• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
18 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
19 
20 #include <deque>
21 #include <utility>
22 #include <vector>
23 
24 #include "base/arena_containers.h"
25 #include "base/logging.h"
26 #include "constants_arm.h"
27 #include "utils/arm/managed_register_arm.h"
28 #include "utils/arm/assembler_arm.h"
29 #include "utils/array_ref.h"
30 #include "offsets.h"
31 
32 namespace art {
33 namespace arm {
34 
35 class Thumb2Assembler FINAL : public ArmAssembler {
36  public:
37   explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
ArmAssembler(arena)38       : ArmAssembler(arena),
39         can_relocate_branches_(can_relocate_branches),
40         force_32bit_(false),
41         it_cond_index_(kNoItCondition),
42         next_condition_(AL),
43         fixups_(arena->Adapter(kArenaAllocAssembler)),
44         fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
45         literals_(arena->Adapter(kArenaAllocAssembler)),
46         jump_tables_(arena->Adapter(kArenaAllocAssembler)),
47         last_position_adjustment_(0u),
48         last_old_position_(0u),
49         last_fixup_id_(0u) {
50     cfi().DelayEmittingAdvancePCs();
51   }
52 
~Thumb2Assembler()53   virtual ~Thumb2Assembler() {
54   }
55 
IsThumb()56   bool IsThumb() const OVERRIDE {
57     return true;
58   }
59 
IsForced32Bit()60   bool IsForced32Bit() const {
61     return force_32bit_;
62   }
63 
CanRelocateBranches()64   bool CanRelocateBranches() const {
65     return can_relocate_branches_;
66   }
67 
68   void FinalizeCode() OVERRIDE;
69 
70   // Data-processing instructions.
71   virtual void and_(Register rd, Register rn, const ShifterOperand& so,
72                     Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
73 
74   virtual void eor(Register rd, Register rn, const ShifterOperand& so,
75                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
76 
77   virtual void sub(Register rd, Register rn, const ShifterOperand& so,
78                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
79 
80   virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
81                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
82 
83   virtual void add(Register rd, Register rn, const ShifterOperand& so,
84                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
85 
86   virtual void adc(Register rd, Register rn, const ShifterOperand& so,
87                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
88 
89   virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
90                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
91 
92   virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
93                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
94 
95   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
96 
97   void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
98 
99   void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
100 
101   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
102 
103   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
104                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
105 
106   virtual void orn(Register rd, Register rn, const ShifterOperand& so,
107                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
108 
109   virtual void mov(Register rd, const ShifterOperand& so,
110                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
111 
112   virtual void bic(Register rd, Register rn, const ShifterOperand& so,
113                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
114 
115   virtual void mvn(Register rd, const ShifterOperand& so,
116                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
117 
118   // Miscellaneous data-processing instructions.
119   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
120   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
121   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
122   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
123   void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
124   void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
125   void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
126 
127   // Multiply instructions.
128   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
129   void mla(Register rd, Register rn, Register rm, Register ra,
130            Condition cond = AL) OVERRIDE;
131   void mls(Register rd, Register rn, Register rm, Register ra,
132            Condition cond = AL) OVERRIDE;
133   void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
134              Condition cond = AL) OVERRIDE;
135   void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
136              Condition cond = AL) OVERRIDE;
137 
138   void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
139   void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
140 
141   // Bit field extract instructions.
142   void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
143   void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
144 
145   // Load/store instructions.
146   void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
147   void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
148 
149   void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
150   void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
151 
152   void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
153   void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
154 
155   void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
156   void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
157 
158   // Load/store register dual instructions using registers `rd` and `rd` + 1.
159   void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
160   void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
161 
162   // Load/store register dual instructions using registers `rd` and `rd2`.
163   // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
164   // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
165   void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
166   void strd(Register rd, Register rd2, const Address& ad, Condition cond);
167 
168 
169   void ldm(BlockAddressMode am, Register base,
170            RegList regs, Condition cond = AL) OVERRIDE;
171   void stm(BlockAddressMode am, Register base,
172            RegList regs, Condition cond = AL) OVERRIDE;
173 
174   void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
175   void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
176 
177   void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL);
178   void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL);
179 
180   void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
181   void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
182 
183   // Miscellaneous instructions.
184   void clrex(Condition cond = AL) OVERRIDE;
185   void nop(Condition cond = AL) OVERRIDE;
186 
187   void bkpt(uint16_t imm16) OVERRIDE;
188   void svc(uint32_t imm24) OVERRIDE;
189 
190   // If-then
191   void it(Condition firstcond, ItState i1 = kItOmitted,
192         ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE;
193 
194   void cbz(Register rn, Label* target) OVERRIDE;
195   void cbnz(Register rn, Label* target) OVERRIDE;
196 
197   // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
198   void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
199   void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
200   void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
201   void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
202   void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
203   void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
204   void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
205   void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
206 
207   // Returns false if the immediate cannot be encoded.
208   bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
209   bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
210 
211   void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
212   void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
213   void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
214   void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
215 
216   void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
217   void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
218   void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
219   void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
220   void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
221   void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
222   void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
223   void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
224   void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
225   void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
226   void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
227   void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
228 
229   void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
230   void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
231   void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
232   void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
233   void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
234   void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
235 
236   void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
237   void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
238   void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
239   void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
240   void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
241   void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
242   void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
243   void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
244   void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
245   void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
246 
247   void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
248   void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
249   void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
250   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
251   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
252 
253   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
254   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
255   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
256   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
257 
258   // Branch instructions.
259   void b(Label* label, Condition cond = AL);
260   void bl(Label* label, Condition cond = AL);
261   void blx(Label* label);
262   void blx(Register rm, Condition cond = AL) OVERRIDE;
263   void bx(Register rm, Condition cond = AL) OVERRIDE;
264 
265   virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
266                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
267   virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
268                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
269   virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
270                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
271   virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
272                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
273   virtual void Rrx(Register rd, Register rm,
274                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
275 
276   virtual void Lsl(Register rd, Register rm, Register rn,
277                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
278   virtual void Lsr(Register rd, Register rm, Register rn,
279                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
280   virtual void Asr(Register rd, Register rm, Register rn,
281                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
282   virtual void Ror(Register rd, Register rm, Register rn,
283                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
284 
285   void Push(Register rd, Condition cond = AL) OVERRIDE;
286   void Pop(Register rd, Condition cond = AL) OVERRIDE;
287 
288   void PushList(RegList regs, Condition cond = AL) OVERRIDE;
289   void PopList(RegList regs, Condition cond = AL) OVERRIDE;
290 
291   void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
292 
293   void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
294   void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
295 
296   // Memory barriers.
297   void dmb(DmbOptions flavor) OVERRIDE;
298 
299   // Get the final position of a label after local fixup based on the old position
300   // recorded before FinalizeCode().
301   uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE;
302 
303   using ArmAssembler::NewLiteral;  // Make the helper template visible.
304 
305   Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE;
306   void LoadLiteral(Register rt, Literal* literal) OVERRIDE;
307   void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE;
308   void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE;
309   void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE;
310 
311   // Add signed constant value to rd. May clobber IP.
312   void AddConstant(Register rd, Register rn, int32_t value,
313                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
314 
315   void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
316 
317   // Load and Store. May clobber IP.
318   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
319   void MarkExceptionHandler(Label* label) OVERRIDE;
320   void LoadFromOffset(LoadOperandType type,
321                       Register reg,
322                       Register base,
323                       int32_t offset,
324                       Condition cond = AL) OVERRIDE;
325   void StoreToOffset(StoreOperandType type,
326                      Register reg,
327                      Register base,
328                      int32_t offset,
329                      Condition cond = AL) OVERRIDE;
330   void LoadSFromOffset(SRegister reg,
331                        Register base,
332                        int32_t offset,
333                        Condition cond = AL) OVERRIDE;
334   void StoreSToOffset(SRegister reg,
335                       Register base,
336                       int32_t offset,
337                       Condition cond = AL) OVERRIDE;
338   void LoadDFromOffset(DRegister reg,
339                        Register base,
340                        int32_t offset,
341                        Condition cond = AL) OVERRIDE;
342   void StoreDToOffset(DRegister reg,
343                       Register base,
344                       int32_t offset,
345                       Condition cond = AL) OVERRIDE;
346 
347   bool ShifterOperandCanHold(Register rd,
348                              Register rn,
349                              Opcode opcode,
350                              uint32_t immediate,
351                              SetCc set_cc,
352                              ShifterOperand* shifter_op) OVERRIDE;
353   using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
354 
355   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
356 
357 
358   static bool IsInstructionForExceptionHandling(uintptr_t pc);
359 
360   // Emit data (e.g. encoded instruction or immediate) to the.
361   // instruction stream.
362   void Emit32(int32_t value);     // Emit a 32 bit instruction in thumb format.
363   void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
364   void Bind(Label* label) OVERRIDE;
365 
366   void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
367 
368   // Force the assembler to generate 32 bit instructions.
Force32Bit()369   void Force32Bit() {
370     force_32bit_ = true;
371   }
372 
373   // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
374   // will generate a fixup.
375   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
376   // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
377   void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
378 
379  private:
380   typedef uint16_t FixupId;
381 
382   // Fixup: branches and literal pool references.
383   //
384   // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This
385   // depends on both the type of branch and the offset to which it is branching. The 16-bit
386   // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare
387   // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be
388   // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence
389   // of instructions to make up for the limited range of load literal instructions (up to
390   // 4KiB for the 32-bit variant). When generating code for these insns we don't know the
391   // size before hand, so we assume it is the smallest available size and determine the final
392   // code offsets and sizes and emit code in FinalizeCode().
393   //
394   // To handle this, we keep a record of every branch and literal pool load in the program.
395   // The actual instruction encoding for these is delayed until we know the final size of
396   // every instruction. When we bind a label to a branch we don't know the final location yet
397   // as some preceding instructions may need to be expanded, so we record a non-final offset.
398   // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of
399   // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with
400   // target on the other side of the expanded insn, as their offsets change and this may
401   // trigger further expansion.
402   //
403   // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the
404   // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing
405   // to it, using the fixup ids as links. The first link is stored in the label's position
406   // (the label is linked but not bound), the following links are stored in the code buffer,
407   // in the placeholder where we will eventually emit the actual code.
408 
409   class Fixup {
410    public:
411     // Branch type.
412     enum Type : uint8_t {
413       kConditional,               // B<cond>.
414       kUnconditional,             // B.
415       kUnconditionalLink,         // BL.
416       kUnconditionalLinkX,        // BLX.
417       kCompareAndBranchXZero,     // cbz/cbnz.
418       kLoadLiteralNarrow,         // Load narrrow integer literal.
419       kLoadLiteralWide,           // Load wide integer literal.
420       kLoadLiteralAddr,           // Load address of literal (used for jump table).
421       kLoadFPLiteralSingle,       // Load FP literal single.
422       kLoadFPLiteralDouble,       // Load FP literal double.
423     };
424 
425     // Calculated size of branch instruction based on type and offset.
426     enum Size : uint8_t {
427       // Branch variants.
428       kBranch16Bit,
429       kBranch32Bit,
430       // NOTE: We don't support branches which would require multiple instructions, i.e.
431       // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB.
432 
433       // CBZ/CBNZ variants.
434       kCbxz16Bit,   // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset.
435       kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
436       kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
437 
438       // Load integer literal variants.
439       // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
440       kLiteral1KiB,
441       // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes.
442       kLiteral4KiB,
443       // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes.
444       kLiteral64KiB,
445       // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes.
446       kLiteral1MiB,
447       // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit.
448       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
449       kLiteralFar,
450 
451       // Load literal base addr.
452       // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
453       kLiteralAddr1KiB,
454       // ADR rX, label; 4KiB offset. 4 bytes.
455       kLiteralAddr4KiB,
456       // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
457       kLiteralAddr64KiB,
458       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
459       kLiteralAddrFar,
460 
461       // Load long or FP literal variants.
462       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
463       kLongOrFPLiteral1KiB,
464       // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
465       kLongOrFPLiteral256KiB,
466       // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
467       kLongOrFPLiteralFar,
468     };
469 
470     // Unresolved branch possibly with a condition.
471     static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit,
472                         Condition cond = AL) {
473       DCHECK(type == kConditional || type == kUnconditional ||
474              type == kUnconditionalLink || type == kUnconditionalLinkX);
475       DCHECK(size == kBranch16Bit || size == kBranch32Bit);
476       DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional));
477       return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister,
478                    cond, type, size, location);
479     }
480 
481     // Unresolved compare-and-branch instruction with a register and condition (EQ or NE).
CompareAndBranch(uint32_t location,Register rn,Condition cond)482     static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) {
483       DCHECK(cond == EQ || cond == NE);
484       return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister,
485                    cond, kCompareAndBranchXZero, kCbxz16Bit, location);
486     }
487 
488     // Load narrow literal.
LoadNarrowLiteral(uint32_t location,Register rt,Size size)489     static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
490       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
491              size == kLiteral1MiB || size == kLiteralFar);
492       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
493       return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
494                    AL, kLoadLiteralNarrow, size, location);
495     }
496 
497     // Load wide literal.
498     static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
499                                  Size size = kLongOrFPLiteral1KiB) {
500       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
501              size == kLongOrFPLiteralFar);
502       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
503       return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
504                    AL, kLoadLiteralWide, size, location);
505     }
506 
507     // Load FP single literal.
508     static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
509                                    Size size = kLongOrFPLiteral1KiB) {
510       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
511              size == kLongOrFPLiteralFar);
512       return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
513                    AL, kLoadFPLiteralSingle, size, location);
514     }
515 
516     // Load FP double literal.
517     static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
518                                    Size size = kLongOrFPLiteral1KiB) {
519       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
520              size == kLongOrFPLiteralFar);
521       return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
522                    AL, kLoadFPLiteralDouble, size, location);
523     }
524 
LoadLiteralAddress(uint32_t location,Register rt,Size size)525     static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
526       DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
527              size == kLiteralAddrFar);
528       DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
529       return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
530                    AL, kLoadLiteralAddr, size, location);
531     }
532 
GetType()533     Type GetType() const {
534       return type_;
535     }
536 
IsLoadLiteral()537     bool IsLoadLiteral() const {
538       return GetType() >= kLoadLiteralNarrow;
539     }
540 
541     // Returns whether the Fixup can expand from the original size.
CanExpand()542     bool CanExpand() const {
543       switch (GetOriginalSize()) {
544         case kBranch32Bit:
545         case kCbxz48Bit:
546         case kLiteralFar:
547         case kLiteralAddrFar:
548         case kLongOrFPLiteralFar:
549           return false;
550         default:
551           return true;
552       }
553     }
554 
GetOriginalSize()555     Size GetOriginalSize() const {
556       return original_size_;
557     }
558 
GetSize()559     Size GetSize() const {
560       return size_;
561     }
562 
563     uint32_t GetOriginalSizeInBytes() const;
564 
565     uint32_t GetSizeInBytes() const;
566 
GetLocation()567     uint32_t GetLocation() const {
568       return location_;
569     }
570 
GetAdjustment()571     uint32_t GetAdjustment() const {
572       return adjustment_;
573     }
574 
575     // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
576     static void PrepareDependents(Thumb2Assembler* assembler);
577 
Dependents(const Thumb2Assembler & assembler)578     ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
579       return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
580                                                                            dependents_count_);
581     }
582 
583     // Resolve a branch when the target is known.
Resolve(uint32_t target)584     void Resolve(uint32_t target) {
585       DCHECK_EQ(target_, kUnresolved);
586       DCHECK_NE(target, kUnresolved);
587       target_ = target;
588     }
589 
590     // Check if the current size is OK for current location_, target_ and adjustment_.
591     // If not, increase the size. Return the size increase, 0 if unchanged.
592     // If the target if after this Fixup, also add the difference to adjustment_,
593     // so that we don't need to consider forward Fixups as their own dependencies.
594     uint32_t AdjustSizeIfNeeded(uint32_t current_code_size);
595 
596     // Increase adjustments. This is called for dependents of a Fixup when its size changes.
IncreaseAdjustment(uint32_t increase)597     void IncreaseAdjustment(uint32_t increase) {
598       adjustment_ += increase;
599     }
600 
601     // Finalize the branch with an adjustment to the location. Both location and target are updated.
Finalize(uint32_t location_adjustment)602     void Finalize(uint32_t location_adjustment) {
603       DCHECK_NE(target_, kUnresolved);
604       location_ += location_adjustment;
605       target_ += location_adjustment;
606     }
607 
608     // Emit the branch instruction into the assembler buffer.  This does the
609     // encoding into the thumb instruction.
610     void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
611 
612    private:
Fixup(Register rn,Register rt2,SRegister sd,DRegister dd,Condition cond,Type type,Size size,uint32_t location)613     Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
614           Condition cond, Type type, Size size, uint32_t location)
615         : rn_(rn),
616           rt2_(rt2),
617           sd_(sd),
618           dd_(dd),
619           cond_(cond),
620           type_(type),
621           original_size_(size), size_(size),
622           location_(location),
623           target_(kUnresolved),
624           adjustment_(0u),
625           dependents_count_(0u),
626           dependents_start_(0u) {
627     }
628 
629     static size_t SizeInBytes(Size size);
630 
631     // The size of padding added before the literal pool.
632     static size_t LiteralPoolPaddingSize(uint32_t current_code_size);
633 
634     // Returns the offset from the PC-using insn to the target.
635     int32_t GetOffset(uint32_t current_code_size) const;
636 
637     size_t IncreaseSize(Size new_size);
638 
639     int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;
640 
641     template <typename Function>
642     static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn);
643 
644     static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
645 
646     const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
647     Register rt2_;        // For kLoadLiteralWide.
648     SRegister sd_;        // For kLoadFPLiteralSingle.
649     DRegister dd_;        // For kLoadFPLiteralDouble.
650     const Condition cond_;
651     const Type type_;
652     Size original_size_;
653     Size size_;
654     uint32_t location_;     // Offset into assembler buffer in bytes.
655     uint32_t target_;       // Offset into assembler buffer in bytes.
656     uint32_t adjustment_;   // The number of extra bytes inserted between location_ and target_.
657     // Fixups that require adjustment when current size changes are stored in a single
658     // array in the assembler and we store only the start index and count here.
659     uint32_t dependents_count_;
660     uint32_t dependents_start_;
661   };
662 
663   // Emit a single 32 or 16 bit data processing instruction.
664   void EmitDataProcessing(Condition cond,
665                           Opcode opcode,
666                           SetCc set_cc,
667                           Register rn,
668                           Register rd,
669                           const ShifterOperand& so);
670 
671   // Emit a single 32 bit miscellaneous instruction.
672   void Emit32Miscellaneous(uint8_t op1,
673                            uint8_t op2,
674                            uint32_t rest_encoding);
675 
676   // Emit reverse byte instructions: rev, rev16, revsh.
677   void EmitReverseBytes(Register rd, Register rm, uint32_t op);
678 
679   // Emit a single 16 bit miscellaneous instruction.
680   void Emit16Miscellaneous(uint32_t rest_encoding);
681 
682   // Must the instruction be 32 bits or can it possibly be encoded
683   // in 16 bits?
684   bool Is32BitDataProcessing(Condition cond,
685                              Opcode opcode,
686                              SetCc set_cc,
687                              Register rn,
688                              Register rd,
689                              const ShifterOperand& so);
690 
691   // Emit a 32 bit data processing instruction.
692   void Emit32BitDataProcessing(Condition cond,
693                                Opcode opcode,
694                                SetCc set_cc,
695                                Register rn,
696                                Register rd,
697                                const ShifterOperand& so);
698 
699   // Emit a 16 bit data processing instruction.
700   void Emit16BitDataProcessing(Condition cond,
701                                Opcode opcode,
702                                SetCc set_cc,
703                                Register rn,
704                                Register rd,
705                                const ShifterOperand& so);
706 
707   void Emit16BitAddSub(Condition cond,
708                        Opcode opcode,
709                        SetCc set_cc,
710                        Register rn,
711                        Register rd,
712                        const ShifterOperand& so);
713 
714   uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n);
715 
716   void EmitLoadStore(Condition cond,
717                      bool load,
718                      bool byte,
719                      bool half,
720                      bool is_signed,
721                      Register rd,
722                      const Address& ad);
723 
724   void EmitMemOpAddressMode3(Condition cond,
725                              int32_t mode,
726                              Register rd,
727                              const Address& ad);
728 
729   void EmitMultiMemOp(Condition cond,
730                       BlockAddressMode am,
731                       bool load,
732                       Register base,
733                       RegList regs);
734 
735   void EmitMulOp(Condition cond,
736                  int32_t opcode,
737                  Register rd,
738                  Register rn,
739                  Register rm,
740                  Register rs);
741 
742   void EmitVFPsss(Condition cond,
743                   int32_t opcode,
744                   SRegister sd,
745                   SRegister sn,
746                   SRegister sm);
747 
748   void EmitVFPddd(Condition cond,
749                   int32_t opcode,
750                   DRegister dd,
751                   DRegister dn,
752                   DRegister dm);
753 
754   void EmitVFPsd(Condition cond,
755                  int32_t opcode,
756                  SRegister sd,
757                  DRegister dm);
758 
759   void EmitVFPds(Condition cond,
760                  int32_t opcode,
761                  DRegister dd,
762                  SRegister sm);
763 
764   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
765 
766   void EmitBranch(Condition cond, Label* label, bool link, bool x);
767   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
768   static int DecodeBranchOffset(int32_t inst);
769   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
770                  Condition cond = AL, SetCc set_cc = kCcDontCare);
771   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
772                  Condition cond = AL, SetCc set_cc = kCcDontCare);
773 
774   static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
775   static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
776   bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
777                                int32_t offset,
778                                /*out*/ int32_t* add_to_base,
779                                /*out*/ int32_t* offset_for_load_store);
780   int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
781                                 Register temp,
782                                 Register base,
783                                 int32_t offset,
784                                 Condition cond);
785 
786   // Whether the assembler can relocate branches. If false, unresolved branches will be
787   // emitted on 32bits.
788   bool can_relocate_branches_;
789 
790   // Force the assembler to use 32 bit thumb2 instructions.
791   bool force_32bit_;
792 
793   // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
794   Condition it_conditions_[4];
795   uint8_t it_cond_index_;
796   Condition next_condition_;
797 
798   void SetItCondition(ItState s, Condition cond, uint8_t index);
799 
CheckCondition(Condition cond)800   void CheckCondition(Condition cond) {
801     CHECK_EQ(cond, next_condition_);
802 
803     // Move to the next condition if there is one.
804     if (it_cond_index_ < 3) {
805       ++it_cond_index_;
806       next_condition_ = it_conditions_[it_cond_index_];
807     } else {
808       next_condition_ = AL;
809     }
810   }
811 
CheckConditionLastIt(Condition cond)812   void CheckConditionLastIt(Condition cond) {
813     if (it_cond_index_ < 3) {
814       // Check that the next condition is AL.  This means that the
815       // current condition is the last in the IT block.
816       CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL);
817     }
818     CheckCondition(cond);
819   }
820 
AddFixup(Fixup fixup)821   FixupId AddFixup(Fixup fixup) {
822     FixupId fixup_id = static_cast<FixupId>(fixups_.size());
823     fixups_.push_back(fixup);
824     // For iterating using FixupId, we need the next id to be representable.
825     DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size());
826     return fixup_id;
827   }
828 
GetFixup(FixupId fixup_id)829   Fixup* GetFixup(FixupId fixup_id) {
830     DCHECK_LT(fixup_id, fixups_.size());
831     return &fixups_[fixup_id];
832   }
833 
834   void BindLabel(Label* label, uint32_t bound_pc);
835   uint32_t BindLiterals();
836   void BindJumpTables(uint32_t code_size);
837   void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
838                            std::deque<FixupId>* fixups_to_recalculate);
839   uint32_t AdjustFixups();
840   void EmitFixups(uint32_t adjusted_code_size);
841   void EmitLiterals();
842   void EmitJumpTables();
843   void PatchCFI();
844 
845   static int16_t BEncoding16(int32_t offset, Condition cond);
846   static int32_t BEncoding32(int32_t offset, Condition cond);
847   static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond);
848   static int16_t CmpRnImm8Encoding16(Register rn, int32_t value);
849   static int16_t AddRdnRmEncoding16(Register rdn, Register rm);
850   static int32_t MovwEncoding32(Register rd, int32_t value);
851   static int32_t MovtEncoding32(Register rd, int32_t value);
852   static int32_t MovModImmEncoding32(Register rd, int32_t value);
853   static int16_t LdrLitEncoding16(Register rt, int32_t offset);
854   static int32_t LdrLitEncoding32(Register rt, int32_t offset);
855   static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset);
856   static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset);
857   static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
858   static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
859   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
860   static int16_t AdrEncoding16(Register rd, int32_t offset);
861   static int32_t AdrEncoding32(Register rd, int32_t offset);
862 
863   ArenaVector<Fixup> fixups_;
864   ArenaVector<FixupId> fixup_dependents_;
865 
866   // Use std::deque<> for literal labels to allow insertions at the end
867   // without invalidating pointers and references to existing elements.
868   ArenaDeque<Literal> literals_;
869 
870   // Jump table list.
871   ArenaDeque<JumpTable> jump_tables_;
872 
873   // Data for AdjustedPosition(), see the description there.
874   uint32_t last_position_adjustment_;
875   uint32_t last_old_position_;
876   FixupId last_fixup_id_;
877 };
878 
879 }  // namespace arm
880 }  // namespace art
881 
882 #endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
883