1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ 18 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ 19 20 #include <deque> 21 #include <utility> 22 #include <vector> 23 24 #include "base/arena_containers.h" 25 #include "base/logging.h" 26 #include "constants_arm.h" 27 #include "utils/arm/managed_register_arm.h" 28 #include "utils/arm/assembler_arm.h" 29 #include "utils/array_ref.h" 30 #include "offsets.h" 31 32 namespace art { 33 namespace arm { 34 35 class Thumb2Assembler FINAL : public ArmAssembler { 36 public: 37 explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true) ArmAssembler(arena)38 : ArmAssembler(arena), 39 can_relocate_branches_(can_relocate_branches), 40 force_32bit_(false), 41 it_cond_index_(kNoItCondition), 42 next_condition_(AL), 43 fixups_(arena->Adapter(kArenaAllocAssembler)), 44 fixup_dependents_(arena->Adapter(kArenaAllocAssembler)), 45 literals_(arena->Adapter(kArenaAllocAssembler)), 46 jump_tables_(arena->Adapter(kArenaAllocAssembler)), 47 last_position_adjustment_(0u), 48 last_old_position_(0u), 49 last_fixup_id_(0u) { 50 cfi().DelayEmittingAdvancePCs(); 51 } 52 ~Thumb2Assembler()53 virtual ~Thumb2Assembler() { 54 } 55 IsThumb()56 bool IsThumb() const OVERRIDE { 57 return true; 58 } 59 IsForced32Bit()60 bool IsForced32Bit() const { 61 return force_32bit_; 62 } 63 CanRelocateBranches()64 bool CanRelocateBranches() const { 65 return can_relocate_branches_; 66 } 67 68 void FinalizeCode() OVERRIDE; 69 70 // Data-processing instructions. 71 virtual void and_(Register rd, Register rn, const ShifterOperand& so, 72 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 73 74 virtual void eor(Register rd, Register rn, const ShifterOperand& so, 75 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 76 77 virtual void sub(Register rd, Register rn, const ShifterOperand& so, 78 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 79 80 virtual void rsb(Register rd, Register rn, const ShifterOperand& so, 81 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 82 83 virtual void add(Register rd, Register rn, const ShifterOperand& so, 84 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 85 86 virtual void adc(Register rd, Register rn, const ShifterOperand& so, 87 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 88 89 virtual void sbc(Register rd, Register rn, const ShifterOperand& so, 90 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 91 92 virtual void rsc(Register rd, Register rn, const ShifterOperand& so, 93 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 94 95 void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 96 97 void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 98 99 void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 100 101 void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 102 103 virtual void orr(Register rd, Register rn, const ShifterOperand& so, 104 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 105 106 virtual void orn(Register rd, Register rn, const ShifterOperand& so, 107 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 108 109 virtual void mov(Register rd, const ShifterOperand& so, 110 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 111 112 virtual void bic(Register rd, Register rn, const ShifterOperand& so, 113 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 114 115 virtual void mvn(Register rd, const ShifterOperand& so, 116 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 117 118 // Miscellaneous data-processing instructions. 119 void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE; 120 void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; 121 void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; 122 void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE; 123 void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE; 124 void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE; 125 void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE; 126 127 // Multiply instructions. 128 void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; 129 void mla(Register rd, Register rn, Register rm, Register ra, 130 Condition cond = AL) OVERRIDE; 131 void mls(Register rd, Register rn, Register rm, Register ra, 132 Condition cond = AL) OVERRIDE; 133 void smull(Register rd_lo, Register rd_hi, Register rn, Register rm, 134 Condition cond = AL) OVERRIDE; 135 void umull(Register rd_lo, Register rd_hi, Register rn, Register rm, 136 Condition cond = AL) OVERRIDE; 137 138 void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; 139 void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; 140 141 // Bit field extract instructions. 142 void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; 143 void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; 144 145 // Load/store instructions. 146 void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 147 void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 148 149 void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 150 void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 151 152 void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 153 void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 154 155 void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 156 void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 157 158 // Load/store register dual instructions using registers `rd` and `rd` + 1. 159 void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 160 void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 161 162 // Load/store register dual instructions using registers `rd` and `rd2`. 163 // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding 164 // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1. 165 void ldrd(Register rd, Register rd2, const Address& ad, Condition cond); 166 void strd(Register rd, Register rd2, const Address& ad, Condition cond); 167 168 169 void ldm(BlockAddressMode am, Register base, 170 RegList regs, Condition cond = AL) OVERRIDE; 171 void stm(BlockAddressMode am, Register base, 172 RegList regs, Condition cond = AL) OVERRIDE; 173 174 void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; 175 void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; 176 177 void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL); 178 void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL); 179 180 void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; 181 void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; 182 183 // Miscellaneous instructions. 184 void clrex(Condition cond = AL) OVERRIDE; 185 void nop(Condition cond = AL) OVERRIDE; 186 187 void bkpt(uint16_t imm16) OVERRIDE; 188 void svc(uint32_t imm24) OVERRIDE; 189 190 // If-then 191 void it(Condition firstcond, ItState i1 = kItOmitted, 192 ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE; 193 194 void cbz(Register rn, Label* target) OVERRIDE; 195 void cbnz(Register rn, Label* target) OVERRIDE; 196 197 // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles). 198 void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE; 199 void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE; 200 void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; 201 void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE; 202 void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; 203 void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE; 204 void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 205 void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 206 207 // Returns false if the immediate cannot be encoded. 208 bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE; 209 bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE; 210 211 void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; 212 void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; 213 void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; 214 void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; 215 216 void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 217 void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 218 void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 219 void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 220 void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 221 void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 222 void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 223 void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 224 void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 225 void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 226 void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 227 void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 228 229 void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 230 void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 231 void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 232 void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 233 void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 234 void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 235 236 void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; 237 void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; 238 void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 239 void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; 240 void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 241 void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; 242 void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 243 void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; 244 void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 245 void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; 246 247 void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 248 void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 249 void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE; 250 void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; 251 void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR 252 253 void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; 254 void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; 255 void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; 256 void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; 257 258 // Branch instructions. 259 void b(Label* label, Condition cond = AL); 260 void bl(Label* label, Condition cond = AL); 261 void blx(Label* label); 262 void blx(Register rm, Condition cond = AL) OVERRIDE; 263 void bx(Register rm, Condition cond = AL) OVERRIDE; 264 265 virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, 266 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 267 virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, 268 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 269 virtual void Asr(Register rd, Register rm, uint32_t shift_imm, 270 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 271 virtual void Ror(Register rd, Register rm, uint32_t shift_imm, 272 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 273 virtual void Rrx(Register rd, Register rm, 274 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 275 276 virtual void Lsl(Register rd, Register rm, Register rn, 277 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 278 virtual void Lsr(Register rd, Register rm, Register rn, 279 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 280 virtual void Asr(Register rd, Register rm, Register rn, 281 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 282 virtual void Ror(Register rd, Register rm, Register rn, 283 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 284 285 void Push(Register rd, Condition cond = AL) OVERRIDE; 286 void Pop(Register rd, Condition cond = AL) OVERRIDE; 287 288 void PushList(RegList regs, Condition cond = AL) OVERRIDE; 289 void PopList(RegList regs, Condition cond = AL) OVERRIDE; 290 291 void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; 292 293 void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; 294 void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; 295 296 // Memory barriers. 297 void dmb(DmbOptions flavor) OVERRIDE; 298 299 // Get the final position of a label after local fixup based on the old position 300 // recorded before FinalizeCode(). 301 uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; 302 303 using ArmAssembler::NewLiteral; // Make the helper template visible. 304 305 Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; 306 void LoadLiteral(Register rt, Literal* literal) OVERRIDE; 307 void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; 308 void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; 309 void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; 310 311 // Add signed constant value to rd. May clobber IP. 312 void AddConstant(Register rd, Register rn, int32_t value, 313 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 314 315 void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE; 316 317 // Load and Store. May clobber IP. 318 void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; 319 void MarkExceptionHandler(Label* label) OVERRIDE; 320 void LoadFromOffset(LoadOperandType type, 321 Register reg, 322 Register base, 323 int32_t offset, 324 Condition cond = AL) OVERRIDE; 325 void StoreToOffset(StoreOperandType type, 326 Register reg, 327 Register base, 328 int32_t offset, 329 Condition cond = AL) OVERRIDE; 330 void LoadSFromOffset(SRegister reg, 331 Register base, 332 int32_t offset, 333 Condition cond = AL) OVERRIDE; 334 void StoreSToOffset(SRegister reg, 335 Register base, 336 int32_t offset, 337 Condition cond = AL) OVERRIDE; 338 void LoadDFromOffset(DRegister reg, 339 Register base, 340 int32_t offset, 341 Condition cond = AL) OVERRIDE; 342 void StoreDToOffset(DRegister reg, 343 Register base, 344 int32_t offset, 345 Condition cond = AL) OVERRIDE; 346 347 bool ShifterOperandCanHold(Register rd, 348 Register rn, 349 Opcode opcode, 350 uint32_t immediate, 351 SetCc set_cc, 352 ShifterOperand* shifter_op) OVERRIDE; 353 using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. 354 355 bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; 356 357 358 static bool IsInstructionForExceptionHandling(uintptr_t pc); 359 360 // Emit data (e.g. encoded instruction or immediate) to the. 361 // instruction stream. 362 void Emit32(int32_t value); // Emit a 32 bit instruction in thumb format. 363 void Emit16(int16_t value); // Emit a 16 bit instruction in little endian format. 364 void Bind(Label* label) OVERRIDE; 365 366 void MemoryBarrier(ManagedRegister scratch) OVERRIDE; 367 368 // Force the assembler to generate 32 bit instructions. Force32Bit()369 void Force32Bit() { 370 force_32bit_ = true; 371 } 372 373 // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This 374 // will generate a fixup. 375 JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; 376 // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup. 377 void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; 378 379 private: 380 typedef uint16_t FixupId; 381 382 // Fixup: branches and literal pool references. 383 // 384 // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This 385 // depends on both the type of branch and the offset to which it is branching. The 16-bit 386 // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare 387 // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be 388 // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence 389 // of instructions to make up for the limited range of load literal instructions (up to 390 // 4KiB for the 32-bit variant). When generating code for these insns we don't know the 391 // size before hand, so we assume it is the smallest available size and determine the final 392 // code offsets and sizes and emit code in FinalizeCode(). 393 // 394 // To handle this, we keep a record of every branch and literal pool load in the program. 395 // The actual instruction encoding for these is delayed until we know the final size of 396 // every instruction. When we bind a label to a branch we don't know the final location yet 397 // as some preceding instructions may need to be expanded, so we record a non-final offset. 398 // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of 399 // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with 400 // target on the other side of the expanded insn, as their offsets change and this may 401 // trigger further expansion. 402 // 403 // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the 404 // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing 405 // to it, using the fixup ids as links. The first link is stored in the label's position 406 // (the label is linked but not bound), the following links are stored in the code buffer, 407 // in the placeholder where we will eventually emit the actual code. 408 409 class Fixup { 410 public: 411 // Branch type. 412 enum Type : uint8_t { 413 kConditional, // B<cond>. 414 kUnconditional, // B. 415 kUnconditionalLink, // BL. 416 kUnconditionalLinkX, // BLX. 417 kCompareAndBranchXZero, // cbz/cbnz. 418 kLoadLiteralNarrow, // Load narrrow integer literal. 419 kLoadLiteralWide, // Load wide integer literal. 420 kLoadLiteralAddr, // Load address of literal (used for jump table). 421 kLoadFPLiteralSingle, // Load FP literal single. 422 kLoadFPLiteralDouble, // Load FP literal double. 423 }; 424 425 // Calculated size of branch instruction based on type and offset. 426 enum Size : uint8_t { 427 // Branch variants. 428 kBranch16Bit, 429 kBranch32Bit, 430 // NOTE: We don't support branches which would require multiple instructions, i.e. 431 // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB. 432 433 // CBZ/CBNZ variants. 434 kCbxz16Bit, // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset. 435 kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. 436 kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. 437 438 // Load integer literal variants. 439 // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. 440 kLiteral1KiB, 441 // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes. 442 kLiteral4KiB, 443 // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes. 444 kLiteral64KiB, 445 // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes. 446 kLiteral1MiB, 447 // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit. 448 // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. 449 kLiteralFar, 450 451 // Load literal base addr. 452 // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes. 453 kLiteralAddr1KiB, 454 // ADR rX, label; 4KiB offset. 4 bytes. 455 kLiteralAddr4KiB, 456 // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes. 457 kLiteralAddr64KiB, 458 // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes. 459 kLiteralAddrFar, 460 461 // Load long or FP literal variants. 462 // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. 463 kLongOrFPLiteral1KiB, 464 // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes. 465 kLongOrFPLiteral256KiB, 466 // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. 467 kLongOrFPLiteralFar, 468 }; 469 470 // Unresolved branch possibly with a condition. 471 static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit, 472 Condition cond = AL) { 473 DCHECK(type == kConditional || type == kUnconditional || 474 type == kUnconditionalLink || type == kUnconditionalLinkX); 475 DCHECK(size == kBranch16Bit || size == kBranch32Bit); 476 DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional)); 477 return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister, 478 cond, type, size, location); 479 } 480 481 // Unresolved compare-and-branch instruction with a register and condition (EQ or NE). CompareAndBranch(uint32_t location,Register rn,Condition cond)482 static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) { 483 DCHECK(cond == EQ || cond == NE); 484 return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister, 485 cond, kCompareAndBranchXZero, kCbxz16Bit, location); 486 } 487 488 // Load narrow literal. LoadNarrowLiteral(uint32_t location,Register rt,Size size)489 static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { 490 DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || 491 size == kLiteral1MiB || size == kLiteralFar); 492 DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); 493 return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, 494 AL, kLoadLiteralNarrow, size, location); 495 } 496 497 // Load wide literal. 498 static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, 499 Size size = kLongOrFPLiteral1KiB) { 500 DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || 501 size == kLongOrFPLiteralFar); 502 DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); 503 return Fixup(rt, rt2, kNoSRegister, kNoDRegister, 504 AL, kLoadLiteralWide, size, location); 505 } 506 507 // Load FP single literal. 508 static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, 509 Size size = kLongOrFPLiteral1KiB) { 510 DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || 511 size == kLongOrFPLiteralFar); 512 return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, 513 AL, kLoadFPLiteralSingle, size, location); 514 } 515 516 // Load FP double literal. 517 static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, 518 Size size = kLongOrFPLiteral1KiB) { 519 DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || 520 size == kLongOrFPLiteralFar); 521 return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, 522 AL, kLoadFPLiteralDouble, size, location); 523 } 524 LoadLiteralAddress(uint32_t location,Register rt,Size size)525 static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) { 526 DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB || 527 size == kLiteralAddrFar); 528 DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB); 529 return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, 530 AL, kLoadLiteralAddr, size, location); 531 } 532 GetType()533 Type GetType() const { 534 return type_; 535 } 536 IsLoadLiteral()537 bool IsLoadLiteral() const { 538 return GetType() >= kLoadLiteralNarrow; 539 } 540 541 // Returns whether the Fixup can expand from the original size. CanExpand()542 bool CanExpand() const { 543 switch (GetOriginalSize()) { 544 case kBranch32Bit: 545 case kCbxz48Bit: 546 case kLiteralFar: 547 case kLiteralAddrFar: 548 case kLongOrFPLiteralFar: 549 return false; 550 default: 551 return true; 552 } 553 } 554 GetOriginalSize()555 Size GetOriginalSize() const { 556 return original_size_; 557 } 558 GetSize()559 Size GetSize() const { 560 return size_; 561 } 562 563 uint32_t GetOriginalSizeInBytes() const; 564 565 uint32_t GetSizeInBytes() const; 566 GetLocation()567 uint32_t GetLocation() const { 568 return location_; 569 } 570 GetAdjustment()571 uint32_t GetAdjustment() const { 572 return adjustment_; 573 } 574 575 // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_. 576 static void PrepareDependents(Thumb2Assembler* assembler); 577 Dependents(const Thumb2Assembler & assembler)578 ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const { 579 return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_, 580 dependents_count_); 581 } 582 583 // Resolve a branch when the target is known. Resolve(uint32_t target)584 void Resolve(uint32_t target) { 585 DCHECK_EQ(target_, kUnresolved); 586 DCHECK_NE(target, kUnresolved); 587 target_ = target; 588 } 589 590 // Check if the current size is OK for current location_, target_ and adjustment_. 591 // If not, increase the size. Return the size increase, 0 if unchanged. 592 // If the target if after this Fixup, also add the difference to adjustment_, 593 // so that we don't need to consider forward Fixups as their own dependencies. 594 uint32_t AdjustSizeIfNeeded(uint32_t current_code_size); 595 596 // Increase adjustments. This is called for dependents of a Fixup when its size changes. IncreaseAdjustment(uint32_t increase)597 void IncreaseAdjustment(uint32_t increase) { 598 adjustment_ += increase; 599 } 600 601 // Finalize the branch with an adjustment to the location. Both location and target are updated. Finalize(uint32_t location_adjustment)602 void Finalize(uint32_t location_adjustment) { 603 DCHECK_NE(target_, kUnresolved); 604 location_ += location_adjustment; 605 target_ += location_adjustment; 606 } 607 608 // Emit the branch instruction into the assembler buffer. This does the 609 // encoding into the thumb instruction. 610 void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; 611 612 private: Fixup(Register rn,Register rt2,SRegister sd,DRegister dd,Condition cond,Type type,Size size,uint32_t location)613 Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, 614 Condition cond, Type type, Size size, uint32_t location) 615 : rn_(rn), 616 rt2_(rt2), 617 sd_(sd), 618 dd_(dd), 619 cond_(cond), 620 type_(type), 621 original_size_(size), size_(size), 622 location_(location), 623 target_(kUnresolved), 624 adjustment_(0u), 625 dependents_count_(0u), 626 dependents_start_(0u) { 627 } 628 629 static size_t SizeInBytes(Size size); 630 631 // The size of padding added before the literal pool. 632 static size_t LiteralPoolPaddingSize(uint32_t current_code_size); 633 634 // Returns the offset from the PC-using insn to the target. 635 int32_t GetOffset(uint32_t current_code_size) const; 636 637 size_t IncreaseSize(Size new_size); 638 639 int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const; 640 641 template <typename Function> 642 static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn); 643 644 static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. 645 646 const Register rn_; // Rn for cbnz/cbz, Rt for literal loads. 647 Register rt2_; // For kLoadLiteralWide. 648 SRegister sd_; // For kLoadFPLiteralSingle. 649 DRegister dd_; // For kLoadFPLiteralDouble. 650 const Condition cond_; 651 const Type type_; 652 Size original_size_; 653 Size size_; 654 uint32_t location_; // Offset into assembler buffer in bytes. 655 uint32_t target_; // Offset into assembler buffer in bytes. 656 uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. 657 // Fixups that require adjustment when current size changes are stored in a single 658 // array in the assembler and we store only the start index and count here. 659 uint32_t dependents_count_; 660 uint32_t dependents_start_; 661 }; 662 663 // Emit a single 32 or 16 bit data processing instruction. 664 void EmitDataProcessing(Condition cond, 665 Opcode opcode, 666 SetCc set_cc, 667 Register rn, 668 Register rd, 669 const ShifterOperand& so); 670 671 // Emit a single 32 bit miscellaneous instruction. 672 void Emit32Miscellaneous(uint8_t op1, 673 uint8_t op2, 674 uint32_t rest_encoding); 675 676 // Emit reverse byte instructions: rev, rev16, revsh. 677 void EmitReverseBytes(Register rd, Register rm, uint32_t op); 678 679 // Emit a single 16 bit miscellaneous instruction. 680 void Emit16Miscellaneous(uint32_t rest_encoding); 681 682 // Must the instruction be 32 bits or can it possibly be encoded 683 // in 16 bits? 684 bool Is32BitDataProcessing(Condition cond, 685 Opcode opcode, 686 SetCc set_cc, 687 Register rn, 688 Register rd, 689 const ShifterOperand& so); 690 691 // Emit a 32 bit data processing instruction. 692 void Emit32BitDataProcessing(Condition cond, 693 Opcode opcode, 694 SetCc set_cc, 695 Register rn, 696 Register rd, 697 const ShifterOperand& so); 698 699 // Emit a 16 bit data processing instruction. 700 void Emit16BitDataProcessing(Condition cond, 701 Opcode opcode, 702 SetCc set_cc, 703 Register rn, 704 Register rd, 705 const ShifterOperand& so); 706 707 void Emit16BitAddSub(Condition cond, 708 Opcode opcode, 709 SetCc set_cc, 710 Register rn, 711 Register rd, 712 const ShifterOperand& so); 713 714 uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n); 715 716 void EmitLoadStore(Condition cond, 717 bool load, 718 bool byte, 719 bool half, 720 bool is_signed, 721 Register rd, 722 const Address& ad); 723 724 void EmitMemOpAddressMode3(Condition cond, 725 int32_t mode, 726 Register rd, 727 const Address& ad); 728 729 void EmitMultiMemOp(Condition cond, 730 BlockAddressMode am, 731 bool load, 732 Register base, 733 RegList regs); 734 735 void EmitMulOp(Condition cond, 736 int32_t opcode, 737 Register rd, 738 Register rn, 739 Register rm, 740 Register rs); 741 742 void EmitVFPsss(Condition cond, 743 int32_t opcode, 744 SRegister sd, 745 SRegister sn, 746 SRegister sm); 747 748 void EmitVFPddd(Condition cond, 749 int32_t opcode, 750 DRegister dd, 751 DRegister dn, 752 DRegister dm); 753 754 void EmitVFPsd(Condition cond, 755 int32_t opcode, 756 SRegister sd, 757 DRegister dm); 758 759 void EmitVFPds(Condition cond, 760 int32_t opcode, 761 DRegister dd, 762 SRegister sm); 763 764 void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); 765 766 void EmitBranch(Condition cond, Label* label, bool link, bool x); 767 static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); 768 static int DecodeBranchOffset(int32_t inst); 769 void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, 770 Condition cond = AL, SetCc set_cc = kCcDontCare); 771 void EmitShift(Register rd, Register rn, Shift shift, Register rm, 772 Condition cond = AL, SetCc set_cc = kCcDontCare); 773 774 static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); 775 static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); 776 bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, 777 int32_t offset, 778 /*out*/ int32_t* add_to_base, 779 /*out*/ int32_t* offset_for_load_store); 780 int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, 781 Register temp, 782 Register base, 783 int32_t offset, 784 Condition cond); 785 786 // Whether the assembler can relocate branches. If false, unresolved branches will be 787 // emitted on 32bits. 788 bool can_relocate_branches_; 789 790 // Force the assembler to use 32 bit thumb2 instructions. 791 bool force_32bit_; 792 793 // IfThen conditions. Used to check that conditional instructions match the preceding IT. 794 Condition it_conditions_[4]; 795 uint8_t it_cond_index_; 796 Condition next_condition_; 797 798 void SetItCondition(ItState s, Condition cond, uint8_t index); 799 CheckCondition(Condition cond)800 void CheckCondition(Condition cond) { 801 CHECK_EQ(cond, next_condition_); 802 803 // Move to the next condition if there is one. 804 if (it_cond_index_ < 3) { 805 ++it_cond_index_; 806 next_condition_ = it_conditions_[it_cond_index_]; 807 } else { 808 next_condition_ = AL; 809 } 810 } 811 CheckConditionLastIt(Condition cond)812 void CheckConditionLastIt(Condition cond) { 813 if (it_cond_index_ < 3) { 814 // Check that the next condition is AL. This means that the 815 // current condition is the last in the IT block. 816 CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL); 817 } 818 CheckCondition(cond); 819 } 820 AddFixup(Fixup fixup)821 FixupId AddFixup(Fixup fixup) { 822 FixupId fixup_id = static_cast<FixupId>(fixups_.size()); 823 fixups_.push_back(fixup); 824 // For iterating using FixupId, we need the next id to be representable. 825 DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size()); 826 return fixup_id; 827 } 828 GetFixup(FixupId fixup_id)829 Fixup* GetFixup(FixupId fixup_id) { 830 DCHECK_LT(fixup_id, fixups_.size()); 831 return &fixups_[fixup_id]; 832 } 833 834 void BindLabel(Label* label, uint32_t bound_pc); 835 uint32_t BindLiterals(); 836 void BindJumpTables(uint32_t code_size); 837 void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, 838 std::deque<FixupId>* fixups_to_recalculate); 839 uint32_t AdjustFixups(); 840 void EmitFixups(uint32_t adjusted_code_size); 841 void EmitLiterals(); 842 void EmitJumpTables(); 843 void PatchCFI(); 844 845 static int16_t BEncoding16(int32_t offset, Condition cond); 846 static int32_t BEncoding32(int32_t offset, Condition cond); 847 static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond); 848 static int16_t CmpRnImm8Encoding16(Register rn, int32_t value); 849 static int16_t AddRdnRmEncoding16(Register rdn, Register rm); 850 static int32_t MovwEncoding32(Register rd, int32_t value); 851 static int32_t MovtEncoding32(Register rd, int32_t value); 852 static int32_t MovModImmEncoding32(Register rd, int32_t value); 853 static int16_t LdrLitEncoding16(Register rt, int32_t offset); 854 static int32_t LdrLitEncoding32(Register rt, int32_t offset); 855 static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset); 856 static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset); 857 static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); 858 static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); 859 static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); 860 static int16_t AdrEncoding16(Register rd, int32_t offset); 861 static int32_t AdrEncoding32(Register rd, int32_t offset); 862 863 ArenaVector<Fixup> fixups_; 864 ArenaVector<FixupId> fixup_dependents_; 865 866 // Use std::deque<> for literal labels to allow insertions at the end 867 // without invalidating pointers and references to existing elements. 868 ArenaDeque<Literal> literals_; 869 870 // Jump table list. 871 ArenaDeque<JumpTable> jump_tables_; 872 873 // Data for AdjustedPosition(), see the description there. 874 uint32_t last_position_adjustment_; 875 uint32_t last_old_position_; 876 FixupId last_fixup_id_; 877 }; 878 879 } // namespace arm 880 } // namespace art 881 882 #endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ 883