1 // Copyright 2022 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <cstddef> 7 #include <cstdint> 8 9 #include <xnnpack/assembler.h> 10 11 namespace xnnpack { 12 namespace aarch64 { 13 14 constexpr size_t kInstructionSizeInBytesLog2 = 2; 15 16 struct XRegister { 17 uint8_t code; 18 }; 19 20 constexpr XRegister x0{0}; 21 constexpr XRegister x1{1}; 22 constexpr XRegister x2{2}; 23 constexpr XRegister x3{3}; 24 constexpr XRegister x4{4}; 25 constexpr XRegister x5{5}; 26 constexpr XRegister x6{6}; 27 constexpr XRegister x7{7}; 28 constexpr XRegister x8{8}; 29 constexpr XRegister x9{9}; 30 constexpr XRegister x10{10}; 31 constexpr XRegister x11{11}; 32 constexpr XRegister x12{12}; 33 constexpr XRegister x13{13}; 34 constexpr XRegister x14{14}; 35 constexpr XRegister x15{15}; 36 constexpr XRegister x16{16}; 37 constexpr XRegister x17{17}; 38 constexpr XRegister x18{18}; 39 constexpr XRegister x19{19}; 40 constexpr XRegister x20{20}; 41 constexpr XRegister x21{21}; 42 constexpr XRegister x22{22}; 43 constexpr XRegister x23{23}; 44 constexpr XRegister x24{24}; 45 constexpr XRegister x25{25}; 46 constexpr XRegister x26{26}; 47 constexpr XRegister x27{27}; 48 constexpr XRegister x28{28}; 49 constexpr XRegister x29{29}; 50 constexpr XRegister x30{30}; 51 constexpr XRegister xzr{31}; 52 constexpr XRegister sp{31}; 53 54 struct VRegisterLane { 55 uint8_t code; 56 uint8_t size; 57 uint8_t lane; is_sVRegisterLane58 const bool is_s() { return size == 2; }; 59 }; 60 61 struct ScalarVRegister{ 62 uint8_t code; 63 uint8_t size; 64 65 const VRegisterLane operator[](std::size_t pos) const { 66 return VRegisterLane{code, size, static_cast<uint8_t>(pos)}; 67 } 68 }; 69 70 struct VRegister { 71 uint8_t code; 72 uint8_t size; 73 uint8_t q; 74 v8bVRegister75 VRegister v8b() const { return {code, 0, 0}; } v16bVRegister76 VRegister v16b() const { return {code, 0, 1}; } v4hVRegister77 VRegister v4h() const { return {code, 1, 0}; } v8hVRegister78 VRegister v8h() const { return {code, 1, 1}; } v2sVRegister79 VRegister v2s() const { return {code, 2, 0}; } v4sVRegister80 VRegister v4s() const { return {code, 2, 1}; } v1dVRegister81 VRegister v1d() const { return {code, 3, 0}; } v2dVRegister82 VRegister v2d() const { return {code, 3, 1}; } 83 sVRegister84 ScalarVRegister s() const { return {code, 2}; } dVRegister85 ScalarVRegister d() const { return {code, 3}; } 86 is_sVRegister87 const bool is_s() { return size == 2; }; 88 }; 89 90 constexpr VRegister v0{0}; 91 constexpr VRegister v1{1}; 92 constexpr VRegister v2{2}; 93 constexpr VRegister v3{3}; 94 constexpr VRegister v4{4}; 95 constexpr VRegister v5{5}; 96 constexpr VRegister v6{6}; 97 constexpr VRegister v7{7}; 98 constexpr VRegister v8{8}; 99 constexpr VRegister v9{9}; 100 constexpr VRegister v10{10}; 101 constexpr VRegister v11{11}; 102 constexpr VRegister v12{12}; 103 constexpr VRegister v13{13}; 104 constexpr VRegister v14{14}; 105 constexpr VRegister v15{15}; 106 constexpr VRegister v16{16}; 107 constexpr VRegister v17{17}; 108 constexpr VRegister v18{18}; 109 constexpr VRegister v19{19}; 110 constexpr VRegister v20{20}; 111 constexpr VRegister v21{21}; 112 constexpr VRegister v22{22}; 113 constexpr VRegister v23{23}; 114 constexpr VRegister v24{24}; 115 constexpr VRegister v25{25}; 116 constexpr VRegister v26{26}; 117 constexpr VRegister v27{27}; 118 constexpr VRegister v28{28}; 119 constexpr VRegister v29{29}; 120 constexpr VRegister v30{30}; 121 constexpr VRegister v31{31}; 122 123 struct VRegisterList { VRegisterListVRegisterList124 VRegisterList(VRegister vt1) 125 : vt1(vt1), length(1) {} VRegisterListVRegisterList126 VRegisterList(VRegister vt1, VRegister vt2) 127 : vt1(vt1), vt2(vt2), length(2) {} VRegisterListVRegisterList128 VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3) 129 : vt1(vt1), vt2(vt2), vt3(vt3), length(3) {} VRegisterListVRegisterList130 VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3, VRegister vt4) 131 : vt1(vt1), vt2(vt2), vt3(vt3), vt4(vt4), length(4) {} 132 133 VRegister vt1; 134 VRegister vt2; 135 VRegister vt3; 136 VRegister vt4; 137 uint8_t length; 138 }; 139 140 struct SRegister { 141 uint8_t code; 142 }; 143 144 constexpr SRegister s0{0}; 145 constexpr SRegister s1{1}; 146 constexpr SRegister s2{2}; 147 constexpr SRegister s3{3}; 148 constexpr SRegister s4{4}; 149 constexpr SRegister s5{5}; 150 constexpr SRegister s6{6}; 151 constexpr SRegister s7{7}; 152 constexpr SRegister s8{8}; 153 constexpr SRegister s9{9}; 154 constexpr SRegister s10{10}; 155 constexpr SRegister s11{11}; 156 constexpr SRegister s12{12}; 157 constexpr SRegister s13{13}; 158 constexpr SRegister s14{14}; 159 constexpr SRegister s15{15}; 160 constexpr SRegister s16{16}; 161 constexpr SRegister s17{17}; 162 constexpr SRegister s18{18}; 163 constexpr SRegister s19{19}; 164 constexpr SRegister s20{20}; 165 constexpr SRegister s21{21}; 166 constexpr SRegister s22{22}; 167 constexpr SRegister s23{23}; 168 constexpr SRegister s24{24}; 169 constexpr SRegister s25{25}; 170 constexpr SRegister s26{26}; 171 constexpr SRegister s27{27}; 172 constexpr SRegister s28{28}; 173 constexpr SRegister s29{29}; 174 constexpr SRegister s30{30}; 175 constexpr SRegister s31{31}; 176 177 struct DRegister { 178 uint8_t code; 179 }; 180 181 constexpr DRegister d0{0}; 182 constexpr DRegister d1{1}; 183 constexpr DRegister d2{2}; 184 constexpr DRegister d3{3}; 185 constexpr DRegister d4{4}; 186 constexpr DRegister d5{5}; 187 constexpr DRegister d6{6}; 188 constexpr DRegister d7{7}; 189 constexpr DRegister d8{8}; 190 constexpr DRegister d9{9}; 191 constexpr DRegister d10{10}; 192 constexpr DRegister d11{11}; 193 constexpr DRegister d12{12}; 194 constexpr DRegister d13{13}; 195 constexpr DRegister d14{14}; 196 constexpr DRegister d15{15}; 197 constexpr DRegister d16{16}; 198 constexpr DRegister d17{17}; 199 constexpr DRegister d18{18}; 200 constexpr DRegister d19{19}; 201 constexpr DRegister d20{20}; 202 constexpr DRegister d21{21}; 203 constexpr DRegister d22{22}; 204 constexpr DRegister d23{23}; 205 constexpr DRegister d24{24}; 206 constexpr DRegister d25{25}; 207 constexpr DRegister d26{26}; 208 constexpr DRegister d27{27}; 209 constexpr DRegister d28{28}; 210 constexpr DRegister d29{29}; 211 constexpr DRegister d30{30}; 212 constexpr DRegister d31{31}; 213 214 struct QRegister { 215 uint8_t code; 216 }; 217 218 constexpr QRegister q0{0}; 219 constexpr QRegister q1{1}; 220 constexpr QRegister q2{2}; 221 constexpr QRegister q3{3}; 222 constexpr QRegister q4{4}; 223 constexpr QRegister q5{5}; 224 constexpr QRegister q6{6}; 225 constexpr QRegister q7{7}; 226 constexpr QRegister q8{8}; 227 constexpr QRegister q9{9}; 228 constexpr QRegister q10{10}; 229 constexpr QRegister q11{11}; 230 constexpr QRegister q12{12}; 231 constexpr QRegister q13{13}; 232 constexpr QRegister q14{14}; 233 constexpr QRegister q15{15}; 234 constexpr QRegister q16{16}; 235 constexpr QRegister q17{17}; 236 constexpr QRegister q18{18}; 237 constexpr QRegister q19{19}; 238 constexpr QRegister q20{20}; 239 constexpr QRegister q21{21}; 240 constexpr QRegister q22{22}; 241 constexpr QRegister q23{23}; 242 constexpr QRegister q24{24}; 243 constexpr QRegister q25{25}; 244 constexpr QRegister q26{26}; 245 constexpr QRegister q27{27}; 246 constexpr QRegister q28{28}; 247 constexpr QRegister q29{29}; 248 constexpr QRegister q30{30}; 249 constexpr QRegister q31{31}; 250 251 // C1.3.3 Load/Store addressing modes 252 enum class AddressingMode { 253 kOffset, // Base plus offset: [base{, #imm}] ; [base, Xm{, LSL #imm}]. 254 kPostIndex, // Post-index: [base], #imm ; [base], Xm. 255 kPreIndex, // Pre-index: [base, #imm]! 256 }; 257 258 struct MemOperand { MemOperandMemOperand259 MemOperand(XRegister xn): base(xn), mode(AddressingMode::kOffset), offset(0) {} MemOperandMemOperand260 MemOperand(XRegister xn, int32_t offset): base(xn), mode(AddressingMode::kOffset), offset(offset) {} MemOperandMemOperand261 MemOperand(XRegister xn, int32_t offset, AddressingMode mode): base(xn), mode(mode), offset(offset) {} 262 263 // Overload postfix increment to indicate a pre-index addressing mode for load/stores. 264 MemOperand operator++(int) { 265 mode = AddressingMode::kPreIndex; 266 return *this; 267 } 268 269 XRegister base; 270 AddressingMode mode; 271 int32_t offset; 272 }; 273 274 static inline MemOperand operator,(XRegister r, int32_t offset) { 275 return MemOperand(r, offset); 276 } 277 278 // Helper struct for some syntax sugar to look like native assembly, see mem. 279 struct MemOperandHelper { 280 MemOperand operator[](MemOperand op) const { return op; } 281 MemOperand operator[](XRegister r) const { return MemOperand(r, 0); } 282 }; 283 284 // Use "mem" (and its overload of array subscript operator) to get some syntax 285 // that looks closer to native assembly when accessing memory. For example: 286 // - ldp(x0, x1, mem[rn, offset]); // offset 287 // - ldp(x0, x1, mem[rn], offset); // post-indexed 288 constexpr MemOperandHelper mem; 289 290 enum PrefetchOp { 291 kPLDL1KEEP = 0 292 }; 293 294 enum Condition : uint32_t { 295 kEQ = 0x0, 296 kNE = 0x1, 297 kCS = 0x2, 298 kCC = 0x3, 299 kMI = 0x4, 300 kPL = 0x5, 301 kVS = 0x6, 302 kVC = 0x7, 303 kHI = 0x8, 304 kLS = 0x9, 305 kGE = 0xa, 306 kLT = 0xB, 307 kGT = 0xC, 308 kLE = 0xD, 309 kAL = 0xE, 310 kHS = kCS, 311 kLO = kCC, 312 }; 313 314 enum class BranchType { 315 kConditional, 316 // For encoding, TBZ and TBNZ are treated similarly, called TBXZ here. 317 kTbxz, 318 kUnconditional, 319 }; 320 321 // Instruction to use for alignment. 322 // kNop should be used for loops, branch targets. kHlt for end of function. 323 enum class AlignInstruction { 324 kHlt, 325 kNop, 326 }; 327 328 class Assembler : public AssemblerBase { 329 public: 330 using AssemblerBase::AssemblerBase; 331 332 // Base instructions. 333 void add(XRegister xd, XRegister xn, uint16_t imm12); 334 void add(XRegister xd, XRegister xn, XRegister xm); 335 void b(Label& l); b_eq(Label & l)336 void b_eq(Label& l) { return b(kEQ, l); } b_hi(Label & l)337 void b_hi(Label& l) { return b(kHI, l); } b_hs(Label & l)338 void b_hs(Label& l) { return b(kHS, l); } b_lo(Label & l)339 void b_lo(Label& l) { return b(kLO, l); } b_ne(Label & l)340 void b_ne(Label& l) { return b(kNE, l); } 341 void cmp(XRegister xn, uint16_t imm12); 342 void cmp(XRegister xn, XRegister xm); 343 void csel(XRegister xd, XRegister xn, XRegister xm, Condition c); 344 void hlt(); 345 void ldp(XRegister xt1, XRegister xt2, MemOperand xn); 346 void ldp(XRegister xt1, XRegister xt2, MemOperand xn, int32_t imm); 347 void ldr(XRegister xt, MemOperand xn); 348 void ldr(XRegister xt, MemOperand xn, int32_t imm); 349 void mov(XRegister xd, XRegister xn); 350 void nop(); 351 void prfm(PrefetchOp prfop, MemOperand xn); 352 void ret(); 353 void stp(XRegister xt1, XRegister xt2, MemOperand xn); 354 void str(XRegister xt1, MemOperand xn); 355 void sub(XRegister xd, XRegister xn, XRegister xm); 356 void subs(XRegister xd, XRegister xn, uint16_t imm12); 357 void tbnz(XRegister xd, uint8_t bit, Label& l); 358 void tbz(XRegister xd, uint8_t bit, Label& l); 359 // Only immediates with lowest N bits set are supported. 360 void tst(XRegister xn, uint8_t imm); 361 362 // SIMD instructions 363 void dup(DRegister dd, VRegisterLane vn); 364 void fabs(VRegister vd, VRegister vn); 365 void fadd(VRegister vd, VRegister vn, VRegister vm); 366 void fmax(VRegister vd, VRegister vn, VRegister vm); 367 void fmin(VRegister vd, VRegister vn, VRegister vm); 368 void fmla(VRegister vd, VRegister vn, VRegisterLane vm); 369 void fmul(VRegister vd, VRegister vn, VRegister vm); 370 void fneg(VRegister vd, VRegister vn); 371 void ld1(VRegisterList vs, MemOperand xn, int32_t imm); 372 void ld1r(VRegisterList xs, MemOperand xn); 373 void ld2r(VRegisterList xs, MemOperand xn); 374 void ld3r(VRegisterList xs, MemOperand xn); 375 void ldp(DRegister dt1, DRegister dt2, MemOperand xn); 376 void ldp(DRegister dt1, DRegister dt2, MemOperand xn, int32_t imm); 377 void ldp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm); 378 void ldr(DRegister dt, MemOperand xn, int32_t imm); 379 void ldr(QRegister qt, MemOperand xn, int32_t imm); 380 void ldr(SRegister st, MemOperand xn, int32_t imm); 381 void mov(VRegister vd, VRegister vn); 382 void movi(VRegister vd, uint8_t imm); 383 void st1(VRegisterList vs, MemOperand xn, XRegister xm); 384 void stp(DRegister dt1, DRegister dt2, MemOperand xn); 385 void stp(QRegister qt1, QRegister qt2, MemOperand xn); 386 void stp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm); 387 void str(DRegister dt, MemOperand xn, int32_t imm); 388 void str(QRegister qt, MemOperand xn, int32_t imm); 389 void str(SRegister st, MemOperand xn); 390 void str(SRegister st, MemOperand xn, int32_t imm); 391 392 // Aligns the buffer to n (must be a power of 2). 393 void align(uint8_t n, AlignInstruction instr); align(uint8_t n)394 void align(uint8_t n) { align(n, AlignInstruction::kNop); } 395 // Binds Label l to the current location in the code buffer. 396 void bind(Label& l); 397 398 private: 399 void b(Condition c, Label& l); 400 void branch_to_label(uint32_t opcode, BranchType bt, Label& l); 401 void ldr(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code); 402 void str(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code); 403 void tb_helper(uint32_t op, XRegister xd, uint8_t bit, Label& l); 404 405 }; 406 407 } // namespace aarch64 408 } // namespace xnnpack 409