// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include namespace xnnpack { namespace aarch64 { constexpr size_t kInstructionSizeInBytesLog2 = 2; struct XRegister { uint8_t code; }; constexpr XRegister x0{0}; constexpr XRegister x1{1}; constexpr XRegister x2{2}; constexpr XRegister x3{3}; constexpr XRegister x4{4}; constexpr XRegister x5{5}; constexpr XRegister x6{6}; constexpr XRegister x7{7}; constexpr XRegister x8{8}; constexpr XRegister x9{9}; constexpr XRegister x10{10}; constexpr XRegister x11{11}; constexpr XRegister x12{12}; constexpr XRegister x13{13}; constexpr XRegister x14{14}; constexpr XRegister x15{15}; constexpr XRegister x16{16}; constexpr XRegister x17{17}; constexpr XRegister x18{18}; constexpr XRegister x19{19}; constexpr XRegister x20{20}; constexpr XRegister x21{21}; constexpr XRegister x22{22}; constexpr XRegister x23{23}; constexpr XRegister x24{24}; constexpr XRegister x25{25}; constexpr XRegister x26{26}; constexpr XRegister x27{27}; constexpr XRegister x28{28}; constexpr XRegister x29{29}; constexpr XRegister x30{30}; constexpr XRegister xzr{31}; constexpr XRegister sp{31}; struct VRegisterLane { uint8_t code; uint8_t size; uint8_t lane; const bool is_s() { return size == 2; }; }; struct ScalarVRegister{ uint8_t code; uint8_t size; const VRegisterLane operator[](std::size_t pos) const { return VRegisterLane{code, size, static_cast(pos)}; } }; struct VRegister { uint8_t code; uint8_t size; uint8_t q; VRegister v8b() const { return {code, 0, 0}; } VRegister v16b() const { return {code, 0, 1}; } VRegister v4h() const { return {code, 1, 0}; } VRegister v8h() const { return {code, 1, 1}; } VRegister v2s() const { return {code, 2, 0}; } VRegister v4s() const { return {code, 2, 1}; } VRegister v1d() const { return {code, 3, 0}; } VRegister v2d() const { return {code, 3, 1}; } ScalarVRegister s() const { return {code, 2}; } ScalarVRegister d() const { return {code, 3}; } const bool is_s() { return size == 2; }; }; constexpr VRegister v0{0}; constexpr VRegister v1{1}; constexpr VRegister v2{2}; constexpr VRegister v3{3}; constexpr VRegister v4{4}; constexpr VRegister v5{5}; constexpr VRegister v6{6}; constexpr VRegister v7{7}; constexpr VRegister v8{8}; constexpr VRegister v9{9}; constexpr VRegister v10{10}; constexpr VRegister v11{11}; constexpr VRegister v12{12}; constexpr VRegister v13{13}; constexpr VRegister v14{14}; constexpr VRegister v15{15}; constexpr VRegister v16{16}; constexpr VRegister v17{17}; constexpr VRegister v18{18}; constexpr VRegister v19{19}; constexpr VRegister v20{20}; constexpr VRegister v21{21}; constexpr VRegister v22{22}; constexpr VRegister v23{23}; constexpr VRegister v24{24}; constexpr VRegister v25{25}; constexpr VRegister v26{26}; constexpr VRegister v27{27}; constexpr VRegister v28{28}; constexpr VRegister v29{29}; constexpr VRegister v30{30}; constexpr VRegister v31{31}; struct VRegisterList { VRegisterList(VRegister vt1) : vt1(vt1), length(1) {} VRegisterList(VRegister vt1, VRegister vt2) : vt1(vt1), vt2(vt2), length(2) {} VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3) : vt1(vt1), vt2(vt2), vt3(vt3), length(3) {} VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3, VRegister vt4) : vt1(vt1), vt2(vt2), vt3(vt3), vt4(vt4), length(4) {} VRegister vt1; VRegister vt2; VRegister vt3; VRegister vt4; uint8_t length; }; struct SRegister { uint8_t code; }; constexpr SRegister s0{0}; constexpr SRegister s1{1}; constexpr SRegister s2{2}; constexpr SRegister s3{3}; constexpr SRegister s4{4}; constexpr SRegister s5{5}; constexpr SRegister s6{6}; constexpr SRegister s7{7}; constexpr SRegister s8{8}; constexpr SRegister s9{9}; constexpr SRegister s10{10}; constexpr SRegister s11{11}; constexpr SRegister s12{12}; constexpr SRegister s13{13}; constexpr SRegister s14{14}; constexpr SRegister s15{15}; constexpr SRegister s16{16}; constexpr SRegister s17{17}; constexpr SRegister s18{18}; constexpr SRegister s19{19}; constexpr SRegister s20{20}; constexpr SRegister s21{21}; constexpr SRegister s22{22}; constexpr SRegister s23{23}; constexpr SRegister s24{24}; constexpr SRegister s25{25}; constexpr SRegister s26{26}; constexpr SRegister s27{27}; constexpr SRegister s28{28}; constexpr SRegister s29{29}; constexpr SRegister s30{30}; constexpr SRegister s31{31}; struct DRegister { uint8_t code; }; constexpr DRegister d0{0}; constexpr DRegister d1{1}; constexpr DRegister d2{2}; constexpr DRegister d3{3}; constexpr DRegister d4{4}; constexpr DRegister d5{5}; constexpr DRegister d6{6}; constexpr DRegister d7{7}; constexpr DRegister d8{8}; constexpr DRegister d9{9}; constexpr DRegister d10{10}; constexpr DRegister d11{11}; constexpr DRegister d12{12}; constexpr DRegister d13{13}; constexpr DRegister d14{14}; constexpr DRegister d15{15}; constexpr DRegister d16{16}; constexpr DRegister d17{17}; constexpr DRegister d18{18}; constexpr DRegister d19{19}; constexpr DRegister d20{20}; constexpr DRegister d21{21}; constexpr DRegister d22{22}; constexpr DRegister d23{23}; constexpr DRegister d24{24}; constexpr DRegister d25{25}; constexpr DRegister d26{26}; constexpr DRegister d27{27}; constexpr DRegister d28{28}; constexpr DRegister d29{29}; constexpr DRegister d30{30}; constexpr DRegister d31{31}; struct QRegister { uint8_t code; }; constexpr QRegister q0{0}; constexpr QRegister q1{1}; constexpr QRegister q2{2}; constexpr QRegister q3{3}; constexpr QRegister q4{4}; constexpr QRegister q5{5}; constexpr QRegister q6{6}; constexpr QRegister q7{7}; constexpr QRegister q8{8}; constexpr QRegister q9{9}; constexpr QRegister q10{10}; constexpr QRegister q11{11}; constexpr QRegister q12{12}; constexpr QRegister q13{13}; constexpr QRegister q14{14}; constexpr QRegister q15{15}; constexpr QRegister q16{16}; constexpr QRegister q17{17}; constexpr QRegister q18{18}; constexpr QRegister q19{19}; constexpr QRegister q20{20}; constexpr QRegister q21{21}; constexpr QRegister q22{22}; constexpr QRegister q23{23}; constexpr QRegister q24{24}; constexpr QRegister q25{25}; constexpr QRegister q26{26}; constexpr QRegister q27{27}; constexpr QRegister q28{28}; constexpr QRegister q29{29}; constexpr QRegister q30{30}; constexpr QRegister q31{31}; // C1.3.3 Load/Store addressing modes enum class AddressingMode { kOffset, // Base plus offset: [base{, #imm}] ; [base, Xm{, LSL #imm}]. kPostIndex, // Post-index: [base], #imm ; [base], Xm. kPreIndex, // Pre-index: [base, #imm]! }; struct MemOperand { MemOperand(XRegister xn): base(xn), mode(AddressingMode::kOffset), offset(0) {} MemOperand(XRegister xn, int32_t offset): base(xn), mode(AddressingMode::kOffset), offset(offset) {} MemOperand(XRegister xn, int32_t offset, AddressingMode mode): base(xn), mode(mode), offset(offset) {} // Overload postfix increment to indicate a pre-index addressing mode for load/stores. MemOperand operator++(int) { mode = AddressingMode::kPreIndex; return *this; } XRegister base; AddressingMode mode; int32_t offset; }; static inline MemOperand operator,(XRegister r, int32_t offset) { return MemOperand(r, offset); } // Helper struct for some syntax sugar to look like native assembly, see mem. struct MemOperandHelper { MemOperand operator[](MemOperand op) const { return op; } MemOperand operator[](XRegister r) const { return MemOperand(r, 0); } }; // Use "mem" (and its overload of array subscript operator) to get some syntax // that looks closer to native assembly when accessing memory. For example: // - ldp(x0, x1, mem[rn, offset]); // offset // - ldp(x0, x1, mem[rn], offset); // post-indexed constexpr MemOperandHelper mem; enum PrefetchOp { kPLDL1KEEP = 0 }; enum Condition : uint32_t { kEQ = 0x0, kNE = 0x1, kCS = 0x2, kCC = 0x3, kMI = 0x4, kPL = 0x5, kVS = 0x6, kVC = 0x7, kHI = 0x8, kLS = 0x9, kGE = 0xa, kLT = 0xB, kGT = 0xC, kLE = 0xD, kAL = 0xE, kHS = kCS, kLO = kCC, }; enum class BranchType { kConditional, // For encoding, TBZ and TBNZ are treated similarly, called TBXZ here. kTbxz, kUnconditional, }; // Instruction to use for alignment. // kNop should be used for loops, branch targets. kHlt for end of function. enum class AlignInstruction { kHlt, kNop, }; class Assembler : public AssemblerBase { public: using AssemblerBase::AssemblerBase; // Base instructions. void add(XRegister xd, XRegister xn, uint16_t imm12); void add(XRegister xd, XRegister xn, XRegister xm); void b(Label& l); void b_eq(Label& l) { return b(kEQ, l); } void b_hi(Label& l) { return b(kHI, l); } void b_hs(Label& l) { return b(kHS, l); } void b_lo(Label& l) { return b(kLO, l); } void b_ne(Label& l) { return b(kNE, l); } void cmp(XRegister xn, uint16_t imm12); void cmp(XRegister xn, XRegister xm); void csel(XRegister xd, XRegister xn, XRegister xm, Condition c); void hlt(); void ldp(XRegister xt1, XRegister xt2, MemOperand xn); void ldp(XRegister xt1, XRegister xt2, MemOperand xn, int32_t imm); void ldr(XRegister xt, MemOperand xn); void ldr(XRegister xt, MemOperand xn, int32_t imm); void mov(XRegister xd, XRegister xn); void nop(); void prfm(PrefetchOp prfop, MemOperand xn); void ret(); void stp(XRegister xt1, XRegister xt2, MemOperand xn); void str(XRegister xt1, MemOperand xn); void sub(XRegister xd, XRegister xn, XRegister xm); void subs(XRegister xd, XRegister xn, uint16_t imm12); void tbnz(XRegister xd, uint8_t bit, Label& l); void tbz(XRegister xd, uint8_t bit, Label& l); // Only immediates with lowest N bits set are supported. void tst(XRegister xn, uint8_t imm); // SIMD instructions void dup(DRegister dd, VRegisterLane vn); void fabs(VRegister vd, VRegister vn); void fadd(VRegister vd, VRegister vn, VRegister vm); void fmax(VRegister vd, VRegister vn, VRegister vm); void fmin(VRegister vd, VRegister vn, VRegister vm); void fmla(VRegister vd, VRegister vn, VRegisterLane vm); void fmul(VRegister vd, VRegister vn, VRegister vm); void fneg(VRegister vd, VRegister vn); void ld1(VRegisterList vs, MemOperand xn, int32_t imm); void ld1r(VRegisterList xs, MemOperand xn); void ld2r(VRegisterList xs, MemOperand xn); void ld3r(VRegisterList xs, MemOperand xn); void ldp(DRegister dt1, DRegister dt2, MemOperand xn); void ldp(DRegister dt1, DRegister dt2, MemOperand xn, int32_t imm); void ldp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm); void ldr(DRegister dt, MemOperand xn, int32_t imm); void ldr(QRegister qt, MemOperand xn, int32_t imm); void ldr(SRegister st, MemOperand xn, int32_t imm); void mov(VRegister vd, VRegister vn); void movi(VRegister vd, uint8_t imm); void st1(VRegisterList vs, MemOperand xn, XRegister xm); void stp(DRegister dt1, DRegister dt2, MemOperand xn); void stp(QRegister qt1, QRegister qt2, MemOperand xn); void stp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm); void str(DRegister dt, MemOperand xn, int32_t imm); void str(QRegister qt, MemOperand xn, int32_t imm); void str(SRegister st, MemOperand xn); void str(SRegister st, MemOperand xn, int32_t imm); // Aligns the buffer to n (must be a power of 2). void align(uint8_t n, AlignInstruction instr); void align(uint8_t n) { align(n, AlignInstruction::kNop); } // Binds Label l to the current location in the code buffer. void bind(Label& l); private: void b(Condition c, Label& l); void branch_to_label(uint32_t opcode, BranchType bt, Label& l); void ldr(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code); void str(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code); void tb_helper(uint32_t op, XRegister xd, uint8_t bit, Label& l); }; } // namespace aarch64 } // namespace xnnpack