• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <cstddef>
7 #include <cstdint>
8 
9 #include <xnnpack/assembler.h>
10 
11 namespace xnnpack {
12 namespace aarch64 {
13 
14 constexpr size_t kInstructionSizeInBytesLog2 = 2;
15 
16 struct XRegister {
17   uint8_t code;
18 };
19 
20 constexpr XRegister x0{0};
21 constexpr XRegister x1{1};
22 constexpr XRegister x2{2};
23 constexpr XRegister x3{3};
24 constexpr XRegister x4{4};
25 constexpr XRegister x5{5};
26 constexpr XRegister x6{6};
27 constexpr XRegister x7{7};
28 constexpr XRegister x8{8};
29 constexpr XRegister x9{9};
30 constexpr XRegister x10{10};
31 constexpr XRegister x11{11};
32 constexpr XRegister x12{12};
33 constexpr XRegister x13{13};
34 constexpr XRegister x14{14};
35 constexpr XRegister x15{15};
36 constexpr XRegister x16{16};
37 constexpr XRegister x17{17};
38 constexpr XRegister x18{18};
39 constexpr XRegister x19{19};
40 constexpr XRegister x20{20};
41 constexpr XRegister x21{21};
42 constexpr XRegister x22{22};
43 constexpr XRegister x23{23};
44 constexpr XRegister x24{24};
45 constexpr XRegister x25{25};
46 constexpr XRegister x26{26};
47 constexpr XRegister x27{27};
48 constexpr XRegister x28{28};
49 constexpr XRegister x29{29};
50 constexpr XRegister x30{30};
51 constexpr XRegister xzr{31};
52 constexpr XRegister sp{31};
53 
54 struct VRegisterLane {
55   uint8_t code;
56   uint8_t size;
57   uint8_t lane;
is_sVRegisterLane58   const bool is_s() { return size == 2; };
59 };
60 
61 struct ScalarVRegister{
62   uint8_t code;
63   uint8_t size;
64 
65   const VRegisterLane operator[](std::size_t pos) const {
66     return VRegisterLane{code, size, static_cast<uint8_t>(pos)};
67   }
68 };
69 
70 struct VRegister {
71   uint8_t code;
72   uint8_t size;
73   uint8_t q;
74 
v8bVRegister75   VRegister v8b() const { return {code, 0, 0}; }
v16bVRegister76   VRegister v16b() const { return {code, 0, 1}; }
v4hVRegister77   VRegister v4h() const { return {code, 1, 0}; }
v8hVRegister78   VRegister v8h() const { return {code, 1, 1}; }
v2sVRegister79   VRegister v2s() const { return {code, 2, 0}; }
v4sVRegister80   VRegister v4s() const { return {code, 2, 1}; }
v1dVRegister81   VRegister v1d() const { return {code, 3, 0}; }
v2dVRegister82   VRegister v2d() const { return {code, 3, 1}; }
83 
sVRegister84   ScalarVRegister s() const { return {code, 2}; }
dVRegister85   ScalarVRegister d() const { return {code, 3}; }
86 
is_sVRegister87   const bool is_s() { return size == 2; };
88 };
89 
90 constexpr VRegister v0{0};
91 constexpr VRegister v1{1};
92 constexpr VRegister v2{2};
93 constexpr VRegister v3{3};
94 constexpr VRegister v4{4};
95 constexpr VRegister v5{5};
96 constexpr VRegister v6{6};
97 constexpr VRegister v7{7};
98 constexpr VRegister v8{8};
99 constexpr VRegister v9{9};
100 constexpr VRegister v10{10};
101 constexpr VRegister v11{11};
102 constexpr VRegister v12{12};
103 constexpr VRegister v13{13};
104 constexpr VRegister v14{14};
105 constexpr VRegister v15{15};
106 constexpr VRegister v16{16};
107 constexpr VRegister v17{17};
108 constexpr VRegister v18{18};
109 constexpr VRegister v19{19};
110 constexpr VRegister v20{20};
111 constexpr VRegister v21{21};
112 constexpr VRegister v22{22};
113 constexpr VRegister v23{23};
114 constexpr VRegister v24{24};
115 constexpr VRegister v25{25};
116 constexpr VRegister v26{26};
117 constexpr VRegister v27{27};
118 constexpr VRegister v28{28};
119 constexpr VRegister v29{29};
120 constexpr VRegister v30{30};
121 constexpr VRegister v31{31};
122 
123 struct VRegisterList {
VRegisterListVRegisterList124   VRegisterList(VRegister vt1)
125       : vt1(vt1), length(1) {}
VRegisterListVRegisterList126   VRegisterList(VRegister vt1, VRegister vt2)
127       : vt1(vt1), vt2(vt2), length(2) {}
VRegisterListVRegisterList128   VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3)
129       : vt1(vt1), vt2(vt2), vt3(vt3), length(3) {}
VRegisterListVRegisterList130   VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3, VRegister vt4)
131       : vt1(vt1), vt2(vt2), vt3(vt3), vt4(vt4), length(4) {}
132 
133   VRegister vt1;
134   VRegister vt2;
135   VRegister vt3;
136   VRegister vt4;
137   uint8_t length;
138 };
139 
140 struct SRegister {
141   uint8_t code;
142 };
143 
144 constexpr SRegister s0{0};
145 constexpr SRegister s1{1};
146 constexpr SRegister s2{2};
147 constexpr SRegister s3{3};
148 constexpr SRegister s4{4};
149 constexpr SRegister s5{5};
150 constexpr SRegister s6{6};
151 constexpr SRegister s7{7};
152 constexpr SRegister s8{8};
153 constexpr SRegister s9{9};
154 constexpr SRegister s10{10};
155 constexpr SRegister s11{11};
156 constexpr SRegister s12{12};
157 constexpr SRegister s13{13};
158 constexpr SRegister s14{14};
159 constexpr SRegister s15{15};
160 constexpr SRegister s16{16};
161 constexpr SRegister s17{17};
162 constexpr SRegister s18{18};
163 constexpr SRegister s19{19};
164 constexpr SRegister s20{20};
165 constexpr SRegister s21{21};
166 constexpr SRegister s22{22};
167 constexpr SRegister s23{23};
168 constexpr SRegister s24{24};
169 constexpr SRegister s25{25};
170 constexpr SRegister s26{26};
171 constexpr SRegister s27{27};
172 constexpr SRegister s28{28};
173 constexpr SRegister s29{29};
174 constexpr SRegister s30{30};
175 constexpr SRegister s31{31};
176 
177 struct DRegister {
178   uint8_t code;
179 };
180 
181 constexpr DRegister d0{0};
182 constexpr DRegister d1{1};
183 constexpr DRegister d2{2};
184 constexpr DRegister d3{3};
185 constexpr DRegister d4{4};
186 constexpr DRegister d5{5};
187 constexpr DRegister d6{6};
188 constexpr DRegister d7{7};
189 constexpr DRegister d8{8};
190 constexpr DRegister d9{9};
191 constexpr DRegister d10{10};
192 constexpr DRegister d11{11};
193 constexpr DRegister d12{12};
194 constexpr DRegister d13{13};
195 constexpr DRegister d14{14};
196 constexpr DRegister d15{15};
197 constexpr DRegister d16{16};
198 constexpr DRegister d17{17};
199 constexpr DRegister d18{18};
200 constexpr DRegister d19{19};
201 constexpr DRegister d20{20};
202 constexpr DRegister d21{21};
203 constexpr DRegister d22{22};
204 constexpr DRegister d23{23};
205 constexpr DRegister d24{24};
206 constexpr DRegister d25{25};
207 constexpr DRegister d26{26};
208 constexpr DRegister d27{27};
209 constexpr DRegister d28{28};
210 constexpr DRegister d29{29};
211 constexpr DRegister d30{30};
212 constexpr DRegister d31{31};
213 
214 struct QRegister {
215   uint8_t code;
216 };
217 
218 constexpr QRegister q0{0};
219 constexpr QRegister q1{1};
220 constexpr QRegister q2{2};
221 constexpr QRegister q3{3};
222 constexpr QRegister q4{4};
223 constexpr QRegister q5{5};
224 constexpr QRegister q6{6};
225 constexpr QRegister q7{7};
226 constexpr QRegister q8{8};
227 constexpr QRegister q9{9};
228 constexpr QRegister q10{10};
229 constexpr QRegister q11{11};
230 constexpr QRegister q12{12};
231 constexpr QRegister q13{13};
232 constexpr QRegister q14{14};
233 constexpr QRegister q15{15};
234 constexpr QRegister q16{16};
235 constexpr QRegister q17{17};
236 constexpr QRegister q18{18};
237 constexpr QRegister q19{19};
238 constexpr QRegister q20{20};
239 constexpr QRegister q21{21};
240 constexpr QRegister q22{22};
241 constexpr QRegister q23{23};
242 constexpr QRegister q24{24};
243 constexpr QRegister q25{25};
244 constexpr QRegister q26{26};
245 constexpr QRegister q27{27};
246 constexpr QRegister q28{28};
247 constexpr QRegister q29{29};
248 constexpr QRegister q30{30};
249 constexpr QRegister q31{31};
250 
251 // C1.3.3 Load/Store addressing modes
252 enum class AddressingMode {
253   kOffset, // Base plus offset: [base{, #imm}] ; [base, Xm{, LSL #imm}].
254   kPostIndex, // Post-index: [base], #imm ; [base], Xm.
255   kPreIndex, // Pre-index: [base, #imm]!
256 };
257 
258 struct MemOperand {
MemOperandMemOperand259   MemOperand(XRegister xn): base(xn), mode(AddressingMode::kOffset), offset(0) {}
MemOperandMemOperand260   MemOperand(XRegister xn, int32_t offset): base(xn), mode(AddressingMode::kOffset), offset(offset) {}
MemOperandMemOperand261   MemOperand(XRegister xn, int32_t offset, AddressingMode mode): base(xn), mode(mode), offset(offset) {}
262 
263   // Overload postfix increment to indicate a pre-index addressing mode for load/stores.
264   MemOperand operator++(int) {
265     mode = AddressingMode::kPreIndex;
266     return *this;
267   }
268 
269   XRegister base;
270   AddressingMode mode;
271   int32_t offset;
272 };
273 
274 static inline MemOperand operator,(XRegister r, int32_t offset) {
275   return MemOperand(r, offset);
276 }
277 
278 // Helper struct for some syntax sugar to look like native assembly, see mem.
279 struct MemOperandHelper {
280   MemOperand operator[](MemOperand op) const { return op; }
281   MemOperand operator[](XRegister r) const { return MemOperand(r, 0); }
282 };
283 
284 // Use "mem" (and its overload of array subscript operator) to get some syntax
285 // that looks closer to native assembly when accessing memory. For example:
286 // - ldp(x0, x1, mem[rn, offset]); // offset
287 // - ldp(x0, x1, mem[rn], offset); // post-indexed
288 constexpr MemOperandHelper mem;
289 
290 enum PrefetchOp {
291   kPLDL1KEEP = 0
292 };
293 
294 enum Condition : uint32_t {
295   kEQ = 0x0,
296   kNE = 0x1,
297   kCS = 0x2,
298   kCC = 0x3,
299   kMI = 0x4,
300   kPL = 0x5,
301   kVS = 0x6,
302   kVC = 0x7,
303   kHI = 0x8,
304   kLS = 0x9,
305   kGE = 0xa,
306   kLT = 0xB,
307   kGT = 0xC,
308   kLE = 0xD,
309   kAL = 0xE,
310   kHS = kCS,
311   kLO = kCC,
312 };
313 
314 enum class BranchType {
315   kConditional,
316   // For encoding, TBZ and TBNZ are treated similarly, called TBXZ here.
317   kTbxz,
318   kUnconditional,
319 };
320 
321 // Instruction to use for alignment.
322 // kNop should be used for loops, branch targets. kHlt for end of function.
323 enum class AlignInstruction {
324   kHlt,
325   kNop,
326 };
327 
328 class Assembler : public AssemblerBase {
329  public:
330   using AssemblerBase::AssemblerBase;
331 
332   // Base instructions.
333   void add(XRegister xd, XRegister xn, uint16_t imm12);
334   void add(XRegister xd, XRegister xn, XRegister xm);
335   void b(Label& l);
b_eq(Label & l)336   void b_eq(Label& l) { return b(kEQ, l); }
b_hi(Label & l)337   void b_hi(Label& l) { return b(kHI, l); }
b_hs(Label & l)338   void b_hs(Label& l) { return b(kHS, l); }
b_lo(Label & l)339   void b_lo(Label& l) { return b(kLO, l); }
b_ne(Label & l)340   void b_ne(Label& l) { return b(kNE, l); }
341   void cmp(XRegister xn, uint16_t imm12);
342   void cmp(XRegister xn, XRegister xm);
343   void csel(XRegister xd, XRegister xn, XRegister xm, Condition c);
344   void hlt();
345   void ldp(XRegister xt1, XRegister xt2, MemOperand xn);
346   void ldp(XRegister xt1, XRegister xt2, MemOperand xn, int32_t imm);
347   void ldr(XRegister xt, MemOperand xn);
348   void ldr(XRegister xt, MemOperand xn, int32_t imm);
349   void mov(XRegister xd, XRegister xn);
350   void nop();
351   void prfm(PrefetchOp prfop, MemOperand xn);
352   void ret();
353   void stp(XRegister xt1, XRegister xt2, MemOperand xn);
354   void str(XRegister xt1, MemOperand xn);
355   void sub(XRegister xd, XRegister xn, XRegister xm);
356   void subs(XRegister xd, XRegister xn, uint16_t imm12);
357   void tbnz(XRegister xd, uint8_t bit, Label& l);
358   void tbz(XRegister xd, uint8_t bit, Label& l);
359   // Only immediates with lowest N bits set are supported.
360   void tst(XRegister xn, uint8_t imm);
361 
362   // SIMD instructions
363   void dup(DRegister dd, VRegisterLane vn);
364   void fabs(VRegister vd, VRegister vn);
365   void fadd(VRegister vd, VRegister vn, VRegister vm);
366   void fmax(VRegister vd, VRegister vn, VRegister vm);
367   void fmin(VRegister vd, VRegister vn, VRegister vm);
368   void fmla(VRegister vd, VRegister vn, VRegisterLane vm);
369   void fmul(VRegister vd, VRegister vn, VRegister vm);
370   void fneg(VRegister vd, VRegister vn);
371   void ld1(VRegisterList vs, MemOperand xn, int32_t imm);
372   void ld1r(VRegisterList xs, MemOperand xn);
373   void ld2r(VRegisterList xs, MemOperand xn);
374   void ld3r(VRegisterList xs, MemOperand xn);
375   void ldp(DRegister dt1, DRegister dt2, MemOperand xn);
376   void ldp(DRegister dt1, DRegister dt2, MemOperand xn, int32_t imm);
377   void ldp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm);
378   void ldr(DRegister dt, MemOperand xn, int32_t imm);
379   void ldr(QRegister qt, MemOperand xn, int32_t imm);
380   void ldr(SRegister st, MemOperand xn, int32_t imm);
381   void mov(VRegister vd, VRegister vn);
382   void movi(VRegister vd, uint8_t imm);
383   void st1(VRegisterList vs, MemOperand xn, XRegister xm);
384   void stp(DRegister dt1, DRegister dt2, MemOperand xn);
385   void stp(QRegister qt1, QRegister qt2, MemOperand xn);
386   void stp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm);
387   void str(DRegister dt, MemOperand xn, int32_t imm);
388   void str(QRegister qt, MemOperand xn, int32_t imm);
389   void str(SRegister st, MemOperand xn);
390   void str(SRegister st, MemOperand xn, int32_t imm);
391 
392   // Aligns the buffer to n (must be a power of 2).
393   void align(uint8_t n, AlignInstruction instr);
align(uint8_t n)394   void align(uint8_t n) { align(n, AlignInstruction::kNop); }
395   // Binds Label l to the current location in the code buffer.
396   void bind(Label& l);
397 
398  private:
399   void b(Condition c, Label& l);
400   void branch_to_label(uint32_t opcode, BranchType bt, Label& l);
401   void ldr(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code);
402   void str(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code);
403   void tb_helper(uint32_t op, XRegister xd, uint8_t bit, Label& l);
404 
405 };
406 
407 }  // namespace aarch64
408 }  // namespace xnnpack
409