• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <cstddef>
7 #include <cstdint>
8 
9 #include <xnnpack/assembler.h>
10 
11 namespace xnnpack {
12 namespace aarch64 {
13 
14 constexpr size_t kInstructionSizeInBytesLog2 = 2;
15 
16 struct XRegister {
17   uint8_t code;
18 };
19 
20 constexpr XRegister x0{0};
21 constexpr XRegister x1{1};
22 constexpr XRegister x2{2};
23 constexpr XRegister x3{3};
24 constexpr XRegister x4{4};
25 constexpr XRegister x5{5};
26 constexpr XRegister x6{6};
27 constexpr XRegister x7{7};
28 constexpr XRegister x8{8};
29 constexpr XRegister x9{9};
30 constexpr XRegister x10{10};
31 constexpr XRegister x11{11};
32 constexpr XRegister x12{12};
33 constexpr XRegister x13{13};
34 constexpr XRegister x14{14};
35 constexpr XRegister x15{15};
36 constexpr XRegister x16{16};
37 constexpr XRegister x17{17};
38 constexpr XRegister x18{18};
39 constexpr XRegister x19{19};
40 constexpr XRegister x20{20};
41 constexpr XRegister x21{21};
42 constexpr XRegister x22{22};
43 constexpr XRegister x23{23};
44 constexpr XRegister x24{24};
45 constexpr XRegister x25{25};
46 constexpr XRegister x26{26};
47 constexpr XRegister x27{27};
48 constexpr XRegister x28{28};
49 constexpr XRegister x29{29};
50 constexpr XRegister x30{30};
51 constexpr XRegister xzr{31};
52 constexpr XRegister sp{31};
53 
54 struct VRegisterLane {
55   uint8_t code;
56   uint8_t size;
57   uint8_t lane;
is_sVRegisterLane58   const bool is_s() { return size == 2; };
59 };
60 
61 struct ScalarVRegister{
62   uint8_t code;
63   uint8_t size;
64 
65   const VRegisterLane operator[](std::size_t pos) const {
66     return VRegisterLane{code, size, static_cast<uint8_t>(pos)};
67   }
68 };
69 
70 struct VRegister {
71   uint8_t code;
72   uint8_t size;
73   uint8_t q;
74 
v8bVRegister75   VRegister v8b() const { return {code, 0, 0}; }
v16bVRegister76   VRegister v16b() const { return {code, 0, 1}; }
v4hVRegister77   VRegister v4h() const { return {code, 1, 0}; }
v8hVRegister78   VRegister v8h() const { return {code, 1, 1}; }
v2sVRegister79   VRegister v2s() const { return {code, 2, 0}; }
v4sVRegister80   VRegister v4s() const { return {code, 2, 1}; }
v1dVRegister81   VRegister v1d() const { return {code, 3, 0}; }
v2dVRegister82   VRegister v2d() const { return {code, 3, 1}; }
83 
sVRegister84   ScalarVRegister s() const { return {code, 2}; }
dVRegister85   ScalarVRegister d() const { return {code, 3}; }
86 
is_sVRegister87   const bool is_s() { return size == 2; };
88 };
89 
90 constexpr VRegister v0{0};
91 constexpr VRegister v1{1};
92 constexpr VRegister v2{2};
93 constexpr VRegister v3{3};
94 constexpr VRegister v4{4};
95 constexpr VRegister v5{5};
96 constexpr VRegister v6{6};
97 constexpr VRegister v7{7};
98 constexpr VRegister v8{8};
99 constexpr VRegister v9{9};
100 constexpr VRegister v10{10};
101 constexpr VRegister v11{11};
102 constexpr VRegister v12{12};
103 constexpr VRegister v13{13};
104 constexpr VRegister v14{14};
105 constexpr VRegister v15{15};
106 constexpr VRegister v16{16};
107 constexpr VRegister v17{17};
108 constexpr VRegister v18{18};
109 constexpr VRegister v19{19};
110 constexpr VRegister v20{20};
111 constexpr VRegister v21{21};
112 constexpr VRegister v22{22};
113 constexpr VRegister v23{23};
114 constexpr VRegister v24{24};
115 constexpr VRegister v25{25};
116 constexpr VRegister v26{26};
117 constexpr VRegister v27{27};
118 constexpr VRegister v28{28};
119 constexpr VRegister v29{29};
120 constexpr VRegister v30{30};
121 constexpr VRegister v31{31};
122 
123 struct VRegisterList {
VRegisterListVRegisterList124   VRegisterList(VRegister vt1)
125       : vt1(vt1), length(1) {}
VRegisterListVRegisterList126   VRegisterList(VRegister vt1, VRegister vt2)
127       : vt1(vt1), vt2(vt2), length(2) {}
VRegisterListVRegisterList128   VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3)
129       : vt1(vt1), vt2(vt2), vt3(vt3), length(3) {}
VRegisterListVRegisterList130   VRegisterList(VRegister vt1, VRegister vt2, VRegister vt3, VRegister vt4)
131       : vt1(vt1), vt2(vt2), vt3(vt3), vt4(vt4), length(4) {}
132 
133   VRegister vt1;
134   VRegister vt2;
135   VRegister vt3;
136   VRegister vt4;
137   uint8_t length;
138 };
139 
140 struct SRegister {
141   uint8_t code;
142 };
143 
144 constexpr SRegister s0{0};
145 constexpr SRegister s1{1};
146 constexpr SRegister s2{2};
147 constexpr SRegister s3{3};
148 constexpr SRegister s4{4};
149 constexpr SRegister s5{5};
150 constexpr SRegister s6{6};
151 constexpr SRegister s7{7};
152 constexpr SRegister s8{8};
153 constexpr SRegister s9{9};
154 constexpr SRegister s10{10};
155 constexpr SRegister s11{11};
156 constexpr SRegister s12{12};
157 constexpr SRegister s13{13};
158 constexpr SRegister s14{14};
159 constexpr SRegister s15{15};
160 constexpr SRegister s16{16};
161 constexpr SRegister s17{17};
162 constexpr SRegister s18{18};
163 constexpr SRegister s19{19};
164 constexpr SRegister s20{20};
165 constexpr SRegister s21{21};
166 constexpr SRegister s22{22};
167 constexpr SRegister s23{23};
168 constexpr SRegister s24{24};
169 constexpr SRegister s25{25};
170 constexpr SRegister s26{26};
171 constexpr SRegister s27{27};
172 constexpr SRegister s28{28};
173 constexpr SRegister s29{29};
174 constexpr SRegister s30{30};
175 constexpr SRegister s31{31};
176 
177 struct DRegister {
178   uint8_t code;
179 };
180 
181 constexpr DRegister d0{0};
182 constexpr DRegister d1{1};
183 constexpr DRegister d2{2};
184 constexpr DRegister d3{3};
185 constexpr DRegister d4{4};
186 constexpr DRegister d5{5};
187 constexpr DRegister d6{6};
188 constexpr DRegister d7{7};
189 constexpr DRegister d8{8};
190 constexpr DRegister d9{9};
191 constexpr DRegister d10{10};
192 constexpr DRegister d11{11};
193 constexpr DRegister d12{12};
194 constexpr DRegister d13{13};
195 constexpr DRegister d14{14};
196 constexpr DRegister d15{15};
197 constexpr DRegister d16{16};
198 constexpr DRegister d17{17};
199 constexpr DRegister d18{18};
200 constexpr DRegister d19{19};
201 constexpr DRegister d20{20};
202 constexpr DRegister d21{21};
203 constexpr DRegister d22{22};
204 constexpr DRegister d23{23};
205 constexpr DRegister d24{24};
206 constexpr DRegister d25{25};
207 constexpr DRegister d26{26};
208 constexpr DRegister d27{27};
209 constexpr DRegister d28{28};
210 constexpr DRegister d29{29};
211 constexpr DRegister d30{30};
212 constexpr DRegister d31{31};
213 
214 struct QRegister {
215   uint8_t code;
216 };
217 
218 constexpr QRegister q0{0};
219 constexpr QRegister q1{1};
220 constexpr QRegister q2{2};
221 constexpr QRegister q3{3};
222 constexpr QRegister q4{4};
223 constexpr QRegister q5{5};
224 constexpr QRegister q6{6};
225 constexpr QRegister q7{7};
226 constexpr QRegister q8{8};
227 constexpr QRegister q9{9};
228 constexpr QRegister q10{10};
229 constexpr QRegister q11{11};
230 constexpr QRegister q12{12};
231 constexpr QRegister q13{13};
232 constexpr QRegister q14{14};
233 constexpr QRegister q15{15};
234 constexpr QRegister q16{16};
235 constexpr QRegister q17{17};
236 constexpr QRegister q18{18};
237 constexpr QRegister q19{19};
238 constexpr QRegister q20{20};
239 constexpr QRegister q21{21};
240 constexpr QRegister q22{22};
241 constexpr QRegister q23{23};
242 constexpr QRegister q24{24};
243 constexpr QRegister q25{25};
244 constexpr QRegister q26{26};
245 constexpr QRegister q27{27};
246 constexpr QRegister q28{28};
247 constexpr QRegister q29{29};
248 constexpr QRegister q30{30};
249 constexpr QRegister q31{31};
250 
251 // C1.3.3 Load/Store addressing modes
252 enum class AddressingMode {
253   kOffset, // Base plus offset: [base{, #imm}] ; [base, Xm{, LSL #imm}].
254   kPostIndex, // Post-index: [base], #imm ; [base], Xm.
255   kPreIndex, // Pre-index: [base, #imm]!
256 };
257 
258 struct MemOperand {
MemOperandMemOperand259   MemOperand(XRegister xn): base(xn), mode(AddressingMode::kOffset), offset(0) {}
MemOperandMemOperand260   MemOperand(XRegister xn, int32_t offset): base(xn), mode(AddressingMode::kOffset), offset(offset) {}
MemOperandMemOperand261   MemOperand(XRegister xn, int32_t offset, AddressingMode mode): base(xn), mode(mode), offset(offset) {}
262 
263   // Overload postfix increment to indicate a pre-index addressing mode for load/stores.
264   MemOperand operator++(int) {
265     mode = AddressingMode::kPreIndex;
266     return *this;
267   }
268 
269   XRegister base;
270   AddressingMode mode;
271   int32_t offset;
272 };
273 
274 static inline MemOperand operator,(XRegister r, int32_t offset) {
275   return MemOperand(r, offset);
276 }
277 
278 // Helper struct for some syntax sugar to look like native assembly, see mem.
279 struct MemOperandHelper {
280   MemOperand operator[](MemOperand op) const { return op; }
281   MemOperand operator[](XRegister r) const { return MemOperand(r, 0); }
282 };
283 
284 // Use "mem" (and its overload of array subscript operator) to get some syntax
285 // that looks closer to native assembly when accessing memory. For example:
286 // - ldp(x0, x1, mem[rn, offset]); // offset
287 // - ldp(x0, x1, mem[rn], offset); // post-indexed
288 constexpr MemOperandHelper mem;
289 
290 enum PrefetchOp {
291   kPLDL1KEEP = 0
292 };
293 
294 enum Condition : uint32_t {
295   kEQ = 0x0,
296   kNE = 0x1,
297   kCS = 0x2,
298   kCC = 0x3,
299   kMI = 0x4,
300   kPL = 0x5,
301   kVS = 0x6,
302   kVC = 0x7,
303   kHI = 0x8,
304   kLS = 0x9,
305   kGE = 0xa,
306   kLT = 0xB,
307   kGT = 0xC,
308   kLE = 0xD,
309   kAL = 0xE,
310   kHS = kCS,
311   kLO = kCC,
312 };
313 
314 enum class BranchType {
315   kConditional,
316   // For encoding, TBZ and TBNZ are treated similarly, called TBXZ here.
317   kTbxz,
318   kUnconditional,
319 };
320 
321 class Assembler : public AssemblerBase {
322  public:
323   using AssemblerBase::AssemblerBase;
324 
325   // Base instructions.
326   void add(XRegister xd, XRegister xn, uint16_t imm12);
327   void add(XRegister xd, XRegister xn, XRegister xm);
328   void b(Label& l);
b_eq(Label & l)329   void b_eq(Label& l) { return b(kEQ, l); }
b_hi(Label & l)330   void b_hi(Label& l) { return b(kHI, l); }
b_hs(Label & l)331   void b_hs(Label& l) { return b(kHS, l); }
b_lo(Label & l)332   void b_lo(Label& l) { return b(kLO, l); }
b_ne(Label & l)333   void b_ne(Label& l) { return b(kNE, l); }
334   void cmp(XRegister xn, uint16_t imm12);
335   void cmp(XRegister xn, XRegister xm);
336   void csel(XRegister xd, XRegister xn, XRegister xm, Condition c);
337   void ldp(XRegister xt1, XRegister xt2, MemOperand xn);
338   void ldp(XRegister xt1, XRegister xt2, MemOperand xn, int32_t imm);
339   void ldr(XRegister xt, MemOperand xn);
340   void ldr(XRegister xt, MemOperand xn, int32_t imm);
341   void mov(XRegister xd, XRegister xn);
342   void prfm(PrefetchOp prfop, MemOperand xn);
343   void ret();
344   void stp(XRegister xt1, XRegister xt2, MemOperand xn);
345   void sub(XRegister xd, XRegister xn, XRegister xm);
346   void subs(XRegister xd, XRegister xn, uint16_t imm12);
347   void tbnz(XRegister xd, uint8_t bit, Label& l);
348   void tbz(XRegister xd, uint8_t bit, Label& l);
349   // Only immediates with lowest N bits set are supported.
350   void tst(XRegister xn, uint8_t imm);
351 
352   // SIMD instructions
353   void dup(DRegister dd, VRegisterLane vn);
354   void fadd(VRegister vd, VRegister vn, VRegister vm);
355   void fmax(VRegister vd, VRegister vn, VRegister vm);
356   void fmin(VRegister vd, VRegister vn, VRegister vm);
357   void fmla(VRegister vd, VRegister vn, VRegisterLane vm);
358   void ld1(VRegisterList vs, MemOperand xn, int32_t imm);
359   void ld1r(VRegisterList xs, MemOperand xn);
360   void ld2r(VRegisterList xs, MemOperand xn);
361   void ldp(DRegister dt1, DRegister dt2, MemOperand xn);
362   void ldp(DRegister dt1, DRegister dt2, MemOperand xn, int32_t imm);
363   void ldp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm);
364   void ldr(DRegister dt, MemOperand xn, int32_t imm);
365   void ldr(QRegister qt, MemOperand xn, int32_t imm);
366   void ldr(SRegister st, MemOperand xn, int32_t imm);
367   void mov(VRegister vd, VRegister vn);
368   void movi(VRegister vd, uint8_t imm);
369   void st1(VRegisterList vs, MemOperand xn, XRegister xm);
370   void stp(DRegister dt1, DRegister dt2, MemOperand xn);
371   void stp(QRegister qt1, QRegister qt2, MemOperand xn);
372   void stp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm);
373   void str(DRegister dt, MemOperand xn, int32_t imm);
374   void str(QRegister qt, MemOperand xn, int32_t imm);
375   void str(SRegister st, MemOperand xn);
376   void str(SRegister st, MemOperand xn, int32_t imm);
377 
378   // Binds Label l to the current location in the code buffer.
379   void bind(Label& l);
380 
381  private:
382   void b(Condition c, Label& l);
383   void branch_to_label(uint32_t opcode, BranchType bt, Label& l);
384   void ldr(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code);
385   void str(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code);
386   void tb_helper(uint32_t op, XRegister xd, uint8_t bit, Label& l);
387 
388 };
389 
390 }  // namespace aarch64
391 }  // namespace xnnpack
392