• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <xnnpack/aarch64-assembler.h>
7 #include <xnnpack/common.h>
8 #include <xnnpack/math.h>
9 
10 #include <cmath>
11 
12 namespace xnnpack {
13 namespace aarch64 {
14 // Min and max values for the imm7 for ldp, will be shifted right by 3 when encoding.
15 constexpr int32_t kImm7Min = -512;
16 constexpr int32_t kImm7Max = 504;
17 constexpr uint32_t kImm7Mask = 0x7F;
18 // Max value for imm12, will be shifted right by 3 when encoding.
19 constexpr int32_t kImm12Max = 32760;
20 constexpr uint32_t kUint12Max = 4095;
21 
22 constexpr int32_t kInt9Max = 255;
23 constexpr int32_t kInt9Min = -256;
24 constexpr uint32_t kImm9Mask = 0x1FF;
25 
26 // Constants used for checking branch offset bounds.
27 // Conditional bounds are +/-1MB.
28 constexpr ptrdiff_t kConditionalBranchImmMax = 1048572;
29 constexpr ptrdiff_t kConditionalBranchImmMin = -1048576;
30 // TBZ and TBNZ bounds are +/-32KB.
31 constexpr ptrdiff_t kTbxzImmMax = 32764;
32 constexpr ptrdiff_t kTbxzImmMin = -32768;
33 // Unconditional bounds are +/-128MB.
34 constexpr ptrdiff_t kUnconditionalBranchImmMax = 134217727;
35 constexpr ptrdiff_t kUnconditionalBranchImmMin = -134217728;
36 
37 constexpr uint32_t kConditionalImmMask = 0x0007FFFF;
38 constexpr uint32_t kTbxzImmMask = 0x3FFF;
39 constexpr uint32_t kUnconditionalImmMask = 0x03FFFFFF;
40 
rd(Reg rn)41 template <typename Reg> inline uint32_t rd(Reg rn) { return rn.code; }
rt(Reg rn)42 template <typename Reg> inline uint32_t rt(Reg rn) { return rn.code; }
rt2(Reg rn)43 template <typename Reg> inline uint32_t rt2(Reg rn) { return rn.code << 10; }
rm(Reg rn)44 template <typename Reg> inline uint32_t rm(Reg rn) { return rn.code << 16; }
rn(Reg rn)45 template <typename Reg> inline uint32_t rn(Reg rn) { return rn.code << 5; }
q(VRegister vt)46 inline uint32_t q(VRegister vt) { return vt.q << 30; }
size(VRegister vt)47 inline uint32_t size(VRegister vt) { return vt.size << 10; }
fp_sz(VRegister vn)48 inline uint32_t fp_sz(VRegister vn) { return vn.is_s() ? 0 : 1 << 22; }
postindex(MemOperand op)49 inline uint32_t postindex(MemOperand op) { return (op.mode == AddressingMode::kPostIndex) ? 0 : 1 << 24; }
wb(MemOperand op)50 inline uint32_t wb(MemOperand op) { return op.mode == AddressingMode::kOffset ? 0 : 1 << 23; }
51 
is_same_shape(VRegister vt1,VRegister vt2)52 inline bool is_same_shape(VRegister vt1, VRegister vt2) {
53   return vt1.size == vt2.size && vt1.q == vt2.q;
54 }
55 
56 template <typename Reg, typename... Regs>
is_same_shape(Reg reg1,Reg reg2,Regs...regs)57 inline bool is_same_shape(Reg reg1, Reg reg2, Regs... regs) {
58   return is_same_shape(reg1, reg2) && is_same_shape(reg2, regs...);
59 }
60 
is_same_shape(VRegisterList vs)61 inline bool is_same_shape(VRegisterList vs) {
62   switch (vs.length) {
63     case 1:
64       return true;
65     case 2:
66       return is_same_shape(vs.vt1, vs.vt2);
67     case 3:
68       return is_same_shape(vs.vt1, vs.vt2, vs.vt3);
69     case 4:
70       return is_same_shape(vs.vt1, vs.vt2, vs.vt3, vs.vt4);
71     default:
72       XNN_UNREACHABLE;
73   }
74 }
75 
is_same_data_type(VRegister vt1,VRegisterLane vt2)76 inline bool is_same_data_type(VRegister vt1, VRegisterLane vt2) {
77   return vt1.size == vt2.size;
78 }
79 
is_consecutive(VRegister vt1,VRegister vt2)80 inline bool is_consecutive(VRegister vt1, VRegister vt2) {
81   return (vt1.code + 1) % 32 == vt2.code;
82 }
83 
84 template <typename Reg, typename... Regs>
is_consecutive(Reg reg1,Reg reg2,Regs...regs)85 inline bool is_consecutive(Reg reg1, Reg reg2, Regs... regs) {
86   return is_consecutive(reg1, reg2) && is_consecutive(reg2, regs...);
87 }
88 
is_consecutive(VRegisterList vs)89 inline bool is_consecutive(VRegisterList vs) {
90   switch (vs.length) {
91     case 1:
92       return true;
93     case 2:
94       return is_consecutive(vs.vt1, vs.vt2);
95     case 3:
96       return is_consecutive(vs.vt1, vs.vt2, vs.vt3);
97     case 4:
98       return is_consecutive(vs.vt1, vs.vt2, vs.vt3, vs.vt4);
99     default:
100       XNN_UNREACHABLE;
101   }
102 }
103 
104 // Check if a branch offset is valid, it must fit in 19 bits.
branch_offset_valid(ptrdiff_t offset,BranchType branch_type)105 inline bool branch_offset_valid(ptrdiff_t offset, BranchType branch_type) {
106   switch (branch_type) {
107     case BranchType::kConditional:
108       return offset < kConditionalBranchImmMax && offset > kConditionalBranchImmMin;
109     case BranchType::kTbxz:
110       return offset < kTbxzImmMax && offset > kTbxzImmMin;
111     case BranchType::kUnconditional:
112       return offset < kUnconditionalBranchImmMax && offset > kUnconditionalBranchImmMin;
113     default:
114       XNN_UNREACHABLE;
115   }
116   return false;
117 }
118 
instruction_branch_type(uint32_t instr)119 inline BranchType instruction_branch_type(uint32_t instr) {
120   const uint32_t masked = instr & 0xFE000000;
121   switch (masked) {
122     case 0xB6000000:
123     case 0x36000000:
124       return BranchType::kTbxz;
125     case 0x54000000:
126       return BranchType::kConditional;
127     case 0x14000000:
128     case 0x16000000:
129       return BranchType::kUnconditional;
130     default:
131       XNN_UNREACHABLE;
132   }
133 }
134 
mask(BranchType branch_type)135 inline uint32_t mask(BranchType branch_type) {
136   switch (branch_type) {
137     case BranchType::kConditional:
138       return kConditionalImmMask;
139     case BranchType::kTbxz:
140       return kTbxzImmMask;
141     case BranchType::kUnconditional:
142       return kUnconditionalImmMask;
143     default:
144       XNN_UNREACHABLE;
145   }
146 }
147 
shift(BranchType branch_type)148 inline uint8_t shift(BranchType branch_type) {
149   switch (branch_type) {
150     case BranchType::kConditional:
151       return 5;
152     case BranchType::kTbxz:
153       return 5;
154     case BranchType::kUnconditional:
155       return 0;
156     default:
157       XNN_UNREACHABLE;
158   }
159 }
160 
branch_imm(ptrdiff_t offset,BranchType bt)161 inline uint32_t branch_imm(ptrdiff_t offset, BranchType bt) {
162   return ((offset >> kInstructionSizeInBytesLog2) & mask(bt)) << shift(bt);
163 }
164 
hl(VRegisterLane vl)165 inline uint32_t hl(VRegisterLane vl) {
166   if (vl.is_s()) {
167     return (vl.lane & 1) << 21 | ((vl.lane & 2) << 10);
168   } else {
169     return (vl.lane & 1) << 11;
170   }
171 }
172 
lane_index_valid(uint8_t q,uint8_t size,uint8_t lane)173 inline bool lane_index_valid(uint8_t q, uint8_t size, uint8_t lane) {
174   // The logic here is something like:
175   // if (q && size == 0) {
176   //   return lane < 16;
177   // } else if (q && size == 1) {
178   //   return lane < 8;
179   // } else if (q && size == 2) {
180   //   return lane < 4;
181   // } else if (q && size == 3) {
182   //   return lane < 2;
183   // }
184   // then repeat for !q with maximum lane size halved.
185   // translated into this formula.
186   return lane < ((q + 1) << (3 - size));
187 }
188 
load_store_opcode(uint8_t register_length)189 inline uint8_t load_store_opcode(uint8_t register_length) {
190   switch (register_length) {
191     case 1:
192       return 0x7;
193     case 2:
194       return 0xA;
195     case 3:
196       return 0x6;
197     case 4:
198       return 0x2;
199     default:
200       XNN_UNREACHABLE;
201   }
202 }
203 
imm7_offset_valid(int32_t imm,XRegister)204 inline bool imm7_offset_valid(int32_t imm, XRegister) {
205   return imm >= kImm7Min && imm <= kImm7Max && (imm & 0x7) == 0;
206 }
207 
imm7_offset_valid(int32_t imm,DRegister)208 inline bool imm7_offset_valid(int32_t imm, DRegister) {
209   return imm >= kImm7Min && imm <= kImm7Max && (imm & 0x7) == 0;
210 }
211 
imm7_offset_valid(int32_t imm,QRegister)212 inline bool imm7_offset_valid(int32_t imm, QRegister) {
213   return imm >= (kImm7Min * 2) && imm <= (kImm7Max * 2) && (imm & 0xF) == 0;
214 }
215 
216 // Base instructions.
217 
add(XRegister xd,XRegister xn,uint16_t imm12)218 void Assembler::add(XRegister xd, XRegister xn, uint16_t imm12) {
219   // The instruction supports larger numbers using the shift by (left shift by 12), but that's unused in kernels.
220   if (imm12 > kUint12Max) {
221     error_ = Error::kInvalidOperand;
222     return;
223   }
224 
225   emit32(0x91000000 | imm12 << 10 | rn(xn) | rd(xd));
226 }
227 
add(XRegister xd,XRegister xn,XRegister xm)228 void Assembler::add(XRegister xd, XRegister xn, XRegister xm) {
229   emit32(0x8B000000 | rd(xd) | rn(xn) | rm(xm));
230 }
231 
b(Label & l)232 void Assembler::b(Label& l) {
233   return branch_to_label(0x14000000, BranchType::kUnconditional, l);
234 }
235 
cmp(XRegister xn,uint16_t imm12)236 void Assembler::cmp(XRegister xn, uint16_t imm12) {
237   if (imm12 > kUint12Max) {
238     error_ = Error::kInvalidOperand;
239     return;
240   }
241   emit32(0xF100001F | imm12 << 10 | rn(xn));
242 }
243 
cmp(XRegister xn,XRegister xm)244 void Assembler::cmp(XRegister xn, XRegister xm) {
245   emit32(0xEB00001F | rm(xm) | rn(xn));
246 }
247 
csel(XRegister xd,XRegister xn,XRegister xm,Condition c)248 void Assembler::csel(XRegister xd, XRegister xn, XRegister xm, Condition c) {
249   emit32(0x9A800000 | rm(xm) | c << 12 | rn(xn) | rd(xd));
250 }
251 
ldp(XRegister xt1,XRegister xt2,MemOperand xn)252 void Assembler::ldp(XRegister xt1, XRegister xt2, MemOperand xn) {
253   if (!imm7_offset_valid(xn.offset, xt1)) {
254     error_ = Error::kInvalidOperand;
255     return;
256   }
257 
258   const uint32_t offset = (xn.offset >> 3) & kImm7Mask;
259 
260   emit32(0xA8400000 | postindex(xn) | wb(xn) | offset << 15 | rt2(xt2) | rn(xn.base) | xt1.code);
261 }
262 
ldp(XRegister xt1,XRegister xt2,MemOperand xn,int32_t imm)263 void Assembler::ldp(XRegister xt1, XRegister xt2, MemOperand xn, int32_t imm) {
264   if (xn.offset != 0) {
265     error_ = Error::kInvalidOperand;
266     return;
267   }
268   return ldp(xt1, xt2, {xn.base, imm, AddressingMode::kPostIndex});
269 }
270 
ldr(XRegister xt,MemOperand xn)271 void Assembler::ldr(XRegister xt, MemOperand xn) {
272   const int32_t imm = xn.offset;
273   if (xn.mode != AddressingMode::kOffset || imm < 0 || imm > (kUint12Max << 3) || (imm & 7) != 0) {
274     error_ = Error::kInvalidOperand;
275     return;
276   }
277 
278   emit32(0xF9400000 | imm >> 3 << 10 | rn(xn.base) | xt.code);
279 }
280 
ldr(XRegister xt,MemOperand xn,int32_t imm)281 void Assembler::ldr(XRegister xt, MemOperand xn, int32_t imm) {
282   if (imm < kInt9Min || imm > kInt9Max) {
283     error_ = Error::kInvalidOperand;
284     return;
285   }
286 
287   emit32(0xF8400400 | (imm & kImm9Mask) << 12 | rn(xn.base) | rt(xt));
288 }
289 
mov(XRegister xd,XRegister xn)290 void Assembler::mov(XRegister xd, XRegister xn) {
291   emit32(0xAA0003E0 | rm(xn) | rd(xd));
292 }
293 
prfm(PrefetchOp prfop,MemOperand xn)294 void Assembler::prfm(PrefetchOp prfop, MemOperand xn) {
295   if (xn.offset < 0 || xn.offset > kImm12Max) {
296     error_ = Error::kInvalidOperand;
297     return;
298   }
299 
300   emit32(0xF9800000 | xn.offset >> 3 << 10 | rn(xn.base) | prfop);
301 }
302 
ret()303 void Assembler::ret() {
304   emit32(0xD65F0000 | rn(x30));
305 }
306 
stp(XRegister xt1,XRegister xt2,MemOperand xn)307 void Assembler::stp(XRegister xt1, XRegister xt2, MemOperand xn) {
308   if (!imm7_offset_valid(xn.offset, xt1)) {
309     error_ = Error::kInvalidOperand;
310     return;
311   }
312 
313   const uint32_t offset = (xn.offset >> 3) & kImm7Mask;
314   emit32(0xA9000000 | wb(xn) | offset << 15 | rt2(xt2) | rn(xn.base) | rt(xt1));
315 }
316 
sub(XRegister xd,XRegister xn,XRegister xm)317 void Assembler::sub(XRegister xd, XRegister xn, XRegister xm) {
318   emit32(0xCB000000 | rm(xm) | rn(xn) | rd(xd));
319 }
320 
subs(XRegister xd,XRegister xn,uint16_t imm12)321 void Assembler::subs(XRegister xd, XRegister xn, uint16_t imm12) {
322   if (imm12 > kUint12Max) {
323     error_ = Error::kInvalidOperand;
324     return;
325   }
326 
327   emit32(0xF1000000 | imm12 << 10 | rn(xn) | rd(xd));
328 }
329 
tbnz(XRegister xd,uint8_t bit,Label & l)330 void Assembler::tbnz(XRegister xd, uint8_t bit, Label& l) {
331   return tb_helper(0x37000000, xd, bit, l);
332 }
333 
tbz(XRegister xd,uint8_t bit,Label & l)334 void Assembler::tbz(XRegister xd, uint8_t bit, Label& l) {
335   return tb_helper(0x36000000, xd, bit, l);
336 }
337 
tst(XRegister xn,uint8_t imm)338 void Assembler::tst(XRegister xn, uint8_t imm) {
339   // Encoding of immediate is quite complicated, we only support po2-1, which is what assembly microkernel uses.
340   uint32_t imm_po2 = imm + 1;
341   if (!is_po2(imm_po2)) {
342     error_ = Error::kUnimplemented;
343     return;
344   }
345 
346   const uint32_t imm_s = (ctz(imm_po2) - 1) << 10;
347   emit32(0xF240001F | imm_s | rn(xn));
348 }
349 
350 // SIMD instructions.
351 
dup(DRegister dd,VRegisterLane vn)352 void Assembler::dup(DRegister dd, VRegisterLane vn) {
353   if (vn.size != 3 || vn.lane > 1) {
354     error_ = Error::kInvalidOperand;
355     return;
356   }
357   const uint8_t imm5 = 0b1000 | (vn.lane & 1) << 4;
358   emit32(0x5E000400 | imm5 << 16 | rn(vn) | rd(dd));
359 }
360 
fadd(VRegister vd,VRegister vn,VRegister vm)361 void Assembler::fadd(VRegister vd, VRegister vn, VRegister vm) {
362   if (!is_same_shape(vd, vn, vm)) {
363     error_ = Error::kInvalidOperand;
364     return;
365   }
366 
367   emit32(0x0E20D400 | q(vd) | fp_sz(vn) | rm(vm) | rn(vn) | rd(vd));
368 }
369 
fmax(VRegister vd,VRegister vn,VRegister vm)370 void Assembler::fmax(VRegister vd, VRegister vn, VRegister vm) {
371   if (!is_same_shape(vd, vn, vm)) {
372     error_ = Error::kInvalidOperand;
373     return;
374   }
375 
376   emit32(0x0E20F400 | q(vd) | fp_sz(vn) | rm(vm) | rn(vn) | rd(vd));
377 }
378 
fmin(VRegister vd,VRegister vn,VRegister vm)379 void Assembler::fmin(VRegister vd, VRegister vn, VRegister vm) {
380   if (!is_same_shape(vd, vn, vm)) {
381     error_ = Error::kInvalidOperand;
382     return;
383   }
384 
385   emit32(0x0EA0F400 | q(vd) | fp_sz(vn) | rm(vm) | rn(vn) | rd(vd));
386 }
387 
fmla(VRegister vd,VRegister vn,VRegisterLane vm)388 void Assembler::fmla(VRegister vd, VRegister vn, VRegisterLane vm) {
389   if (!is_same_shape(vd, vn) || !is_same_data_type(vd, vm)) {
390     error_ = Error::kInvalidOperand;
391     return;
392   }
393   if (!lane_index_valid(vd.q, vm.size, vm.lane)) {
394     error_ = Error::kInvalidLaneIndex;
395     return;
396   }
397 
398   emit32(0x0F801000 | q(vd) | fp_sz(vd) | hl(vm) | rm(vm) | rn(vn) | rd(vd));
399 }
400 
ld1(VRegisterList vs,MemOperand xn,int32_t imm)401 void Assembler::ld1(VRegisterList vs, MemOperand xn, int32_t imm) {
402   VRegister vt = vs.vt1;
403 
404   if (!is_same_shape(vs) || !is_consecutive(vs)) {
405     error_ = Error::kInvalidOperand;
406     return;
407   }
408 
409   // imm must match number of bytes loaded.
410   if ((vt.q + 1) * 8 * vs.length != imm) {
411     error_ = Error::kInvalidOperand;
412     return;
413   }
414 
415   const uint8_t opcode = load_store_opcode(vs.length);
416 
417   emit32(0x0CDF0000 | q(vt) | opcode << 12 | size(vt) | rn(xn.base) | rt(vt));
418 }
419 
ld1r(VRegisterList xs,MemOperand xn)420 void Assembler::ld1r(VRegisterList xs, MemOperand xn) {
421   if (xs.length != 1 || xn.offset != 0) {
422     error_ = Error::kInvalidOperand;
423     return;
424   }
425 
426   emit32(0x0D40C000 | q(xs.vt1) | size(xs.vt1) | rn(xn.base) | xs.vt1.code);
427 }
428 
ld2r(VRegisterList xs,MemOperand xn)429 void Assembler::ld2r(VRegisterList xs, MemOperand xn) {
430   if (xs.length != 2 || !is_same_shape(xs.vt1, xs.vt2) || xn.offset != 0) {
431     error_ = Error::kInvalidOperand;
432     return;
433   }
434 
435   emit32(0x0D60C000 | q(xs.vt1) | size(xs.vt1) | rn(xn.base) | xs.vt1.code);
436 }
437 
ldp(DRegister dt1,DRegister dt2,MemOperand xn)438 void Assembler::ldp(DRegister dt1, DRegister dt2, MemOperand xn) {
439   if (!imm7_offset_valid(xn.offset, dt1)) {
440     error_ = Error::kInvalidOperand;
441     return;
442   }
443 
444   const uint32_t offset = (xn.offset >> 3) & kImm7Mask;
445   emit32(0x6C400000 | postindex(xn) | wb(xn) | offset << 15 | rt2(dt2) | rn(xn.base) | rt(dt1));
446 }
447 
ldp(DRegister dt1,DRegister dt2,MemOperand xn,int32_t imm)448 void Assembler::ldp(DRegister dt1, DRegister dt2, MemOperand xn, int32_t imm) {
449   return ldp(dt1, dt2, {xn.base, imm, AddressingMode::kPostIndex});
450 }
451 
ldp(QRegister qt1,QRegister qt2,MemOperand xn,int32_t imm)452 void Assembler::ldp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm) {
453   if (!imm7_offset_valid(imm, qt1)) {
454     error_ = Error::kInvalidOperand;
455     return;
456   }
457   const uint32_t offset = (imm >> 4) & kImm7Mask;
458 
459   emit32(0xACC00000 | offset << 15 | rt2(qt2) | rn(xn.base) | qt1.code);
460 }
461 
ldr(DRegister dt,MemOperand xn,int32_t imm)462 void Assembler::ldr(DRegister dt, MemOperand xn, int32_t imm) {
463   return ldr(/*size=*/3, /*opc=*/1, xn, imm, dt.code);
464 }
465 
ldr(QRegister qt,MemOperand xn,int32_t imm)466 void Assembler::ldr(QRegister qt, MemOperand xn, int32_t imm) {
467   return ldr(/*size=*/0, /*opc=*/3, xn, imm, qt.code);
468 }
469 
ldr(SRegister st,MemOperand xn,int32_t imm)470 void Assembler::ldr(SRegister st, MemOperand xn, int32_t imm) {
471   return ldr(/*size=*/2, /*opc=*/1, xn, imm, st.code);
472 }
473 
mov(VRegister vd,VRegister vn)474 void Assembler::mov(VRegister vd, VRegister vn) {
475   if (!is_same_shape(vd, vn)) {
476     error_ = Error::kInvalidOperand;
477     return;
478   }
479   emit32(0x0EA01C00 | q(vd) | rm(vn) | rn(vn) | rd(vd));
480 }
481 
movi(VRegister vd,uint8_t imm)482 void Assembler::movi(VRegister vd, uint8_t imm) {
483   if (imm != 0) {
484     error_ = Error::kUnimplemented;
485     return;
486   }
487 
488   uint32_t cmode = 0;
489   switch (vd.size) {
490     case 0:
491       cmode = 0xE;
492       break;
493     case 1:
494       cmode = 0x8;
495       break;
496     case 2:
497       cmode = 0x0;
498       break;
499     default:
500       error_ = Error::kUnimplemented;
501       return;
502   }
503 
504   emit32(0x0F000400 | q(vd) | cmode << 12 | vd.code);
505 }
506 
st1(VRegisterList vs,MemOperand xn,XRegister xm)507 void Assembler::st1(VRegisterList vs, MemOperand xn, XRegister xm) {
508   if (!is_same_shape(vs) || !is_consecutive(vs)) {
509     error_ = Error::kInvalidOperand;
510     return;
511   }
512 
513   VRegister vt = vs.vt1;
514 
515   const uint8_t opcode = load_store_opcode(vs.length);
516   emit32(0x0C800000 | q(vt) | rm(xm) | opcode << 12 | size(vt) | rn(xn.base) | rt(vt));
517 }
518 
stp(DRegister dt1,DRegister dt2,MemOperand xn)519 void Assembler::stp(DRegister dt1, DRegister dt2, MemOperand xn) {
520   if (!imm7_offset_valid(xn.offset, dt1)) {
521     error_ = Error::kInvalidOperand;
522     return;
523   }
524 
525   const uint32_t offset = (xn.offset >> 3) & kImm7Mask;
526   emit32(0x6D000000 | wb(xn) | offset << 15 | rt2(dt2) | rn(xn.base) | rt(dt1));
527 }
528 
stp(QRegister qt1,QRegister qt2,MemOperand xn)529 void Assembler::stp(QRegister qt1, QRegister qt2, MemOperand xn) {
530   if (!imm7_offset_valid(xn.offset, qt1)) {
531     error_ = Error::kInvalidOperand;
532     return;
533   }
534 
535   const uint32_t offset = (xn.offset >> 4) & kImm7Mask;
536   emit32(0xAD000000 | wb(xn) | offset << 15 | rt2(qt2) | rn(xn.base) | rt(qt1));
537 }
538 
stp(QRegister qt1,QRegister qt2,MemOperand xn,int32_t imm)539 void Assembler::stp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm) {
540   if (!imm7_offset_valid(imm, qt1)) {
541     error_ = Error::kInvalidOperand;
542     return;
543   }
544 
545   const uint32_t offset = (imm >> 4) & kImm7Mask;
546   emit32(0xAC800000 | offset << 15 | rt2(qt2) | rn(xn.base) | rt(qt1));
547 }
548 
str(DRegister dt,MemOperand xn,int32_t imm)549 void Assembler::str(DRegister dt, MemOperand xn, int32_t imm) {
550   return str(/*size=*/3, /*opc=*/0, xn, imm, dt.code);
551 }
552 
str(QRegister qt,MemOperand xn,int32_t imm)553 void Assembler::str(QRegister qt, MemOperand xn, int32_t imm) {
554   return str(/*size=*/0, /*opc=*/2, xn, imm, qt.code);
555 }
556 
str(SRegister st,MemOperand xn)557 void Assembler::str(SRegister st, MemOperand xn) {
558   const int32_t imm = xn.offset;
559   if (imm < 0 || imm > (kUint12Max << 2) || (imm & 0x3) != 0) {
560     error_ = Error::kInvalidOperand;
561     return;
562   }
563 
564   emit32(0xBD000000 | imm >> 2 << 10 | rn(xn.base) | rt(st));
565 }
566 
str(SRegister st,MemOperand xn,int32_t imm)567 void Assembler::str(SRegister st, MemOperand xn, int32_t imm) {
568   return str(/*size=*/2, /*opc=*/0, xn, imm, st.code);
569 }
570 
bind(Label & l)571 void Assembler::bind(Label& l) {
572   if (l.bound) {
573     error_ = Error::kLabelAlreadyBound;
574     return;
575   }
576 
577   l.bound = true;
578   l.offset = cursor_;
579 
580   // Patch all users.
581   for (size_t i = 0; i < l.num_users; i++) {
582     byte* user = l.users[i];
583     const ptrdiff_t offset = l.offset - user;
584     uint32_t* instr = reinterpret_cast<uint32_t*>(user);
585 
586     const BranchType bt = instruction_branch_type(*instr);
587     if (!branch_offset_valid(offset, bt)) {
588       error_ = Error::kLabelOffsetOutOfBounds;
589       return;
590     }
591 
592     *instr |= branch_imm(offset, bt);
593   }
594 }
595 
b(Condition c,Label & l)596 void Assembler::b(Condition c, Label& l) {
597   return branch_to_label(0x54000000 | c, BranchType::kConditional, l);
598 }
599 
branch_to_label(uint32_t opcode,BranchType bt,Label & l)600 void Assembler::branch_to_label(uint32_t opcode, BranchType bt, Label& l) {
601   if (l.bound) {
602     const ptrdiff_t offset = l.offset - cursor_;
603     if (!branch_offset_valid(offset, bt)) {
604       error_ = Error::kLabelOffsetOutOfBounds;
605       return;
606     }
607     emit32(opcode | branch_imm(offset, bt));
608   } else {
609     if (!l.add_use(cursor_)) {
610       error_ = Error::kLabelHasTooManyUsers;
611       return;
612     }
613     emit32(opcode);
614   }
615 }
616 
ldr(uint32_t size,uint32_t opc,MemOperand xn,int32_t imm,uint8_t rt_code)617 void Assembler::ldr(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code) {
618   if (xn.mode != AddressingMode::kOffset || xn.offset != 0 || imm < kInt9Min || imm > kInt9Max) {
619     error_ = Error::kInvalidOperand;
620     return;
621   }
622 
623   emit32(0x3C400400 | size << 30 | opc << 22 | (imm & kImm9Mask) << 12| rn(xn.base) | rt_code);
624 }
625 
str(uint32_t size,uint32_t opc,MemOperand xn,int32_t imm,uint8_t rt_code)626 void Assembler::str(uint32_t size, uint32_t opc, MemOperand xn, int32_t imm, uint8_t rt_code) {
627   if (imm < kInt9Min || imm > kInt9Max) {
628     error_ = Error::kInvalidOperand;
629     return;
630   }
631 
632   emit32(0x3C000400 | size << 30 | opc << 22 | (imm & kImm9Mask) << 12 | rn(xn.base) | rt_code);
633 }
634 
tb_helper(uint32_t op,XRegister xd,uint8_t bit,Label & l)635 void Assembler::tb_helper(uint32_t op, XRegister xd, uint8_t bit, Label& l) {
636   if (bit > 63) {
637     error_ = Error::kInvalidOperand;
638     return;
639   }
640 
641   const uint32_t bit_pos = (bit & 0x20) >> 5 << 31 | (bit & 0x1F) << 19;
642   return branch_to_label(op | bit_pos | xd.code, BranchType::kTbxz, l);
643 }
644 
645 }  // namespace aarch64
646 }  // namespace xnnpack
647