1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include "xnnpack/aarch32-assembler.h"
7 #include "xnnpack/assembler.h"
8 #include "xnnpack/math.h"
9
10 #include <cmath>
11 #include <cstddef>
12
13 namespace xnnpack {
14 namespace aarch32 {
15 // Max value of imm for vldr/str (takes imm8, but shift right by 2 when encoding).
16 constexpr int32_t kUint10Max = 1023;
17 // Max value of imm that fits in ldr/str encoding (takes imm12, with a separate bit for sign).
18 constexpr int32_t kUint12Max = 4095;
19
20 // PC register contains current address of instruction + 8 (2 instructions).
21 constexpr ptrdiff_t kPCDelta = 8;
22 // Constants used for checking branch offsets bounds.
23 constexpr ptrdiff_t kInt24Max = 8388607;
24 constexpr ptrdiff_t kInt24Min = -8388608;
25
26 // Check if a branch offset is valid, it must fit in 24 bits.
branch_offset_valid(ptrdiff_t offset)27 bool branch_offset_valid(ptrdiff_t offset) {
28 return offset < kInt24Max && offset > kInt24Min;
29 }
30
invalid_register_list(DRegisterList regs)31 bool invalid_register_list(DRegisterList regs) {
32 return regs.length == 0 || regs.length > 16 || regs.start.code + regs.length > 32;
33 }
34
invalid_register_list(SRegisterList regs)35 bool invalid_register_list(SRegisterList regs) {
36 return regs.length == 0 || regs.start.code + regs.length > 32;
37 }
38
encode(SRegister r,uint32_t single_bit_pos,uint32_t four_bits_pos)39 uint32_t encode(SRegister r, uint32_t single_bit_pos, uint32_t four_bits_pos) {
40 return r.d() << single_bit_pos | r.vd() << four_bits_pos;
41 }
42
encode(DRegister r,uint32_t single_bit_pos,uint32_t four_bits_pos)43 uint32_t encode(DRegister r, uint32_t single_bit_pos, uint32_t four_bits_pos) {
44 return r.d() << single_bit_pos | r.vd() << four_bits_pos;
45 }
46
encode(DRegisterLane r,uint32_t single_bit_pos,uint32_t four_bits_pos)47 uint32_t encode(DRegisterLane r, uint32_t single_bit_pos, uint32_t four_bits_pos) {
48 return r.d() << single_bit_pos | r.vd() << four_bits_pos;
49 }
50
encode(QRegister r,uint32_t single_bit_pos,uint32_t four_bits_pos)51 uint32_t encode(QRegister r, uint32_t single_bit_pos, uint32_t four_bits_pos) {
52 return r.d() << single_bit_pos | r.vd() << four_bits_pos;
53 }
54
encode(SRegisterList regs,uint32_t single_bit_pos,uint32_t four_bits_pos)55 uint32_t encode(SRegisterList regs, uint32_t single_bit_pos, uint32_t four_bits_pos) {
56 const SRegister r = regs.start;
57 return r.d() << single_bit_pos | r.vd() << four_bits_pos | regs.length;
58 }
59
encode(DRegisterList regs,uint32_t single_bit_pos,uint32_t four_bits_pos)60 uint32_t encode(DRegisterList regs, uint32_t single_bit_pos, uint32_t four_bits_pos) {
61 const DRegister r = regs.start;
62 return r.d() << single_bit_pos | r.vd() << four_bits_pos | regs.length * 2;
63 }
64
encode_mem_puw(MemOperand op)65 uint32_t encode_mem_puw(MemOperand op) {
66 return op.p() << 24 | op.u() << 23 | op.w() << 21 | op.base().code << 16;
67 }
68
69 // Return value of 0 is invalid, indicates error.
encode_regs_length_to_type(DRegisterList regs)70 uint32_t encode_regs_length_to_type(DRegisterList regs) {
71 switch (regs.length) {
72 case 1:
73 return 0x7;
74 case 2:
75 return 0xA;
76 case 3:
77 return 0x6;
78 case 4:
79 return 0x2;
80 }
81 return 0;
82 }
83
add(CoreRegister rd,CoreRegister rn,CoreRegister rm)84 void Assembler::add(CoreRegister rd, CoreRegister rn, CoreRegister rm) {
85 emit32(kAL | 0x8 << 20 | rn.code << 16 | rd.code << 12 | rm.code);
86 }
87
add(CoreRegister rd,CoreRegister rn,uint8_t imm)88 void Assembler::add(CoreRegister rd, CoreRegister rn, uint8_t imm) {
89 // Rotation = 0, since imm is limited to 8 bits and fits in encoding.
90 emit32(kAL | 0x28 << 20 | rn.code << 16 | rd.code << 12 | imm);
91 }
92
adds(CoreRegister rd,CoreRegister rn,uint8_t imm)93 void Assembler::adds(CoreRegister rd, CoreRegister rn, uint8_t imm) {
94 // Rotation = 0, since imm is limited to 8 bits and fits in encoding.
95 emit32(kAL | 0x29 << 20 | rn.code << 16 | rd.code << 12 | imm);
96 }
97
and_(CoreRegister rd,CoreRegister rn,uint8_t imm)98 void Assembler::and_(CoreRegister rd, CoreRegister rn, uint8_t imm) {
99 // Rotation = 0, since imm is limited to 8 bits and fits in encoding.
100 emit32(kAL | 1 << 25 | rn.code << 16 | rd.code << 12 | imm);
101 }
102
b(Condition c,Label & l)103 void Assembler::b(Condition c, Label& l) {
104 if (l.bound) {
105 // Offset is relative to after this b instruction + kPCDelta.
106 const ptrdiff_t offset = l.offset - cursor_ - kPCDelta;
107 if (!branch_offset_valid(offset)) {
108 error_ = Error::kLabelOffsetOutOfBounds;
109 return;
110 }
111
112 // No need to shift by 2 since our offset is already in terms of uint32_t.
113 emit32(c | 0xA << 24 | ((offset >> kInstructionSizeInBytesLog2) & 0x00FFFFFF));
114 } else {
115 if (!l.add_use(cursor_)) {
116 error_ = Error::kLabelHasTooManyUsers;
117 return;
118 }
119 // Emit 0 offset first, will patch it up when label is bound later.
120 emit32(c | 0xA << 24);
121 }
122 }
123
bind(Label & l)124 void Assembler::bind(Label& l) {
125 if (l.bound) {
126 error_ = Error::kLabelAlreadyBound;
127 return;
128 }
129
130 l.bound = true;
131 l.offset = cursor_;
132
133 // Patch all users.
134 for (size_t i = 0; i < l.num_users; i++) {
135 byte* user = l.users[i];
136 const ptrdiff_t offset = l.offset - user - kPCDelta;
137 uint32_t* instr = reinterpret_cast<uint32_t*>(user);
138
139 if (!branch_offset_valid(offset)) {
140 error_ = Error::kLabelOffsetOutOfBounds;
141 return;
142 }
143
144 *instr |= (offset >> kInstructionSizeInBytesLog2) & 0x00FFFFFF;
145 }
146 }
147
bic(CoreRegister rd,CoreRegister rn,uint8_t imm)148 void Assembler::bic(CoreRegister rd, CoreRegister rn, uint8_t imm) {
149 emit32(kAL | 0x03C00000 | rn.code << 16 | rd.code << 12 | imm);
150 }
151
bx(CoreRegister rm)152 void Assembler::bx(CoreRegister rm) {
153 emit32(kAL | 0x12fff10 | rm.code);
154 }
155
cmp(CoreRegister rn,uint8_t imm)156 void Assembler::cmp(CoreRegister rn, uint8_t imm) {
157 emit32(kAL | 0x35 << 20 | rn.code << 16 | imm);
158 }
159
cmp(CoreRegister rn,CoreRegister rm)160 void Assembler::cmp(CoreRegister rn, CoreRegister rm) {
161 emit32(kAL | 0x01500000 | rn.code << 16 | rm.code);
162 }
163
ldr(CoreRegister rt,MemOperand op,int32_t offset)164 void Assembler::ldr(CoreRegister rt, MemOperand op, int32_t offset) {
165 ldr(rt, MemOperand(op.base(), offset, AddressingMode::kPostIndexed));
166 }
167
ldr(CoreRegister rt,MemOperand op)168 void Assembler::ldr(CoreRegister rt, MemOperand op) {
169 const int32_t offset = op.offset();
170 if (std::abs(offset) > kUint12Max) {
171 error_ = Error::kInvalidOperand;
172 return;
173 }
174
175 emit32(kAL | 0x41 << 20 | encode_mem_puw(op) | rt.code << 12 | offset);
176 }
177
ldrd(CoreRegister rt,CoreRegister rt2,MemOperand op)178 void Assembler::ldrd(CoreRegister rt, CoreRegister rt2, MemOperand op) {
179 const int32_t offset = op.offset();
180 if ((std::abs(op.offset()) > UINT8_MAX) || (rt.code + 1 != rt2.code)) {
181 error_ = Error::kInvalidOperand;
182 return;
183 }
184 const uint32_t offset_top = (offset & 0xF0) << 4;
185 const uint32_t offset_bot = (offset & 0xF);
186
187 emit32(kAL | 0x004000D0 | encode_mem_puw(op) | rt.code << 12 | offset_top | offset_bot);
188 }
189
mov(CoreRegister rd,CoreRegister rm)190 void Assembler::mov(CoreRegister rd, CoreRegister rm) {
191 mov(kAL, rd, rm);
192 }
193
mov(Condition c,CoreRegister Rd,CoreRegister Rm)194 void Assembler::mov(Condition c, CoreRegister Rd, CoreRegister Rm) {
195 emit32(c | 0x1A << 20 | Rd.code << 12 | Rm.code);
196 }
197
nop()198 void Assembler::nop() {
199 emit32(kAL | 0x0320F000);
200 }
201
pld(MemOperand op)202 void Assembler::pld(MemOperand op) {
203 emit32(0xF550F000 | op.u() << 23 | op.base().code << 16 | op.offset());
204 }
205
pop(CoreRegisterList regs)206 void Assembler::pop(CoreRegisterList regs) {
207 if (!regs.has_more_than_one_register()) {
208 // TODO(zhin): there is a different valid encoding for single register.
209 error_ = Error::kInvalidOperand;
210 return;
211 }
212
213 emit32(kAL | 0x8BD << 16 | regs.list);
214 }
215
push(CoreRegisterList regs)216 void Assembler::push(CoreRegisterList regs) {
217 if (!regs.has_more_than_one_register()) {
218 // TODO(zhin): there is a different valid encoding for single register.
219 error_ = Error::kInvalidOperand;
220 return;
221 }
222
223 emit32(kAL | 0x92D << 16 | regs.list);
224 }
225
str(CoreRegister rt,MemOperand op)226 void Assembler::str(CoreRegister rt, MemOperand op) {
227 const int32_t offset = op.offset();
228 if (std::abs(offset) > kUint12Max) {
229 error_ = Error::kInvalidOperand;
230 return;
231 }
232 emit32(kAL | 1 << 26 | encode_mem_puw(op) | rt.code << 12 | offset);
233 }
234
sub(CoreRegister rd,CoreRegister rn,uint8_t imm)235 void Assembler::sub(CoreRegister rd, CoreRegister rn, uint8_t imm) {
236 emit32(kAL | 0x24 << 20 | rn.code << 16 | rd.code << 12 | imm);
237 }
238
sub(CoreRegister rd,CoreRegister rn,CoreRegister rm)239 void Assembler::sub(CoreRegister rd, CoreRegister rn, CoreRegister rm) {
240 emit32(kAL | 0x4 << 20 | rn.code << 16 | rd.code << 12 | rm.code);
241 }
242
subs(CoreRegister rd,CoreRegister rn,uint8_t imm)243 void Assembler::subs(CoreRegister rd, CoreRegister rn, uint8_t imm) {
244 // Rotation = 0, since imm is limited to 8 bits and fits in encoding.
245 emit32(kAL | 0x25 << 20 | rn.code << 16 | rd.code << 12 | imm);
246 }
247
tst(CoreRegister rn,uint8_t imm)248 void Assembler::tst(CoreRegister rn, uint8_t imm) {
249 // Rotation = 0, since imm is limited to 8 bits and fits in encoding.
250 emit32(kAL | 0x31 << 20 | rn.code << 16 | imm);
251 }
252
vcmpe_f32(SRegister sd,SRegister sm)253 void Assembler::vcmpe_f32(SRegister sd, SRegister sm) {
254 emit32(kAL | 0x0EB40AC0 | encode(sd, 22, 12) | encode(sm, 5, 0));
255 }
256
vcvt_f32_s32(QRegister qd,QRegister qm)257 void Assembler::vcvt_f32_s32(QRegister qd, QRegister qm) {
258 emit32(0xF3BB0640 | encode(qd, 22, 12) | encode(qm, 5, 0));
259 }
260
vcvt_s32_f32(QRegister qd,QRegister qm)261 void Assembler::vcvt_s32_f32(QRegister qd, QRegister qm) {
262 emit32(0xF3BB0740 | encode(qd, 22, 12) | encode(qm, 5, 0));
263 }
264
vcvtn_s32_f32(QRegister qd,QRegister qm)265 void Assembler::vcvtn_s32_f32(QRegister qd, QRegister qm) {
266 emit32(0xF3BB0140 | encode(qd, 22, 12) | encode(qm, 5, 0));
267 }
268
vdup(DataSize size,QRegister qd,DRegisterLane dm)269 void Assembler::vdup(DataSize size, QRegister qd, DRegisterLane dm) {
270 uint8_t imm4 = 0;
271 switch (size) {
272 case k8:
273 if (dm.lane > 7) {
274 error_ = Error::kInvalidLaneIndex;
275 return;
276 }
277 imm4 = 1 | ((dm.lane & 0x7) << 1);
278 break;
279 case k16:
280 if (dm.lane > 3) {
281 error_ = Error::kInvalidLaneIndex;
282 return;
283 }
284 imm4 = 2 | ((dm.lane & 0x3) << 2);
285 break;
286 case k32:
287 if (dm.lane > 1) {
288 error_ = Error::kInvalidLaneIndex;
289 return;
290 }
291 imm4 = 4 | ((dm.lane & 0x1) << 3);
292 break;
293 }
294 emit32(0xF3B00C40 | imm4 << 16 | encode(qd, 22, 12) | encode(dm, 5, 0));
295 }
296
vext_8(QRegister qd,QRegister qn,QRegister qm,uint8_t imm4)297 void Assembler::vext_8(QRegister qd, QRegister qn, QRegister qm, uint8_t imm4) {
298 if (imm4 > 15) {
299 error_ = Error::kInvalidOperand;
300 return;
301 }
302 emit32(0xF2B00040 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0) | imm4 << 8);
303 }
304
vld1(DataSize size,DRegisterList regs,MemOperand op)305 void Assembler::vld1(DataSize size, DRegisterList regs, MemOperand op) {
306 const uint8_t rm = op.mode() == AddressingMode::kPostIndexed ? 0xD : 0xF;
307 vld1(size, regs, op, CoreRegister{rm});
308 }
309
vld1(DataSize size,DRegisterList regs,MemOperand op,CoreRegister rm)310 void Assembler::vld1(DataSize size, DRegisterList regs, MemOperand op, CoreRegister rm) {
311 const uint8_t type = encode_regs_length_to_type(regs);
312 if (!type) {
313 error_ = Error::kInvalidRegisterListLength;
314 return;
315 }
316
317 emit32(0xF4200000 | encode(regs.start, 22, 12) | op.base().code << 16 | type << 8 | size << 6 | rm.code);
318 }
319
vld1_32(DRegisterLane dd,MemOperand op)320 void Assembler::vld1_32(DRegisterLane dd, MemOperand op) {
321 if (dd.lane > 1) {
322 error_ = Error::kInvalidLaneIndex;
323 return;
324 }
325 const uint32_t rm = op.mode() == AddressingMode::kPostIndexed ? 0xD : 0xF;
326 emit32(kAL | 0xF4A00800 | dd.lane << 7 | encode(dd, 22, 12) | op.base().code << 16 | rm);
327 }
328
vld1r_32(DRegisterList regs,MemOperand op)329 void Assembler::vld1r_32(DRegisterList regs, MemOperand op) {
330 if ((op.mode() == AddressingMode::kOffset && op.offset() != 0) || regs.length > 2) {
331 error_ = Error::kInvalidOperand;
332 return;
333 }
334
335 const uint32_t rm = op.mode() == AddressingMode::kPostIndexed ? 0xD : 0xF;
336 emit32(0xF4A00C80 | encode(regs.start, 22, 12) | op.base().code << 16 | (regs.length - 1) << 5 | rm);
337 }
338
vldm(MemOperand rn,SRegisterList regs)339 void Assembler::vldm(MemOperand rn, SRegisterList regs) {
340 if (invalid_register_list(regs)) {
341 error_ = Error::kInvalidRegisterListLength;
342 return;
343 }
344 uint32_t w = (rn.mode() == AddressingMode::kOffset ? 0 : 1) << 21;
345 emit32(kAL | 0x0C900A00 | w | rn.base().code << 16 | encode(regs, 22, 12));
346 }
347
vldm(MemOperand rn,DRegisterList regs)348 void Assembler::vldm(MemOperand rn, DRegisterList regs) {
349 if (invalid_register_list(regs)) {
350 error_ = Error::kInvalidRegisterListLength;
351 return;
352 }
353 uint32_t w = (rn.mode() == AddressingMode::kOffset ? 0 : 1) << 21;
354 emit32(kAL | 0x0C900B00 | w | rn.base().code << 16 | encode(regs, 22, 12));
355 }
356
vldr(SRegister sd,MemOperand op)357 void Assembler::vldr(SRegister sd, MemOperand op) {
358 const uint32_t offset = std::abs(op.offset());
359 if (op.mode() != AddressingMode::kOffset || offset > kUint10Max || offset % 4 != 0) {
360 error_ = Error::kInvalidOperand;
361 return;
362 }
363
364 emit32(kAL | 0x0D100A00 | op.u() << 23 | encode(sd, 22, 12) | op.base().code << 16 | offset >> 2);
365 }
366
vldr(DRegister dd,MemOperand op)367 void Assembler::vldr(DRegister dd, MemOperand op) {
368 const uint32_t offset = std::abs(op.offset());
369 if (op.mode() != AddressingMode::kOffset || offset > kUint10Max || offset % 4 != 0) {
370 error_ = Error::kInvalidOperand;
371 return;
372 }
373
374 emit32(kAL | 0x0D100B00 | op.u() << 23 | encode(dd, 22, 12) | op.base().code << 16 | offset >> 2);
375 }
376
vmax_f32(QRegister qd,QRegister qn,QRegister qm)377 void Assembler::vmax_f32(QRegister qd, QRegister qn, QRegister qm) {
378 emit32(0xF2000F40 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0));
379 }
380
vmax_s8(QRegister qd,QRegister qn,QRegister qm)381 void Assembler::vmax_s8(QRegister qd, QRegister qn, QRegister qm) {
382 emit32(0xF2000640 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0));
383 }
384
vmin_f32(QRegister qd,QRegister qn,QRegister qm)385 void Assembler::vmin_f32(QRegister qd, QRegister qn, QRegister qm) {
386 emit32(0xF2200F40 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0));
387 }
388
vmin_s8(QRegister qd,QRegister qn,QRegister qm)389 void Assembler::vmin_s8(QRegister qd, QRegister qn, QRegister qm) {
390 emit32(0xF2000650 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0));
391 }
392
vmla_f32(SRegister sd,SRegister sn,SRegister sm)393 void Assembler::vmla_f32(SRegister sd, SRegister sn, SRegister sm) {
394 emit32(kAL | 0x0E000A00 | encode(sd, 22, 12) | encode (sn, 7, 16) | encode(sm, 5, 0));
395 }
396
vmla_f32(QRegister qd,QRegister qn,DRegisterLane dm)397 void Assembler::vmla_f32(QRegister qd, QRegister qn, DRegisterLane dm) {
398 if (dm.lane > 1) {
399 error_ = Error::kInvalidLaneIndex;
400 return;
401 }
402 emit32(0xF3A00140 | encode(qd, 22, 12) | encode(qn, 7, 16) | dm.lane << 5 | dm.code);
403 }
404
vmlal_s16(QRegister qd,DRegister dn,DRegisterLane dm)405 void Assembler::vmlal_s16(QRegister qd, DRegister dn, DRegisterLane dm) {
406 if (dm.lane > 3) {
407 error_ = Error::kInvalidLaneIndex;
408 return;
409 }
410 if (dm.code > 7) {
411 error_ = Error::kInvalidOperand;
412 return;
413 }
414
415 uint8_t lane_top = dm.lane >> 1;
416 uint8_t lane_bot = dm.lane & 1;
417 emit32(0xF2900240 | encode(qd, 22, 12) | encode(dn, 7, 16) | lane_top << 5 | lane_bot << 3 | dm.code);
418 }
419
vmov(SRegister sd,SRegister sm)420 void Assembler::vmov(SRegister sd, SRegister sm) {
421 emit32(kAL | 0x0EB00A40 | encode(sd, 22, 12) | encode(sm, 5, 0));
422 }
423
vmov(DRegister dm,CoreRegister rt,CoreRegister rt2)424 void Assembler::vmov(DRegister dm, CoreRegister rt, CoreRegister rt2) {
425 emit32(kAL | 0x0C400B10 | rt2.code << 16 | rt.code << 12 | encode(dm, 5, 0));
426 }
427
vmov(DRegister dd,DRegister dm)428 void Assembler::vmov(DRegister dd, DRegister dm) {
429 emit32(0xF2600110 | encode(dd, 22, 12) | encode(dm, 7, 16) | encode(dm, 5, 0));
430 }
431
vmov(QRegister qd,QRegister qm)432 void Assembler::vmov(QRegister qd, QRegister qm) {
433 emit32(0xF2200150 | encode(qd, 22, 12) | encode(qm, 7, 16) | encode(qm, 5, 0));
434 }
435
vmov_f32(Condition c,SRegister sd,SRegister sm)436 void Assembler::vmov_f32(Condition c, SRegister sd, SRegister sm) {
437 emit32(c | 0x0EB00A40 | encode(sd, 22, 12) | encode(sm, 5, 0));
438 }
439
vmov_f64(DRegister dd,DRegister dm)440 void Assembler::vmov_f64(DRegister dd, DRegister dm) {
441 emit32(kAL | 0x0EB00B40 | encode(dd, 22, 12) | encode(dm, 5, 0));
442 }
443
vmovl_s8(QRegister qd,DRegister dm)444 void Assembler::vmovl_s8(QRegister qd, DRegister dm) {
445 emit32(0xF2880A10 | encode(qd, 22, 12) | encode(dm, 5, 0));
446 }
447
vmrs(CoreRegister rt,SpecialFPRegister spec_reg)448 void Assembler::vmrs(CoreRegister rt, SpecialFPRegister spec_reg) {
449 emit32(kAL | 0x0EF00A10 | static_cast<uint32_t>(spec_reg) << 16 | rt.code << 12);
450 }
451
vmul_f32(QRegister qd,QRegister qn,QRegister qm)452 void Assembler::vmul_f32(QRegister qd, QRegister qn, QRegister qm) {
453 emit32(0xF3000D50 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0));
454 }
455
vpop(DRegisterList regs)456 void Assembler::vpop(DRegisterList regs) {
457 if (invalid_register_list(regs)) {
458 error_ = Error::kInvalidRegisterListLength;
459 return;
460 }
461 emit32(kAL | encode(regs, 22, 12) | 0xCBD << 16 | 0xB << 8);
462 }
463
vpush(DRegisterList regs)464 void Assembler::vpush(DRegisterList regs) {
465 if (invalid_register_list(regs)) {
466 error_ = Error::kInvalidRegisterListLength;
467 return;
468 }
469 emit32(kAL | encode(regs, 22, 12) | 0xD2D << 16 | 0xB << 8);
470 }
471
vpush(SRegisterList regs)472 void Assembler::vpush(SRegisterList regs) {
473 if (invalid_register_list(regs)) {
474 error_ = Error::kInvalidRegisterListLength;
475 return;
476 }
477 emit32(kAL | encode(regs, 22, 12) | 0xD2D << 16 | 0xA << 8);
478 }
479
vqadd_s16(QRegister qd,QRegister qn,QRegister qm)480 void Assembler::vqadd_s16(QRegister qd, QRegister qn, QRegister qm) {
481 emit32(0xF2100050 | encode(qd, 22, 12) | encode(qn, 7, 16) | encode(qm, 5, 0));
482 }
483
vqdmulh_s32(QRegister qd,QRegister qn,DRegisterLane dm)484 void Assembler::vqdmulh_s32(QRegister qd, QRegister qn, DRegisterLane dm) {
485 if (dm.code > 15) {
486 error_ = Error::kInvalidOperand;
487 return;
488 }
489 if (dm.lane > 1) {
490 error_ = Error::kInvalidLaneIndex;
491 return;
492 }
493 emit32(0xF3A00C40 | encode(qd, 22, 12) | encode(qn, 7, 16) | dm.lane << 5 | dm.code);
494 }
495
vqmovn_s16(DRegister dd,QRegister qm)496 void Assembler::vqmovn_s16(DRegister dd, QRegister qm) {
497 emit32(0xF3B20280 | encode(dd, 22, 12) | encode(qm, 5, 0));
498 }
499
vqmovn_s32(DRegister dd,QRegister qm)500 void Assembler::vqmovn_s32(DRegister dd, QRegister qm) {
501 emit32(0xF3B60280 | encode(dd, 22, 12) | encode(qm, 5, 0));
502 }
503
vqshl_s32(QRegister qd,QRegister qm,QRegister qn)504 void Assembler::vqshl_s32(QRegister qd, QRegister qm, QRegister qn) {
505 emit32(0xF2200450 | encode(qd, 22, 12) | encode(qm, 5, 0) | encode(qn, 7, 16));
506 }
507
vrshl_s32(QRegister qd,QRegister qm,QRegister qn)508 void Assembler::vrshl_s32(QRegister qd, QRegister qm, QRegister qn) {
509 emit32(0xF2200540 | encode(qd, 22, 12) | encode(qm, 5, 0) | encode(qn, 7, 16));
510 }
511
vsdot_s8(QRegister qd,QRegister qn,DRegisterLane dm)512 void Assembler::vsdot_s8(QRegister qd, QRegister qn, DRegisterLane dm) {
513 if (dm.lane > 1) {
514 error_ = Error::kInvalidLaneIndex;
515 return;
516 }
517 emit32(0xFE200D40 | encode(qd, 22, 12) | encode(qn, 7, 16) | dm.lane << 5 | dm.code);
518 }
519
vst1(DataSize size,DRegisterList regs,MemOperand op)520 void Assembler::vst1(DataSize size, DRegisterList regs, MemOperand op) {
521 const uint8_t type = encode_regs_length_to_type(regs);
522 if (!type) {
523 error_ = Error::kInvalidRegisterListLength;
524 return;
525 }
526
527 const uint32_t rm = op.mode() == AddressingMode::kPostIndexed ? 0xD : 0xF;
528 emit32(0xF4000000 | encode(regs.start, 22, 12) | op.base().code << 16 | type << 8 | size << 6 | rm);
529 }
530
vst1(DataSize size,DRegisterList regs,MemOperand op,CoreRegister rm)531 void Assembler::vst1(DataSize size, DRegisterList regs, MemOperand op, CoreRegister rm) {
532 if (rm.code == 0b1101 || rm.code == 0b1111) {
533 error_ = Error::kInvalidOperand;
534 return;
535 }
536
537 const uint8_t type = encode_regs_length_to_type(regs);
538 if (!type) {
539 error_ = Error::kInvalidRegisterListLength;
540 return;
541 }
542
543 emit32(0xF4000000 | encode(regs.start, 22, 12) | op.base().code << 16 | type << 8 | size << 6 | rm.code);
544 }
545
vst1(DataSize size,DRegisterLane dd,MemOperand op)546 void Assembler::vst1(DataSize size, DRegisterLane dd, MemOperand op) {
547 if ((size == k8 && dd.lane > 7) || (size == k16 && dd.lane > 3) || (size == k32 && dd.lane > 1)) {
548 error_ = Error::kInvalidLaneIndex;
549 return;
550 }
551
552 const uint8_t shift = size == k8 ? 5 : size == k16 ? 6 : 7;
553 const uint32_t rm = op.mode() == AddressingMode::kPostIndexed ? 0xD : 0xF;
554 emit32(0xF4800000 | encode(dd, 22, 12) | op.base().code << 16 | size << 10 | dd.lane << shift | rm);
555 }
556
vstm(MemOperand rn,DRegisterList regs)557 void Assembler::vstm(MemOperand rn, DRegisterList regs) {
558 if (invalid_register_list(regs)) {
559 error_ = Error::kInvalidRegisterListLength;
560 return;
561 }
562 uint32_t w = (rn.mode() == AddressingMode::kOffset ? 0 : 1) << 21;
563 emit32(kAL | 0x0C800B00 | w | rn.base().code << 16 | encode(regs.start, 22, 12) | regs.length << 1);
564 }
565
vstr(SRegister rn,MemOperand op)566 void Assembler::vstr(SRegister rn, MemOperand op) {
567 const uint32_t offset = std::abs(op.offset());
568 if (op.mode() != AddressingMode::kOffset || offset > kUint10Max || offset % 4 != 0) {
569 error_ = Error::kInvalidOperand;
570 return;
571 }
572 emit32(kAL | 0x0D000A00 | op.u() << 23 | op.base().code << 16 | encode(rn, 22, 12) | offset >> 2);
573 }
574
align(uint8_t n)575 void Assembler::align(uint8_t n) {
576 if (!is_po2(n) || (n % kInstructionSizeInBytes != 0)) {
577 error_ = Error::kInvalidOperand;
578 return;
579 }
580
581 uintptr_t cursor = reinterpret_cast<uintptr_t>(cursor_);
582 const uintptr_t target = round_up_po2(cursor, n);
583 while (cursor < target) {
584 nop();
585 cursor += kInstructionSizeInBytes;
586 }
587 }
588
589 } // namespace aarch32
590 } // namespace xnnpack
591