1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field) \
35 ({ \
36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37 assert((fieldval & ~ field ## _MASK) == 0); \
38 fieldval & field ## _MASK; \
39 })
40
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
42
43 #define QPU_UPDATE_FIELD(inst, value, field) \
44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46
47 #define V3D_QPU_OP_MUL_SHIFT 58
48 #define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)
49
50 #define V3D_QPU_SIG_SHIFT 53
51 #define V3D_QPU_SIG_MASK QPU_MASK(57, 53)
52
53 #define V3D_QPU_COND_SHIFT 46
54 #define V3D_QPU_COND_MASK QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
56
57 #define V3D_QPU_MM QPU_MASK(45, 45)
58 #define V3D_QPU_MA QPU_MASK(44, 44)
59
60 #define V3D_QPU_WADDR_M_SHIFT 38
61 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
62
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
65
66 #define V3D_QPU_WADDR_A_SHIFT 32
67 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
68
69 #define V3D_QPU_BRANCH_COND_SHIFT 32
70 #define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
71
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
74
75 #define V3D_QPU_OP_ADD_SHIFT 24
76 #define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)
77
78 #define V3D_QPU_MUL_B_SHIFT 21
79 #define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)
80
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT 21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
83
84 #define V3D_QPU_MUL_A_SHIFT 18
85 #define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
86
87 #define V3D_QPU_ADD_B_SHIFT 15
88 #define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
89
90 #define V3D_QPU_BRANCH_BDU_SHIFT 15
91 #define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
92
93 #define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)
94
95 #define V3D_QPU_ADD_A_SHIFT 12
96 #define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)
97
98 #define V3D_QPU_BRANCH_BDI_SHIFT 12
99 #define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
100
101 #define V3D_QPU_RADDR_A_SHIFT 6
102 #define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
103
104 #define V3D_QPU_RADDR_B_SHIFT 0
105 #define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)
106
107 #define THRSW .thrsw = true
108 #define LDUNIF .ldunif = true
109 #define LDUNIFRF .ldunifrf = true
110 #define LDUNIFA .ldunifa = true
111 #define LDUNIFARF .ldunifarf = true
112 #define LDTMU .ldtmu = true
113 #define LDVARY .ldvary = true
114 #define LDVPM .ldvpm = true
115 #define SMIMM .small_imm = true
116 #define LDTLB .ldtlb = true
117 #define LDTLBU .ldtlbu = true
118 #define UCB .ucb = true
119 #define ROT .rotate = true
120 #define WRTMUC .wrtmuc = true
121
122 static const struct v3d_qpu_sig v33_sig_map[] = {
123 /* MISC R3 R4 R5 */
124 [0] = { },
125 [1] = { THRSW, },
126 [2] = { LDUNIF },
127 [3] = { THRSW, LDUNIF },
128 [4] = { LDTMU, },
129 [5] = { THRSW, LDTMU, },
130 [6] = { LDTMU, LDUNIF },
131 [7] = { THRSW, LDTMU, LDUNIF },
132 [8] = { LDVARY, },
133 [9] = { THRSW, LDVARY, },
134 [10] = { LDVARY, LDUNIF },
135 [11] = { THRSW, LDVARY, LDUNIF },
136 [12] = { LDVARY, LDTMU, },
137 [13] = { THRSW, LDVARY, LDTMU, },
138 [14] = { SMIMM, LDVARY, },
139 [15] = { SMIMM, },
140 [16] = { LDTLB, },
141 [17] = { LDTLBU, },
142 /* 18-21 reserved */
143 [22] = { UCB, },
144 [23] = { ROT, },
145 [24] = { LDVPM, },
146 [25] = { THRSW, LDVPM, },
147 [26] = { LDVPM, LDUNIF },
148 [27] = { THRSW, LDVPM, LDUNIF },
149 [28] = { LDVPM, LDTMU, },
150 [29] = { THRSW, LDVPM, LDTMU, },
151 [30] = { SMIMM, LDVPM, },
152 [31] = { SMIMM, },
153 };
154
155 static const struct v3d_qpu_sig v40_sig_map[] = {
156 /* MISC R3 R4 R5 */
157 [0] = { },
158 [1] = { THRSW, },
159 [2] = { LDUNIF },
160 [3] = { THRSW, LDUNIF },
161 [4] = { LDTMU, },
162 [5] = { THRSW, LDTMU, },
163 [6] = { LDTMU, LDUNIF },
164 [7] = { THRSW, LDTMU, LDUNIF },
165 [8] = { LDVARY, },
166 [9] = { THRSW, LDVARY, },
167 [10] = { LDVARY, LDUNIF },
168 [11] = { THRSW, LDVARY, LDUNIF },
169 /* 12-13 reserved */
170 [14] = { SMIMM, LDVARY, },
171 [15] = { SMIMM, },
172 [16] = { LDTLB, },
173 [17] = { LDTLBU, },
174 [18] = { WRTMUC },
175 [19] = { THRSW, WRTMUC },
176 [20] = { LDVARY, WRTMUC },
177 [21] = { THRSW, LDVARY, WRTMUC },
178 [22] = { UCB, },
179 [23] = { ROT, },
180 /* 24-30 reserved */
181 [31] = { SMIMM, LDTMU, },
182 };
183
184 static const struct v3d_qpu_sig v41_sig_map[] = {
185 /* MISC phys R5 */
186 [0] = { },
187 [1] = { THRSW, },
188 [2] = { LDUNIF },
189 [3] = { THRSW, LDUNIF },
190 [4] = { LDTMU, },
191 [5] = { THRSW, LDTMU, },
192 [6] = { LDTMU, LDUNIF },
193 [7] = { THRSW, LDTMU, LDUNIF },
194 [8] = { LDVARY, },
195 [9] = { THRSW, LDVARY, },
196 [10] = { LDVARY, LDUNIF },
197 [11] = { THRSW, LDVARY, LDUNIF },
198 [12] = { LDUNIFRF },
199 [13] = { THRSW, LDUNIFRF },
200 [14] = { SMIMM, LDVARY, },
201 [15] = { SMIMM, },
202 [16] = { LDTLB, },
203 [17] = { LDTLBU, },
204 [18] = { WRTMUC },
205 [19] = { THRSW, WRTMUC },
206 [20] = { LDVARY, WRTMUC },
207 [21] = { THRSW, LDVARY, WRTMUC },
208 [22] = { UCB, },
209 [23] = { ROT, },
210 [24] = { LDUNIFA},
211 [25] = { LDUNIFARF },
212 /* 26-30 reserved */
213 [31] = { SMIMM, LDTMU, },
214 };
215
216 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)217 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218 uint32_t packed_sig,
219 struct v3d_qpu_sig *sig)
220 {
221 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222 return false;
223
224 if (devinfo->ver >= 41)
225 *sig = v41_sig_map[packed_sig];
226 else if (devinfo->ver == 40)
227 *sig = v40_sig_map[packed_sig];
228 else
229 *sig = v33_sig_map[packed_sig];
230
231 /* Signals with zeroed unpacked contents after element 0 are reserved. */
232 return (packed_sig == 0 ||
233 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234 }
235
236 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)237 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238 const struct v3d_qpu_sig *sig,
239 uint32_t *packed_sig)
240 {
241 static const struct v3d_qpu_sig *map;
242
243 if (devinfo->ver >= 41)
244 map = v41_sig_map;
245 else if (devinfo->ver == 40)
246 map = v40_sig_map;
247 else
248 map = v33_sig_map;
249
250 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252 *packed_sig = i;
253 return true;
254 }
255 }
256
257 return false;
258 }
259
260 static const uint32_t small_immediates[] = {
261 0, 1, 2, 3,
262 4, 5, 6, 7,
263 8, 9, 10, 11,
264 12, 13, 14, 15,
265 -16, -15, -14, -13,
266 -12, -11, -10, -9,
267 -8, -7, -6, -5,
268 -4, -3, -2, -1,
269 0x3b800000, /* 2.0^-8 */
270 0x3c000000, /* 2.0^-7 */
271 0x3c800000, /* 2.0^-6 */
272 0x3d000000, /* 2.0^-5 */
273 0x3d800000, /* 2.0^-4 */
274 0x3e000000, /* 2.0^-3 */
275 0x3e800000, /* 2.0^-2 */
276 0x3f000000, /* 2.0^-1 */
277 0x3f800000, /* 2.0^0 */
278 0x40000000, /* 2.0^1 */
279 0x40800000, /* 2.0^2 */
280 0x41000000, /* 2.0^3 */
281 0x41800000, /* 2.0^4 */
282 0x42000000, /* 2.0^5 */
283 0x42800000, /* 2.0^6 */
284 0x43000000, /* 2.0^7 */
285 };
286
287 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)288 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
289 uint32_t packed_small_immediate,
290 uint32_t *small_immediate)
291 {
292 if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
293 return false;
294
295 *small_immediate = small_immediates[packed_small_immediate];
296 return true;
297 }
298
299 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)300 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
301 uint32_t value,
302 uint32_t *packed_small_immediate)
303 {
304 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
305
306 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
307 if (small_immediates[i] == value) {
308 *packed_small_immediate = i;
309 return true;
310 }
311 }
312
313 return false;
314 }
315
316 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)317 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
318 uint32_t packed_cond,
319 struct v3d_qpu_flags *cond)
320 {
321 static const enum v3d_qpu_cond cond_map[4] = {
322 [0] = V3D_QPU_COND_IFA,
323 [1] = V3D_QPU_COND_IFB,
324 [2] = V3D_QPU_COND_IFNA,
325 [3] = V3D_QPU_COND_IFNB,
326 };
327
328 cond->ac = V3D_QPU_COND_NONE;
329 cond->mc = V3D_QPU_COND_NONE;
330 cond->apf = V3D_QPU_PF_NONE;
331 cond->mpf = V3D_QPU_PF_NONE;
332 cond->auf = V3D_QPU_UF_NONE;
333 cond->muf = V3D_QPU_UF_NONE;
334
335 if (packed_cond == 0) {
336 return true;
337 } else if (packed_cond >> 2 == 0) {
338 cond->apf = packed_cond & 0x3;
339 } else if (packed_cond >> 4 == 0) {
340 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
341 } else if (packed_cond == 0x10) {
342 return false;
343 } else if (packed_cond >> 2 == 0x4) {
344 cond->mpf = packed_cond & 0x3;
345 } else if (packed_cond >> 4 == 0x1) {
346 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347 } else if (packed_cond >> 4 == 0x2) {
348 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
349 cond->mpf = packed_cond & 0x3;
350 } else if (packed_cond >> 4 == 0x3) {
351 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
352 cond->apf = packed_cond & 0x3;
353 } else if (packed_cond >> 6) {
354 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
355 if (((packed_cond >> 2) & 0x3) == 0) {
356 cond->ac = cond_map[packed_cond & 0x3];
357 } else {
358 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
359 }
360 }
361
362 return true;
363 }
364
365 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)366 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
367 const struct v3d_qpu_flags *cond,
368 uint32_t *packed_cond)
369 {
370 #define AC (1 << 0)
371 #define MC (1 << 1)
372 #define APF (1 << 2)
373 #define MPF (1 << 3)
374 #define AUF (1 << 4)
375 #define MUF (1 << 5)
376 static const struct {
377 uint8_t flags_present;
378 uint8_t bits;
379 } flags_table[] = {
380 { 0, 0 },
381 { APF, 0 },
382 { AUF, 0 },
383 { MPF, (1 << 4) },
384 { MUF, (1 << 4) },
385 { AC, (1 << 5) },
386 { AC | MPF, (1 << 5) },
387 { MC, (1 << 5) | (1 << 4) },
388 { MC | APF, (1 << 5) | (1 << 4) },
389 { MC | AC, (1 << 6) },
390 { MC | AUF, (1 << 6) },
391 };
392
393 uint8_t flags_present = 0;
394 if (cond->ac != V3D_QPU_COND_NONE)
395 flags_present |= AC;
396 if (cond->mc != V3D_QPU_COND_NONE)
397 flags_present |= MC;
398 if (cond->apf != V3D_QPU_PF_NONE)
399 flags_present |= APF;
400 if (cond->mpf != V3D_QPU_PF_NONE)
401 flags_present |= MPF;
402 if (cond->auf != V3D_QPU_UF_NONE)
403 flags_present |= AUF;
404 if (cond->muf != V3D_QPU_UF_NONE)
405 flags_present |= MUF;
406
407 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
408 if (flags_table[i].flags_present != flags_present)
409 continue;
410
411 *packed_cond = flags_table[i].bits;
412
413 *packed_cond |= cond->apf;
414 *packed_cond |= cond->mpf;
415
416 if (flags_present & AUF)
417 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
418 if (flags_present & MUF)
419 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
420
421 if (flags_present & AC) {
422 if (*packed_cond & (1 << 6))
423 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
424 else
425 *packed_cond |= (cond->ac -
426 V3D_QPU_COND_IFA) << 2;
427 }
428
429 if (flags_present & MC) {
430 if (*packed_cond & (1 << 6))
431 *packed_cond |= (cond->mc -
432 V3D_QPU_COND_IFA) << 4;
433 else
434 *packed_cond |= (cond->mc -
435 V3D_QPU_COND_IFA) << 2;
436 }
437
438 return true;
439 }
440
441 return false;
442 }
443
444 /* Make a mapping of the table of opcodes in the spec. The opcode is
445 * determined by a combination of the opcode field, and in the case of 0 or
446 * 1-arg opcodes, the mux_b field as well.
447 */
448 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
449 #define ANYMUX MUX_MASK(0, 7)
450
451 struct opcode_desc {
452 uint8_t opcode_first;
453 uint8_t opcode_last;
454 uint8_t mux_b_mask;
455 uint8_t mux_a_mask;
456 uint8_t op;
457
458 /* first_ver == 0 if it's the same across all V3D versions.
459 * first_ver == X, last_ver == 0 if it's the same for all V3D versions
460 * starting from X
461 * first_ver == X, last_ver == Y if it's the same for all V3D versions
462 * on the range X through Y
463 */
464 uint8_t first_ver;
465 uint8_t last_ver;
466 };
467
468 static const struct opcode_desc add_ops[] = {
469 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
470 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
471 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
472 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
473 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
474 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
475 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
476 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
477 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
478 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
479 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
480 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
481 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
482 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
483 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
484 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
485 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
486 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
487 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
488 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
489 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
490
491 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
492 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
493 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
494
495 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
496 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
497 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
498 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
499 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
500 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
501 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
502 { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
503 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
504 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
505 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
506 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
507 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
508 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
509 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
510 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
511 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
512 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
513
514 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
515 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
516 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
517 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
518
519 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
520 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
521 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
522 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
523 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
524 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
525 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
526 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
527 { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
528 { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
529 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
530
531 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
532 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
533 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
534 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
535 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
536 { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
537 { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
538 { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
539 { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
540 { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
541 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
542 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
543
544 /* FIXME: MORE COMPLICATED */
545 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
546
547 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
548 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
549
550 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
551 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
552 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
553 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
554 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
555 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
556 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
557 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
558
559 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
560 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
561
562 /* The stvpms are distinguished by the waddr field. */
563 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
564 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
565 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
566
567 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
568 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
569 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
570 };
571
572 static const struct opcode_desc mul_ops[] = {
573 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
574 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
575 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
576 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
577 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
578 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
579 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
580 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
581 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
582 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
583 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
584 };
585
586 /* Returns true if op_desc should be filtered out based on devinfo->ver
587 * against op_desc->first_ver and op_desc->last_ver. Check notes about
588 * first_ver/last_ver on struct opcode_desc comments.
589 */
590 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const struct opcode_desc * op_desc)591 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
592 const struct opcode_desc *op_desc)
593 {
594 return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
595 (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver);
596 }
597
598 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)599 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
600 const struct opcode_desc *opcodes,
601 size_t num_opcodes, uint32_t opcode,
602 uint32_t mux_a, uint32_t mux_b)
603 {
604 for (int i = 0; i < num_opcodes; i++) {
605 const struct opcode_desc *op_desc = &opcodes[i];
606
607 if (opcode < op_desc->opcode_first ||
608 opcode > op_desc->opcode_last)
609 continue;
610
611 if (opcode_invalid_in_version(devinfo, op_desc))
612 continue;
613
614 if (!(op_desc->mux_b_mask & (1 << mux_b)))
615 continue;
616
617 if (!(op_desc->mux_a_mask & (1 << mux_a)))
618 continue;
619
620 return op_desc;
621 }
622
623 return NULL;
624 }
625
626 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)627 v3d_qpu_float32_unpack_unpack(uint32_t packed,
628 enum v3d_qpu_input_unpack *unpacked)
629 {
630 switch (packed) {
631 case 0:
632 *unpacked = V3D_QPU_UNPACK_ABS;
633 return true;
634 case 1:
635 *unpacked = V3D_QPU_UNPACK_NONE;
636 return true;
637 case 2:
638 *unpacked = V3D_QPU_UNPACK_L;
639 return true;
640 case 3:
641 *unpacked = V3D_QPU_UNPACK_H;
642 return true;
643 default:
644 return false;
645 }
646 }
647
648 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)649 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
650 uint32_t *packed)
651 {
652 switch (unpacked) {
653 case V3D_QPU_UNPACK_ABS:
654 *packed = 0;
655 return true;
656 case V3D_QPU_UNPACK_NONE:
657 *packed = 1;
658 return true;
659 case V3D_QPU_UNPACK_L:
660 *packed = 2;
661 return true;
662 case V3D_QPU_UNPACK_H:
663 *packed = 3;
664 return true;
665 default:
666 return false;
667 }
668 }
669
670 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)671 v3d_qpu_float16_unpack_unpack(uint32_t packed,
672 enum v3d_qpu_input_unpack *unpacked)
673 {
674 switch (packed) {
675 case 0:
676 *unpacked = V3D_QPU_UNPACK_NONE;
677 return true;
678 case 1:
679 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
680 return true;
681 case 2:
682 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
683 return true;
684 case 3:
685 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
686 return true;
687 case 4:
688 *unpacked = V3D_QPU_UNPACK_SWAP_16;
689 return true;
690 default:
691 return false;
692 }
693 }
694
695 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)696 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
697 uint32_t *packed)
698 {
699 switch (unpacked) {
700 case V3D_QPU_UNPACK_NONE:
701 *packed = 0;
702 return true;
703 case V3D_QPU_UNPACK_REPLICATE_32F_16:
704 *packed = 1;
705 return true;
706 case V3D_QPU_UNPACK_REPLICATE_L_16:
707 *packed = 2;
708 return true;
709 case V3D_QPU_UNPACK_REPLICATE_H_16:
710 *packed = 3;
711 return true;
712 case V3D_QPU_UNPACK_SWAP_16:
713 *packed = 4;
714 return true;
715 default:
716 return false;
717 }
718 }
719
720 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)721 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
722 uint32_t *packed)
723 {
724 switch (unpacked) {
725 case V3D_QPU_PACK_NONE:
726 *packed = 0;
727 return true;
728 case V3D_QPU_PACK_L:
729 *packed = 1;
730 return true;
731 case V3D_QPU_PACK_H:
732 *packed = 2;
733 return true;
734 default:
735 return false;
736 }
737 }
738
739 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)740 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
741 struct v3d_qpu_instr *instr)
742 {
743 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
744 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
745 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
746 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
747
748 uint32_t map_op = op;
749 /* Some big clusters of opcodes are replicated with unpack
750 * flags
751 */
752 if (map_op >= 249 && map_op <= 251)
753 map_op = (map_op - 249 + 245);
754 if (map_op >= 253 && map_op <= 255)
755 map_op = (map_op - 253 + 245);
756
757 const struct opcode_desc *desc =
758 lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
759 map_op, mux_a, mux_b);
760
761 if (!desc)
762 return false;
763
764 instr->alu.add.op = desc->op;
765
766 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
767 * operands.
768 */
769 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
770 if (instr->alu.add.op == V3D_QPU_A_FMIN)
771 instr->alu.add.op = V3D_QPU_A_FMAX;
772 if (instr->alu.add.op == V3D_QPU_A_FADD)
773 instr->alu.add.op = V3D_QPU_A_FADDNF;
774 }
775
776 /* Some QPU ops require a bit more than just basic opcode and mux a/b
777 * comparisons to distinguish them.
778 */
779 switch (instr->alu.add.op) {
780 case V3D_QPU_A_STVPMV:
781 case V3D_QPU_A_STVPMD:
782 case V3D_QPU_A_STVPMP:
783 switch (waddr) {
784 case 0:
785 instr->alu.add.op = V3D_QPU_A_STVPMV;
786 break;
787 case 1:
788 instr->alu.add.op = V3D_QPU_A_STVPMD;
789 break;
790 case 2:
791 instr->alu.add.op = V3D_QPU_A_STVPMP;
792 break;
793 default:
794 return false;
795 }
796 break;
797 default:
798 break;
799 }
800
801 switch (instr->alu.add.op) {
802 case V3D_QPU_A_FADD:
803 case V3D_QPU_A_FADDNF:
804 case V3D_QPU_A_FSUB:
805 case V3D_QPU_A_FMIN:
806 case V3D_QPU_A_FMAX:
807 case V3D_QPU_A_FCMP:
808 case V3D_QPU_A_VFPACK:
809 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
810 instr->alu.add.output_pack = (op >> 4) & 0x3;
811 else
812 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
813
814 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
815 &instr->alu.add.a_unpack)) {
816 return false;
817 }
818
819 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
820 &instr->alu.add.b_unpack)) {
821 return false;
822 }
823 break;
824
825 case V3D_QPU_A_FFLOOR:
826 case V3D_QPU_A_FROUND:
827 case V3D_QPU_A_FTRUNC:
828 case V3D_QPU_A_FCEIL:
829 case V3D_QPU_A_FDX:
830 case V3D_QPU_A_FDY:
831 instr->alu.add.output_pack = mux_b & 0x3;
832
833 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
834 &instr->alu.add.a_unpack)) {
835 return false;
836 }
837 break;
838
839 case V3D_QPU_A_FTOIN:
840 case V3D_QPU_A_FTOIZ:
841 case V3D_QPU_A_FTOUZ:
842 case V3D_QPU_A_FTOC:
843 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
844
845 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
846 &instr->alu.add.a_unpack)) {
847 return false;
848 }
849 break;
850
851 case V3D_QPU_A_VFMIN:
852 case V3D_QPU_A_VFMAX:
853 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
854 &instr->alu.add.a_unpack)) {
855 return false;
856 }
857
858 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
859 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
860 break;
861
862 default:
863 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
864 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
865 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
866 break;
867 }
868
869 instr->alu.add.a = mux_a;
870 instr->alu.add.b = mux_b;
871 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
872
873 instr->alu.add.magic_write = false;
874 if (packed_inst & V3D_QPU_MA) {
875 switch (instr->alu.add.op) {
876 case V3D_QPU_A_LDVPMV_IN:
877 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
878 break;
879 case V3D_QPU_A_LDVPMD_IN:
880 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
881 break;
882 case V3D_QPU_A_LDVPMG_IN:
883 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
884 break;
885 default:
886 instr->alu.add.magic_write = true;
887 break;
888 }
889 }
890
891 return true;
892 }
893
894 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)895 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
896 struct v3d_qpu_instr *instr)
897 {
898 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
899 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
900 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
901
902 {
903 const struct opcode_desc *desc =
904 lookup_opcode_from_packed(devinfo, mul_ops,
905 ARRAY_SIZE(mul_ops),
906 op, mux_a, mux_b);
907 if (!desc)
908 return false;
909
910 instr->alu.mul.op = desc->op;
911 }
912
913 switch (instr->alu.mul.op) {
914 case V3D_QPU_M_FMUL:
915 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
916
917 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
918 &instr->alu.mul.a_unpack)) {
919 return false;
920 }
921
922 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
923 &instr->alu.mul.b_unpack)) {
924 return false;
925 }
926
927 break;
928
929 case V3D_QPU_M_FMOV:
930 instr->alu.mul.output_pack = (((op & 1) << 1) +
931 ((mux_b >> 2) & 1));
932
933 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
934 &instr->alu.mul.a_unpack)) {
935 return false;
936 }
937
938 break;
939
940 case V3D_QPU_M_VFMUL:
941 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
942
943 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
944 &instr->alu.mul.a_unpack)) {
945 return false;
946 }
947
948 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
949
950 break;
951
952 default:
953 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
954 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
955 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
956 break;
957 }
958
959 instr->alu.mul.a = mux_a;
960 instr->alu.mul.b = mux_b;
961 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
962 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
963
964 return true;
965 }
966
967 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)968 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
969 const struct opcode_desc *opcodes, size_t num_opcodes,
970 uint8_t op)
971 {
972 for (int i = 0; i < num_opcodes; i++) {
973 const struct opcode_desc *op_desc = &opcodes[i];
974
975 if (op_desc->op != op)
976 continue;
977
978 if (opcode_invalid_in_version(devinfo, op_desc))
979 continue;
980
981 return op_desc;
982 }
983
984 return NULL;
985 }
986
987 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)988 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
989 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
990 {
991 uint32_t waddr = instr->alu.add.waddr;
992 uint32_t mux_a = instr->alu.add.a;
993 uint32_t mux_b = instr->alu.add.b;
994 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
995 const struct opcode_desc *desc =
996 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
997 instr->alu.add.op);
998
999 if (!desc)
1000 return false;
1001
1002 uint32_t opcode = desc->opcode_first;
1003
1004 /* If an operation doesn't use an arg, its mux values may be used to
1005 * identify the operation type.
1006 */
1007 if (nsrc < 2)
1008 mux_b = ffs(desc->mux_b_mask) - 1;
1009
1010 if (nsrc < 1)
1011 mux_a = ffs(desc->mux_a_mask) - 1;
1012
1013 bool no_magic_write = false;
1014
1015 switch (instr->alu.add.op) {
1016 case V3D_QPU_A_STVPMV:
1017 waddr = 0;
1018 no_magic_write = true;
1019 break;
1020 case V3D_QPU_A_STVPMD:
1021 waddr = 1;
1022 no_magic_write = true;
1023 break;
1024 case V3D_QPU_A_STVPMP:
1025 waddr = 2;
1026 no_magic_write = true;
1027 break;
1028
1029 case V3D_QPU_A_LDVPMV_IN:
1030 case V3D_QPU_A_LDVPMD_IN:
1031 case V3D_QPU_A_LDVPMP:
1032 case V3D_QPU_A_LDVPMG_IN:
1033 assert(!instr->alu.add.magic_write);
1034 break;
1035
1036 case V3D_QPU_A_LDVPMV_OUT:
1037 case V3D_QPU_A_LDVPMD_OUT:
1038 case V3D_QPU_A_LDVPMG_OUT:
1039 assert(!instr->alu.add.magic_write);
1040 *packed_instr |= V3D_QPU_MA;
1041 break;
1042
1043 default:
1044 break;
1045 }
1046
1047 switch (instr->alu.add.op) {
1048 case V3D_QPU_A_FADD:
1049 case V3D_QPU_A_FADDNF:
1050 case V3D_QPU_A_FSUB:
1051 case V3D_QPU_A_FMIN:
1052 case V3D_QPU_A_FMAX:
1053 case V3D_QPU_A_FCMP: {
1054 uint32_t output_pack;
1055 uint32_t a_unpack;
1056 uint32_t b_unpack;
1057
1058 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1059 &output_pack)) {
1060 return false;
1061 }
1062 opcode |= output_pack << 4;
1063
1064 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1065 &a_unpack)) {
1066 return false;
1067 }
1068
1069 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1070 &b_unpack)) {
1071 return false;
1072 }
1073
1074 /* These operations with commutative operands are
1075 * distinguished by which order their operands come in.
1076 */
1077 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1078 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1079 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1080 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1081 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1082 uint32_t temp;
1083
1084 temp = a_unpack;
1085 a_unpack = b_unpack;
1086 b_unpack = temp;
1087
1088 temp = mux_a;
1089 mux_a = mux_b;
1090 mux_b = temp;
1091 }
1092
1093 opcode |= a_unpack << 2;
1094 opcode |= b_unpack << 0;
1095
1096 break;
1097 }
1098
1099 case V3D_QPU_A_VFPACK: {
1100 uint32_t a_unpack;
1101 uint32_t b_unpack;
1102
1103 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1104 instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1105 return false;
1106 }
1107
1108 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1109 &a_unpack)) {
1110 return false;
1111 }
1112
1113 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1114 &b_unpack)) {
1115 return false;
1116 }
1117
1118 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1119 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1120
1121 break;
1122 }
1123
1124 case V3D_QPU_A_FFLOOR:
1125 case V3D_QPU_A_FROUND:
1126 case V3D_QPU_A_FTRUNC:
1127 case V3D_QPU_A_FCEIL:
1128 case V3D_QPU_A_FDX:
1129 case V3D_QPU_A_FDY: {
1130 uint32_t packed;
1131
1132 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1133 &packed)) {
1134 return false;
1135 }
1136 mux_b |= packed;
1137
1138 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1139 &packed)) {
1140 return false;
1141 }
1142 if (packed == 0)
1143 return false;
1144 opcode = (opcode & ~(1 << 2)) | packed << 2;
1145 break;
1146 }
1147
1148 case V3D_QPU_A_FTOIN:
1149 case V3D_QPU_A_FTOIZ:
1150 case V3D_QPU_A_FTOUZ:
1151 case V3D_QPU_A_FTOC:
1152 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1153 return false;
1154
1155 uint32_t packed;
1156 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1157 &packed)) {
1158 return false;
1159 }
1160 if (packed == 0)
1161 return false;
1162 opcode |= packed << 2;
1163
1164 break;
1165
1166 case V3D_QPU_A_VFMIN:
1167 case V3D_QPU_A_VFMAX:
1168 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1169 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1170 return false;
1171 }
1172
1173 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1174 &packed)) {
1175 return false;
1176 }
1177 opcode |= packed;
1178 break;
1179
1180 default:
1181 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1182 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1183 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1184 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1185 return false;
1186 }
1187 break;
1188 }
1189
1190 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1191 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1192 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1193 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1194 if (instr->alu.add.magic_write && !no_magic_write)
1195 *packed_instr |= V3D_QPU_MA;
1196
1197 return true;
1198 }
1199
1200 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1201 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1202 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1203 {
1204 uint32_t mux_a = instr->alu.mul.a;
1205 uint32_t mux_b = instr->alu.mul.b;
1206 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1207
1208 const struct opcode_desc *desc =
1209 lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
1210 instr->alu.mul.op);
1211
1212 if (!desc)
1213 return false;
1214
1215 uint32_t opcode = desc->opcode_first;
1216
1217 /* Some opcodes have a single valid value for their mux a/b, so set
1218 * that here. If mux a/b determine packing, it will be set below.
1219 */
1220 if (nsrc < 2)
1221 mux_b = ffs(desc->mux_b_mask) - 1;
1222
1223 if (nsrc < 1)
1224 mux_a = ffs(desc->mux_a_mask) - 1;
1225
1226 switch (instr->alu.mul.op) {
1227 case V3D_QPU_M_FMUL: {
1228 uint32_t packed;
1229
1230 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1231 &packed)) {
1232 return false;
1233 }
1234 /* No need for a +1 because desc->opcode_first has a 1 in this
1235 * field.
1236 */
1237 opcode += packed << 4;
1238
1239 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1240 &packed)) {
1241 return false;
1242 }
1243 opcode |= packed << 2;
1244
1245 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1246 &packed)) {
1247 return false;
1248 }
1249 opcode |= packed << 0;
1250 break;
1251 }
1252
1253 case V3D_QPU_M_FMOV: {
1254 uint32_t packed;
1255
1256 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1257 &packed)) {
1258 return false;
1259 }
1260 opcode |= (packed >> 1) & 1;
1261 mux_b = (packed & 1) << 2;
1262
1263 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1264 &packed)) {
1265 return false;
1266 }
1267 mux_b |= packed;
1268 break;
1269 }
1270
1271 case V3D_QPU_M_VFMUL: {
1272 uint32_t packed;
1273
1274 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1275 return false;
1276
1277 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1278 &packed)) {
1279 return false;
1280 }
1281 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1282 opcode = 8;
1283 else
1284 opcode |= (packed + 4) & 7;
1285
1286 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1287 return false;
1288
1289 break;
1290 }
1291
1292 default:
1293 break;
1294 }
1295
1296 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1297 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1298
1299 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1300 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1301 if (instr->alu.mul.magic_write)
1302 *packed_instr |= V3D_QPU_MM;
1303
1304 return true;
1305 }
1306
1307 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1308 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1309 uint64_t packed_instr,
1310 struct v3d_qpu_instr *instr)
1311 {
1312 instr->type = V3D_QPU_INSTR_TYPE_ALU;
1313
1314 if (!v3d_qpu_sig_unpack(devinfo,
1315 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1316 &instr->sig))
1317 return false;
1318
1319 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1320 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1321 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1322 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1323
1324 instr->flags.ac = V3D_QPU_COND_NONE;
1325 instr->flags.mc = V3D_QPU_COND_NONE;
1326 instr->flags.apf = V3D_QPU_PF_NONE;
1327 instr->flags.mpf = V3D_QPU_PF_NONE;
1328 instr->flags.auf = V3D_QPU_UF_NONE;
1329 instr->flags.muf = V3D_QPU_UF_NONE;
1330 } else {
1331 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1332 return false;
1333 }
1334
1335 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1336 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1337
1338 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1339 return false;
1340
1341 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1342 return false;
1343
1344 return true;
1345 }
1346
1347 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1348 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1349 uint64_t packed_instr,
1350 struct v3d_qpu_instr *instr)
1351 {
1352 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1353
1354 uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1355 if (cond == 0)
1356 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1357 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1358 V3D_QPU_BRANCH_COND_ALLNA)
1359 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1360 else
1361 return false;
1362
1363 uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1364 if (msfign == 3)
1365 return false;
1366 instr->branch.msfign = msfign;
1367
1368 instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1369
1370 instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1371 if (instr->branch.ub) {
1372 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1373 V3D_QPU_BRANCH_BDU);
1374 }
1375
1376 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1377 V3D_QPU_RADDR_A);
1378
1379 instr->branch.offset = 0;
1380
1381 instr->branch.offset +=
1382 QPU_GET_FIELD(packed_instr,
1383 V3D_QPU_BRANCH_ADDR_LOW) << 3;
1384
1385 instr->branch.offset +=
1386 QPU_GET_FIELD(packed_instr,
1387 V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1388
1389 return true;
1390 }
1391
1392 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1393 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1394 uint64_t packed_instr,
1395 struct v3d_qpu_instr *instr)
1396 {
1397 if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1398 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1399 } else {
1400 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1401
1402 if ((sig & 24) == 16) {
1403 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1404 instr);
1405 } else {
1406 return false;
1407 }
1408 }
1409 }
1410
1411 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1412 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1413 const struct v3d_qpu_instr *instr,
1414 uint64_t *packed_instr)
1415 {
1416 uint32_t sig;
1417 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1418 return false;
1419 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1420
1421 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1422 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1423 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1424
1425 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1426 return false;
1427 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1428 return false;
1429
1430 uint32_t flags;
1431 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1432 if (instr->flags.ac != V3D_QPU_COND_NONE ||
1433 instr->flags.mc != V3D_QPU_COND_NONE ||
1434 instr->flags.apf != V3D_QPU_PF_NONE ||
1435 instr->flags.mpf != V3D_QPU_PF_NONE ||
1436 instr->flags.auf != V3D_QPU_UF_NONE ||
1437 instr->flags.muf != V3D_QPU_UF_NONE) {
1438 return false;
1439 }
1440
1441 flags = instr->sig_addr;
1442 if (instr->sig_magic)
1443 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1444 } else {
1445 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1446 return false;
1447 }
1448
1449 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1450 } else {
1451 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1452 return false;
1453 }
1454
1455 return true;
1456 }
1457
1458 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1459 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1460 const struct v3d_qpu_instr *instr,
1461 uint64_t *packed_instr)
1462 {
1463 *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1464
1465 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1466 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1467 V3D_QPU_BRANCH_COND_A0),
1468 V3D_QPU_BRANCH_COND);
1469 }
1470
1471 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1472 V3D_QPU_BRANCH_MSFIGN);
1473
1474 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1475 V3D_QPU_BRANCH_BDI);
1476
1477 if (instr->branch.ub) {
1478 *packed_instr |= V3D_QPU_BRANCH_UB;
1479 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1480 V3D_QPU_BRANCH_BDU);
1481 }
1482
1483 switch (instr->branch.bdi) {
1484 case V3D_QPU_BRANCH_DEST_ABS:
1485 case V3D_QPU_BRANCH_DEST_REL:
1486 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1487 V3D_QPU_BRANCH_MSFIGN);
1488
1489 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1490 ~0xff000000) >> 3,
1491 V3D_QPU_BRANCH_ADDR_LOW);
1492
1493 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1494 V3D_QPU_BRANCH_ADDR_HIGH);
1495 break;
1496 default:
1497 break;
1498 }
1499
1500 if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1501 instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1502 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1503 V3D_QPU_RADDR_A);
1504 }
1505
1506 return true;
1507 }
1508
1509 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1510 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1511 const struct v3d_qpu_instr *instr,
1512 uint64_t *packed_instr)
1513 {
1514 *packed_instr = 0;
1515
1516 switch (instr->type) {
1517 case V3D_QPU_INSTR_TYPE_ALU:
1518 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1519 case V3D_QPU_INSTR_TYPE_BRANCH:
1520 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1521 default:
1522 return false;
1523 }
1524 }
1525