1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field) \
35 ({ \
36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37 assert((fieldval & ~ field ## _MASK) == 0); \
38 fieldval & field ## _MASK; \
39 })
40
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
42
43 #define QPU_UPDATE_FIELD(inst, value, field) \
44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46
47 #define V3D_QPU_OP_MUL_SHIFT 58
48 #define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)
49
50 #define V3D_QPU_SIG_SHIFT 53
51 #define V3D_QPU_SIG_MASK QPU_MASK(57, 53)
52
53 #define V3D_QPU_COND_SHIFT 46
54 #define V3D_QPU_COND_MASK QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
56
57 #define V3D_QPU_MM QPU_MASK(45, 45)
58 #define V3D_QPU_MA QPU_MASK(44, 44)
59
60 #define V3D_QPU_WADDR_M_SHIFT 38
61 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
62
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
65
66 #define V3D_QPU_WADDR_A_SHIFT 32
67 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
68
69 #define V3D_QPU_BRANCH_COND_SHIFT 32
70 #define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
71
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
74
75 #define V3D_QPU_OP_ADD_SHIFT 24
76 #define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)
77
78 #define V3D_QPU_MUL_B_SHIFT 21
79 #define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)
80
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT 21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
83
84 #define V3D_QPU_MUL_A_SHIFT 18
85 #define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
86
87 #define V3D_QPU_ADD_B_SHIFT 15
88 #define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
89
90 #define V3D_QPU_BRANCH_BDU_SHIFT 15
91 #define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
92
93 #define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)
94
95 #define V3D_QPU_ADD_A_SHIFT 12
96 #define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)
97
98 #define V3D_QPU_BRANCH_BDI_SHIFT 12
99 #define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
100
101 #define V3D_QPU_RADDR_A_SHIFT 6
102 #define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
103
104 #define V3D_QPU_RADDR_B_SHIFT 0
105 #define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)
106
107 #define THRSW .thrsw = true
108 #define LDUNIF .ldunif = true
109 #define LDUNIFRF .ldunifrf = true
110 #define LDUNIFA .ldunifa = true
111 #define LDUNIFARF .ldunifarf = true
112 #define LDTMU .ldtmu = true
113 #define LDVARY .ldvary = true
114 #define LDVPM .ldvpm = true
115 #define SMIMM .small_imm = true
116 #define LDTLB .ldtlb = true
117 #define LDTLBU .ldtlbu = true
118 #define UCB .ucb = true
119 #define ROT .rotate = true
120 #define WRTMUC .wrtmuc = true
121
122 static const struct v3d_qpu_sig v33_sig_map[] = {
123 /* MISC R3 R4 R5 */
124 [0] = { },
125 [1] = { THRSW, },
126 [2] = { LDUNIF },
127 [3] = { THRSW, LDUNIF },
128 [4] = { LDTMU, },
129 [5] = { THRSW, LDTMU, },
130 [6] = { LDTMU, LDUNIF },
131 [7] = { THRSW, LDTMU, LDUNIF },
132 [8] = { LDVARY, },
133 [9] = { THRSW, LDVARY, },
134 [10] = { LDVARY, LDUNIF },
135 [11] = { THRSW, LDVARY, LDUNIF },
136 [12] = { LDVARY, LDTMU, },
137 [13] = { THRSW, LDVARY, LDTMU, },
138 [14] = { SMIMM, LDVARY, },
139 [15] = { SMIMM, },
140 [16] = { LDTLB, },
141 [17] = { LDTLBU, },
142 /* 18-21 reserved */
143 [22] = { UCB, },
144 [23] = { ROT, },
145 [24] = { LDVPM, },
146 [25] = { THRSW, LDVPM, },
147 [26] = { LDVPM, LDUNIF },
148 [27] = { THRSW, LDVPM, LDUNIF },
149 [28] = { LDVPM, LDTMU, },
150 [29] = { THRSW, LDVPM, LDTMU, },
151 [30] = { SMIMM, LDVPM, },
152 [31] = { SMIMM, },
153 };
154
155 static const struct v3d_qpu_sig v40_sig_map[] = {
156 /* MISC R3 R4 R5 */
157 [0] = { },
158 [1] = { THRSW, },
159 [2] = { LDUNIF },
160 [3] = { THRSW, LDUNIF },
161 [4] = { LDTMU, },
162 [5] = { THRSW, LDTMU, },
163 [6] = { LDTMU, LDUNIF },
164 [7] = { THRSW, LDTMU, LDUNIF },
165 [8] = { LDVARY, },
166 [9] = { THRSW, LDVARY, },
167 [10] = { LDVARY, LDUNIF },
168 [11] = { THRSW, LDVARY, LDUNIF },
169 /* 12-13 reserved */
170 [14] = { SMIMM, LDVARY, },
171 [15] = { SMIMM, },
172 [16] = { LDTLB, },
173 [17] = { LDTLBU, },
174 [18] = { WRTMUC },
175 [19] = { THRSW, WRTMUC },
176 [20] = { LDVARY, WRTMUC },
177 [21] = { THRSW, LDVARY, WRTMUC },
178 [22] = { UCB, },
179 [23] = { ROT, },
180 /* 24-30 reserved */
181 [31] = { SMIMM, LDTMU, },
182 };
183
184 static const struct v3d_qpu_sig v41_sig_map[] = {
185 /* MISC phys R5 */
186 [0] = { },
187 [1] = { THRSW, },
188 [2] = { LDUNIF },
189 [3] = { THRSW, LDUNIF },
190 [4] = { LDTMU, },
191 [5] = { THRSW, LDTMU, },
192 [6] = { LDTMU, LDUNIF },
193 [7] = { THRSW, LDTMU, LDUNIF },
194 [8] = { LDVARY, },
195 [9] = { THRSW, LDVARY, },
196 [10] = { LDVARY, LDUNIF },
197 [11] = { THRSW, LDVARY, LDUNIF },
198 [12] = { LDUNIFRF },
199 [13] = { THRSW, LDUNIFRF },
200 [14] = { SMIMM, LDVARY, },
201 [15] = { SMIMM, },
202 [16] = { LDTLB, },
203 [17] = { LDTLBU, },
204 [18] = { WRTMUC },
205 [19] = { THRSW, WRTMUC },
206 [20] = { LDVARY, WRTMUC },
207 [21] = { THRSW, LDVARY, WRTMUC },
208 [22] = { UCB, },
209 [23] = { ROT, },
210 [24] = { LDUNIFA},
211 [25] = { LDUNIFARF },
212 /* 26-30 reserved */
213 [31] = { SMIMM, LDTMU, },
214 };
215
216 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)217 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218 uint32_t packed_sig,
219 struct v3d_qpu_sig *sig)
220 {
221 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222 return false;
223
224 if (devinfo->ver >= 41)
225 *sig = v41_sig_map[packed_sig];
226 else if (devinfo->ver == 40)
227 *sig = v40_sig_map[packed_sig];
228 else
229 *sig = v33_sig_map[packed_sig];
230
231 /* Signals with zeroed unpacked contents after element 0 are reserved. */
232 return (packed_sig == 0 ||
233 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234 }
235
236 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)237 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238 const struct v3d_qpu_sig *sig,
239 uint32_t *packed_sig)
240 {
241 static const struct v3d_qpu_sig *map;
242
243 if (devinfo->ver >= 41)
244 map = v41_sig_map;
245 else if (devinfo->ver == 40)
246 map = v40_sig_map;
247 else
248 map = v33_sig_map;
249
250 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252 *packed_sig = i;
253 return true;
254 }
255 }
256
257 return false;
258 }
259 static inline unsigned
fui(float f)260 fui( float f )
261 {
262 union {float f; unsigned ui;} fi;
263 fi.f = f;
264 return fi.ui;
265 }
266
267 static const uint32_t small_immediates[] = {
268 0, 1, 2, 3,
269 4, 5, 6, 7,
270 8, 9, 10, 11,
271 12, 13, 14, 15,
272 -16, -15, -14, -13,
273 -12, -11, -10, -9,
274 -8, -7, -6, -5,
275 -4, -3, -2, -1,
276 0x3b800000, /* 2.0^-8 */
277 0x3c000000, /* 2.0^-7 */
278 0x3c800000, /* 2.0^-6 */
279 0x3d000000, /* 2.0^-5 */
280 0x3d800000, /* 2.0^-4 */
281 0x3e000000, /* 2.0^-3 */
282 0x3e800000, /* 2.0^-2 */
283 0x3f000000, /* 2.0^-1 */
284 0x3f800000, /* 2.0^0 */
285 0x40000000, /* 2.0^1 */
286 0x40800000, /* 2.0^2 */
287 0x41000000, /* 2.0^3 */
288 0x41800000, /* 2.0^4 */
289 0x42000000, /* 2.0^5 */
290 0x42800000, /* 2.0^6 */
291 0x43000000, /* 2.0^7 */
292 };
293
294 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)295 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
296 uint32_t packed_small_immediate,
297 uint32_t *small_immediate)
298 {
299 if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
300 return false;
301
302 *small_immediate = small_immediates[packed_small_immediate];
303 return true;
304 }
305
306 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)307 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
308 uint32_t value,
309 uint32_t *packed_small_immediate)
310 {
311 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
312
313 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
314 if (small_immediates[i] == value) {
315 *packed_small_immediate = i;
316 return true;
317 }
318 }
319
320 return false;
321 }
322
323 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)324 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
325 uint32_t packed_cond,
326 struct v3d_qpu_flags *cond)
327 {
328 static const enum v3d_qpu_cond cond_map[4] = {
329 [0] = V3D_QPU_COND_IFA,
330 [1] = V3D_QPU_COND_IFB,
331 [2] = V3D_QPU_COND_IFNA,
332 [3] = V3D_QPU_COND_IFNB,
333 };
334
335 cond->ac = V3D_QPU_COND_NONE;
336 cond->mc = V3D_QPU_COND_NONE;
337 cond->apf = V3D_QPU_PF_NONE;
338 cond->mpf = V3D_QPU_PF_NONE;
339 cond->auf = V3D_QPU_UF_NONE;
340 cond->muf = V3D_QPU_UF_NONE;
341
342 if (packed_cond == 0) {
343 return true;
344 } else if (packed_cond >> 2 == 0) {
345 cond->apf = packed_cond & 0x3;
346 } else if (packed_cond >> 4 == 0) {
347 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
348 } else if (packed_cond == 0x10) {
349 return false;
350 } else if (packed_cond >> 2 == 0x4) {
351 cond->mpf = packed_cond & 0x3;
352 } else if (packed_cond >> 4 == 0x1) {
353 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
354 } else if (packed_cond >> 4 == 0x2) {
355 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
356 cond->mpf = packed_cond & 0x3;
357 } else if (packed_cond >> 4 == 0x3) {
358 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
359 cond->apf = packed_cond & 0x3;
360 } else if (packed_cond >> 6) {
361 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
362 if (((packed_cond >> 2) & 0x3) == 0) {
363 cond->ac = cond_map[packed_cond & 0x3];
364 } else {
365 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
366 }
367 }
368
369 return true;
370 }
371
372 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)373 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
374 const struct v3d_qpu_flags *cond,
375 uint32_t *packed_cond)
376 {
377 #define AC (1 << 0)
378 #define MC (1 << 1)
379 #define APF (1 << 2)
380 #define MPF (1 << 3)
381 #define AUF (1 << 4)
382 #define MUF (1 << 5)
383 static const struct {
384 uint8_t flags_present;
385 uint8_t bits;
386 } flags_table[] = {
387 { 0, 0 },
388 { APF, 0 },
389 { AUF, 0 },
390 { MPF, (1 << 4) },
391 { MUF, (1 << 4) },
392 { AC, (1 << 5) },
393 { AC | MPF, (1 << 5) },
394 { MC, (1 << 5) | (1 << 4) },
395 { MC | APF, (1 << 5) | (1 << 4) },
396 { MC | AC, (1 << 6) },
397 { MC | AUF, (1 << 6) },
398 };
399
400 uint8_t flags_present = 0;
401 if (cond->ac != V3D_QPU_COND_NONE)
402 flags_present |= AC;
403 if (cond->mc != V3D_QPU_COND_NONE)
404 flags_present |= MC;
405 if (cond->apf != V3D_QPU_PF_NONE)
406 flags_present |= APF;
407 if (cond->mpf != V3D_QPU_PF_NONE)
408 flags_present |= MPF;
409 if (cond->auf != V3D_QPU_UF_NONE)
410 flags_present |= AUF;
411 if (cond->muf != V3D_QPU_UF_NONE)
412 flags_present |= MUF;
413
414 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
415 if (flags_table[i].flags_present != flags_present)
416 continue;
417
418 *packed_cond = flags_table[i].bits;
419
420 *packed_cond |= cond->apf;
421 *packed_cond |= cond->mpf;
422
423 if (flags_present & AUF)
424 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
425 if (flags_present & MUF)
426 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
427
428 if (flags_present & AC)
429 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
430
431 if (flags_present & MC) {
432 if (*packed_cond & (1 << 6))
433 *packed_cond |= (cond->mc -
434 V3D_QPU_COND_IFA) << 4;
435 else
436 *packed_cond |= (cond->mc -
437 V3D_QPU_COND_IFA) << 2;
438 }
439
440 return true;
441 }
442
443 return false;
444 }
445
446 /* Make a mapping of the table of opcodes in the spec. The opcode is
447 * determined by a combination of the opcode field, and in the case of 0 or
448 * 1-arg opcodes, the mux_b field as well.
449 */
450 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
451 #define ANYMUX MUX_MASK(0, 7)
452
453 struct opcode_desc {
454 uint8_t opcode_first;
455 uint8_t opcode_last;
456 uint8_t mux_b_mask;
457 uint8_t mux_a_mask;
458 uint8_t op;
459
460 /* first_ver == 0 if it's the same across all V3D versions.
461 * first_ver == X, last_ver == 0 if it's the same for all V3D versions
462 * starting from X
463 * first_ver == X, last_ver == Y if it's the same for all V3D versions
464 * on the range X through Y
465 */
466 uint8_t first_ver;
467 uint8_t last_ver;
468 };
469
470 static const struct opcode_desc add_ops[] = {
471 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
472 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
473 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
474 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
475 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
476 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
477 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
478 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
479 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
480 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
481 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
482 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
483 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
484 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
485 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
486 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
487 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
488 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
489 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
490 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
491 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
492
493 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
494 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
495 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
496
497 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
498 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
499 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
500 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
501 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
502 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
503 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
504 { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
505 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
506 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
507 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
508 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
509 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
510 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
511 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
512 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
513 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
514 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
515
516 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
517 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
518 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
519 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
520
521 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
522 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
523 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
524 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
525 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
526 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
527 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
528 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
529 { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
530 { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
531 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
532
533 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
534 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
535 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
536 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
537 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
538 { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
539 { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
540 { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
541 { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
542 { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
543 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
544 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
545
546 /* FIXME: MORE COMPLICATED */
547 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
548
549 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
550 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
551
552 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
553 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
554 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
555 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
556 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
557 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
558 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
559 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
560
561 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
562 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
563
564 /* The stvpms are distinguished by the waddr field. */
565 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
566 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
567 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
568
569 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
570 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
571 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
572 };
573
574 static const struct opcode_desc mul_ops[] = {
575 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
576 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
577 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
578 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
579 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
580 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
581 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
582 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
583 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
584 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
585 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
586 };
587
588 /* Returns true if op_desc should be filtered out based on devinfo->ver
589 * against op_desc->first_ver and op_desc->last_ver. Check notes about
590 * first_ver/last_ver on struct opcode_desc comments.
591 */
592 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const struct opcode_desc * op_desc)593 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
594 const struct opcode_desc *op_desc)
595 {
596 return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
597 (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver);
598 }
599
600 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)601 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
602 const struct opcode_desc *opcodes,
603 size_t num_opcodes, uint32_t opcode,
604 uint32_t mux_a, uint32_t mux_b)
605 {
606 for (int i = 0; i < num_opcodes; i++) {
607 const struct opcode_desc *op_desc = &opcodes[i];
608
609 if (opcode < op_desc->opcode_first ||
610 opcode > op_desc->opcode_last)
611 continue;
612
613 if (opcode_invalid_in_version(devinfo, op_desc))
614 continue;
615
616 if (!(op_desc->mux_b_mask & (1 << mux_b)))
617 continue;
618
619 if (!(op_desc->mux_a_mask & (1 << mux_a)))
620 continue;
621
622 return op_desc;
623 }
624
625 return NULL;
626 }
627
628 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)629 v3d_qpu_float32_unpack_unpack(uint32_t packed,
630 enum v3d_qpu_input_unpack *unpacked)
631 {
632 switch (packed) {
633 case 0:
634 *unpacked = V3D_QPU_UNPACK_ABS;
635 return true;
636 case 1:
637 *unpacked = V3D_QPU_UNPACK_NONE;
638 return true;
639 case 2:
640 *unpacked = V3D_QPU_UNPACK_L;
641 return true;
642 case 3:
643 *unpacked = V3D_QPU_UNPACK_H;
644 return true;
645 default:
646 return false;
647 }
648 }
649
650 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)651 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
652 uint32_t *packed)
653 {
654 switch (unpacked) {
655 case V3D_QPU_UNPACK_ABS:
656 *packed = 0;
657 return true;
658 case V3D_QPU_UNPACK_NONE:
659 *packed = 1;
660 return true;
661 case V3D_QPU_UNPACK_L:
662 *packed = 2;
663 return true;
664 case V3D_QPU_UNPACK_H:
665 *packed = 3;
666 return true;
667 default:
668 return false;
669 }
670 }
671
672 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)673 v3d_qpu_float16_unpack_unpack(uint32_t packed,
674 enum v3d_qpu_input_unpack *unpacked)
675 {
676 switch (packed) {
677 case 0:
678 *unpacked = V3D_QPU_UNPACK_NONE;
679 return true;
680 case 1:
681 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
682 return true;
683 case 2:
684 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
685 return true;
686 case 3:
687 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
688 return true;
689 case 4:
690 *unpacked = V3D_QPU_UNPACK_SWAP_16;
691 return true;
692 default:
693 return false;
694 }
695 }
696
697 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)698 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
699 uint32_t *packed)
700 {
701 switch (unpacked) {
702 case V3D_QPU_UNPACK_NONE:
703 *packed = 0;
704 return true;
705 case V3D_QPU_UNPACK_REPLICATE_32F_16:
706 *packed = 1;
707 return true;
708 case V3D_QPU_UNPACK_REPLICATE_L_16:
709 *packed = 2;
710 return true;
711 case V3D_QPU_UNPACK_REPLICATE_H_16:
712 *packed = 3;
713 return true;
714 case V3D_QPU_UNPACK_SWAP_16:
715 *packed = 4;
716 return true;
717 default:
718 return false;
719 }
720 }
721
722 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)723 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
724 uint32_t *packed)
725 {
726 switch (unpacked) {
727 case V3D_QPU_PACK_NONE:
728 *packed = 0;
729 return true;
730 case V3D_QPU_PACK_L:
731 *packed = 1;
732 return true;
733 case V3D_QPU_PACK_H:
734 *packed = 2;
735 return true;
736 default:
737 return false;
738 }
739 }
740
741 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)742 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
743 struct v3d_qpu_instr *instr)
744 {
745 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
746 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
747 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
748 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
749
750 uint32_t map_op = op;
751 /* Some big clusters of opcodes are replicated with unpack
752 * flags
753 */
754 if (map_op >= 249 && map_op <= 251)
755 map_op = (map_op - 249 + 245);
756 if (map_op >= 253 && map_op <= 255)
757 map_op = (map_op - 253 + 245);
758
759 const struct opcode_desc *desc =
760 lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
761 map_op, mux_a, mux_b);
762
763 if (!desc)
764 return false;
765
766 instr->alu.add.op = desc->op;
767
768 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
769 * operands.
770 */
771 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
772 if (instr->alu.add.op == V3D_QPU_A_FMIN)
773 instr->alu.add.op = V3D_QPU_A_FMAX;
774 if (instr->alu.add.op == V3D_QPU_A_FADD)
775 instr->alu.add.op = V3D_QPU_A_FADDNF;
776 }
777
778 /* Some QPU ops require a bit more than just basic opcode and mux a/b
779 * comparisons to distinguish them.
780 */
781 switch (instr->alu.add.op) {
782 case V3D_QPU_A_STVPMV:
783 case V3D_QPU_A_STVPMD:
784 case V3D_QPU_A_STVPMP:
785 switch (waddr) {
786 case 0:
787 instr->alu.add.op = V3D_QPU_A_STVPMV;
788 break;
789 case 1:
790 instr->alu.add.op = V3D_QPU_A_STVPMD;
791 break;
792 case 2:
793 instr->alu.add.op = V3D_QPU_A_STVPMP;
794 break;
795 default:
796 return false;
797 }
798 break;
799 default:
800 break;
801 }
802
803 switch (instr->alu.add.op) {
804 case V3D_QPU_A_FADD:
805 case V3D_QPU_A_FADDNF:
806 case V3D_QPU_A_FSUB:
807 case V3D_QPU_A_FMIN:
808 case V3D_QPU_A_FMAX:
809 case V3D_QPU_A_FCMP:
810 case V3D_QPU_A_VFPACK:
811 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
812 instr->alu.add.output_pack = (op >> 4) & 0x3;
813 else
814 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
815
816 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
817 &instr->alu.add.a_unpack)) {
818 return false;
819 }
820
821 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
822 &instr->alu.add.b_unpack)) {
823 return false;
824 }
825 break;
826
827 case V3D_QPU_A_FFLOOR:
828 case V3D_QPU_A_FROUND:
829 case V3D_QPU_A_FTRUNC:
830 case V3D_QPU_A_FCEIL:
831 case V3D_QPU_A_FDX:
832 case V3D_QPU_A_FDY:
833 instr->alu.add.output_pack = mux_b & 0x3;
834
835 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
836 &instr->alu.add.a_unpack)) {
837 return false;
838 }
839 break;
840
841 case V3D_QPU_A_FTOIN:
842 case V3D_QPU_A_FTOIZ:
843 case V3D_QPU_A_FTOUZ:
844 case V3D_QPU_A_FTOC:
845 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
846
847 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
848 &instr->alu.add.a_unpack)) {
849 return false;
850 }
851 break;
852
853 case V3D_QPU_A_VFMIN:
854 case V3D_QPU_A_VFMAX:
855 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
856 &instr->alu.add.a_unpack)) {
857 return false;
858 }
859
860 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
861 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
862 break;
863
864 default:
865 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
866 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
867 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
868 break;
869 }
870
871 instr->alu.add.a = mux_a;
872 instr->alu.add.b = mux_b;
873 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
874
875 instr->alu.add.magic_write = false;
876 if (packed_inst & V3D_QPU_MA) {
877 switch (instr->alu.add.op) {
878 case V3D_QPU_A_LDVPMV_IN:
879 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
880 break;
881 case V3D_QPU_A_LDVPMD_IN:
882 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
883 break;
884 case V3D_QPU_A_LDVPMG_IN:
885 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
886 break;
887 default:
888 instr->alu.add.magic_write = true;
889 break;
890 }
891 }
892
893 return true;
894 }
895
896 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)897 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
898 struct v3d_qpu_instr *instr)
899 {
900 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
901 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
902 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
903
904 {
905 const struct opcode_desc *desc =
906 lookup_opcode_from_packed(devinfo, mul_ops,
907 ARRAY_SIZE(mul_ops),
908 op, mux_a, mux_b);
909 if (!desc)
910 return false;
911
912 instr->alu.mul.op = desc->op;
913 }
914
915 switch (instr->alu.mul.op) {
916 case V3D_QPU_M_FMUL:
917 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
918
919 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
920 &instr->alu.mul.a_unpack)) {
921 return false;
922 }
923
924 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
925 &instr->alu.mul.b_unpack)) {
926 return false;
927 }
928
929 break;
930
931 case V3D_QPU_M_FMOV:
932 instr->alu.mul.output_pack = (((op & 1) << 1) +
933 ((mux_b >> 2) & 1));
934
935 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
936 &instr->alu.mul.a_unpack)) {
937 return false;
938 }
939
940 break;
941
942 case V3D_QPU_M_VFMUL:
943 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
944
945 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
946 &instr->alu.mul.a_unpack)) {
947 return false;
948 }
949
950 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
951
952 break;
953
954 default:
955 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
956 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
957 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
958 break;
959 }
960
961 instr->alu.mul.a = mux_a;
962 instr->alu.mul.b = mux_b;
963 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
964 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
965
966 return true;
967 }
968
969 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)970 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
971 const struct opcode_desc *opcodes, size_t num_opcodes,
972 uint8_t op)
973 {
974 for (int i = 0; i < num_opcodes; i++) {
975 const struct opcode_desc *op_desc = &opcodes[i];
976
977 if (op_desc->op != op)
978 continue;
979
980 if (opcode_invalid_in_version(devinfo, op_desc))
981 continue;
982
983 return op_desc;
984 }
985
986 return NULL;
987 }
988
989 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)990 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
991 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
992 {
993 uint32_t waddr = instr->alu.add.waddr;
994 uint32_t mux_a = instr->alu.add.a;
995 uint32_t mux_b = instr->alu.add.b;
996 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
997 const struct opcode_desc *desc =
998 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
999 instr->alu.add.op);
1000
1001 if (!desc)
1002 return false;
1003
1004 uint32_t opcode = desc->opcode_first;
1005
1006 /* If an operation doesn't use an arg, its mux values may be used to
1007 * identify the operation type.
1008 */
1009 if (nsrc < 2)
1010 mux_b = ffs(desc->mux_b_mask) - 1;
1011
1012 if (nsrc < 1)
1013 mux_a = ffs(desc->mux_a_mask) - 1;
1014
1015 bool no_magic_write = false;
1016
1017 switch (instr->alu.add.op) {
1018 case V3D_QPU_A_STVPMV:
1019 waddr = 0;
1020 no_magic_write = true;
1021 break;
1022 case V3D_QPU_A_STVPMD:
1023 waddr = 1;
1024 no_magic_write = true;
1025 break;
1026 case V3D_QPU_A_STVPMP:
1027 waddr = 2;
1028 no_magic_write = true;
1029 break;
1030
1031 case V3D_QPU_A_LDVPMV_IN:
1032 case V3D_QPU_A_LDVPMD_IN:
1033 case V3D_QPU_A_LDVPMP:
1034 case V3D_QPU_A_LDVPMG_IN:
1035 assert(!instr->alu.add.magic_write);
1036 break;
1037
1038 case V3D_QPU_A_LDVPMV_OUT:
1039 case V3D_QPU_A_LDVPMD_OUT:
1040 case V3D_QPU_A_LDVPMG_OUT:
1041 assert(!instr->alu.add.magic_write);
1042 *packed_instr |= V3D_QPU_MA;
1043 break;
1044
1045 default:
1046 break;
1047 }
1048
1049 switch (instr->alu.add.op) {
1050 case V3D_QPU_A_FADD:
1051 case V3D_QPU_A_FADDNF:
1052 case V3D_QPU_A_FSUB:
1053 case V3D_QPU_A_FMIN:
1054 case V3D_QPU_A_FMAX:
1055 case V3D_QPU_A_FCMP: {
1056 uint32_t output_pack;
1057 uint32_t a_unpack;
1058 uint32_t b_unpack;
1059
1060 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1061 &output_pack)) {
1062 return false;
1063 }
1064 opcode |= output_pack << 4;
1065
1066 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1067 &a_unpack)) {
1068 return false;
1069 }
1070
1071 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1072 &b_unpack)) {
1073 return false;
1074 }
1075
1076 /* These operations with commutative operands are
1077 * distinguished by which order their operands come in.
1078 */
1079 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1080 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1081 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1082 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1083 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1084 uint32_t temp;
1085
1086 temp = a_unpack;
1087 a_unpack = b_unpack;
1088 b_unpack = temp;
1089
1090 temp = mux_a;
1091 mux_a = mux_b;
1092 mux_b = temp;
1093 }
1094
1095 opcode |= a_unpack << 2;
1096 opcode |= b_unpack << 0;
1097
1098 break;
1099 }
1100
1101 case V3D_QPU_A_VFPACK: {
1102 uint32_t a_unpack;
1103 uint32_t b_unpack;
1104
1105 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1106 instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1107 return false;
1108 }
1109
1110 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1111 &a_unpack)) {
1112 return false;
1113 }
1114
1115 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1116 &b_unpack)) {
1117 return false;
1118 }
1119
1120 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1121 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1122
1123 break;
1124 }
1125
1126 case V3D_QPU_A_FFLOOR:
1127 case V3D_QPU_A_FROUND:
1128 case V3D_QPU_A_FTRUNC:
1129 case V3D_QPU_A_FCEIL:
1130 case V3D_QPU_A_FDX:
1131 case V3D_QPU_A_FDY: {
1132 uint32_t packed;
1133
1134 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1135 &packed)) {
1136 return false;
1137 }
1138 mux_b |= packed;
1139
1140 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1141 &packed)) {
1142 return false;
1143 }
1144 if (packed == 0)
1145 return false;
1146 opcode = (opcode & ~(1 << 2)) | packed << 2;
1147 break;
1148 }
1149
1150 case V3D_QPU_A_FTOIN:
1151 case V3D_QPU_A_FTOIZ:
1152 case V3D_QPU_A_FTOUZ:
1153 case V3D_QPU_A_FTOC:
1154 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1155 return false;
1156
1157 uint32_t packed;
1158 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1159 &packed)) {
1160 return false;
1161 }
1162 if (packed == 0)
1163 return false;
1164 opcode |= packed << 2;
1165
1166 break;
1167
1168 case V3D_QPU_A_VFMIN:
1169 case V3D_QPU_A_VFMAX:
1170 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1171 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1172 return false;
1173 }
1174
1175 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1176 &packed)) {
1177 return false;
1178 }
1179 opcode |= packed;
1180 break;
1181
1182 default:
1183 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1184 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1185 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1186 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1187 return false;
1188 }
1189 break;
1190 }
1191
1192 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1193 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1194 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1195 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1196 if (instr->alu.add.magic_write && !no_magic_write)
1197 *packed_instr |= V3D_QPU_MA;
1198
1199 return true;
1200 }
1201
1202 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1203 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1204 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1205 {
1206 uint32_t mux_a = instr->alu.mul.a;
1207 uint32_t mux_b = instr->alu.mul.b;
1208 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1209
1210 const struct opcode_desc *desc =
1211 lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
1212 instr->alu.mul.op);
1213
1214 if (!desc)
1215 return false;
1216
1217 uint32_t opcode = desc->opcode_first;
1218
1219 /* Some opcodes have a single valid value for their mux a/b, so set
1220 * that here. If mux a/b determine packing, it will be set below.
1221 */
1222 if (nsrc < 2)
1223 mux_b = ffs(desc->mux_b_mask) - 1;
1224
1225 if (nsrc < 1)
1226 mux_a = ffs(desc->mux_a_mask) - 1;
1227
1228 switch (instr->alu.mul.op) {
1229 case V3D_QPU_M_FMUL: {
1230 uint32_t packed;
1231
1232 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1233 &packed)) {
1234 return false;
1235 }
1236 /* No need for a +1 because desc->opcode_first has a 1 in this
1237 * field.
1238 */
1239 opcode += packed << 4;
1240
1241 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1242 &packed)) {
1243 return false;
1244 }
1245 opcode |= packed << 2;
1246
1247 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1248 &packed)) {
1249 return false;
1250 }
1251 opcode |= packed << 0;
1252 break;
1253 }
1254
1255 case V3D_QPU_M_FMOV: {
1256 uint32_t packed;
1257
1258 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1259 &packed)) {
1260 return false;
1261 }
1262 opcode |= (packed >> 1) & 1;
1263 mux_b = (packed & 1) << 2;
1264
1265 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1266 &packed)) {
1267 return false;
1268 }
1269 mux_b |= packed;
1270 break;
1271 }
1272
1273 case V3D_QPU_M_VFMUL: {
1274 uint32_t packed;
1275
1276 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1277 return false;
1278
1279 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1280 &packed)) {
1281 return false;
1282 }
1283 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1284 opcode = 8;
1285 else
1286 opcode |= (packed + 4) & 7;
1287
1288 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1289 return false;
1290
1291 break;
1292 }
1293
1294 default:
1295 break;
1296 }
1297
1298 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1299 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1300
1301 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1302 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1303 if (instr->alu.mul.magic_write)
1304 *packed_instr |= V3D_QPU_MM;
1305
1306 return true;
1307 }
1308
1309 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1310 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1311 uint64_t packed_instr,
1312 struct v3d_qpu_instr *instr)
1313 {
1314 instr->type = V3D_QPU_INSTR_TYPE_ALU;
1315
1316 if (!v3d_qpu_sig_unpack(devinfo,
1317 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1318 &instr->sig))
1319 return false;
1320
1321 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1322 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1323 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1324 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1325
1326 instr->flags.ac = V3D_QPU_COND_NONE;
1327 instr->flags.mc = V3D_QPU_COND_NONE;
1328 instr->flags.apf = V3D_QPU_PF_NONE;
1329 instr->flags.mpf = V3D_QPU_PF_NONE;
1330 instr->flags.auf = V3D_QPU_UF_NONE;
1331 instr->flags.muf = V3D_QPU_UF_NONE;
1332 } else {
1333 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1334 return false;
1335 }
1336
1337 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1338 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1339
1340 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1341 return false;
1342
1343 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1344 return false;
1345
1346 return true;
1347 }
1348
1349 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1350 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1351 uint64_t packed_instr,
1352 struct v3d_qpu_instr *instr)
1353 {
1354 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1355
1356 uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1357 if (cond == 0)
1358 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1359 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1360 V3D_QPU_BRANCH_COND_ALLNA)
1361 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1362 else
1363 return false;
1364
1365 uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1366 if (msfign == 3)
1367 return false;
1368 instr->branch.msfign = msfign;
1369
1370 instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1371
1372 instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1373 if (instr->branch.ub) {
1374 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1375 V3D_QPU_BRANCH_BDU);
1376 }
1377
1378 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1379 V3D_QPU_RADDR_A);
1380
1381 instr->branch.offset = 0;
1382
1383 instr->branch.offset +=
1384 QPU_GET_FIELD(packed_instr,
1385 V3D_QPU_BRANCH_ADDR_LOW) << 3;
1386
1387 instr->branch.offset +=
1388 QPU_GET_FIELD(packed_instr,
1389 V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1390
1391 return true;
1392 }
1393
1394 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1395 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1396 uint64_t packed_instr,
1397 struct v3d_qpu_instr *instr)
1398 {
1399 if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1400 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1401 } else {
1402 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1403
1404 if ((sig & 24) == 16) {
1405 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1406 instr);
1407 } else {
1408 return false;
1409 }
1410 }
1411 }
1412
1413 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1414 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1415 const struct v3d_qpu_instr *instr,
1416 uint64_t *packed_instr)
1417 {
1418 uint32_t sig;
1419 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1420 return false;
1421 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1422
1423 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1424 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1425 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1426
1427 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1428 return false;
1429 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1430 return false;
1431
1432 uint32_t flags;
1433 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1434 if (instr->flags.ac != V3D_QPU_COND_NONE ||
1435 instr->flags.mc != V3D_QPU_COND_NONE ||
1436 instr->flags.apf != V3D_QPU_PF_NONE ||
1437 instr->flags.mpf != V3D_QPU_PF_NONE ||
1438 instr->flags.auf != V3D_QPU_UF_NONE ||
1439 instr->flags.muf != V3D_QPU_UF_NONE) {
1440 return false;
1441 }
1442
1443 flags = instr->sig_addr;
1444 if (instr->sig_magic)
1445 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1446 } else {
1447 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1448 return false;
1449 }
1450
1451 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1452 } else {
1453 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1454 return false;
1455 }
1456
1457 return true;
1458 }
1459
1460 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1461 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1462 const struct v3d_qpu_instr *instr,
1463 uint64_t *packed_instr)
1464 {
1465 *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1466
1467 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1468 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1469 V3D_QPU_BRANCH_COND_A0),
1470 V3D_QPU_BRANCH_COND);
1471 }
1472
1473 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1474 V3D_QPU_BRANCH_MSFIGN);
1475
1476 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1477 V3D_QPU_BRANCH_BDI);
1478
1479 if (instr->branch.ub) {
1480 *packed_instr |= V3D_QPU_BRANCH_UB;
1481 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1482 V3D_QPU_BRANCH_BDU);
1483 }
1484
1485 switch (instr->branch.bdi) {
1486 case V3D_QPU_BRANCH_DEST_ABS:
1487 case V3D_QPU_BRANCH_DEST_REL:
1488 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1489 V3D_QPU_BRANCH_MSFIGN);
1490
1491 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1492 ~0xff000000) >> 3,
1493 V3D_QPU_BRANCH_ADDR_LOW);
1494
1495 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1496 V3D_QPU_BRANCH_ADDR_HIGH);
1497 break;
1498 default:
1499 break;
1500 }
1501
1502 if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1503 instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1504 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1505 V3D_QPU_RADDR_A);
1506 }
1507
1508 return true;
1509 }
1510
1511 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1512 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1513 const struct v3d_qpu_instr *instr,
1514 uint64_t *packed_instr)
1515 {
1516 *packed_instr = 0;
1517
1518 switch (instr->type) {
1519 case V3D_QPU_INSTR_TYPE_ALU:
1520 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1521 case V3D_QPU_INSTR_TYPE_BRANCH:
1522 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1523 default:
1524 return false;
1525 }
1526 }
1527