1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 #ifndef QPU_MASK
31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32 /* Using the GNU statement expression extension */
33 #define QPU_SET_FIELD(value, field) \
34 ({ \
35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36 assert((fieldval & ~ field ## _MASK) == 0); \
37 fieldval & field ## _MASK; \
38 })
39
40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
41
42 #define QPU_UPDATE_FIELD(inst, value, field) \
43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44 #endif /* QPU_MASK */
45
46 #define VC5_QPU_OP_MUL_SHIFT 58
47 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
48
49 #define VC5_QPU_SIG_SHIFT 53
50 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
51
52 #define VC5_QPU_COND_SHIFT 46
53 #define VC5_QPU_COND_MASK QPU_MASK(52, 46)
54 #define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
55
56 #define VC5_QPU_MM QPU_MASK(45, 45)
57 #define VC5_QPU_MA QPU_MASK(44, 44)
58
59 #define V3D_QPU_WADDR_M_SHIFT 38
60 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
61
62 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
63 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
64
65 #define V3D_QPU_WADDR_A_SHIFT 32
66 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
67
68 #define VC5_QPU_BRANCH_COND_SHIFT 32
69 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
70
71 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
72 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
73
74 #define VC5_QPU_OP_ADD_SHIFT 24
75 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
76
77 #define VC5_QPU_MUL_B_SHIFT 21
78 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
79
80 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
81 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
82
83 #define VC5_QPU_MUL_A_SHIFT 18
84 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
85
86 #define VC5_QPU_ADD_B_SHIFT 15
87 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
88
89 #define VC5_QPU_BRANCH_BDU_SHIFT 15
90 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
91
92 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
93
94 #define VC5_QPU_ADD_A_SHIFT 12
95 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
96
97 #define VC5_QPU_BRANCH_BDI_SHIFT 12
98 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
99
100 #define VC5_QPU_RADDR_A_SHIFT 6
101 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
102
103 #define VC5_QPU_RADDR_B_SHIFT 0
104 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
105
106 #define THRSW .thrsw = true
107 #define LDUNIF .ldunif = true
108 #define LDUNIFRF .ldunifrf = true
109 #define LDUNIFA .ldunifa = true
110 #define LDUNIFARF .ldunifarf = true
111 #define LDTMU .ldtmu = true
112 #define LDVARY .ldvary = true
113 #define LDVPM .ldvpm = true
114 #define SMIMM .small_imm = true
115 #define LDTLB .ldtlb = true
116 #define LDTLBU .ldtlbu = true
117 #define UCB .ucb = true
118 #define ROT .rotate = true
119 #define WRTMUC .wrtmuc = true
120
121 static const struct v3d_qpu_sig v33_sig_map[] = {
122 /* MISC R3 R4 R5 */
123 [0] = { },
124 [1] = { THRSW, },
125 [2] = { LDUNIF },
126 [3] = { THRSW, LDUNIF },
127 [4] = { LDTMU, },
128 [5] = { THRSW, LDTMU, },
129 [6] = { LDTMU, LDUNIF },
130 [7] = { THRSW, LDTMU, LDUNIF },
131 [8] = { LDVARY, },
132 [9] = { THRSW, LDVARY, },
133 [10] = { LDVARY, LDUNIF },
134 [11] = { THRSW, LDVARY, LDUNIF },
135 [12] = { LDVARY, LDTMU, },
136 [13] = { THRSW, LDVARY, LDTMU, },
137 [14] = { SMIMM, LDVARY, },
138 [15] = { SMIMM, },
139 [16] = { LDTLB, },
140 [17] = { LDTLBU, },
141 /* 18-21 reserved */
142 [22] = { UCB, },
143 [23] = { ROT, },
144 [24] = { LDVPM, },
145 [25] = { THRSW, LDVPM, },
146 [26] = { LDVPM, LDUNIF },
147 [27] = { THRSW, LDVPM, LDUNIF },
148 [28] = { LDVPM, LDTMU, },
149 [29] = { THRSW, LDVPM, LDTMU, },
150 [30] = { SMIMM, LDVPM, },
151 [31] = { SMIMM, },
152 };
153
154 static const struct v3d_qpu_sig v40_sig_map[] = {
155 /* MISC R3 R4 R5 */
156 [0] = { },
157 [1] = { THRSW, },
158 [2] = { LDUNIF },
159 [3] = { THRSW, LDUNIF },
160 [4] = { LDTMU, },
161 [5] = { THRSW, LDTMU, },
162 [6] = { LDTMU, LDUNIF },
163 [7] = { THRSW, LDTMU, LDUNIF },
164 [8] = { LDVARY, },
165 [9] = { THRSW, LDVARY, },
166 [10] = { LDVARY, LDUNIF },
167 [11] = { THRSW, LDVARY, LDUNIF },
168 /* 12-13 reserved */
169 [14] = { SMIMM, LDVARY, },
170 [15] = { SMIMM, },
171 [16] = { LDTLB, },
172 [17] = { LDTLBU, },
173 [18] = { WRTMUC },
174 [19] = { THRSW, WRTMUC },
175 [20] = { LDVARY, WRTMUC },
176 [21] = { THRSW, LDVARY, WRTMUC },
177 [22] = { UCB, },
178 [23] = { ROT, },
179 /* 24-30 reserved */
180 [31] = { SMIMM, LDTMU, },
181 };
182
183 static const struct v3d_qpu_sig v41_sig_map[] = {
184 /* MISC phys R5 */
185 [0] = { },
186 [1] = { THRSW, },
187 [2] = { LDUNIF },
188 [3] = { THRSW, LDUNIF },
189 [4] = { LDTMU, },
190 [5] = { THRSW, LDTMU, },
191 [6] = { LDTMU, LDUNIF },
192 [7] = { THRSW, LDTMU, LDUNIF },
193 [8] = { LDVARY, },
194 [9] = { THRSW, LDVARY, },
195 [10] = { LDVARY, LDUNIF },
196 [11] = { THRSW, LDVARY, LDUNIF },
197 [12] = { LDUNIFRF },
198 [13] = { THRSW, LDUNIFRF },
199 [14] = { SMIMM, LDVARY, },
200 [15] = { SMIMM, },
201 [16] = { LDTLB, },
202 [17] = { LDTLBU, },
203 [18] = { WRTMUC },
204 [19] = { THRSW, WRTMUC },
205 [20] = { LDVARY, WRTMUC },
206 [21] = { THRSW, LDVARY, WRTMUC },
207 [22] = { UCB, },
208 [23] = { ROT, },
209 /* 24-30 reserved */
210 [24] = { LDUNIFA},
211 [25] = { LDUNIFARF },
212 [31] = { SMIMM, LDTMU, },
213 };
214
215 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)216 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
217 uint32_t packed_sig,
218 struct v3d_qpu_sig *sig)
219 {
220 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
221 return false;
222
223 if (devinfo->ver >= 41)
224 *sig = v41_sig_map[packed_sig];
225 else if (devinfo->ver == 40)
226 *sig = v40_sig_map[packed_sig];
227 else
228 *sig = v33_sig_map[packed_sig];
229
230 /* Signals with zeroed unpacked contents after element 0 are reserved. */
231 return (packed_sig == 0 ||
232 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
233 }
234
235 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)236 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
237 const struct v3d_qpu_sig *sig,
238 uint32_t *packed_sig)
239 {
240 static const struct v3d_qpu_sig *map;
241
242 if (devinfo->ver >= 41)
243 map = v41_sig_map;
244 else if (devinfo->ver == 40)
245 map = v40_sig_map;
246 else
247 map = v33_sig_map;
248
249 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
250 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
251 *packed_sig = i;
252 return true;
253 }
254 }
255
256 return false;
257 }
258 static inline unsigned
fui(float f)259 fui( float f )
260 {
261 union {float f; unsigned ui;} fi;
262 fi.f = f;
263 return fi.ui;
264 }
265
266 static const uint32_t small_immediates[] = {
267 0, 1, 2, 3,
268 4, 5, 6, 7,
269 8, 9, 10, 11,
270 12, 13, 14, 15,
271 -16, -15, -14, -13,
272 -12, -11, -10, -9,
273 -8, -7, -6, -5,
274 -4, -3, -2, -1,
275 0x3b800000, /* 2.0^-8 */
276 0x3c000000, /* 2.0^-7 */
277 0x3c800000, /* 2.0^-6 */
278 0x3d000000, /* 2.0^-5 */
279 0x3d800000, /* 2.0^-4 */
280 0x3e000000, /* 2.0^-3 */
281 0x3e800000, /* 2.0^-2 */
282 0x3f000000, /* 2.0^-1 */
283 0x3f800000, /* 2.0^0 */
284 0x40000000, /* 2.0^1 */
285 0x40800000, /* 2.0^2 */
286 0x41000000, /* 2.0^3 */
287 0x41800000, /* 2.0^4 */
288 0x42000000, /* 2.0^5 */
289 0x42800000, /* 2.0^6 */
290 0x43000000, /* 2.0^7 */
291 };
292
293 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)294 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
295 uint32_t packed_small_immediate,
296 uint32_t *small_immediate)
297 {
298 if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
299 return false;
300
301 *small_immediate = small_immediates[packed_small_immediate];
302 return true;
303 }
304
305 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)306 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
307 uint32_t value,
308 uint32_t *packed_small_immediate)
309 {
310 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
311
312 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
313 if (small_immediates[i] == value) {
314 *packed_small_immediate = i;
315 return true;
316 }
317 }
318
319 return false;
320 }
321
322 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)323 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
324 uint32_t packed_cond,
325 struct v3d_qpu_flags *cond)
326 {
327 static const enum v3d_qpu_cond cond_map[4] = {
328 [0] = V3D_QPU_COND_IFA,
329 [1] = V3D_QPU_COND_IFB,
330 [2] = V3D_QPU_COND_IFNA,
331 [3] = V3D_QPU_COND_IFNB,
332 };
333
334 cond->ac = V3D_QPU_COND_NONE;
335 cond->mc = V3D_QPU_COND_NONE;
336 cond->apf = V3D_QPU_PF_NONE;
337 cond->mpf = V3D_QPU_PF_NONE;
338 cond->auf = V3D_QPU_UF_NONE;
339 cond->muf = V3D_QPU_UF_NONE;
340
341 if (packed_cond == 0) {
342 return true;
343 } else if (packed_cond >> 2 == 0) {
344 cond->apf = packed_cond & 0x3;
345 } else if (packed_cond >> 4 == 0) {
346 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347 } else if (packed_cond == 0x10) {
348 return false;
349 } else if (packed_cond >> 2 == 0x4) {
350 cond->mpf = packed_cond & 0x3;
351 } else if (packed_cond >> 4 == 0x1) {
352 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
353 } else if (packed_cond >> 4 == 0x2) {
354 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
355 cond->mpf = packed_cond & 0x3;
356 } else if (packed_cond >> 4 == 0x3) {
357 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
358 cond->apf = packed_cond & 0x3;
359 } else if (packed_cond >> 6) {
360 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
361 if (((packed_cond >> 2) & 0x3) == 0) {
362 cond->ac = cond_map[packed_cond & 0x3];
363 } else {
364 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
365 }
366 }
367
368 return true;
369 }
370
371 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)372 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
373 const struct v3d_qpu_flags *cond,
374 uint32_t *packed_cond)
375 {
376 #define AC (1 << 0)
377 #define MC (1 << 1)
378 #define APF (1 << 2)
379 #define MPF (1 << 3)
380 #define AUF (1 << 4)
381 #define MUF (1 << 5)
382 static const struct {
383 uint8_t flags_present;
384 uint8_t bits;
385 } flags_table[] = {
386 { 0, 0 },
387 { APF, 0 },
388 { AUF, 0 },
389 { MPF, (1 << 4) },
390 { MUF, (1 << 4) },
391 { AC, (1 << 5) },
392 { AC | MPF, (1 << 5) },
393 { MC, (1 << 5) | (1 << 4) },
394 { MC | APF, (1 << 5) | (1 << 4) },
395 { MC | AC, (1 << 6) },
396 { MC | AUF, (1 << 6) },
397 };
398
399 uint8_t flags_present = 0;
400 if (cond->ac != V3D_QPU_COND_NONE)
401 flags_present |= AC;
402 if (cond->mc != V3D_QPU_COND_NONE)
403 flags_present |= MC;
404 if (cond->apf != V3D_QPU_PF_NONE)
405 flags_present |= APF;
406 if (cond->mpf != V3D_QPU_PF_NONE)
407 flags_present |= MPF;
408 if (cond->auf != V3D_QPU_UF_NONE)
409 flags_present |= AUF;
410 if (cond->muf != V3D_QPU_UF_NONE)
411 flags_present |= MUF;
412
413 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
414 if (flags_table[i].flags_present != flags_present)
415 continue;
416
417 *packed_cond = flags_table[i].bits;
418
419 *packed_cond |= cond->apf;
420 *packed_cond |= cond->mpf;
421
422 if (flags_present & AUF)
423 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
424 if (flags_present & MUF)
425 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
426
427 if (flags_present & AC)
428 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
429
430 if (flags_present & MC) {
431 if (*packed_cond & (1 << 6))
432 *packed_cond |= (cond->mc -
433 V3D_QPU_COND_IFA) << 4;
434 else
435 *packed_cond |= (cond->mc -
436 V3D_QPU_COND_IFA) << 2;
437 }
438
439 return true;
440 }
441
442 return false;
443 }
444
445 /* Make a mapping of the table of opcodes in the spec. The opcode is
446 * determined by a combination of the opcode field, and in the case of 0 or
447 * 1-arg opcodes, the mux_b field as well.
448 */
449 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
450 #define ANYMUX MUX_MASK(0, 7)
451
452 struct opcode_desc {
453 uint8_t opcode_first;
454 uint8_t opcode_last;
455 uint8_t mux_b_mask;
456 uint8_t mux_a_mask;
457 uint8_t op;
458 /* 0 if it's the same across V3D versions, or a specific V3D version. */
459 uint8_t ver;
460 };
461
462 static const struct opcode_desc add_ops[] = {
463 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
464 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
465 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
466 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
467 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
468 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
469 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
470 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
471 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
472 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
473 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
474 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
475 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
476 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
477 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
478 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
479 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
480 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
481 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
482 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
483 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
484
485 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
486 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
487 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
488
489 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
490 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
491 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
492 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
493 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
494 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
495 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
496 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
497 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
498 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
499 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
500 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
501 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
502 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
503 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
504 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
505 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
506
507 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
508 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
509 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
510 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
511
512 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
513 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
514 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
515 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
516 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
517 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_PATCHID, 40 },
518 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
519 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
520
521 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
522 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
523 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
524 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
525 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
526
527 /* FIXME: MORE COMPLICATED */
528 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
529
530 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
531 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
532
533 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
534 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
535 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
536 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
537 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
538 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
539 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
540 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
541
542 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
543 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
544
545 /* The stvpms are distinguished by the waddr field. */
546 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
547 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
548 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
549
550 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
551 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
552 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
553 };
554
555 static const struct opcode_desc mul_ops[] = {
556 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
557 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
558 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
559 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
560 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
561 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
562 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
563 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
564 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
565 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
566 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
567 };
568
569 static const struct opcode_desc *
lookup_opcode(const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)570 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
571 uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
572 {
573 for (int i = 0; i < num_opcodes; i++) {
574 const struct opcode_desc *op_desc = &opcodes[i];
575
576 if (opcode < op_desc->opcode_first ||
577 opcode > op_desc->opcode_last)
578 continue;
579
580 if (!(op_desc->mux_b_mask & (1 << mux_b)))
581 continue;
582
583 if (!(op_desc->mux_a_mask & (1 << mux_a)))
584 continue;
585
586 return op_desc;
587 }
588
589 return NULL;
590 }
591
592 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)593 v3d_qpu_float32_unpack_unpack(uint32_t packed,
594 enum v3d_qpu_input_unpack *unpacked)
595 {
596 switch (packed) {
597 case 0:
598 *unpacked = V3D_QPU_UNPACK_ABS;
599 return true;
600 case 1:
601 *unpacked = V3D_QPU_UNPACK_NONE;
602 return true;
603 case 2:
604 *unpacked = V3D_QPU_UNPACK_L;
605 return true;
606 case 3:
607 *unpacked = V3D_QPU_UNPACK_H;
608 return true;
609 default:
610 return false;
611 }
612 }
613
614 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)615 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
616 uint32_t *packed)
617 {
618 switch (unpacked) {
619 case V3D_QPU_UNPACK_ABS:
620 *packed = 0;
621 return true;
622 case V3D_QPU_UNPACK_NONE:
623 *packed = 1;
624 return true;
625 case V3D_QPU_UNPACK_L:
626 *packed = 2;
627 return true;
628 case V3D_QPU_UNPACK_H:
629 *packed = 3;
630 return true;
631 default:
632 return false;
633 }
634 }
635
636 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)637 v3d_qpu_float16_unpack_unpack(uint32_t packed,
638 enum v3d_qpu_input_unpack *unpacked)
639 {
640 switch (packed) {
641 case 0:
642 *unpacked = V3D_QPU_UNPACK_NONE;
643 return true;
644 case 1:
645 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
646 return true;
647 case 2:
648 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
649 return true;
650 case 3:
651 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
652 return true;
653 case 4:
654 *unpacked = V3D_QPU_UNPACK_SWAP_16;
655 return true;
656 default:
657 return false;
658 }
659 }
660
661 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)662 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
663 uint32_t *packed)
664 {
665 switch (unpacked) {
666 case V3D_QPU_UNPACK_NONE:
667 *packed = 0;
668 return true;
669 case V3D_QPU_UNPACK_REPLICATE_32F_16:
670 *packed = 1;
671 return true;
672 case V3D_QPU_UNPACK_REPLICATE_L_16:
673 *packed = 2;
674 return true;
675 case V3D_QPU_UNPACK_REPLICATE_H_16:
676 *packed = 3;
677 return true;
678 case V3D_QPU_UNPACK_SWAP_16:
679 *packed = 4;
680 return true;
681 default:
682 return false;
683 }
684 }
685
686 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)687 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
688 uint32_t *packed)
689 {
690 switch (unpacked) {
691 case V3D_QPU_PACK_NONE:
692 *packed = 0;
693 return true;
694 case V3D_QPU_PACK_L:
695 *packed = 1;
696 return true;
697 case V3D_QPU_PACK_H:
698 *packed = 2;
699 return true;
700 default:
701 return false;
702 }
703 }
704
705 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)706 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
707 struct v3d_qpu_instr *instr)
708 {
709 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
710 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
711 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
712 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
713
714 uint32_t map_op = op;
715 /* Some big clusters of opcodes are replicated with unpack
716 * flags
717 */
718 if (map_op >= 249 && map_op <= 251)
719 map_op = (map_op - 249 + 245);
720 if (map_op >= 253 && map_op <= 255)
721 map_op = (map_op - 253 + 245);
722
723 const struct opcode_desc *desc =
724 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
725 map_op, mux_a, mux_b);
726 if (!desc)
727 return false;
728
729 instr->alu.add.op = desc->op;
730
731 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
732 * operands.
733 */
734 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
735 if (instr->alu.add.op == V3D_QPU_A_FMIN)
736 instr->alu.add.op = V3D_QPU_A_FMAX;
737 if (instr->alu.add.op == V3D_QPU_A_FADD)
738 instr->alu.add.op = V3D_QPU_A_FADDNF;
739 }
740
741 /* Some QPU ops require a bit more than just basic opcode and mux a/b
742 * comparisons to distinguish them.
743 */
744 switch (instr->alu.add.op) {
745 case V3D_QPU_A_STVPMV:
746 case V3D_QPU_A_STVPMD:
747 case V3D_QPU_A_STVPMP:
748 switch (waddr) {
749 case 0:
750 instr->alu.add.op = V3D_QPU_A_STVPMV;
751 break;
752 case 1:
753 instr->alu.add.op = V3D_QPU_A_STVPMD;
754 break;
755 case 2:
756 instr->alu.add.op = V3D_QPU_A_STVPMP;
757 break;
758 default:
759 return false;
760 }
761 break;
762 default:
763 break;
764 }
765
766 switch (instr->alu.add.op) {
767 case V3D_QPU_A_FADD:
768 case V3D_QPU_A_FADDNF:
769 case V3D_QPU_A_FSUB:
770 case V3D_QPU_A_FMIN:
771 case V3D_QPU_A_FMAX:
772 case V3D_QPU_A_FCMP:
773 instr->alu.add.output_pack = (op >> 4) & 0x3;
774
775 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
776 &instr->alu.add.a_unpack)) {
777 return false;
778 }
779
780 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
781 &instr->alu.add.b_unpack)) {
782 return false;
783 }
784 break;
785
786 case V3D_QPU_A_FFLOOR:
787 case V3D_QPU_A_FROUND:
788 case V3D_QPU_A_FTRUNC:
789 case V3D_QPU_A_FCEIL:
790 case V3D_QPU_A_FDX:
791 case V3D_QPU_A_FDY:
792 instr->alu.add.output_pack = mux_b & 0x3;
793
794 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
795 &instr->alu.add.a_unpack)) {
796 return false;
797 }
798 break;
799
800 case V3D_QPU_A_FTOIN:
801 case V3D_QPU_A_FTOIZ:
802 case V3D_QPU_A_FTOUZ:
803 case V3D_QPU_A_FTOC:
804 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
805
806 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
807 &instr->alu.add.a_unpack)) {
808 return false;
809 }
810 break;
811
812 case V3D_QPU_A_VFMIN:
813 case V3D_QPU_A_VFMAX:
814 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
815 &instr->alu.add.a_unpack)) {
816 return false;
817 }
818
819 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
820 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
821 break;
822
823 default:
824 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
825 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
826 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
827 break;
828 }
829
830 instr->alu.add.a = mux_a;
831 instr->alu.add.b = mux_b;
832 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
833
834 instr->alu.add.magic_write = false;
835 if (packed_inst & VC5_QPU_MA) {
836 switch (instr->alu.add.op) {
837 case V3D_QPU_A_LDVPMV_IN:
838 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
839 break;
840 case V3D_QPU_A_LDVPMD_IN:
841 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
842 break;
843 case V3D_QPU_A_LDVPMG_IN:
844 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
845 break;
846 default:
847 instr->alu.add.magic_write = true;
848 break;
849 }
850 }
851
852 return true;
853 }
854
855 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)856 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
857 struct v3d_qpu_instr *instr)
858 {
859 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
860 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
861 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
862
863 {
864 const struct opcode_desc *desc =
865 lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
866 op, mux_a, mux_b);
867 if (!desc)
868 return false;
869
870 instr->alu.mul.op = desc->op;
871 }
872
873 switch (instr->alu.mul.op) {
874 case V3D_QPU_M_FMUL:
875 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
876
877 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
878 &instr->alu.mul.a_unpack)) {
879 return false;
880 }
881
882 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
883 &instr->alu.mul.b_unpack)) {
884 return false;
885 }
886
887 break;
888
889 case V3D_QPU_M_FMOV:
890 instr->alu.mul.output_pack = (((op & 1) << 1) +
891 ((mux_b >> 2) & 1));
892
893 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
894 &instr->alu.mul.a_unpack)) {
895 return false;
896 }
897
898 break;
899
900 case V3D_QPU_M_VFMUL:
901 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
902
903 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
904 &instr->alu.mul.a_unpack)) {
905 return false;
906 }
907
908 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
909
910 break;
911
912 default:
913 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
914 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
915 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
916 break;
917 }
918
919 instr->alu.mul.a = mux_a;
920 instr->alu.mul.b = mux_b;
921 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
922 instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
923
924 return true;
925 }
926
927 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)928 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
929 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
930 {
931 uint32_t waddr = instr->alu.add.waddr;
932 uint32_t mux_a = instr->alu.add.a;
933 uint32_t mux_b = instr->alu.add.b;
934 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
935 const struct opcode_desc *desc;
936
937 int opcode;
938 for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
939 desc++) {
940 if (desc->op == instr->alu.add.op)
941 break;
942 }
943 if (desc == &add_ops[ARRAY_SIZE(add_ops)])
944 return false;
945
946 opcode = desc->opcode_first;
947
948 /* If an operation doesn't use an arg, its mux values may be used to
949 * identify the operation type.
950 */
951 if (nsrc < 2)
952 mux_b = ffs(desc->mux_b_mask) - 1;
953
954 if (nsrc < 1)
955 mux_a = ffs(desc->mux_a_mask) - 1;
956
957 bool no_magic_write = false;
958
959 switch (instr->alu.add.op) {
960 case V3D_QPU_A_STVPMV:
961 waddr = 0;
962 no_magic_write = true;
963 break;
964 case V3D_QPU_A_STVPMD:
965 waddr = 1;
966 no_magic_write = true;
967 break;
968 case V3D_QPU_A_STVPMP:
969 waddr = 2;
970 no_magic_write = true;
971 break;
972
973 case V3D_QPU_A_LDVPMV_IN:
974 case V3D_QPU_A_LDVPMD_IN:
975 case V3D_QPU_A_LDVPMP:
976 case V3D_QPU_A_LDVPMG_IN:
977 assert(!instr->alu.add.magic_write);
978 break;
979
980 case V3D_QPU_A_LDVPMV_OUT:
981 case V3D_QPU_A_LDVPMD_OUT:
982 case V3D_QPU_A_LDVPMG_OUT:
983 assert(!instr->alu.add.magic_write);
984 *packed_instr |= VC5_QPU_MA;
985 break;
986
987 default:
988 break;
989 }
990
991 switch (instr->alu.add.op) {
992 case V3D_QPU_A_FADD:
993 case V3D_QPU_A_FADDNF:
994 case V3D_QPU_A_FSUB:
995 case V3D_QPU_A_FMIN:
996 case V3D_QPU_A_FMAX:
997 case V3D_QPU_A_FCMP: {
998 uint32_t output_pack;
999 uint32_t a_unpack;
1000 uint32_t b_unpack;
1001
1002 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1003 &output_pack)) {
1004 return false;
1005 }
1006 opcode |= output_pack << 4;
1007
1008 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1009 &a_unpack)) {
1010 return false;
1011 }
1012
1013 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1014 &b_unpack)) {
1015 return false;
1016 }
1017
1018 /* These operations with commutative operands are
1019 * distinguished by which order their operands come in.
1020 */
1021 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1022 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1023 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1024 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1025 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1026 uint32_t temp;
1027
1028 temp = a_unpack;
1029 a_unpack = b_unpack;
1030 b_unpack = temp;
1031
1032 temp = mux_a;
1033 mux_a = mux_b;
1034 mux_b = temp;
1035 }
1036
1037 opcode |= a_unpack << 2;
1038 opcode |= b_unpack << 0;
1039 break;
1040 }
1041
1042 case V3D_QPU_A_FFLOOR:
1043 case V3D_QPU_A_FROUND:
1044 case V3D_QPU_A_FTRUNC:
1045 case V3D_QPU_A_FCEIL:
1046 case V3D_QPU_A_FDX:
1047 case V3D_QPU_A_FDY: {
1048 uint32_t packed;
1049
1050 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1051 &packed)) {
1052 return false;
1053 }
1054 mux_b |= packed;
1055
1056 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1057 &packed)) {
1058 return false;
1059 }
1060 if (packed == 0)
1061 return false;
1062 opcode |= packed << 2;
1063 break;
1064 }
1065
1066 case V3D_QPU_A_FTOIN:
1067 case V3D_QPU_A_FTOIZ:
1068 case V3D_QPU_A_FTOUZ:
1069 case V3D_QPU_A_FTOC:
1070 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1071 return false;
1072
1073 uint32_t packed;
1074 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1075 &packed)) {
1076 return false;
1077 }
1078 if (packed == 0)
1079 return false;
1080 opcode |= packed << 2;
1081
1082 break;
1083
1084 case V3D_QPU_A_VFMIN:
1085 case V3D_QPU_A_VFMAX:
1086 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1087 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1088 return false;
1089 }
1090
1091 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1092 &packed)) {
1093 return false;
1094 }
1095 opcode |= packed;
1096 break;
1097
1098 default:
1099 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1100 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1101 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1102 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1103 return false;
1104 }
1105 break;
1106 }
1107
1108 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1109 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1110 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1111 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1112 if (instr->alu.add.magic_write && !no_magic_write)
1113 *packed_instr |= VC5_QPU_MA;
1114
1115 return true;
1116 }
1117
1118 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1119 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1120 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1121 {
1122 uint32_t mux_a = instr->alu.mul.a;
1123 uint32_t mux_b = instr->alu.mul.b;
1124 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1125 const struct opcode_desc *desc;
1126
1127 for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1128 desc++) {
1129 if (desc->op == instr->alu.mul.op)
1130 break;
1131 }
1132 if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1133 return false;
1134
1135 uint32_t opcode = desc->opcode_first;
1136
1137 /* Some opcodes have a single valid value for their mux a/b, so set
1138 * that here. If mux a/b determine packing, it will be set below.
1139 */
1140 if (nsrc < 2)
1141 mux_b = ffs(desc->mux_b_mask) - 1;
1142
1143 if (nsrc < 1)
1144 mux_a = ffs(desc->mux_a_mask) - 1;
1145
1146 switch (instr->alu.mul.op) {
1147 case V3D_QPU_M_FMUL: {
1148 uint32_t packed;
1149
1150 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1151 &packed)) {
1152 return false;
1153 }
1154 /* No need for a +1 because desc->opcode_first has a 1 in this
1155 * field.
1156 */
1157 opcode += packed << 4;
1158
1159 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1160 &packed)) {
1161 return false;
1162 }
1163 opcode |= packed << 2;
1164
1165 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1166 &packed)) {
1167 return false;
1168 }
1169 opcode |= packed << 0;
1170 break;
1171 }
1172
1173 case V3D_QPU_M_FMOV: {
1174 uint32_t packed;
1175
1176 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1177 &packed)) {
1178 return false;
1179 }
1180 opcode |= (packed >> 1) & 1;
1181 mux_b = (packed & 1) << 2;
1182
1183 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1184 &packed)) {
1185 return false;
1186 }
1187 mux_b |= packed;
1188 break;
1189 }
1190
1191 case V3D_QPU_M_VFMUL: {
1192 uint32_t packed;
1193
1194 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1195 return false;
1196
1197 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1198 &packed)) {
1199 return false;
1200 }
1201 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1202 opcode = 8;
1203 else
1204 opcode |= (packed + 4) & 7;
1205
1206 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1207 return false;
1208
1209 break;
1210 }
1211
1212 default:
1213 break;
1214 }
1215
1216 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1217 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1218
1219 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1220 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1221 if (instr->alu.mul.magic_write)
1222 *packed_instr |= VC5_QPU_MM;
1223
1224 return true;
1225 }
1226
1227 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1228 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1229 uint64_t packed_instr,
1230 struct v3d_qpu_instr *instr)
1231 {
1232 instr->type = V3D_QPU_INSTR_TYPE_ALU;
1233
1234 if (!v3d_qpu_sig_unpack(devinfo,
1235 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1236 &instr->sig))
1237 return false;
1238
1239 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1240 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1241 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1242 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1243
1244 instr->flags.ac = V3D_QPU_COND_NONE;
1245 instr->flags.mc = V3D_QPU_COND_NONE;
1246 instr->flags.apf = V3D_QPU_PF_NONE;
1247 instr->flags.mpf = V3D_QPU_PF_NONE;
1248 instr->flags.auf = V3D_QPU_UF_NONE;
1249 instr->flags.muf = V3D_QPU_UF_NONE;
1250 } else {
1251 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1252 return false;
1253 }
1254
1255 instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1256 instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1257
1258 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1259 return false;
1260
1261 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1262 return false;
1263
1264 return true;
1265 }
1266
1267 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1268 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1269 uint64_t packed_instr,
1270 struct v3d_qpu_instr *instr)
1271 {
1272 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1273
1274 uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1275 if (cond == 0)
1276 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1277 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1278 V3D_QPU_BRANCH_COND_ALLNA)
1279 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1280 else
1281 return false;
1282
1283 uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1284 if (msfign == 3)
1285 return false;
1286 instr->branch.msfign = msfign;
1287
1288 instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1289
1290 instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1291 if (instr->branch.ub) {
1292 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1293 VC5_QPU_BRANCH_BDU);
1294 }
1295
1296 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1297 VC5_QPU_RADDR_A);
1298
1299 instr->branch.offset = 0;
1300
1301 instr->branch.offset +=
1302 QPU_GET_FIELD(packed_instr,
1303 VC5_QPU_BRANCH_ADDR_LOW) << 3;
1304
1305 instr->branch.offset +=
1306 QPU_GET_FIELD(packed_instr,
1307 VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1308
1309 return true;
1310 }
1311
1312 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1313 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1314 uint64_t packed_instr,
1315 struct v3d_qpu_instr *instr)
1316 {
1317 if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1318 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1319 } else {
1320 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1321
1322 if ((sig & 24) == 16) {
1323 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1324 instr);
1325 } else {
1326 return false;
1327 }
1328 }
1329 }
1330
1331 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1332 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1333 const struct v3d_qpu_instr *instr,
1334 uint64_t *packed_instr)
1335 {
1336 uint32_t sig;
1337 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1338 return false;
1339 *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1340
1341 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1342 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1343 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1344
1345 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1346 return false;
1347 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1348 return false;
1349
1350 uint32_t flags;
1351 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1352 if (instr->flags.ac != V3D_QPU_COND_NONE ||
1353 instr->flags.mc != V3D_QPU_COND_NONE ||
1354 instr->flags.apf != V3D_QPU_PF_NONE ||
1355 instr->flags.mpf != V3D_QPU_PF_NONE ||
1356 instr->flags.auf != V3D_QPU_UF_NONE ||
1357 instr->flags.muf != V3D_QPU_UF_NONE) {
1358 return false;
1359 }
1360
1361 flags = instr->sig_addr;
1362 if (instr->sig_magic)
1363 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1364 } else {
1365 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1366 return false;
1367 }
1368
1369 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1370 } else {
1371 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1372 return false;
1373 }
1374
1375 return true;
1376 }
1377
1378 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1379 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1380 const struct v3d_qpu_instr *instr,
1381 uint64_t *packed_instr)
1382 {
1383 *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1384
1385 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1386 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1387 V3D_QPU_BRANCH_COND_A0),
1388 VC5_QPU_BRANCH_COND);
1389 }
1390
1391 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1392 VC5_QPU_BRANCH_MSFIGN);
1393
1394 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1395 VC5_QPU_BRANCH_BDI);
1396
1397 if (instr->branch.ub) {
1398 *packed_instr |= VC5_QPU_BRANCH_UB;
1399 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1400 VC5_QPU_BRANCH_BDU);
1401 }
1402
1403 switch (instr->branch.bdi) {
1404 case V3D_QPU_BRANCH_DEST_ABS:
1405 case V3D_QPU_BRANCH_DEST_REL:
1406 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1407 VC5_QPU_BRANCH_MSFIGN);
1408
1409 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1410 ~0xff000000) >> 3,
1411 VC5_QPU_BRANCH_ADDR_LOW);
1412
1413 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1414 VC5_QPU_BRANCH_ADDR_HIGH);
1415
1416 case V3D_QPU_BRANCH_DEST_REGFILE:
1417 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1418 VC5_QPU_RADDR_A);
1419 break;
1420
1421 default:
1422 break;
1423 }
1424
1425 return true;
1426 }
1427
1428 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1429 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1430 const struct v3d_qpu_instr *instr,
1431 uint64_t *packed_instr)
1432 {
1433 *packed_instr = 0;
1434
1435 switch (instr->type) {
1436 case V3D_QPU_INSTR_TYPE_ALU:
1437 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1438 case V3D_QPU_INSTR_TYPE_BRANCH:
1439 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1440 default:
1441 return false;
1442 }
1443 }
1444