1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field) \
35 ({ \
36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37 assert((fieldval & ~ field ## _MASK) == 0); \
38 fieldval & field ## _MASK; \
39 })
40
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
42
43 #define QPU_UPDATE_FIELD(inst, value, field) \
44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46
47 #define V3D_QPU_OP_MUL_SHIFT 58
48 #define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)
49
50 #define V3D_QPU_SIG_SHIFT 53
51 #define V3D_QPU_SIG_MASK QPU_MASK(57, 53)
52
53 #define V3D_QPU_COND_SHIFT 46
54 #define V3D_QPU_COND_MASK QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
56
57 #define V3D_QPU_MM QPU_MASK(45, 45)
58 #define V3D_QPU_MA QPU_MASK(44, 44)
59
60 #define V3D_QPU_WADDR_M_SHIFT 38
61 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
62
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
65
66 #define V3D_QPU_WADDR_A_SHIFT 32
67 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
68
69 #define V3D_QPU_BRANCH_COND_SHIFT 32
70 #define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
71
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
74
75 #define V3D_QPU_OP_ADD_SHIFT 24
76 #define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)
77
78 #define V3D_QPU_MUL_B_SHIFT 21
79 #define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)
80
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT 21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
83
84 #define V3D_QPU_MUL_A_SHIFT 18
85 #define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
86
87 #define V3D_QPU_RADDR_C_SHIFT 18
88 #define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18)
89
90 #define V3D_QPU_ADD_B_SHIFT 15
91 #define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
92
93 #define V3D_QPU_BRANCH_BDU_SHIFT 15
94 #define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
95
96 #define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)
97
98 #define V3D_QPU_ADD_A_SHIFT 12
99 #define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)
100
101 #define V3D_QPU_BRANCH_BDI_SHIFT 12
102 #define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
103
104 #define V3D_QPU_RADDR_D_SHIFT 12
105 #define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12)
106
107 #define V3D_QPU_RADDR_A_SHIFT 6
108 #define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
109
110 #define V3D_QPU_RADDR_B_SHIFT 0
111 #define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)
112
113 #define THRSW .thrsw = true
114 #define LDUNIF .ldunif = true
115 #define LDUNIFRF .ldunifrf = true
116 #define LDUNIFA .ldunifa = true
117 #define LDUNIFARF .ldunifarf = true
118 #define LDTMU .ldtmu = true
119 #define LDVARY .ldvary = true
120 #define LDVPM .ldvpm = true
121 #define LDTLB .ldtlb = true
122 #define LDTLBU .ldtlbu = true
123 #define UCB .ucb = true
124 #define ROT .rotate = true
125 #define WRTMUC .wrtmuc = true
126 #define SMIMM_A .small_imm_a = true
127 #define SMIMM_B .small_imm_b = true
128 #define SMIMM_C .small_imm_c = true
129 #define SMIMM_D .small_imm_d = true
130
131 static const struct v3d_qpu_sig v3d42_sig_map[] = {
132 /* MISC phys R5 */
133 [0] = { },
134 [1] = { THRSW, },
135 [2] = { LDUNIF },
136 [3] = { THRSW, LDUNIF },
137 [4] = { LDTMU, },
138 [5] = { THRSW, LDTMU, },
139 [6] = { LDTMU, LDUNIF },
140 [7] = { THRSW, LDTMU, LDUNIF },
141 [8] = { LDVARY, },
142 [9] = { THRSW, LDVARY, },
143 [10] = { LDVARY, LDUNIF },
144 [11] = { THRSW, LDVARY, LDUNIF },
145 [12] = { LDUNIFRF },
146 [13] = { THRSW, LDUNIFRF },
147 [14] = { SMIMM_B, LDVARY },
148 [15] = { SMIMM_B, },
149 [16] = { LDTLB, },
150 [17] = { LDTLBU, },
151 [18] = { WRTMUC },
152 [19] = { THRSW, WRTMUC },
153 [20] = { LDVARY, WRTMUC },
154 [21] = { THRSW, LDVARY, WRTMUC },
155 [22] = { UCB, },
156 [23] = { ROT, },
157 [24] = { LDUNIFA},
158 [25] = { LDUNIFARF },
159 /* 26-30 reserved */
160 [31] = { SMIMM_B, LDTMU, },
161 };
162
163
164 static const struct v3d_qpu_sig v3d71_sig_map[] = {
165 /* MISC phys RF0 */
166 [0] = { },
167 [1] = { THRSW, },
168 [2] = { LDUNIF },
169 [3] = { THRSW, LDUNIF },
170 [4] = { LDTMU, },
171 [5] = { THRSW, LDTMU, },
172 [6] = { LDTMU, LDUNIF },
173 [7] = { THRSW, LDTMU, LDUNIF },
174 [8] = { LDVARY, },
175 [9] = { THRSW, LDVARY, },
176 [10] = { LDVARY, LDUNIF },
177 [11] = { THRSW, LDVARY, LDUNIF },
178 [12] = { LDUNIFRF },
179 [13] = { THRSW, LDUNIFRF },
180 [14] = { SMIMM_A, },
181 [15] = { SMIMM_B, },
182 [16] = { LDTLB, },
183 [17] = { LDTLBU, },
184 [18] = { WRTMUC },
185 [19] = { THRSW, WRTMUC },
186 [20] = { LDVARY, WRTMUC },
187 [21] = { THRSW, LDVARY, WRTMUC },
188 [22] = { UCB, },
189 /* 23 reserved */
190 [24] = { LDUNIFA},
191 [25] = { LDUNIFARF },
192 [26] = { LDTMU, WRTMUC },
193 [27] = { THRSW, LDTMU, WRTMUC },
194 /* 28-29 reserved */
195 [30] = { SMIMM_C, },
196 [31] = { SMIMM_D, },
197 };
198
199 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)200 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
201 uint32_t packed_sig,
202 struct v3d_qpu_sig *sig)
203 {
204 if (packed_sig >= ARRAY_SIZE(v3d42_sig_map))
205 return false;
206
207 if (devinfo->ver >= 71)
208 *sig = v3d71_sig_map[packed_sig];
209 else
210 *sig = v3d42_sig_map[packed_sig];
211
212 /* Signals with zeroed unpacked contents after element 0 are reserved. */
213 return (packed_sig == 0 ||
214 memcmp(sig, &v3d42_sig_map[0], sizeof(*sig)) != 0);
215 }
216
217 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)218 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
219 const struct v3d_qpu_sig *sig,
220 uint32_t *packed_sig)
221 {
222 static const struct v3d_qpu_sig *map;
223
224 if (devinfo->ver >= 71)
225 map = v3d71_sig_map;
226 else
227 map = v3d42_sig_map;
228
229 for (int i = 0; i < ARRAY_SIZE(v3d42_sig_map); i++) {
230 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
231 *packed_sig = i;
232 return true;
233 }
234 }
235
236 return false;
237 }
238
239 static const uint32_t small_immediates[] = {
240 0, 1, 2, 3,
241 4, 5, 6, 7,
242 8, 9, 10, 11,
243 12, 13, 14, 15,
244 -16, -15, -14, -13,
245 -12, -11, -10, -9,
246 -8, -7, -6, -5,
247 -4, -3, -2, -1,
248 0x3b800000, /* 2.0^-8 */
249 0x3c000000, /* 2.0^-7 */
250 0x3c800000, /* 2.0^-6 */
251 0x3d000000, /* 2.0^-5 */
252 0x3d800000, /* 2.0^-4 */
253 0x3e000000, /* 2.0^-3 */
254 0x3e800000, /* 2.0^-2 */
255 0x3f000000, /* 2.0^-1 */
256 0x3f800000, /* 2.0^0 */
257 0x40000000, /* 2.0^1 */
258 0x40800000, /* 2.0^2 */
259 0x41000000, /* 2.0^3 */
260 0x41800000, /* 2.0^4 */
261 0x42000000, /* 2.0^5 */
262 0x42800000, /* 2.0^6 */
263 0x43000000, /* 2.0^7 */
264 };
265
266 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)267 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
268 uint32_t packed_small_immediate,
269 uint32_t *small_immediate)
270 {
271 if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
272 return false;
273
274 *small_immediate = small_immediates[packed_small_immediate];
275 return true;
276 }
277
278 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)279 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
280 uint32_t value,
281 uint32_t *packed_small_immediate)
282 {
283 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
284
285 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
286 if (small_immediates[i] == value) {
287 *packed_small_immediate = i;
288 return true;
289 }
290 }
291
292 return false;
293 }
294
295 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)296 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
297 uint32_t packed_cond,
298 struct v3d_qpu_flags *cond)
299 {
300 static const enum v3d_qpu_cond cond_map[4] = {
301 [0] = V3D_QPU_COND_IFA,
302 [1] = V3D_QPU_COND_IFB,
303 [2] = V3D_QPU_COND_IFNA,
304 [3] = V3D_QPU_COND_IFNB,
305 };
306
307 cond->ac = V3D_QPU_COND_NONE;
308 cond->mc = V3D_QPU_COND_NONE;
309 cond->apf = V3D_QPU_PF_NONE;
310 cond->mpf = V3D_QPU_PF_NONE;
311 cond->auf = V3D_QPU_UF_NONE;
312 cond->muf = V3D_QPU_UF_NONE;
313
314 if (packed_cond == 0) {
315 return true;
316 } else if (packed_cond >> 2 == 0) {
317 cond->apf = packed_cond & 0x3;
318 } else if (packed_cond >> 4 == 0) {
319 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
320 } else if (packed_cond == 0x10) {
321 return false;
322 } else if (packed_cond >> 2 == 0x4) {
323 cond->mpf = packed_cond & 0x3;
324 } else if (packed_cond >> 4 == 0x1) {
325 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
326 } else if (packed_cond >> 4 == 0x2) {
327 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
328 cond->mpf = packed_cond & 0x3;
329 } else if (packed_cond >> 4 == 0x3) {
330 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
331 cond->apf = packed_cond & 0x3;
332 } else if (packed_cond >> 6) {
333 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
334 if (((packed_cond >> 2) & 0x3) == 0) {
335 cond->ac = cond_map[packed_cond & 0x3];
336 } else {
337 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
338 }
339 }
340
341 return true;
342 }
343
344 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)345 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
346 const struct v3d_qpu_flags *cond,
347 uint32_t *packed_cond)
348 {
349 #define AC (1 << 0)
350 #define MC (1 << 1)
351 #define APF (1 << 2)
352 #define MPF (1 << 3)
353 #define AUF (1 << 4)
354 #define MUF (1 << 5)
355 static const struct {
356 uint8_t flags_present;
357 uint8_t bits;
358 } flags_table[] = {
359 { 0, 0 },
360 { APF, 0 },
361 { AUF, 0 },
362 { MPF, (1 << 4) },
363 { MUF, (1 << 4) },
364 { AC, (1 << 5) },
365 { AC | MPF, (1 << 5) },
366 { MC, (1 << 5) | (1 << 4) },
367 { MC | APF, (1 << 5) | (1 << 4) },
368 { MC | AC, (1 << 6) },
369 { MC | AUF, (1 << 6) },
370 };
371
372 uint8_t flags_present = 0;
373 if (cond->ac != V3D_QPU_COND_NONE)
374 flags_present |= AC;
375 if (cond->mc != V3D_QPU_COND_NONE)
376 flags_present |= MC;
377 if (cond->apf != V3D_QPU_PF_NONE)
378 flags_present |= APF;
379 if (cond->mpf != V3D_QPU_PF_NONE)
380 flags_present |= MPF;
381 if (cond->auf != V3D_QPU_UF_NONE)
382 flags_present |= AUF;
383 if (cond->muf != V3D_QPU_UF_NONE)
384 flags_present |= MUF;
385
386 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
387 if (flags_table[i].flags_present != flags_present)
388 continue;
389
390 *packed_cond = flags_table[i].bits;
391
392 *packed_cond |= cond->apf;
393 *packed_cond |= cond->mpf;
394
395 if (flags_present & AUF)
396 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
397 if (flags_present & MUF)
398 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
399
400 if (flags_present & AC) {
401 if (*packed_cond & (1 << 6))
402 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
403 else
404 *packed_cond |= (cond->ac -
405 V3D_QPU_COND_IFA) << 2;
406 }
407
408 if (flags_present & MC) {
409 if (*packed_cond & (1 << 6))
410 *packed_cond |= (cond->mc -
411 V3D_QPU_COND_IFA) << 4;
412 else
413 *packed_cond |= (cond->mc -
414 V3D_QPU_COND_IFA) << 2;
415 }
416
417 return true;
418 }
419
420 return false;
421 }
422
423 /* Make a mapping of the table of opcodes in the spec. The opcode is
424 * determined by a combination of the opcode field, and in the case of 0 or
425 * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as
426 * well.
427 */
428 #define OP_MASK(val) BITFIELD64_BIT(val)
429 #define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1)
430 #define ANYMUX OP_RANGE(0, 7)
431 #define ANYOPMASK OP_RANGE(0, 63)
432
433 struct opcode_desc {
434 uint8_t opcode_first;
435 uint8_t opcode_last;
436
437 union {
438 struct {
439 uint8_t b_mask;
440 uint8_t a_mask;
441 } mux;
442 uint64_t raddr_mask;
443 };
444
445 uint8_t op;
446
447 /* first_ver == 0 if it's the same across all V3D versions.
448 * first_ver == X, last_ver == 0 if it's the same for all V3D versions
449 * starting from X
450 * first_ver == X, last_ver == Y if it's the same for all V3D versions
451 * on the range X through Y
452 */
453 uint8_t first_ver;
454 uint8_t last_ver;
455 };
456
457 static const struct opcode_desc v3d42_add_ops[] = {
458 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
459 { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD },
460 { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF },
461 { 53, 55, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
462 { 56, 56, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD },
463 { 57, 59, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
464 { 60, 60, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB },
465 { 61, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
466 { 64, 111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB },
467 { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN },
468 { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX },
469 { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN },
470 { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX },
471 { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL },
472 { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR },
473 { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR },
474 { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR },
475 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
476 { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN },
477 { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX },
478 { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN },
479
480 { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND },
481 { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR },
482 { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR },
483
484 { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD },
485 { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB },
486 { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT },
487 { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG },
488 { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH },
489 { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH },
490 { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP },
491 { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP },
492 { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF },
493 { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF },
494 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
495 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX },
496 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX },
497 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR },
498 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA },
499 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
500 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB },
501 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
502
503 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD },
504 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD },
505 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD },
506 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD },
507
508 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF },
509 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF },
510 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 },
511 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 },
512 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 },
513 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 },
514 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT },
515 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT },
516 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 },
517 { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 },
518 { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
519
520 { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
521 { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
522 { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
523 { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
524 { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 },
525 { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 },
526 { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 },
527 { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 },
528 { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 },
529 { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 },
530 { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
531 { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
532
533 /* FIXME: MORE COMPLICATED */
534 /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
535
536 { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP },
537 { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX },
538
539 { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND },
540 { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN },
541 { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC },
542 { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ },
543 { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR },
544 { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ },
545 { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL },
546 { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC },
547
548 { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX },
549 { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY },
550
551 /* The stvpms are distinguished by the waddr field. */
552 { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV },
553 { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD },
554 { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP },
555
556 { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF },
557 { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ },
558 { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF },
559 };
560
561 static const struct opcode_desc v3d42_mul_ops[] = {
562 { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD },
563 { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB },
564 { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 },
565 { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL },
566 { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 },
567 { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP },
568 { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 },
569 { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42},
570 { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 },
571 { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 },
572
573 { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL },
574 };
575
576 /* Note that it would have been possible to define all the add/mul opcodes in
577 * just one table, using the first_ver/last_ver. But taking into account that
578 * for v3d71 there were a lot of changes, it was more tidy this way. Also
579 * right now we are doing a linear search on those tables, so this maintains
580 * the tables smaller.
581 *
582 * Just in case we merge the tables, we define the first_ver as 71 for those
583 * opcodes that changed on v3d71
584 */
585 static const struct opcode_desc v3d71_add_ops[] = {
586 /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
587 { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
588 { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
589 { 53, 55, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
590 { 56, 56, .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
591 { 57, 59, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
592 { 60, 60, .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB },
593 { 61, 63, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
594 { 64, 111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB },
595 { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN },
596 { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX },
597 { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN },
598 { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX },
599 { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL },
600 { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
601 { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
602 { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
603 /* FMIN is instead FMAX depending on the raddr_a/b order. */
604 { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
605 { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
606 { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
607
608 { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
609 { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
610 { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR },
611 { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD },
612 { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB },
613
614 { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT },
615 { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG },
616 { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH },
617 { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH },
618 { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP },
619 { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ },
620 { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF },
621 { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF },
622
623 { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
624 { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX },
625 { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX },
626 { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR },
627 { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA },
628 { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
629 { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB },
630 { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
631 { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD },
632 { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD },
633 { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF },
634 { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF },
635 { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID },
636 { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID },
637 { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID },
638 { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT },
639 { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT },
640 { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST },
641 { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST },
642
643 { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD },
644 { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD },
645
646 { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 },
647 { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 },
648 { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 },
649
650 { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 },
651 { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 },
652 { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 },
653 { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 },
654 { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 },
655 { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 },
656 { 188, 188, .raddr_mask = OP_MASK(38), V3D_QPU_A_BALLOT, 71 },
657 { 188, 188, .raddr_mask = OP_MASK(39), V3D_QPU_A_BCASTF, 71 },
658 { 188, 188, .raddr_mask = OP_MASK(40), V3D_QPU_A_ALLEQ, 71 },
659 { 188, 188, .raddr_mask = OP_MASK(41), V3D_QPU_A_ALLFEQ, 71 },
660
661 { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 },
662
663 /* The stvpms are distinguished by the waddr field. */
664 { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71},
665 { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71},
666 { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71},
667
668 { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 },
669
670 { 245, 245, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FROUND, 71 },
671 { 245, 245, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FROUND, 71 },
672 { 245, 245, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FROUND, 71 },
673 { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 },
674
675 { 245, 245, .raddr_mask = OP_MASK(3), V3D_QPU_A_FTOIN, 71 },
676 { 245, 245, .raddr_mask = OP_MASK(7), V3D_QPU_A_FTOIN, 71 },
677 { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 },
678 { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 },
679
680 { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 },
681 { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 },
682 { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 },
683 { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 },
684
685 { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 },
686 { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 },
687 { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 },
688 { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 },
689
690 { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 },
691 { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 },
692 { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 },
693 { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 },
694
695 { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 },
696 { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 },
697 { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 },
698 { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 },
699
700 { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 },
701 { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 },
702 { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 },
703 { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 },
704
705 { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC },
706 { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC },
707 { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC },
708 { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC },
709
710 { 246, 246, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FDX, 71 },
711 { 246, 246, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FDX, 71 },
712 { 246, 246, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FDX, 71 },
713 { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 },
714 { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 },
715 { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 },
716 { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 },
717 { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 },
718
719 { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
720 { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
721
722 { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
723 { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
724
725 { 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 },
726 { 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 },
727 { 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 },
728 { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
729 { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
730 { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
731 { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
732
733 { 249, 249, .raddr_mask = OP_MASK(3), V3D_QPU_A_MOV, 71 },
734 { 249, 249, .raddr_mask = OP_MASK(7), V3D_QPU_A_MOV, 71 },
735 { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
736 { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
737 { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
738
739 { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
740 { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
741
742 { 252, 252, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROTQ, 71 },
743 { 253, 253, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROT, 71 },
744 { 254, 254, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHUFFLE, 71 },
745 };
746
747 static const struct opcode_desc v3d71_mul_ops[] = {
748 /* For V3D 7.1, second mask field would be ignored */
749 { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 },
750 { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 },
751 { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
752 { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
753 { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 },
754 { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 },
755 { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 },
756
757 { 14, 14, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_M_FMOV, 71 },
758 { 14, 14, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_M_FMOV, 71 },
759 { 14, 14, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_M_FMOV, 71 },
760 { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 },
761 { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 },
762 { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 },
763 { 14, 14, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_M_FMOV, 71 },
764 { 14, 14, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_M_FMOV, 71 },
765
766 { 14, 14, .raddr_mask = OP_MASK(3), V3D_QPU_M_MOV, 71 },
767 { 14, 14, .raddr_mask = OP_MASK(7), V3D_QPU_M_MOV, 71 },
768 { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 },
769 { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
770 { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
771
772 { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
773 { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
774 { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
775 { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
776 { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
777 { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
778
779 { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
780
781 { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
782 };
783
784 /* Returns true if op_desc should be filtered out based on devinfo->ver
785 * against op_desc->first_ver and op_desc->last_ver. Check notes about
786 * first_ver/last_ver on struct opcode_desc comments.
787 */
788 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const uint8_t first_ver,const uint8_t last_ver)789 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
790 const uint8_t first_ver,
791 const uint8_t last_ver)
792 {
793 return (first_ver != 0 && devinfo->ver < first_ver) ||
794 (last_ver != 0 && devinfo->ver > last_ver);
795 }
796
797 /* Note that we pass as parameters mux_a, mux_b and raddr, even if depending
798 * on the devinfo->ver some would be ignored. We do this way just to avoid
799 * having two really similar lookup_opcode methods
800 */
801 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b,uint32_t raddr)802 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
803 const struct opcode_desc *opcodes,
804 size_t num_opcodes, uint32_t opcode,
805 uint32_t mux_a, uint32_t mux_b,
806 uint32_t raddr)
807 {
808 for (int i = 0; i < num_opcodes; i++) {
809 const struct opcode_desc *op_desc = &opcodes[i];
810
811 if (opcode < op_desc->opcode_first ||
812 opcode > op_desc->opcode_last)
813 continue;
814
815 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
816 continue;
817
818 if (devinfo->ver < 71) {
819 if (!(op_desc->mux.b_mask & (1 << mux_b)))
820 continue;
821
822 if (!(op_desc->mux.a_mask & (1 << mux_a)))
823 continue;
824 } else {
825 if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr)))
826 continue;
827 }
828
829 return op_desc;
830 }
831
832 return NULL;
833 }
834
835 static bool
v3d_qpu_float32_unpack_unpack(const struct v3d_device_info * devinfo,uint32_t packed,enum v3d_qpu_input_unpack * unpacked)836 v3d_qpu_float32_unpack_unpack(const struct v3d_device_info *devinfo,
837 uint32_t packed,
838 enum v3d_qpu_input_unpack *unpacked)
839 {
840 switch (packed) {
841 case 0:
842 *unpacked = V3D_QPU_UNPACK_ABS;
843 return true;
844 case 1:
845 *unpacked = V3D_QPU_UNPACK_NONE;
846 return true;
847 case 2:
848 *unpacked = V3D_QPU_UNPACK_L;
849 return true;
850 case 3:
851 *unpacked = V3D_QPU_UNPACK_H;
852 return true;
853 case 4:
854 *unpacked = V3D71_QPU_UNPACK_SAT;
855 return devinfo->ver >= 71;
856 case 5:
857 *unpacked = V3D71_QPU_UNPACK_NSAT;
858 return devinfo->ver >= 71;
859 case 6:
860 *unpacked = V3D71_QPU_UNPACK_MAX0;
861 return devinfo->ver >= 71;
862 default:
863 return false;
864 }
865 }
866
867 static bool
v3d_qpu_float32_unpack_pack(const struct v3d_device_info * devinfo,enum v3d_qpu_input_unpack unpacked,uint32_t * packed)868 v3d_qpu_float32_unpack_pack(const struct v3d_device_info *devinfo,
869 enum v3d_qpu_input_unpack unpacked,
870 uint32_t *packed)
871 {
872 switch (unpacked) {
873 case V3D_QPU_UNPACK_ABS:
874 *packed = 0;
875 return true;
876 case V3D_QPU_UNPACK_NONE:
877 *packed = 1;
878 return true;
879 case V3D_QPU_UNPACK_L:
880 *packed = 2;
881 return true;
882 case V3D_QPU_UNPACK_H:
883 *packed = 3;
884 return true;
885 case V3D71_QPU_UNPACK_SAT:
886 *packed = 4;
887 return devinfo->ver >= 71;
888 case V3D71_QPU_UNPACK_NSAT:
889 *packed = 5;
890 return devinfo->ver >= 71;
891 case V3D71_QPU_UNPACK_MAX0:
892 *packed = 6;
893 return devinfo->ver >= 71;
894 default:
895 return false;
896 }
897 }
898
899 static bool
v3d_qpu_int32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)900 v3d_qpu_int32_unpack_unpack(uint32_t packed,
901 enum v3d_qpu_input_unpack *unpacked)
902 {
903 switch (packed) {
904 case 0:
905 *unpacked = V3D_QPU_UNPACK_NONE;
906 return true;
907 case 1:
908 *unpacked = V3D_QPU_UNPACK_UL;
909 return true;
910 case 2:
911 *unpacked = V3D_QPU_UNPACK_UH;
912 return true;
913 case 3:
914 *unpacked = V3D_QPU_UNPACK_IL;
915 return true;
916 case 4:
917 *unpacked = V3D_QPU_UNPACK_IH;
918 return true;
919 default:
920 return false;
921 }
922 }
923
924 static bool
v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)925 v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
926 uint32_t *packed)
927 {
928 switch (unpacked) {
929 case V3D_QPU_UNPACK_NONE:
930 *packed = 0;
931 return true;
932 case V3D_QPU_UNPACK_UL:
933 *packed = 1;
934 return true;
935 case V3D_QPU_UNPACK_UH:
936 *packed = 2;
937 return true;
938 case V3D_QPU_UNPACK_IL:
939 *packed = 3;
940 return true;
941 case V3D_QPU_UNPACK_IH:
942 *packed = 4;
943 return true;
944 default:
945 return false;
946 }
947 }
948
949 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)950 v3d_qpu_float16_unpack_unpack(uint32_t packed,
951 enum v3d_qpu_input_unpack *unpacked)
952 {
953 switch (packed) {
954 case 0:
955 *unpacked = V3D_QPU_UNPACK_NONE;
956 return true;
957 case 1:
958 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
959 return true;
960 case 2:
961 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
962 return true;
963 case 3:
964 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
965 return true;
966 case 4:
967 *unpacked = V3D_QPU_UNPACK_SWAP_16;
968 return true;
969 default:
970 return false;
971 }
972 }
973
974 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)975 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
976 uint32_t *packed)
977 {
978 switch (unpacked) {
979 case V3D_QPU_UNPACK_NONE:
980 *packed = 0;
981 return true;
982 case V3D_QPU_UNPACK_REPLICATE_32F_16:
983 *packed = 1;
984 return true;
985 case V3D_QPU_UNPACK_REPLICATE_L_16:
986 *packed = 2;
987 return true;
988 case V3D_QPU_UNPACK_REPLICATE_H_16:
989 *packed = 3;
990 return true;
991 case V3D_QPU_UNPACK_SWAP_16:
992 *packed = 4;
993 return true;
994 default:
995 return false;
996 }
997 }
998
999 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,uint32_t * packed)1000 v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,
1001 uint32_t *packed)
1002 {
1003 switch (pack) {
1004 case V3D_QPU_PACK_NONE:
1005 *packed = 0;
1006 return true;
1007 case V3D_QPU_PACK_L:
1008 *packed = 1;
1009 return true;
1010 case V3D_QPU_PACK_H:
1011 *packed = 2;
1012 return true;
1013 default:
1014 return false;
1015 }
1016 }
1017
1018 static bool
v3d42_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1019 v3d42_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1020 struct v3d_qpu_instr *instr)
1021 {
1022 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1023 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
1024 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
1025 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1026
1027 uint32_t map_op = op;
1028 /* Some big clusters of opcodes are replicated with unpack
1029 * flags
1030 */
1031 if (map_op >= 249 && map_op <= 251)
1032 map_op = (map_op - 249 + 245);
1033 if (map_op >= 253 && map_op <= 255)
1034 map_op = (map_op - 253 + 245);
1035
1036 const struct opcode_desc *desc =
1037 lookup_opcode_from_packed(devinfo, v3d42_add_ops,
1038 ARRAY_SIZE(v3d42_add_ops),
1039 map_op, mux_a, mux_b, 0);
1040
1041 if (!desc)
1042 return false;
1043
1044 instr->alu.add.op = desc->op;
1045
1046 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
1047 * operands.
1048 */
1049 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
1050 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1051 instr->alu.add.op = V3D_QPU_A_FMAX;
1052 if (instr->alu.add.op == V3D_QPU_A_FADD)
1053 instr->alu.add.op = V3D_QPU_A_FADDNF;
1054 }
1055
1056 /* Some QPU ops require a bit more than just basic opcode and mux a/b
1057 * comparisons to distinguish them.
1058 */
1059 switch (instr->alu.add.op) {
1060 case V3D_QPU_A_STVPMV:
1061 case V3D_QPU_A_STVPMD:
1062 case V3D_QPU_A_STVPMP:
1063 switch (waddr) {
1064 case 0:
1065 instr->alu.add.op = V3D_QPU_A_STVPMV;
1066 break;
1067 case 1:
1068 instr->alu.add.op = V3D_QPU_A_STVPMD;
1069 break;
1070 case 2:
1071 instr->alu.add.op = V3D_QPU_A_STVPMP;
1072 break;
1073 default:
1074 return false;
1075 }
1076 break;
1077 default:
1078 break;
1079 }
1080
1081 switch (instr->alu.add.op) {
1082 case V3D_QPU_A_FADD:
1083 case V3D_QPU_A_FADDNF:
1084 case V3D_QPU_A_FSUB:
1085 case V3D_QPU_A_FMIN:
1086 case V3D_QPU_A_FMAX:
1087 case V3D_QPU_A_FCMP:
1088 case V3D_QPU_A_VFPACK:
1089 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
1090 instr->alu.add.output_pack = (op >> 4) & 0x3;
1091 else
1092 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1093
1094 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1095 &instr->alu.add.a.unpack)) {
1096 return false;
1097 }
1098
1099 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1100 &instr->alu.add.b.unpack)) {
1101 return false;
1102 }
1103 break;
1104
1105 case V3D_QPU_A_FFLOOR:
1106 case V3D_QPU_A_FROUND:
1107 case V3D_QPU_A_FTRUNC:
1108 case V3D_QPU_A_FCEIL:
1109 case V3D_QPU_A_FDX:
1110 case V3D_QPU_A_FDY:
1111 instr->alu.add.output_pack = mux_b & 0x3;
1112
1113 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1114 &instr->alu.add.a.unpack)) {
1115 return false;
1116 }
1117 break;
1118
1119 case V3D_QPU_A_FTOIN:
1120 case V3D_QPU_A_FTOIZ:
1121 case V3D_QPU_A_FTOUZ:
1122 case V3D_QPU_A_FTOC:
1123 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1124
1125 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1126 &instr->alu.add.a.unpack)) {
1127 return false;
1128 }
1129 break;
1130
1131 case V3D_QPU_A_VFMIN:
1132 case V3D_QPU_A_VFMAX:
1133 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1134 &instr->alu.add.a.unpack)) {
1135 return false;
1136 }
1137
1138 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1139 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1140 break;
1141
1142 default:
1143 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1144 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1145 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1146 break;
1147 }
1148
1149 instr->alu.add.a.mux = mux_a;
1150 instr->alu.add.b.mux = mux_b;
1151 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1152
1153 instr->alu.add.magic_write = false;
1154 if (packed_inst & V3D_QPU_MA) {
1155 switch (instr->alu.add.op) {
1156 case V3D_QPU_A_LDVPMV_IN:
1157 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1158 break;
1159 case V3D_QPU_A_LDVPMD_IN:
1160 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1161 break;
1162 case V3D_QPU_A_LDVPMG_IN:
1163 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1164 break;
1165 default:
1166 instr->alu.add.magic_write = true;
1167 break;
1168 }
1169 }
1170
1171 return true;
1172 }
1173
1174 static bool
v3d71_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1175 v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1176 struct v3d_qpu_instr *instr)
1177 {
1178 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1179 uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A);
1180 uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B);
1181 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1182 uint32_t map_op = op;
1183
1184 const struct opcode_desc *desc =
1185 lookup_opcode_from_packed(devinfo,
1186 v3d71_add_ops,
1187 ARRAY_SIZE(v3d71_add_ops),
1188 map_op, 0, 0,
1189 raddr_b);
1190 if (!desc)
1191 return false;
1192
1193 instr->alu.add.op = desc->op;
1194
1195 /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
1196 * operands.
1197 */
1198 if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
1199 instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
1200 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1201 instr->alu.add.op = V3D_QPU_A_FMAX;
1202 if (instr->alu.add.op == V3D_QPU_A_FADD)
1203 instr->alu.add.op = V3D_QPU_A_FADDNF;
1204 }
1205
1206 /* Some QPU ops require a bit more than just basic opcode and mux a/b
1207 * comparisons to distinguish them.
1208 */
1209 switch (instr->alu.add.op) {
1210 case V3D_QPU_A_STVPMV:
1211 case V3D_QPU_A_STVPMD:
1212 case V3D_QPU_A_STVPMP:
1213 switch (waddr) {
1214 case 0:
1215 instr->alu.add.op = V3D_QPU_A_STVPMV;
1216 break;
1217 case 1:
1218 instr->alu.add.op = V3D_QPU_A_STVPMD;
1219 break;
1220 case 2:
1221 instr->alu.add.op = V3D_QPU_A_STVPMP;
1222 break;
1223 default:
1224 return false;
1225 }
1226 break;
1227 default:
1228 break;
1229 }
1230
1231 switch (instr->alu.add.op) {
1232 case V3D_QPU_A_FADD:
1233 case V3D_QPU_A_FADDNF:
1234 case V3D_QPU_A_FSUB:
1235 case V3D_QPU_A_FMIN:
1236 case V3D_QPU_A_FMAX:
1237 case V3D_QPU_A_FCMP:
1238 case V3D_QPU_A_VFPACK:
1239 if (instr->alu.add.op != V3D_QPU_A_VFPACK &&
1240 instr->alu.add.op != V3D_QPU_A_FCMP) {
1241 instr->alu.add.output_pack = (op >> 4) & 0x3;
1242 } else {
1243 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1244 }
1245
1246 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1247 &instr->alu.add.a.unpack)) {
1248 return false;
1249 }
1250
1251 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1252 &instr->alu.add.b.unpack)) {
1253 return false;
1254 }
1255 break;
1256
1257 case V3D_QPU_A_FFLOOR:
1258 case V3D_QPU_A_FROUND:
1259 case V3D_QPU_A_FTRUNC:
1260 case V3D_QPU_A_FCEIL:
1261 case V3D_QPU_A_FDX:
1262 case V3D_QPU_A_FDY:
1263 instr->alu.add.output_pack = raddr_b & 0x3;
1264
1265 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1266 &instr->alu.add.a.unpack)) {
1267 return false;
1268 }
1269 break;
1270
1271 case V3D_QPU_A_FTOIN:
1272 case V3D_QPU_A_FTOIZ:
1273 case V3D_QPU_A_FTOUZ:
1274 case V3D_QPU_A_FTOC:
1275 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1276
1277 if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_b >> 2) & 0x3,
1278 &instr->alu.add.a.unpack)) {
1279 return false;
1280 }
1281 break;
1282
1283 case V3D_QPU_A_VFMIN:
1284 case V3D_QPU_A_VFMAX:
1285 unreachable("pending v3d71 update");
1286 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1287 &instr->alu.add.a.unpack)) {
1288 return false;
1289 }
1290
1291 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1292 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1293 break;
1294
1295 case V3D_QPU_A_MOV:
1296 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1297
1298 if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
1299 &instr->alu.add.a.unpack)) {
1300 return false;
1301 }
1302 break;
1303
1304 case V3D_QPU_A_FMOV:
1305 instr->alu.add.output_pack = raddr_b & 0x3;
1306
1307 /* Mul alu FMOV has one additional variant */
1308 int32_t unpack = (raddr_b >> 2) & 0x7;
1309 if (unpack == 7)
1310 return false;
1311
1312 if (!v3d_qpu_float32_unpack_unpack(devinfo, unpack,
1313 &instr->alu.add.a.unpack)) {
1314 return false;
1315 }
1316 break;
1317
1318 default:
1319 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1320 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1321 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1322 break;
1323 }
1324
1325 instr->alu.add.a.raddr = raddr_a;
1326 instr->alu.add.b.raddr = raddr_b;
1327 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1328
1329 instr->alu.add.magic_write = false;
1330 if (packed_inst & V3D_QPU_MA) {
1331 switch (instr->alu.add.op) {
1332 case V3D_QPU_A_LDVPMV_IN:
1333 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1334 break;
1335 case V3D_QPU_A_LDVPMD_IN:
1336 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1337 break;
1338 case V3D_QPU_A_LDVPMG_IN:
1339 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1340 break;
1341 default:
1342 instr->alu.add.magic_write = true;
1343 break;
1344 }
1345 }
1346
1347 return true;
1348 }
1349
1350 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1351 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1352 struct v3d_qpu_instr *instr)
1353 {
1354 if (devinfo->ver >= 71)
1355 return v3d71_qpu_add_unpack(devinfo, packed_inst, instr);
1356 else
1357 return v3d42_qpu_add_unpack(devinfo, packed_inst, instr);
1358 }
1359
1360 static bool
v3d42_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1361 v3d42_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1362 struct v3d_qpu_instr *instr)
1363 {
1364 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1365 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
1366 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
1367
1368 {
1369 const struct opcode_desc *desc =
1370 lookup_opcode_from_packed(devinfo,
1371 v3d42_mul_ops,
1372 ARRAY_SIZE(v3d42_mul_ops),
1373 op, mux_a, mux_b, 0);
1374 if (!desc)
1375 return false;
1376
1377 instr->alu.mul.op = desc->op;
1378 }
1379
1380 switch (instr->alu.mul.op) {
1381 case V3D_QPU_M_FMUL:
1382 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1383
1384 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1385 &instr->alu.mul.a.unpack)) {
1386 return false;
1387 }
1388
1389 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1390 &instr->alu.mul.b.unpack)) {
1391 return false;
1392 }
1393
1394 break;
1395
1396 case V3D_QPU_M_FMOV:
1397 instr->alu.mul.output_pack = (((op & 1) << 1) +
1398 ((mux_b >> 2) & 1));
1399
1400 if (!v3d_qpu_float32_unpack_unpack(devinfo, mux_b & 0x3,
1401 &instr->alu.mul.a.unpack)) {
1402 return false;
1403 }
1404
1405 break;
1406
1407 case V3D_QPU_M_VFMUL:
1408 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1409
1410 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1411 &instr->alu.mul.a.unpack)) {
1412 return false;
1413 }
1414
1415 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1416
1417 break;
1418
1419 default:
1420 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1421 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1422 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1423 break;
1424 }
1425
1426 instr->alu.mul.a.mux = mux_a;
1427 instr->alu.mul.b.mux = mux_b;
1428 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1429 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1430
1431 return true;
1432 }
1433
1434 static bool
v3d71_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1435 v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1436 struct v3d_qpu_instr *instr)
1437 {
1438 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1439 uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C);
1440 uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D);
1441
1442 {
1443 const struct opcode_desc *desc =
1444 lookup_opcode_from_packed(devinfo,
1445 v3d71_mul_ops,
1446 ARRAY_SIZE(v3d71_mul_ops),
1447 op, 0, 0,
1448 raddr_d);
1449 if (!desc)
1450 return false;
1451
1452 instr->alu.mul.op = desc->op;
1453 }
1454
1455 switch (instr->alu.mul.op) {
1456 case V3D_QPU_M_FMUL:
1457 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1458
1459 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1460 &instr->alu.mul.a.unpack)) {
1461 return false;
1462 }
1463
1464 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1465 &instr->alu.mul.b.unpack)) {
1466 return false;
1467 }
1468
1469 break;
1470
1471 case V3D_QPU_M_FMOV:
1472 instr->alu.mul.output_pack = raddr_d & 0x3;
1473
1474 if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_d >> 2) & 0x3,
1475 &instr->alu.mul.a.unpack)) {
1476 return false;
1477 }
1478
1479 break;
1480
1481 case V3D_QPU_M_VFMUL:
1482 unreachable("pending v3d71 update");
1483 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1484
1485 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1486 &instr->alu.mul.a.unpack)) {
1487 return false;
1488 }
1489
1490 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1491
1492 break;
1493
1494 case V3D_QPU_M_MOV:
1495 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1496
1497 if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
1498 &instr->alu.mul.a.unpack)) {
1499 return false;
1500 }
1501 break;
1502
1503 default:
1504 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1505 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1506 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1507 break;
1508 }
1509
1510 instr->alu.mul.a.raddr = raddr_c;
1511 instr->alu.mul.b.raddr = raddr_d;
1512 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1513 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1514
1515 return true;
1516 }
1517
1518 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1519 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1520 struct v3d_qpu_instr *instr)
1521 {
1522 if (devinfo->ver >= 71)
1523 return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr);
1524 else
1525 return v3d42_qpu_mul_unpack(devinfo, packed_inst, instr);
1526 }
1527
1528 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)1529 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
1530 const struct opcode_desc *opcodes, size_t num_opcodes,
1531 uint8_t op)
1532 {
1533 for (int i = 0; i < num_opcodes; i++) {
1534 const struct opcode_desc *op_desc = &opcodes[i];
1535
1536 if (op_desc->op != op)
1537 continue;
1538
1539 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
1540 continue;
1541
1542 return op_desc;
1543 }
1544
1545 return NULL;
1546 }
1547
1548 static bool
v3d42_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1549 v3d42_qpu_add_pack(const struct v3d_device_info *devinfo,
1550 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1551 {
1552 uint32_t waddr = instr->alu.add.waddr;
1553 uint32_t mux_a = instr->alu.add.a.mux;
1554 uint32_t mux_b = instr->alu.add.b.mux;
1555 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1556 const struct opcode_desc *desc =
1557 lookup_opcode_from_instr(devinfo, v3d42_add_ops,
1558 ARRAY_SIZE(v3d42_add_ops),
1559 instr->alu.add.op);
1560
1561 if (!desc)
1562 return false;
1563
1564 uint32_t opcode = desc->opcode_first;
1565
1566 /* If an operation doesn't use an arg, its mux values may be used to
1567 * identify the operation type.
1568 */
1569 if (nsrc < 2)
1570 mux_b = ffs(desc->mux.b_mask) - 1;
1571
1572 if (nsrc < 1)
1573 mux_a = ffs(desc->mux.a_mask) - 1;
1574
1575 bool no_magic_write = false;
1576
1577 switch (instr->alu.add.op) {
1578 case V3D_QPU_A_STVPMV:
1579 waddr = 0;
1580 no_magic_write = true;
1581 break;
1582 case V3D_QPU_A_STVPMD:
1583 waddr = 1;
1584 no_magic_write = true;
1585 break;
1586 case V3D_QPU_A_STVPMP:
1587 waddr = 2;
1588 no_magic_write = true;
1589 break;
1590
1591 case V3D_QPU_A_LDVPMV_IN:
1592 case V3D_QPU_A_LDVPMD_IN:
1593 case V3D_QPU_A_LDVPMP:
1594 case V3D_QPU_A_LDVPMG_IN:
1595 assert(!instr->alu.add.magic_write);
1596 break;
1597
1598 case V3D_QPU_A_LDVPMV_OUT:
1599 case V3D_QPU_A_LDVPMD_OUT:
1600 case V3D_QPU_A_LDVPMG_OUT:
1601 assert(!instr->alu.add.magic_write);
1602 *packed_instr |= V3D_QPU_MA;
1603 break;
1604
1605 default:
1606 break;
1607 }
1608
1609 switch (instr->alu.add.op) {
1610 case V3D_QPU_A_FADD:
1611 case V3D_QPU_A_FADDNF:
1612 case V3D_QPU_A_FSUB:
1613 case V3D_QPU_A_FMIN:
1614 case V3D_QPU_A_FMAX:
1615 case V3D_QPU_A_FCMP: {
1616 uint32_t output_pack;
1617 uint32_t a_unpack;
1618 uint32_t b_unpack;
1619
1620 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1621 &output_pack)) {
1622 return false;
1623 }
1624 opcode |= output_pack << 4;
1625
1626 if (!v3d_qpu_float32_unpack_pack(devinfo,
1627 instr->alu.add.a.unpack,
1628 &a_unpack)) {
1629 return false;
1630 }
1631
1632 if (!v3d_qpu_float32_unpack_pack(devinfo,
1633 instr->alu.add.b.unpack,
1634 &b_unpack)) {
1635 return false;
1636 }
1637
1638 /* These operations with commutative operands are
1639 * distinguished by which order their operands come in.
1640 */
1641 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1642 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1643 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1644 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1645 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1646 uint32_t temp;
1647
1648 temp = a_unpack;
1649 a_unpack = b_unpack;
1650 b_unpack = temp;
1651
1652 temp = mux_a;
1653 mux_a = mux_b;
1654 mux_b = temp;
1655 }
1656
1657 opcode |= a_unpack << 2;
1658 opcode |= b_unpack << 0;
1659
1660 break;
1661 }
1662
1663 case V3D_QPU_A_VFPACK: {
1664 uint32_t a_unpack;
1665 uint32_t b_unpack;
1666
1667 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1668 instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1669 return false;
1670 }
1671
1672 if (!v3d_qpu_float32_unpack_pack(devinfo,
1673 instr->alu.add.a.unpack,
1674 &a_unpack)) {
1675 return false;
1676 }
1677
1678 if (!v3d_qpu_float32_unpack_pack(devinfo,
1679 instr->alu.add.b.unpack,
1680 &b_unpack)) {
1681 return false;
1682 }
1683
1684 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1685 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1686
1687 break;
1688 }
1689
1690 case V3D_QPU_A_FFLOOR:
1691 case V3D_QPU_A_FROUND:
1692 case V3D_QPU_A_FTRUNC:
1693 case V3D_QPU_A_FCEIL:
1694 case V3D_QPU_A_FDX:
1695 case V3D_QPU_A_FDY: {
1696 uint32_t packed;
1697
1698 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1699 &packed)) {
1700 return false;
1701 }
1702 mux_b |= packed;
1703
1704 if (!v3d_qpu_float32_unpack_pack(devinfo,
1705 instr->alu.add.a.unpack,
1706 &packed)) {
1707 return false;
1708 }
1709 if (packed == 0)
1710 return false;
1711 opcode = (opcode & ~(0x3 << 2)) | packed << 2;
1712 break;
1713 }
1714
1715 case V3D_QPU_A_FTOIN:
1716 case V3D_QPU_A_FTOIZ:
1717 case V3D_QPU_A_FTOUZ:
1718 case V3D_QPU_A_FTOC:
1719 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1720 return false;
1721
1722 uint32_t packed;
1723 if (!v3d_qpu_float32_unpack_pack(devinfo,
1724 instr->alu.add.a.unpack,
1725 &packed)) {
1726 return false;
1727 }
1728 if (packed == 0)
1729 return false;
1730 opcode |= packed << 2;
1731
1732 break;
1733
1734 case V3D_QPU_A_VFMIN:
1735 case V3D_QPU_A_VFMAX:
1736 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1737 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1738 return false;
1739 }
1740
1741 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1742 &packed)) {
1743 return false;
1744 }
1745 opcode |= packed;
1746 break;
1747
1748 default:
1749 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1750 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1751 instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
1752 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
1753 return false;
1754 }
1755 break;
1756 }
1757
1758 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1759 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1760 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1761 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1762 if (instr->alu.add.magic_write && !no_magic_write)
1763 *packed_instr |= V3D_QPU_MA;
1764
1765 return true;
1766 }
1767
1768 static bool
v3d71_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1769 v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
1770 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1771 {
1772 uint32_t waddr = instr->alu.add.waddr;
1773 uint32_t raddr_a = instr->alu.add.a.raddr;
1774 uint32_t raddr_b = instr->alu.add.b.raddr;
1775
1776 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1777 const struct opcode_desc *desc =
1778 lookup_opcode_from_instr(devinfo, v3d71_add_ops,
1779 ARRAY_SIZE(v3d71_add_ops),
1780 instr->alu.add.op);
1781 if (!desc)
1782 return false;
1783
1784 uint32_t opcode = desc->opcode_first;
1785
1786 /* If an operation doesn't use an arg, its raddr values may be used to
1787 * identify the operation type.
1788 */
1789 if (nsrc < 2)
1790 raddr_b = ffsll(desc->raddr_mask) - 1;
1791
1792 bool no_magic_write = false;
1793
1794 switch (instr->alu.add.op) {
1795 case V3D_QPU_A_STVPMV:
1796 waddr = 0;
1797 no_magic_write = true;
1798 break;
1799 case V3D_QPU_A_STVPMD:
1800 waddr = 1;
1801 no_magic_write = true;
1802 break;
1803 case V3D_QPU_A_STVPMP:
1804 waddr = 2;
1805 no_magic_write = true;
1806 break;
1807
1808 case V3D_QPU_A_LDVPMV_IN:
1809 case V3D_QPU_A_LDVPMD_IN:
1810 case V3D_QPU_A_LDVPMP:
1811 case V3D_QPU_A_LDVPMG_IN:
1812 assert(!instr->alu.add.magic_write);
1813 break;
1814
1815 case V3D_QPU_A_LDVPMV_OUT:
1816 case V3D_QPU_A_LDVPMD_OUT:
1817 case V3D_QPU_A_LDVPMG_OUT:
1818 assert(!instr->alu.add.magic_write);
1819 *packed_instr |= V3D_QPU_MA;
1820 break;
1821
1822 default:
1823 break;
1824 }
1825
1826 switch (instr->alu.add.op) {
1827 case V3D_QPU_A_FADD:
1828 case V3D_QPU_A_FADDNF:
1829 case V3D_QPU_A_FSUB:
1830 case V3D_QPU_A_FMIN:
1831 case V3D_QPU_A_FMAX:
1832 case V3D_QPU_A_FCMP: {
1833 uint32_t output_pack;
1834 uint32_t a_unpack;
1835 uint32_t b_unpack;
1836
1837 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
1838 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1839 &output_pack)) {
1840 return false;
1841 }
1842 opcode |= output_pack << 4;
1843 }
1844
1845 if (!v3d_qpu_float32_unpack_pack(devinfo,
1846 instr->alu.add.a.unpack,
1847 &a_unpack)) {
1848 return false;
1849 }
1850
1851 if (!v3d_qpu_float32_unpack_pack(devinfo,
1852 instr->alu.add.b.unpack,
1853 &b_unpack)) {
1854 return false;
1855 }
1856
1857 /* These operations with commutative operands are
1858 * distinguished by the order of the operands come in.
1859 */
1860 bool ordering =
1861 instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
1862 instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
1863 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1864 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1865 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1866 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1867 uint32_t temp;
1868
1869 temp = a_unpack;
1870 a_unpack = b_unpack;
1871 b_unpack = temp;
1872
1873 temp = raddr_a;
1874 raddr_a = raddr_b;
1875 raddr_b = temp;
1876
1877 /* If we are swapping raddr_a/b we also need to swap
1878 * small_imm_a/b.
1879 */
1880 if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
1881 assert(instr->sig.small_imm_a !=
1882 instr->sig.small_imm_b);
1883 struct v3d_qpu_sig new_sig = instr->sig;
1884 new_sig.small_imm_a = !instr->sig.small_imm_a;
1885 new_sig.small_imm_b = !instr->sig.small_imm_b;
1886 uint32_t sig;
1887 if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
1888 return false;
1889 *packed_instr &= ~V3D_QPU_SIG_MASK;
1890 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1891 }
1892 }
1893
1894 opcode |= a_unpack << 2;
1895 opcode |= b_unpack << 0;
1896
1897 break;
1898 }
1899
1900 case V3D_QPU_A_VFPACK: {
1901 uint32_t a_unpack;
1902 uint32_t b_unpack;
1903
1904 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1905 instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1906 return false;
1907 }
1908
1909 if (!v3d_qpu_float32_unpack_pack(devinfo,
1910 instr->alu.add.a.unpack,
1911 &a_unpack)) {
1912 return false;
1913 }
1914
1915 if (!v3d_qpu_float32_unpack_pack(devinfo,
1916 instr->alu.add.b.unpack,
1917 &b_unpack)) {
1918 return false;
1919 }
1920
1921 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1922 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1923
1924 break;
1925 }
1926
1927 case V3D_QPU_A_FFLOOR:
1928 case V3D_QPU_A_FROUND:
1929 case V3D_QPU_A_FTRUNC:
1930 case V3D_QPU_A_FCEIL:
1931 case V3D_QPU_A_FDX:
1932 case V3D_QPU_A_FDY: {
1933 uint32_t packed;
1934
1935 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1936 &packed)) {
1937 return false;
1938 }
1939 raddr_b |= packed;
1940
1941 if (!v3d_qpu_float32_unpack_pack(devinfo,
1942 instr->alu.add.a.unpack,
1943 &packed)) {
1944 return false;
1945 }
1946 if (packed == 0)
1947 return false;
1948 raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2;
1949 break;
1950 }
1951
1952 case V3D_QPU_A_FTOIN:
1953 case V3D_QPU_A_FTOIZ:
1954 case V3D_QPU_A_FTOUZ:
1955 case V3D_QPU_A_FTOC:
1956 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1957 return false;
1958
1959 uint32_t packed;
1960 if (!v3d_qpu_float32_unpack_pack(devinfo,
1961 instr->alu.add.a.unpack,
1962 &packed)) {
1963 return false;
1964 }
1965 if (packed == 0)
1966 return false;
1967
1968 raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2;
1969
1970 break;
1971
1972 case V3D_QPU_A_VFMIN:
1973 case V3D_QPU_A_VFMAX:
1974 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1975 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1976 return false;
1977 }
1978
1979 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1980 &packed)) {
1981 return false;
1982 }
1983 opcode |= packed;
1984 break;
1985
1986 case V3D_QPU_A_MOV: {
1987 uint32_t packed;
1988
1989 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1990 return false;
1991
1992 if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
1993 &packed)) {
1994 return false;
1995 }
1996
1997 raddr_b |= packed << 2;
1998 break;
1999 }
2000
2001 case V3D_QPU_A_FMOV: {
2002 uint32_t packed;
2003
2004 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
2005 &packed)) {
2006 return false;
2007 }
2008 raddr_b = packed;
2009
2010 if (!v3d_qpu_float32_unpack_pack(devinfo,
2011 instr->alu.add.a.unpack,
2012 &packed)) {
2013 return false;
2014 }
2015 raddr_b |= packed << 2;
2016 break;
2017 }
2018
2019 default:
2020 if (instr->alu.add.op != V3D_QPU_A_NOP &&
2021 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2022 instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
2023 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
2024 return false;
2025 }
2026 break;
2027 }
2028
2029 *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A);
2030 *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B);
2031 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
2032 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
2033 if (instr->alu.add.magic_write && !no_magic_write)
2034 *packed_instr |= V3D_QPU_MA;
2035
2036 return true;
2037 }
2038
2039 static bool
v3d42_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2040 v3d42_qpu_mul_pack(const struct v3d_device_info *devinfo,
2041 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2042 {
2043 uint32_t mux_a = instr->alu.mul.a.mux;
2044 uint32_t mux_b = instr->alu.mul.b.mux;
2045 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2046
2047 const struct opcode_desc *desc =
2048 lookup_opcode_from_instr(devinfo, v3d42_mul_ops,
2049 ARRAY_SIZE(v3d42_mul_ops),
2050 instr->alu.mul.op);
2051
2052 if (!desc)
2053 return false;
2054
2055 uint32_t opcode = desc->opcode_first;
2056
2057 /* Some opcodes have a single valid value for their mux a/b, so set
2058 * that here. If mux a/b determine packing, it will be set below.
2059 */
2060 if (nsrc < 2)
2061 mux_b = ffs(desc->mux.b_mask) - 1;
2062
2063 if (nsrc < 1)
2064 mux_a = ffs(desc->mux.a_mask) - 1;
2065
2066 switch (instr->alu.mul.op) {
2067 case V3D_QPU_M_FMUL: {
2068 uint32_t packed;
2069
2070 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2071 &packed)) {
2072 return false;
2073 }
2074 /* No need for a +1 because desc->opcode_first has a 1 in this
2075 * field.
2076 */
2077 opcode += packed << 4;
2078
2079 if (!v3d_qpu_float32_unpack_pack(devinfo,
2080 instr->alu.mul.a.unpack,
2081 &packed)) {
2082 return false;
2083 }
2084 opcode |= packed << 2;
2085
2086 if (!v3d_qpu_float32_unpack_pack(devinfo,
2087 instr->alu.mul.b.unpack,
2088 &packed)) {
2089 return false;
2090 }
2091 opcode |= packed << 0;
2092 break;
2093 }
2094
2095 case V3D_QPU_M_FMOV: {
2096 uint32_t packed;
2097
2098 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2099 &packed)) {
2100 return false;
2101 }
2102 opcode |= (packed >> 1) & 1;
2103 mux_b = (packed & 1) << 2;
2104
2105 if (!v3d_qpu_float32_unpack_pack(devinfo,
2106 instr->alu.mul.a.unpack,
2107 &packed)) {
2108 return false;
2109 }
2110 mux_b |= packed;
2111 break;
2112 }
2113
2114 case V3D_QPU_M_VFMUL: {
2115 uint32_t packed;
2116
2117 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2118 return false;
2119
2120 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2121 &packed)) {
2122 return false;
2123 }
2124 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2125 opcode = 8;
2126 else
2127 opcode |= (packed + 4) & 7;
2128
2129 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2130 return false;
2131
2132 break;
2133 }
2134
2135 default:
2136 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2137 (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2138 instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2139 instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2140 return false;
2141 }
2142 break;
2143 }
2144
2145 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
2146 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
2147
2148 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2149 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2150 if (instr->alu.mul.magic_write)
2151 *packed_instr |= V3D_QPU_MM;
2152
2153 return true;
2154 }
2155
2156 static bool
v3d71_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2157 v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
2158 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2159 {
2160 uint32_t raddr_c = instr->alu.mul.a.raddr;
2161 uint32_t raddr_d = instr->alu.mul.b.raddr;
2162 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2163
2164 const struct opcode_desc *desc =
2165 lookup_opcode_from_instr(devinfo, v3d71_mul_ops,
2166 ARRAY_SIZE(v3d71_mul_ops),
2167 instr->alu.mul.op);
2168 if (!desc)
2169 return false;
2170
2171 uint32_t opcode = desc->opcode_first;
2172
2173 /* Some opcodes have a single valid value for their raddr_d, so set
2174 * that here. If raddr_b determine packing, it will be set below.
2175 */
2176 if (nsrc < 2)
2177 raddr_d = ffsll(desc->raddr_mask) - 1;
2178
2179 switch (instr->alu.mul.op) {
2180 case V3D_QPU_M_FMUL: {
2181 uint32_t packed;
2182
2183 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2184 &packed)) {
2185 return false;
2186 }
2187 /* No need for a +1 because desc->opcode_first has a 1 in this
2188 * field.
2189 */
2190 opcode += packed << 4;
2191
2192 if (!v3d_qpu_float32_unpack_pack(devinfo,
2193 instr->alu.mul.a.unpack,
2194 &packed)) {
2195 return false;
2196 }
2197 opcode |= packed << 2;
2198
2199 if (!v3d_qpu_float32_unpack_pack(devinfo,
2200 instr->alu.mul.b.unpack,
2201 &packed)) {
2202 return false;
2203 }
2204 opcode |= packed << 0;
2205 break;
2206 }
2207
2208 case V3D_QPU_M_FMOV: {
2209 uint32_t packed;
2210
2211 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2212 &packed)) {
2213 return false;
2214 }
2215 raddr_d |= packed;
2216
2217 if (!v3d_qpu_float32_unpack_pack(devinfo,
2218 instr->alu.mul.a.unpack,
2219 &packed)) {
2220 return false;
2221 }
2222 raddr_d |= packed << 2;
2223 break;
2224 }
2225
2226 case V3D_QPU_M_VFMUL: {
2227 unreachable("pending v3d71 update");
2228 uint32_t packed;
2229
2230 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2231 return false;
2232
2233 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2234 &packed)) {
2235 return false;
2236 }
2237 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2238 opcode = 8;
2239 else
2240 opcode |= (packed + 4) & 7;
2241
2242 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2243 return false;
2244
2245 break;
2246 }
2247
2248 case V3D_QPU_M_MOV: {
2249 uint32_t packed;
2250
2251 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2252 return false;
2253
2254 if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
2255 &packed)) {
2256 return false;
2257 }
2258
2259 raddr_d |= packed << 2;
2260 break;
2261 }
2262
2263 default:
2264 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2265 (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2266 instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2267 instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2268 return false;
2269 }
2270 break;
2271 }
2272
2273 *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C);
2274 *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D);
2275 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2276 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2277 if (instr->alu.mul.magic_write)
2278 *packed_instr |= V3D_QPU_MM;
2279
2280 return true;
2281 }
2282
2283 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2284 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
2285 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2286 {
2287 if (devinfo->ver >= 71)
2288 return v3d71_qpu_add_pack(devinfo, instr, packed_instr);
2289 else
2290 return v3d42_qpu_add_pack(devinfo, instr, packed_instr);
2291 }
2292
2293 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2294 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
2295 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2296 {
2297 if (devinfo->ver >= 71)
2298 return v3d71_qpu_mul_pack(devinfo, instr, packed_instr);
2299 else
2300 return v3d42_qpu_mul_pack(devinfo, instr, packed_instr);
2301 }
2302
2303 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2304 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
2305 uint64_t packed_instr,
2306 struct v3d_qpu_instr *instr)
2307 {
2308 instr->type = V3D_QPU_INSTR_TYPE_ALU;
2309
2310 if (!v3d_qpu_sig_unpack(devinfo,
2311 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
2312 &instr->sig))
2313 return false;
2314
2315 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
2316 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2317 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
2318 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
2319
2320 instr->flags.ac = V3D_QPU_COND_NONE;
2321 instr->flags.mc = V3D_QPU_COND_NONE;
2322 instr->flags.apf = V3D_QPU_PF_NONE;
2323 instr->flags.mpf = V3D_QPU_PF_NONE;
2324 instr->flags.auf = V3D_QPU_UF_NONE;
2325 instr->flags.muf = V3D_QPU_UF_NONE;
2326 } else {
2327 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
2328 return false;
2329 }
2330
2331 if (devinfo->ver <= 71) {
2332 /*
2333 * For v3d71 this will be set on add/mul unpack, as raddr are
2334 * now part of v3d_qpu_input
2335 */
2336 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
2337 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
2338 }
2339
2340 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
2341 return false;
2342
2343 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
2344 return false;
2345
2346 return true;
2347 }
2348
2349 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2350 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
2351 uint64_t packed_instr,
2352 struct v3d_qpu_instr *instr)
2353 {
2354 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
2355
2356 uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
2357 if (cond == 0)
2358 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
2359 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
2360 V3D_QPU_BRANCH_COND_ALLNA)
2361 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
2362 else
2363 return false;
2364
2365 uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
2366 if (msfign == 3)
2367 return false;
2368 instr->branch.msfign = msfign;
2369
2370 instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
2371
2372 instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
2373 if (instr->branch.ub) {
2374 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
2375 V3D_QPU_BRANCH_BDU);
2376 }
2377
2378 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
2379 V3D_QPU_RADDR_A);
2380
2381 instr->branch.offset = 0;
2382
2383 instr->branch.offset +=
2384 QPU_GET_FIELD(packed_instr,
2385 V3D_QPU_BRANCH_ADDR_LOW) << 3;
2386
2387 instr->branch.offset +=
2388 QPU_GET_FIELD(packed_instr,
2389 V3D_QPU_BRANCH_ADDR_HIGH) << 24;
2390
2391 return true;
2392 }
2393
2394 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2395 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
2396 uint64_t packed_instr,
2397 struct v3d_qpu_instr *instr)
2398 {
2399 if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
2400 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
2401 } else {
2402 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
2403
2404 if ((sig & 24) == 16) {
2405 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
2406 instr);
2407 } else {
2408 return false;
2409 }
2410 }
2411 }
2412
2413 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2414 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
2415 const struct v3d_qpu_instr *instr,
2416 uint64_t *packed_instr)
2417 {
2418 uint32_t sig;
2419 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
2420 return false;
2421 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
2422
2423 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
2424 if (devinfo->ver < 71) {
2425 /*
2426 * For v3d71 this will be set on add/mul unpack, as
2427 * raddr are now part of v3d_qpu_input
2428 */
2429 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
2430 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
2431 }
2432
2433 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
2434 return false;
2435 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
2436 return false;
2437
2438 uint32_t flags;
2439 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2440 if (instr->flags.ac != V3D_QPU_COND_NONE ||
2441 instr->flags.mc != V3D_QPU_COND_NONE ||
2442 instr->flags.apf != V3D_QPU_PF_NONE ||
2443 instr->flags.mpf != V3D_QPU_PF_NONE ||
2444 instr->flags.auf != V3D_QPU_UF_NONE ||
2445 instr->flags.muf != V3D_QPU_UF_NONE) {
2446 return false;
2447 }
2448
2449 flags = instr->sig_addr;
2450 if (instr->sig_magic)
2451 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
2452 } else {
2453 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
2454 return false;
2455 }
2456
2457 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
2458 } else {
2459 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
2460 return false;
2461 }
2462
2463 return true;
2464 }
2465
2466 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2467 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
2468 const struct v3d_qpu_instr *instr,
2469 uint64_t *packed_instr)
2470 {
2471 *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
2472
2473 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2474 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
2475 V3D_QPU_BRANCH_COND_A0),
2476 V3D_QPU_BRANCH_COND);
2477 }
2478
2479 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2480 V3D_QPU_BRANCH_MSFIGN);
2481
2482 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
2483 V3D_QPU_BRANCH_BDI);
2484
2485 if (instr->branch.ub) {
2486 *packed_instr |= V3D_QPU_BRANCH_UB;
2487 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
2488 V3D_QPU_BRANCH_BDU);
2489 }
2490
2491 switch (instr->branch.bdi) {
2492 case V3D_QPU_BRANCH_DEST_ABS:
2493 case V3D_QPU_BRANCH_DEST_REL:
2494 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2495 V3D_QPU_BRANCH_MSFIGN);
2496
2497 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
2498 ~0xff000000) >> 3,
2499 V3D_QPU_BRANCH_ADDR_LOW);
2500
2501 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
2502 V3D_QPU_BRANCH_ADDR_HIGH);
2503 break;
2504 default:
2505 break;
2506 }
2507
2508 if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
2509 instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
2510 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
2511 V3D_QPU_RADDR_A);
2512 }
2513
2514 return true;
2515 }
2516
2517 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2518 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
2519 const struct v3d_qpu_instr *instr,
2520 uint64_t *packed_instr)
2521 {
2522 *packed_instr = 0;
2523
2524 switch (instr->type) {
2525 case V3D_QPU_INSTR_TYPE_ALU:
2526 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
2527 case V3D_QPU_INSTR_TYPE_BRANCH:
2528 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
2529 default:
2530 return false;
2531 }
2532 }
2533