1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field) \
35 ({ \
36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37 assert((fieldval & ~ field ## _MASK) == 0); \
38 fieldval & field ## _MASK; \
39 })
40
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
42
43 #define QPU_UPDATE_FIELD(inst, value, field) \
44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46
47 #define V3D_QPU_OP_MUL_SHIFT 58
48 #define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)
49
50 #define V3D_QPU_SIG_SHIFT 53
51 #define V3D_QPU_SIG_MASK QPU_MASK(57, 53)
52
53 #define V3D_QPU_COND_SHIFT 46
54 #define V3D_QPU_COND_MASK QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
56
57 #define V3D_QPU_MM QPU_MASK(45, 45)
58 #define V3D_QPU_MA QPU_MASK(44, 44)
59
60 #define V3D_QPU_WADDR_M_SHIFT 38
61 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
62
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
65
66 #define V3D_QPU_WADDR_A_SHIFT 32
67 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
68
69 #define V3D_QPU_BRANCH_COND_SHIFT 32
70 #define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
71
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
74
75 #define V3D_QPU_OP_ADD_SHIFT 24
76 #define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)
77
78 #define V3D_QPU_MUL_B_SHIFT 21
79 #define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)
80
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT 21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
83
84 #define V3D_QPU_MUL_A_SHIFT 18
85 #define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
86
87 #define V3D_QPU_RADDR_C_SHIFT 18
88 #define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18)
89
90 #define V3D_QPU_ADD_B_SHIFT 15
91 #define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
92
93 #define V3D_QPU_BRANCH_BDU_SHIFT 15
94 #define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
95
96 #define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)
97
98 #define V3D_QPU_ADD_A_SHIFT 12
99 #define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)
100
101 #define V3D_QPU_BRANCH_BDI_SHIFT 12
102 #define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
103
104 #define V3D_QPU_RADDR_D_SHIFT 12
105 #define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12)
106
107 #define V3D_QPU_RADDR_A_SHIFT 6
108 #define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
109
110 #define V3D_QPU_RADDR_B_SHIFT 0
111 #define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)
112
113 #define THRSW .thrsw = true
114 #define LDUNIF .ldunif = true
115 #define LDUNIFRF .ldunifrf = true
116 #define LDUNIFA .ldunifa = true
117 #define LDUNIFARF .ldunifarf = true
118 #define LDTMU .ldtmu = true
119 #define LDVARY .ldvary = true
120 #define LDVPM .ldvpm = true
121 #define LDTLB .ldtlb = true
122 #define LDTLBU .ldtlbu = true
123 #define UCB .ucb = true
124 #define ROT .rotate = true
125 #define WRTMUC .wrtmuc = true
126 #define SMIMM_A .small_imm_a = true
127 #define SMIMM_B .small_imm_b = true
128 #define SMIMM_C .small_imm_c = true
129 #define SMIMM_D .small_imm_d = true
130
131 static const struct v3d_qpu_sig v33_sig_map[] = {
132 /* MISC R3 R4 R5 */
133 [0] = { },
134 [1] = { THRSW, },
135 [2] = { LDUNIF },
136 [3] = { THRSW, LDUNIF },
137 [4] = { LDTMU, },
138 [5] = { THRSW, LDTMU, },
139 [6] = { LDTMU, LDUNIF },
140 [7] = { THRSW, LDTMU, LDUNIF },
141 [8] = { LDVARY, },
142 [9] = { THRSW, LDVARY, },
143 [10] = { LDVARY, LDUNIF },
144 [11] = { THRSW, LDVARY, LDUNIF },
145 [12] = { LDVARY, LDTMU, },
146 [13] = { THRSW, LDVARY, LDTMU, },
147 [14] = { SMIMM_B, LDVARY, },
148 [15] = { SMIMM_B, },
149 [16] = { LDTLB, },
150 [17] = { LDTLBU, },
151 /* 18-21 reserved */
152 [22] = { UCB, },
153 [23] = { ROT, },
154 [24] = { LDVPM, },
155 [25] = { THRSW, LDVPM, },
156 [26] = { LDVPM, LDUNIF },
157 [27] = { THRSW, LDVPM, LDUNIF },
158 [28] = { LDVPM, LDTMU, },
159 [29] = { THRSW, LDVPM, LDTMU, },
160 [30] = { SMIMM_B, LDVPM, },
161 [31] = { SMIMM_B, },
162 };
163
164 static const struct v3d_qpu_sig v40_sig_map[] = {
165 /* MISC R3 R4 R5 */
166 [0] = { },
167 [1] = { THRSW, },
168 [2] = { LDUNIF },
169 [3] = { THRSW, LDUNIF },
170 [4] = { LDTMU, },
171 [5] = { THRSW, LDTMU, },
172 [6] = { LDTMU, LDUNIF },
173 [7] = { THRSW, LDTMU, LDUNIF },
174 [8] = { LDVARY, },
175 [9] = { THRSW, LDVARY, },
176 [10] = { LDVARY, LDUNIF },
177 [11] = { THRSW, LDVARY, LDUNIF },
178 /* 12-13 reserved */
179 [14] = { SMIMM_B, LDVARY, },
180 [15] = { SMIMM_B, },
181 [16] = { LDTLB, },
182 [17] = { LDTLBU, },
183 [18] = { WRTMUC },
184 [19] = { THRSW, WRTMUC },
185 [20] = { LDVARY, WRTMUC },
186 [21] = { THRSW, LDVARY, WRTMUC },
187 [22] = { UCB, },
188 [23] = { ROT, },
189 /* 24-30 reserved */
190 [31] = { SMIMM_B, LDTMU, },
191 };
192
193 static const struct v3d_qpu_sig v41_sig_map[] = {
194 /* MISC phys R5 */
195 [0] = { },
196 [1] = { THRSW, },
197 [2] = { LDUNIF },
198 [3] = { THRSW, LDUNIF },
199 [4] = { LDTMU, },
200 [5] = { THRSW, LDTMU, },
201 [6] = { LDTMU, LDUNIF },
202 [7] = { THRSW, LDTMU, LDUNIF },
203 [8] = { LDVARY, },
204 [9] = { THRSW, LDVARY, },
205 [10] = { LDVARY, LDUNIF },
206 [11] = { THRSW, LDVARY, LDUNIF },
207 [12] = { LDUNIFRF },
208 [13] = { THRSW, LDUNIFRF },
209 [14] = { SMIMM_B, LDVARY },
210 [15] = { SMIMM_B, },
211 [16] = { LDTLB, },
212 [17] = { LDTLBU, },
213 [18] = { WRTMUC },
214 [19] = { THRSW, WRTMUC },
215 [20] = { LDVARY, WRTMUC },
216 [21] = { THRSW, LDVARY, WRTMUC },
217 [22] = { UCB, },
218 [23] = { ROT, },
219 [24] = { LDUNIFA},
220 [25] = { LDUNIFARF },
221 /* 26-30 reserved */
222 [31] = { SMIMM_B, LDTMU, },
223 };
224
225
226 static const struct v3d_qpu_sig v71_sig_map[] = {
227 /* MISC phys RF0 */
228 [0] = { },
229 [1] = { THRSW, },
230 [2] = { LDUNIF },
231 [3] = { THRSW, LDUNIF },
232 [4] = { LDTMU, },
233 [5] = { THRSW, LDTMU, },
234 [6] = { LDTMU, LDUNIF },
235 [7] = { THRSW, LDTMU, LDUNIF },
236 [8] = { LDVARY, },
237 [9] = { THRSW, LDVARY, },
238 [10] = { LDVARY, LDUNIF },
239 [11] = { THRSW, LDVARY, LDUNIF },
240 [12] = { LDUNIFRF },
241 [13] = { THRSW, LDUNIFRF },
242 [14] = { SMIMM_A, },
243 [15] = { SMIMM_B, },
244 [16] = { LDTLB, },
245 [17] = { LDTLBU, },
246 [18] = { WRTMUC },
247 [19] = { THRSW, WRTMUC },
248 [20] = { LDVARY, WRTMUC },
249 [21] = { THRSW, LDVARY, WRTMUC },
250 [22] = { UCB, },
251 /* 23 reserved */
252 [24] = { LDUNIFA},
253 [25] = { LDUNIFARF },
254 /* 26-29 reserved */
255 [30] = { SMIMM_C, },
256 [31] = { SMIMM_D, },
257 };
258
259 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)260 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
261 uint32_t packed_sig,
262 struct v3d_qpu_sig *sig)
263 {
264 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
265 return false;
266
267 if (devinfo->ver >= 71)
268 *sig = v71_sig_map[packed_sig];
269 else if (devinfo->ver >= 41)
270 *sig = v41_sig_map[packed_sig];
271 else if (devinfo->ver == 40)
272 *sig = v40_sig_map[packed_sig];
273 else
274 *sig = v33_sig_map[packed_sig];
275
276 /* Signals with zeroed unpacked contents after element 0 are reserved. */
277 return (packed_sig == 0 ||
278 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
279 }
280
281 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)282 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
283 const struct v3d_qpu_sig *sig,
284 uint32_t *packed_sig)
285 {
286 static const struct v3d_qpu_sig *map;
287
288 if (devinfo->ver >= 71)
289 map = v71_sig_map;
290 else if (devinfo->ver >= 41)
291 map = v41_sig_map;
292 else if (devinfo->ver == 40)
293 map = v40_sig_map;
294 else
295 map = v33_sig_map;
296
297 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
298 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
299 *packed_sig = i;
300 return true;
301 }
302 }
303
304 return false;
305 }
306
307 static const uint32_t small_immediates[] = {
308 0, 1, 2, 3,
309 4, 5, 6, 7,
310 8, 9, 10, 11,
311 12, 13, 14, 15,
312 -16, -15, -14, -13,
313 -12, -11, -10, -9,
314 -8, -7, -6, -5,
315 -4, -3, -2, -1,
316 0x3b800000, /* 2.0^-8 */
317 0x3c000000, /* 2.0^-7 */
318 0x3c800000, /* 2.0^-6 */
319 0x3d000000, /* 2.0^-5 */
320 0x3d800000, /* 2.0^-4 */
321 0x3e000000, /* 2.0^-3 */
322 0x3e800000, /* 2.0^-2 */
323 0x3f000000, /* 2.0^-1 */
324 0x3f800000, /* 2.0^0 */
325 0x40000000, /* 2.0^1 */
326 0x40800000, /* 2.0^2 */
327 0x41000000, /* 2.0^3 */
328 0x41800000, /* 2.0^4 */
329 0x42000000, /* 2.0^5 */
330 0x42800000, /* 2.0^6 */
331 0x43000000, /* 2.0^7 */
332 };
333
334 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)335 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
336 uint32_t packed_small_immediate,
337 uint32_t *small_immediate)
338 {
339 if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
340 return false;
341
342 *small_immediate = small_immediates[packed_small_immediate];
343 return true;
344 }
345
346 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)347 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
348 uint32_t value,
349 uint32_t *packed_small_immediate)
350 {
351 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
352
353 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
354 if (small_immediates[i] == value) {
355 *packed_small_immediate = i;
356 return true;
357 }
358 }
359
360 return false;
361 }
362
363 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)364 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
365 uint32_t packed_cond,
366 struct v3d_qpu_flags *cond)
367 {
368 static const enum v3d_qpu_cond cond_map[4] = {
369 [0] = V3D_QPU_COND_IFA,
370 [1] = V3D_QPU_COND_IFB,
371 [2] = V3D_QPU_COND_IFNA,
372 [3] = V3D_QPU_COND_IFNB,
373 };
374
375 cond->ac = V3D_QPU_COND_NONE;
376 cond->mc = V3D_QPU_COND_NONE;
377 cond->apf = V3D_QPU_PF_NONE;
378 cond->mpf = V3D_QPU_PF_NONE;
379 cond->auf = V3D_QPU_UF_NONE;
380 cond->muf = V3D_QPU_UF_NONE;
381
382 if (packed_cond == 0) {
383 return true;
384 } else if (packed_cond >> 2 == 0) {
385 cond->apf = packed_cond & 0x3;
386 } else if (packed_cond >> 4 == 0) {
387 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
388 } else if (packed_cond == 0x10) {
389 return false;
390 } else if (packed_cond >> 2 == 0x4) {
391 cond->mpf = packed_cond & 0x3;
392 } else if (packed_cond >> 4 == 0x1) {
393 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
394 } else if (packed_cond >> 4 == 0x2) {
395 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
396 cond->mpf = packed_cond & 0x3;
397 } else if (packed_cond >> 4 == 0x3) {
398 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
399 cond->apf = packed_cond & 0x3;
400 } else if (packed_cond >> 6) {
401 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
402 if (((packed_cond >> 2) & 0x3) == 0) {
403 cond->ac = cond_map[packed_cond & 0x3];
404 } else {
405 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
406 }
407 }
408
409 return true;
410 }
411
412 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)413 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
414 const struct v3d_qpu_flags *cond,
415 uint32_t *packed_cond)
416 {
417 #define AC (1 << 0)
418 #define MC (1 << 1)
419 #define APF (1 << 2)
420 #define MPF (1 << 3)
421 #define AUF (1 << 4)
422 #define MUF (1 << 5)
423 static const struct {
424 uint8_t flags_present;
425 uint8_t bits;
426 } flags_table[] = {
427 { 0, 0 },
428 { APF, 0 },
429 { AUF, 0 },
430 { MPF, (1 << 4) },
431 { MUF, (1 << 4) },
432 { AC, (1 << 5) },
433 { AC | MPF, (1 << 5) },
434 { MC, (1 << 5) | (1 << 4) },
435 { MC | APF, (1 << 5) | (1 << 4) },
436 { MC | AC, (1 << 6) },
437 { MC | AUF, (1 << 6) },
438 };
439
440 uint8_t flags_present = 0;
441 if (cond->ac != V3D_QPU_COND_NONE)
442 flags_present |= AC;
443 if (cond->mc != V3D_QPU_COND_NONE)
444 flags_present |= MC;
445 if (cond->apf != V3D_QPU_PF_NONE)
446 flags_present |= APF;
447 if (cond->mpf != V3D_QPU_PF_NONE)
448 flags_present |= MPF;
449 if (cond->auf != V3D_QPU_UF_NONE)
450 flags_present |= AUF;
451 if (cond->muf != V3D_QPU_UF_NONE)
452 flags_present |= MUF;
453
454 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
455 if (flags_table[i].flags_present != flags_present)
456 continue;
457
458 *packed_cond = flags_table[i].bits;
459
460 *packed_cond |= cond->apf;
461 *packed_cond |= cond->mpf;
462
463 if (flags_present & AUF)
464 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
465 if (flags_present & MUF)
466 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
467
468 if (flags_present & AC) {
469 if (*packed_cond & (1 << 6))
470 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
471 else
472 *packed_cond |= (cond->ac -
473 V3D_QPU_COND_IFA) << 2;
474 }
475
476 if (flags_present & MC) {
477 if (*packed_cond & (1 << 6))
478 *packed_cond |= (cond->mc -
479 V3D_QPU_COND_IFA) << 4;
480 else
481 *packed_cond |= (cond->mc -
482 V3D_QPU_COND_IFA) << 2;
483 }
484
485 return true;
486 }
487
488 return false;
489 }
490
491 /* Make a mapping of the table of opcodes in the spec. The opcode is
492 * determined by a combination of the opcode field, and in the case of 0 or
493 * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as
494 * well.
495 */
496 #define OP_MASK(val) BITFIELD64_BIT(val)
497 #define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1)
498 #define ANYMUX OP_RANGE(0, 7)
499 #define ANYOPMASK OP_RANGE(0, 63)
500
501 struct opcode_desc {
502 uint8_t opcode_first;
503 uint8_t opcode_last;
504
505 union {
506 struct {
507 uint8_t b_mask;
508 uint8_t a_mask;
509 } mux;
510 uint64_t raddr_mask;
511 };
512
513 uint8_t op;
514
515 /* first_ver == 0 if it's the same across all V3D versions.
516 * first_ver == X, last_ver == 0 if it's the same for all V3D versions
517 * starting from X
518 * first_ver == X, last_ver == Y if it's the same for all V3D versions
519 * on the range X through Y
520 */
521 uint8_t first_ver;
522 uint8_t last_ver;
523 };
524
525 static const struct opcode_desc add_ops_v33[] = {
526 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
527 { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD },
528 { 0, 47, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF },
529 { 53, 55, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
530 { 56, 56, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD },
531 { 57, 59, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
532 { 60, 60, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB },
533 { 61, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
534 { 64, 111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB },
535 { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN },
536 { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX },
537 { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN },
538 { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX },
539 { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL },
540 { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR },
541 { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR },
542 { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR },
543 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
544 { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN },
545 { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX },
546 { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN },
547
548 { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND },
549 { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR },
550 { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR },
551
552 { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD },
553 { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB },
554 { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT },
555 { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG },
556 { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH },
557 { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH },
558 { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP },
559 { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP },
560 { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF },
561 { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF },
562 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
563 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX },
564 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX },
565 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR },
566 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA },
567 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
568 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB },
569 { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
570
571 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD },
572 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD },
573 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD },
574 { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD },
575
576 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF },
577 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF },
578 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 },
579 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 },
580 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 },
581 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 },
582 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT },
583 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT },
584 { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 },
585 { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 },
586 { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
587
588 { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
589 { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
590 { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
591 { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
592 { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 },
593 { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 },
594 { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 },
595 { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 },
596 { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 },
597 { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 },
598 { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
599 { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
600
601 /* FIXME: MORE COMPLICATED */
602 /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
603
604 { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP },
605 { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX },
606
607 { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND },
608 { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN },
609 { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC },
610 { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ },
611 { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR },
612 { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ },
613 { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL },
614 { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC },
615
616 { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX },
617 { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY },
618
619 /* The stvpms are distinguished by the waddr field. */
620 { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV },
621 { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD },
622 { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP },
623
624 { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF },
625 { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ },
626 { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF },
627 };
628
629 static const struct opcode_desc mul_ops_v33[] = {
630 { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD },
631 { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB },
632 { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 },
633 { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL },
634 { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 },
635 { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP },
636 { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 },
637 { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42},
638 { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 },
639 { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 },
640
641 { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL },
642 };
643
644 /* Note that it would have been possible to define all the add/mul opcodes in
645 * just one table, using the first_ver/last_ver. But taking into account that
646 * for v71 there were a lot of changes, it was more tidy this way. Also right
647 * now we are doing a linear search on those tables, so this maintains the
648 * tables smaller.
649 *
650 * Just in case we merge the tables, we define the first_ver as 71 for those
651 * opcodes that changed on v71
652 */
653 static const struct opcode_desc add_ops_v71[] = {
654 /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
655 { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
656 { 0, 47, .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
657 { 53, 55, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
658 { 56, 56, .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
659 { 57, 59, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
660 { 60, 60, .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB },
661 { 61, 63, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
662 { 64, 111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB },
663 { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN },
664 { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX },
665 { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN },
666 { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX },
667 { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL },
668 { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
669 { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
670 { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
671 /* FMIN is instead FMAX depending on the raddr_a/b order. */
672 { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
673 { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
674 { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
675
676 { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
677 { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
678 { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR },
679 { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD },
680 { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB },
681
682 { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT },
683 { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG },
684 { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH },
685 { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH },
686 { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP },
687 { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ },
688 { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF },
689 { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF },
690
691 { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
692 { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX },
693 { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX },
694 { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR },
695 { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA },
696 { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
697 { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB },
698 { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
699 { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD },
700 { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD },
701 { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF },
702 { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF },
703 { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID },
704 { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID },
705 { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID },
706 { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT },
707 { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT },
708 { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST },
709 { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST },
710
711 { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD },
712 { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD },
713
714 { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 },
715 { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 },
716 { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 },
717
718 { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 },
719 { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 },
720 { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 },
721 { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 },
722 { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 },
723 { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 },
724 { 188, 188, .raddr_mask = OP_MASK(38), V3D_QPU_A_BALLOT, 71 },
725 { 188, 188, .raddr_mask = OP_MASK(39), V3D_QPU_A_BCASTF, 71 },
726 { 188, 188, .raddr_mask = OP_MASK(40), V3D_QPU_A_ALLEQ, 71 },
727 { 188, 188, .raddr_mask = OP_MASK(41), V3D_QPU_A_ALLFEQ, 71 },
728
729 { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 },
730
731 /* The stvpms are distinguished by the waddr field. */
732 { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71},
733 { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71},
734 { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71},
735
736 { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 },
737
738 { 245, 245, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FROUND, 71 },
739 { 245, 245, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FROUND, 71 },
740 { 245, 245, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FROUND, 71 },
741 { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 },
742
743 { 245, 245, .raddr_mask = OP_MASK(3), V3D_QPU_A_FTOIN, 71 },
744 { 245, 245, .raddr_mask = OP_MASK(7), V3D_QPU_A_FTOIN, 71 },
745 { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 },
746 { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 },
747
748 { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 },
749 { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 },
750 { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 },
751 { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 },
752
753 { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 },
754 { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 },
755 { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 },
756 { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 },
757
758 { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 },
759 { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 },
760 { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 },
761 { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 },
762
763 { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 },
764 { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 },
765 { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 },
766 { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 },
767
768 { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 },
769 { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 },
770 { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 },
771 { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 },
772
773 { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC },
774 { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC },
775 { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC },
776 { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC },
777
778 { 246, 246, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FDX, 71 },
779 { 246, 246, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FDX, 71 },
780 { 246, 246, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FDX, 71 },
781 { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 },
782 { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 },
783 { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 },
784 { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 },
785 { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 },
786
787 { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
788 { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
789
790 { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
791 { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
792
793 { 249, 249, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_A_FMOV, 71 },
794 { 249, 249, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_A_FMOV, 71 },
795 { 249, 249, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_A_FMOV, 71 },
796 { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
797 { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
798 { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
799 { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
800
801 { 249, 249, .raddr_mask = OP_MASK(3), V3D_QPU_A_MOV, 71 },
802 { 249, 249, .raddr_mask = OP_MASK(7), V3D_QPU_A_MOV, 71 },
803 { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
804 { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
805 { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
806
807 { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
808 { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
809
810 { 252, 252, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROTQ, 71 },
811 { 253, 253, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROT, 71 },
812 { 254, 254, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHUFFLE, 71 },
813 };
814
815 static const struct opcode_desc mul_ops_v71[] = {
816 /* For V3D 7.1, second mask field would be ignored */
817 { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 },
818 { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 },
819 { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
820 { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
821 { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 },
822 { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 },
823 { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 },
824
825 { 14, 14, .raddr_mask = OP_RANGE(0, 2), V3D_QPU_M_FMOV, 71 },
826 { 14, 14, .raddr_mask = OP_RANGE(4, 6), V3D_QPU_M_FMOV, 71 },
827 { 14, 14, .raddr_mask = OP_RANGE(8, 10), V3D_QPU_M_FMOV, 71 },
828 { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 },
829 { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 },
830 { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 },
831
832 { 14, 14, .raddr_mask = OP_MASK(3), V3D_QPU_M_MOV, 71 },
833 { 14, 14, .raddr_mask = OP_MASK(7), V3D_QPU_M_MOV, 71 },
834 { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 },
835 { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
836 { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
837
838 { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
839 { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
840 { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
841 { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
842 { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
843 { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
844
845 { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
846
847 { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
848 };
849
850 /* Returns true if op_desc should be filtered out based on devinfo->ver
851 * against op_desc->first_ver and op_desc->last_ver. Check notes about
852 * first_ver/last_ver on struct opcode_desc comments.
853 */
854 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const uint8_t first_ver,const uint8_t last_ver)855 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
856 const uint8_t first_ver,
857 const uint8_t last_ver)
858 {
859 return (first_ver != 0 && devinfo->ver < first_ver) ||
860 (last_ver != 0 && devinfo->ver > last_ver);
861 }
862
863 /* Note that we pass as parameters mux_a, mux_b and raddr, even if depending
864 * on the devinfo->ver some would be ignored. We do this way just to avoid
865 * having two really similar lookup_opcode methods
866 */
867 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b,uint32_t raddr)868 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
869 const struct opcode_desc *opcodes,
870 size_t num_opcodes, uint32_t opcode,
871 uint32_t mux_a, uint32_t mux_b,
872 uint32_t raddr)
873 {
874 for (int i = 0; i < num_opcodes; i++) {
875 const struct opcode_desc *op_desc = &opcodes[i];
876
877 if (opcode < op_desc->opcode_first ||
878 opcode > op_desc->opcode_last)
879 continue;
880
881 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
882 continue;
883
884 if (devinfo->ver < 71) {
885 if (!(op_desc->mux.b_mask & (1 << mux_b)))
886 continue;
887
888 if (!(op_desc->mux.a_mask & (1 << mux_a)))
889 continue;
890 } else {
891 if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr)))
892 continue;
893 }
894
895 return op_desc;
896 }
897
898 return NULL;
899 }
900
901 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)902 v3d_qpu_float32_unpack_unpack(uint32_t packed,
903 enum v3d_qpu_input_unpack *unpacked)
904 {
905 switch (packed) {
906 case 0:
907 *unpacked = V3D_QPU_UNPACK_ABS;
908 return true;
909 case 1:
910 *unpacked = V3D_QPU_UNPACK_NONE;
911 return true;
912 case 2:
913 *unpacked = V3D_QPU_UNPACK_L;
914 return true;
915 case 3:
916 *unpacked = V3D_QPU_UNPACK_H;
917 return true;
918 default:
919 return false;
920 }
921 }
922
923 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)924 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
925 uint32_t *packed)
926 {
927 switch (unpacked) {
928 case V3D_QPU_UNPACK_ABS:
929 *packed = 0;
930 return true;
931 case V3D_QPU_UNPACK_NONE:
932 *packed = 1;
933 return true;
934 case V3D_QPU_UNPACK_L:
935 *packed = 2;
936 return true;
937 case V3D_QPU_UNPACK_H:
938 *packed = 3;
939 return true;
940 default:
941 return false;
942 }
943 }
944
945 static bool
v3d_qpu_int32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)946 v3d_qpu_int32_unpack_unpack(uint32_t packed,
947 enum v3d_qpu_input_unpack *unpacked)
948 {
949 switch (packed) {
950 case 0:
951 *unpacked = V3D_QPU_UNPACK_NONE;
952 return true;
953 case 1:
954 *unpacked = V3D_QPU_UNPACK_UL;
955 return true;
956 case 2:
957 *unpacked = V3D_QPU_UNPACK_UH;
958 return true;
959 case 3:
960 *unpacked = V3D_QPU_UNPACK_IL;
961 return true;
962 case 4:
963 *unpacked = V3D_QPU_UNPACK_IH;
964 return true;
965 default:
966 return false;
967 }
968 }
969
970 static bool
v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)971 v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
972 uint32_t *packed)
973 {
974 switch (unpacked) {
975 case V3D_QPU_UNPACK_NONE:
976 *packed = 0;
977 return true;
978 case V3D_QPU_UNPACK_UL:
979 *packed = 1;
980 return true;
981 case V3D_QPU_UNPACK_UH:
982 *packed = 2;
983 return true;
984 case V3D_QPU_UNPACK_IL:
985 *packed = 3;
986 return true;
987 case V3D_QPU_UNPACK_IH:
988 *packed = 4;
989 return true;
990 default:
991 return false;
992 }
993 }
994
995 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)996 v3d_qpu_float16_unpack_unpack(uint32_t packed,
997 enum v3d_qpu_input_unpack *unpacked)
998 {
999 switch (packed) {
1000 case 0:
1001 *unpacked = V3D_QPU_UNPACK_NONE;
1002 return true;
1003 case 1:
1004 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
1005 return true;
1006 case 2:
1007 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
1008 return true;
1009 case 3:
1010 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
1011 return true;
1012 case 4:
1013 *unpacked = V3D_QPU_UNPACK_SWAP_16;
1014 return true;
1015 default:
1016 return false;
1017 }
1018 }
1019
1020 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)1021 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
1022 uint32_t *packed)
1023 {
1024 switch (unpacked) {
1025 case V3D_QPU_UNPACK_NONE:
1026 *packed = 0;
1027 return true;
1028 case V3D_QPU_UNPACK_REPLICATE_32F_16:
1029 *packed = 1;
1030 return true;
1031 case V3D_QPU_UNPACK_REPLICATE_L_16:
1032 *packed = 2;
1033 return true;
1034 case V3D_QPU_UNPACK_REPLICATE_H_16:
1035 *packed = 3;
1036 return true;
1037 case V3D_QPU_UNPACK_SWAP_16:
1038 *packed = 4;
1039 return true;
1040 default:
1041 return false;
1042 }
1043 }
1044
1045 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,uint32_t * packed)1046 v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,
1047 uint32_t *packed)
1048 {
1049 switch (pack) {
1050 case V3D_QPU_PACK_NONE:
1051 *packed = 0;
1052 return true;
1053 case V3D_QPU_PACK_L:
1054 *packed = 1;
1055 return true;
1056 case V3D_QPU_PACK_H:
1057 *packed = 2;
1058 return true;
1059 default:
1060 return false;
1061 }
1062 }
1063
1064 static bool
v3d33_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1065 v3d33_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1066 struct v3d_qpu_instr *instr)
1067 {
1068 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1069 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
1070 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
1071 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1072
1073 uint32_t map_op = op;
1074 /* Some big clusters of opcodes are replicated with unpack
1075 * flags
1076 */
1077 if (map_op >= 249 && map_op <= 251)
1078 map_op = (map_op - 249 + 245);
1079 if (map_op >= 253 && map_op <= 255)
1080 map_op = (map_op - 253 + 245);
1081
1082 const struct opcode_desc *desc =
1083 lookup_opcode_from_packed(devinfo, add_ops_v33,
1084 ARRAY_SIZE(add_ops_v33),
1085 map_op, mux_a, mux_b, 0);
1086
1087 if (!desc)
1088 return false;
1089
1090 instr->alu.add.op = desc->op;
1091
1092 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
1093 * operands.
1094 */
1095 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
1096 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1097 instr->alu.add.op = V3D_QPU_A_FMAX;
1098 if (instr->alu.add.op == V3D_QPU_A_FADD)
1099 instr->alu.add.op = V3D_QPU_A_FADDNF;
1100 }
1101
1102 /* Some QPU ops require a bit more than just basic opcode and mux a/b
1103 * comparisons to distinguish them.
1104 */
1105 switch (instr->alu.add.op) {
1106 case V3D_QPU_A_STVPMV:
1107 case V3D_QPU_A_STVPMD:
1108 case V3D_QPU_A_STVPMP:
1109 switch (waddr) {
1110 case 0:
1111 instr->alu.add.op = V3D_QPU_A_STVPMV;
1112 break;
1113 case 1:
1114 instr->alu.add.op = V3D_QPU_A_STVPMD;
1115 break;
1116 case 2:
1117 instr->alu.add.op = V3D_QPU_A_STVPMP;
1118 break;
1119 default:
1120 return false;
1121 }
1122 break;
1123 default:
1124 break;
1125 }
1126
1127 switch (instr->alu.add.op) {
1128 case V3D_QPU_A_FADD:
1129 case V3D_QPU_A_FADDNF:
1130 case V3D_QPU_A_FSUB:
1131 case V3D_QPU_A_FMIN:
1132 case V3D_QPU_A_FMAX:
1133 case V3D_QPU_A_FCMP:
1134 case V3D_QPU_A_VFPACK:
1135 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
1136 instr->alu.add.output_pack = (op >> 4) & 0x3;
1137 else
1138 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1139
1140 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1141 &instr->alu.add.a.unpack)) {
1142 return false;
1143 }
1144
1145 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1146 &instr->alu.add.b.unpack)) {
1147 return false;
1148 }
1149 break;
1150
1151 case V3D_QPU_A_FFLOOR:
1152 case V3D_QPU_A_FROUND:
1153 case V3D_QPU_A_FTRUNC:
1154 case V3D_QPU_A_FCEIL:
1155 case V3D_QPU_A_FDX:
1156 case V3D_QPU_A_FDY:
1157 instr->alu.add.output_pack = mux_b & 0x3;
1158
1159 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1160 &instr->alu.add.a.unpack)) {
1161 return false;
1162 }
1163 break;
1164
1165 case V3D_QPU_A_FTOIN:
1166 case V3D_QPU_A_FTOIZ:
1167 case V3D_QPU_A_FTOUZ:
1168 case V3D_QPU_A_FTOC:
1169 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1170
1171 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1172 &instr->alu.add.a.unpack)) {
1173 return false;
1174 }
1175 break;
1176
1177 case V3D_QPU_A_VFMIN:
1178 case V3D_QPU_A_VFMAX:
1179 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1180 &instr->alu.add.a.unpack)) {
1181 return false;
1182 }
1183
1184 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1185 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1186 break;
1187
1188 default:
1189 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1190 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1191 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1192 break;
1193 }
1194
1195 instr->alu.add.a.mux = mux_a;
1196 instr->alu.add.b.mux = mux_b;
1197 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1198
1199 instr->alu.add.magic_write = false;
1200 if (packed_inst & V3D_QPU_MA) {
1201 switch (instr->alu.add.op) {
1202 case V3D_QPU_A_LDVPMV_IN:
1203 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1204 break;
1205 case V3D_QPU_A_LDVPMD_IN:
1206 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1207 break;
1208 case V3D_QPU_A_LDVPMG_IN:
1209 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1210 break;
1211 default:
1212 instr->alu.add.magic_write = true;
1213 break;
1214 }
1215 }
1216
1217 return true;
1218 }
1219
1220 static bool
v3d71_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1221 v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1222 struct v3d_qpu_instr *instr)
1223 {
1224 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1225 uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A);
1226 uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B);
1227 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1228 uint32_t map_op = op;
1229
1230 const struct opcode_desc *desc =
1231 lookup_opcode_from_packed(devinfo,
1232 add_ops_v71,
1233 ARRAY_SIZE(add_ops_v71),
1234 map_op, 0, 0,
1235 raddr_b);
1236 if (!desc)
1237 return false;
1238
1239 instr->alu.add.op = desc->op;
1240
1241 /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
1242 * operands.
1243 */
1244 if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
1245 instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
1246 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1247 instr->alu.add.op = V3D_QPU_A_FMAX;
1248 if (instr->alu.add.op == V3D_QPU_A_FADD)
1249 instr->alu.add.op = V3D_QPU_A_FADDNF;
1250 }
1251
1252 /* Some QPU ops require a bit more than just basic opcode and mux a/b
1253 * comparisons to distinguish them.
1254 */
1255 switch (instr->alu.add.op) {
1256 case V3D_QPU_A_STVPMV:
1257 case V3D_QPU_A_STVPMD:
1258 case V3D_QPU_A_STVPMP:
1259 switch (waddr) {
1260 case 0:
1261 instr->alu.add.op = V3D_QPU_A_STVPMV;
1262 break;
1263 case 1:
1264 instr->alu.add.op = V3D_QPU_A_STVPMD;
1265 break;
1266 case 2:
1267 instr->alu.add.op = V3D_QPU_A_STVPMP;
1268 break;
1269 default:
1270 return false;
1271 }
1272 break;
1273 default:
1274 break;
1275 }
1276
1277 switch (instr->alu.add.op) {
1278 case V3D_QPU_A_FADD:
1279 case V3D_QPU_A_FADDNF:
1280 case V3D_QPU_A_FSUB:
1281 case V3D_QPU_A_FMIN:
1282 case V3D_QPU_A_FMAX:
1283 case V3D_QPU_A_FCMP:
1284 case V3D_QPU_A_VFPACK:
1285 if (instr->alu.add.op != V3D_QPU_A_VFPACK &&
1286 instr->alu.add.op != V3D_QPU_A_FCMP) {
1287 instr->alu.add.output_pack = (op >> 4) & 0x3;
1288 } else {
1289 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1290 }
1291
1292 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1293 &instr->alu.add.a.unpack)) {
1294 return false;
1295 }
1296
1297 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1298 &instr->alu.add.b.unpack)) {
1299 return false;
1300 }
1301 break;
1302
1303 case V3D_QPU_A_FFLOOR:
1304 case V3D_QPU_A_FROUND:
1305 case V3D_QPU_A_FTRUNC:
1306 case V3D_QPU_A_FCEIL:
1307 case V3D_QPU_A_FDX:
1308 case V3D_QPU_A_FDY:
1309 instr->alu.add.output_pack = raddr_b & 0x3;
1310
1311 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1312 &instr->alu.add.a.unpack)) {
1313 return false;
1314 }
1315 break;
1316
1317 case V3D_QPU_A_FTOIN:
1318 case V3D_QPU_A_FTOIZ:
1319 case V3D_QPU_A_FTOUZ:
1320 case V3D_QPU_A_FTOC:
1321 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1322
1323 if (!v3d_qpu_float32_unpack_unpack((raddr_b >> 2) & 0x3,
1324 &instr->alu.add.a.unpack)) {
1325 return false;
1326 }
1327 break;
1328
1329 case V3D_QPU_A_VFMIN:
1330 case V3D_QPU_A_VFMAX:
1331 unreachable("pending v71 update");
1332 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1333 &instr->alu.add.a.unpack)) {
1334 return false;
1335 }
1336
1337 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1338 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1339 break;
1340
1341 case V3D_QPU_A_MOV:
1342 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1343
1344 if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
1345 &instr->alu.add.a.unpack)) {
1346 return false;
1347 }
1348 break;
1349
1350 case V3D_QPU_A_FMOV:
1351 instr->alu.add.output_pack = raddr_b & 0x3;
1352
1353 /* Mul alu FMOV has one additional variant */
1354 int32_t unpack = (raddr_b >> 2) & 0x7;
1355 if (unpack == 7)
1356 return false;
1357
1358 if (!v3d_qpu_float32_unpack_unpack(unpack,
1359 &instr->alu.add.a.unpack)) {
1360 return false;
1361 }
1362 break;
1363
1364 default:
1365 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1366 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1367 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1368 break;
1369 }
1370
1371 instr->alu.add.a.raddr = raddr_a;
1372 instr->alu.add.b.raddr = raddr_b;
1373 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1374
1375 instr->alu.add.magic_write = false;
1376 if (packed_inst & V3D_QPU_MA) {
1377 switch (instr->alu.add.op) {
1378 case V3D_QPU_A_LDVPMV_IN:
1379 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1380 break;
1381 case V3D_QPU_A_LDVPMD_IN:
1382 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1383 break;
1384 case V3D_QPU_A_LDVPMG_IN:
1385 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1386 break;
1387 default:
1388 instr->alu.add.magic_write = true;
1389 break;
1390 }
1391 }
1392
1393 return true;
1394 }
1395
1396 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1397 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1398 struct v3d_qpu_instr *instr)
1399 {
1400 if (devinfo->ver < 71)
1401 return v3d33_qpu_add_unpack(devinfo, packed_inst, instr);
1402 else
1403 return v3d71_qpu_add_unpack(devinfo, packed_inst, instr);
1404 }
1405
1406 static bool
v3d33_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1407 v3d33_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1408 struct v3d_qpu_instr *instr)
1409 {
1410 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1411 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
1412 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
1413
1414 {
1415 const struct opcode_desc *desc =
1416 lookup_opcode_from_packed(devinfo,
1417 mul_ops_v33,
1418 ARRAY_SIZE(mul_ops_v33),
1419 op, mux_a, mux_b, 0);
1420 if (!desc)
1421 return false;
1422
1423 instr->alu.mul.op = desc->op;
1424 }
1425
1426 switch (instr->alu.mul.op) {
1427 case V3D_QPU_M_FMUL:
1428 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1429
1430 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1431 &instr->alu.mul.a.unpack)) {
1432 return false;
1433 }
1434
1435 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1436 &instr->alu.mul.b.unpack)) {
1437 return false;
1438 }
1439
1440 break;
1441
1442 case V3D_QPU_M_FMOV:
1443 instr->alu.mul.output_pack = (((op & 1) << 1) +
1444 ((mux_b >> 2) & 1));
1445
1446 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
1447 &instr->alu.mul.a.unpack)) {
1448 return false;
1449 }
1450
1451 break;
1452
1453 case V3D_QPU_M_VFMUL:
1454 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1455
1456 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1457 &instr->alu.mul.a.unpack)) {
1458 return false;
1459 }
1460
1461 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1462
1463 break;
1464
1465 default:
1466 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1467 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1468 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1469 break;
1470 }
1471
1472 instr->alu.mul.a.mux = mux_a;
1473 instr->alu.mul.b.mux = mux_b;
1474 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1475 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1476
1477 return true;
1478 }
1479
1480 static bool
v3d71_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1481 v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1482 struct v3d_qpu_instr *instr)
1483 {
1484 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1485 uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C);
1486 uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D);
1487
1488 {
1489 const struct opcode_desc *desc =
1490 lookup_opcode_from_packed(devinfo,
1491 mul_ops_v71,
1492 ARRAY_SIZE(mul_ops_v71),
1493 op, 0, 0,
1494 raddr_d);
1495 if (!desc)
1496 return false;
1497
1498 instr->alu.mul.op = desc->op;
1499 }
1500
1501 switch (instr->alu.mul.op) {
1502 case V3D_QPU_M_FMUL:
1503 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1504
1505 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1506 &instr->alu.mul.a.unpack)) {
1507 return false;
1508 }
1509
1510 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1511 &instr->alu.mul.b.unpack)) {
1512 return false;
1513 }
1514
1515 break;
1516
1517 case V3D_QPU_M_FMOV:
1518 instr->alu.mul.output_pack = raddr_d & 0x3;
1519
1520 if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7,
1521 &instr->alu.mul.a.unpack)) {
1522 return false;
1523 }
1524
1525 break;
1526
1527 case V3D_QPU_M_VFMUL:
1528 unreachable("pending v71 update");
1529 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1530
1531 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1532 &instr->alu.mul.a.unpack)) {
1533 return false;
1534 }
1535
1536 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1537
1538 break;
1539
1540 case V3D_QPU_M_MOV:
1541 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1542
1543 if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
1544 &instr->alu.mul.a.unpack)) {
1545 return false;
1546 }
1547 break;
1548
1549 default:
1550 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1551 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1552 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1553 break;
1554 }
1555
1556 instr->alu.mul.a.raddr = raddr_c;
1557 instr->alu.mul.b.raddr = raddr_d;
1558 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1559 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1560
1561 return true;
1562 }
1563
1564 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1565 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1566 struct v3d_qpu_instr *instr)
1567 {
1568 if (devinfo->ver < 71)
1569 return v3d33_qpu_mul_unpack(devinfo, packed_inst, instr);
1570 else
1571 return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr);
1572 }
1573
1574 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)1575 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
1576 const struct opcode_desc *opcodes, size_t num_opcodes,
1577 uint8_t op)
1578 {
1579 for (int i = 0; i < num_opcodes; i++) {
1580 const struct opcode_desc *op_desc = &opcodes[i];
1581
1582 if (op_desc->op != op)
1583 continue;
1584
1585 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
1586 continue;
1587
1588 return op_desc;
1589 }
1590
1591 return NULL;
1592 }
1593
1594 static bool
v3d33_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1595 v3d33_qpu_add_pack(const struct v3d_device_info *devinfo,
1596 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1597 {
1598 uint32_t waddr = instr->alu.add.waddr;
1599 uint32_t mux_a = instr->alu.add.a.mux;
1600 uint32_t mux_b = instr->alu.add.b.mux;
1601 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1602 const struct opcode_desc *desc =
1603 lookup_opcode_from_instr(devinfo, add_ops_v33,
1604 ARRAY_SIZE(add_ops_v33),
1605 instr->alu.add.op);
1606
1607 if (!desc)
1608 return false;
1609
1610 uint32_t opcode = desc->opcode_first;
1611
1612 /* If an operation doesn't use an arg, its mux values may be used to
1613 * identify the operation type.
1614 */
1615 if (nsrc < 2)
1616 mux_b = ffs(desc->mux.b_mask) - 1;
1617
1618 if (nsrc < 1)
1619 mux_a = ffs(desc->mux.a_mask) - 1;
1620
1621 bool no_magic_write = false;
1622
1623 switch (instr->alu.add.op) {
1624 case V3D_QPU_A_STVPMV:
1625 waddr = 0;
1626 no_magic_write = true;
1627 break;
1628 case V3D_QPU_A_STVPMD:
1629 waddr = 1;
1630 no_magic_write = true;
1631 break;
1632 case V3D_QPU_A_STVPMP:
1633 waddr = 2;
1634 no_magic_write = true;
1635 break;
1636
1637 case V3D_QPU_A_LDVPMV_IN:
1638 case V3D_QPU_A_LDVPMD_IN:
1639 case V3D_QPU_A_LDVPMP:
1640 case V3D_QPU_A_LDVPMG_IN:
1641 assert(!instr->alu.add.magic_write);
1642 break;
1643
1644 case V3D_QPU_A_LDVPMV_OUT:
1645 case V3D_QPU_A_LDVPMD_OUT:
1646 case V3D_QPU_A_LDVPMG_OUT:
1647 assert(!instr->alu.add.magic_write);
1648 *packed_instr |= V3D_QPU_MA;
1649 break;
1650
1651 default:
1652 break;
1653 }
1654
1655 switch (instr->alu.add.op) {
1656 case V3D_QPU_A_FADD:
1657 case V3D_QPU_A_FADDNF:
1658 case V3D_QPU_A_FSUB:
1659 case V3D_QPU_A_FMIN:
1660 case V3D_QPU_A_FMAX:
1661 case V3D_QPU_A_FCMP: {
1662 uint32_t output_pack;
1663 uint32_t a_unpack;
1664 uint32_t b_unpack;
1665
1666 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1667 &output_pack)) {
1668 return false;
1669 }
1670 opcode |= output_pack << 4;
1671
1672 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1673 &a_unpack)) {
1674 return false;
1675 }
1676
1677 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1678 &b_unpack)) {
1679 return false;
1680 }
1681
1682 /* These operations with commutative operands are
1683 * distinguished by which order their operands come in.
1684 */
1685 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1686 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1687 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1688 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1689 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1690 uint32_t temp;
1691
1692 temp = a_unpack;
1693 a_unpack = b_unpack;
1694 b_unpack = temp;
1695
1696 temp = mux_a;
1697 mux_a = mux_b;
1698 mux_b = temp;
1699 }
1700
1701 opcode |= a_unpack << 2;
1702 opcode |= b_unpack << 0;
1703
1704 break;
1705 }
1706
1707 case V3D_QPU_A_VFPACK: {
1708 uint32_t a_unpack;
1709 uint32_t b_unpack;
1710
1711 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1712 instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1713 return false;
1714 }
1715
1716 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1717 &a_unpack)) {
1718 return false;
1719 }
1720
1721 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1722 &b_unpack)) {
1723 return false;
1724 }
1725
1726 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1727 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1728
1729 break;
1730 }
1731
1732 case V3D_QPU_A_FFLOOR:
1733 case V3D_QPU_A_FROUND:
1734 case V3D_QPU_A_FTRUNC:
1735 case V3D_QPU_A_FCEIL:
1736 case V3D_QPU_A_FDX:
1737 case V3D_QPU_A_FDY: {
1738 uint32_t packed;
1739
1740 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1741 &packed)) {
1742 return false;
1743 }
1744 mux_b |= packed;
1745
1746 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1747 &packed)) {
1748 return false;
1749 }
1750 if (packed == 0)
1751 return false;
1752 opcode = (opcode & ~(0x3 << 2)) | packed << 2;
1753 break;
1754 }
1755
1756 case V3D_QPU_A_FTOIN:
1757 case V3D_QPU_A_FTOIZ:
1758 case V3D_QPU_A_FTOUZ:
1759 case V3D_QPU_A_FTOC:
1760 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1761 return false;
1762
1763 uint32_t packed;
1764 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1765 &packed)) {
1766 return false;
1767 }
1768 if (packed == 0)
1769 return false;
1770 opcode |= packed << 2;
1771
1772 break;
1773
1774 case V3D_QPU_A_VFMIN:
1775 case V3D_QPU_A_VFMAX:
1776 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1777 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1778 return false;
1779 }
1780
1781 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1782 &packed)) {
1783 return false;
1784 }
1785 opcode |= packed;
1786 break;
1787
1788 default:
1789 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1790 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1791 instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
1792 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
1793 return false;
1794 }
1795 break;
1796 }
1797
1798 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1799 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1800 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1801 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1802 if (instr->alu.add.magic_write && !no_magic_write)
1803 *packed_instr |= V3D_QPU_MA;
1804
1805 return true;
1806 }
1807
1808 static bool
v3d71_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1809 v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
1810 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1811 {
1812 uint32_t waddr = instr->alu.add.waddr;
1813 uint32_t raddr_a = instr->alu.add.a.raddr;
1814 uint32_t raddr_b = instr->alu.add.b.raddr;
1815
1816 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1817 const struct opcode_desc *desc =
1818 lookup_opcode_from_instr(devinfo, add_ops_v71,
1819 ARRAY_SIZE(add_ops_v71),
1820 instr->alu.add.op);
1821 if (!desc)
1822 return false;
1823
1824 uint32_t opcode = desc->opcode_first;
1825
1826 /* If an operation doesn't use an arg, its raddr values may be used to
1827 * identify the operation type.
1828 */
1829 if (nsrc < 2)
1830 raddr_b = ffsll(desc->raddr_mask) - 1;
1831
1832 bool no_magic_write = false;
1833
1834 switch (instr->alu.add.op) {
1835 case V3D_QPU_A_STVPMV:
1836 waddr = 0;
1837 no_magic_write = true;
1838 break;
1839 case V3D_QPU_A_STVPMD:
1840 waddr = 1;
1841 no_magic_write = true;
1842 break;
1843 case V3D_QPU_A_STVPMP:
1844 waddr = 2;
1845 no_magic_write = true;
1846 break;
1847
1848 case V3D_QPU_A_LDVPMV_IN:
1849 case V3D_QPU_A_LDVPMD_IN:
1850 case V3D_QPU_A_LDVPMP:
1851 case V3D_QPU_A_LDVPMG_IN:
1852 assert(!instr->alu.add.magic_write);
1853 break;
1854
1855 case V3D_QPU_A_LDVPMV_OUT:
1856 case V3D_QPU_A_LDVPMD_OUT:
1857 case V3D_QPU_A_LDVPMG_OUT:
1858 assert(!instr->alu.add.magic_write);
1859 *packed_instr |= V3D_QPU_MA;
1860 break;
1861
1862 default:
1863 break;
1864 }
1865
1866 switch (instr->alu.add.op) {
1867 case V3D_QPU_A_FADD:
1868 case V3D_QPU_A_FADDNF:
1869 case V3D_QPU_A_FSUB:
1870 case V3D_QPU_A_FMIN:
1871 case V3D_QPU_A_FMAX:
1872 case V3D_QPU_A_FCMP: {
1873 uint32_t output_pack;
1874 uint32_t a_unpack;
1875 uint32_t b_unpack;
1876
1877 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
1878 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1879 &output_pack)) {
1880 return false;
1881 }
1882 opcode |= output_pack << 4;
1883 }
1884
1885 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1886 &a_unpack)) {
1887 return false;
1888 }
1889
1890 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1891 &b_unpack)) {
1892 return false;
1893 }
1894
1895 /* These operations with commutative operands are
1896 * distinguished by the order of the operands come in.
1897 */
1898 bool ordering =
1899 instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
1900 instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
1901 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1902 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1903 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1904 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1905 uint32_t temp;
1906
1907 temp = a_unpack;
1908 a_unpack = b_unpack;
1909 b_unpack = temp;
1910
1911 temp = raddr_a;
1912 raddr_a = raddr_b;
1913 raddr_b = temp;
1914
1915 /* If we are swapping raddr_a/b we also need to swap
1916 * small_imm_a/b.
1917 */
1918 if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
1919 assert(instr->sig.small_imm_a !=
1920 instr->sig.small_imm_b);
1921 struct v3d_qpu_sig new_sig = instr->sig;
1922 new_sig.small_imm_a = !instr->sig.small_imm_a;
1923 new_sig.small_imm_b = !instr->sig.small_imm_b;
1924 uint32_t sig;
1925 if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
1926 return false;
1927 *packed_instr &= ~V3D_QPU_SIG_MASK;
1928 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1929 }
1930 }
1931
1932 opcode |= a_unpack << 2;
1933 opcode |= b_unpack << 0;
1934
1935 break;
1936 }
1937
1938 case V3D_QPU_A_VFPACK: {
1939 uint32_t a_unpack;
1940 uint32_t b_unpack;
1941
1942 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1943 instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1944 return false;
1945 }
1946
1947 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1948 &a_unpack)) {
1949 return false;
1950 }
1951
1952 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1953 &b_unpack)) {
1954 return false;
1955 }
1956
1957 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1958 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1959
1960 break;
1961 }
1962
1963 case V3D_QPU_A_FFLOOR:
1964 case V3D_QPU_A_FROUND:
1965 case V3D_QPU_A_FTRUNC:
1966 case V3D_QPU_A_FCEIL:
1967 case V3D_QPU_A_FDX:
1968 case V3D_QPU_A_FDY: {
1969 uint32_t packed;
1970
1971 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1972 &packed)) {
1973 return false;
1974 }
1975 raddr_b |= packed;
1976
1977 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1978 &packed)) {
1979 return false;
1980 }
1981 if (packed == 0)
1982 return false;
1983 raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2;
1984 break;
1985 }
1986
1987 case V3D_QPU_A_FTOIN:
1988 case V3D_QPU_A_FTOIZ:
1989 case V3D_QPU_A_FTOUZ:
1990 case V3D_QPU_A_FTOC:
1991 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1992 return false;
1993
1994 uint32_t packed;
1995 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1996 &packed)) {
1997 return false;
1998 }
1999 if (packed == 0)
2000 return false;
2001
2002 raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2;
2003
2004 break;
2005
2006 case V3D_QPU_A_VFMIN:
2007 case V3D_QPU_A_VFMAX:
2008 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2009 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
2010 return false;
2011 }
2012
2013 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
2014 &packed)) {
2015 return false;
2016 }
2017 opcode |= packed;
2018 break;
2019
2020 case V3D_QPU_A_MOV: {
2021 uint32_t packed;
2022
2023 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
2024 return false;
2025
2026 if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
2027 &packed)) {
2028 return false;
2029 }
2030
2031 raddr_b |= packed << 2;
2032 break;
2033 }
2034
2035 case V3D_QPU_A_FMOV: {
2036 uint32_t packed;
2037
2038 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
2039 &packed)) {
2040 return false;
2041 }
2042 raddr_b = packed;
2043
2044 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
2045 &packed)) {
2046 return false;
2047 }
2048 raddr_b |= packed << 2;
2049 break;
2050 }
2051
2052 default:
2053 if (instr->alu.add.op != V3D_QPU_A_NOP &&
2054 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2055 instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
2056 instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
2057 return false;
2058 }
2059 break;
2060 }
2061
2062 *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A);
2063 *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B);
2064 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
2065 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
2066 if (instr->alu.add.magic_write && !no_magic_write)
2067 *packed_instr |= V3D_QPU_MA;
2068
2069 return true;
2070 }
2071
2072 static bool
v3d33_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2073 v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo,
2074 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2075 {
2076 uint32_t mux_a = instr->alu.mul.a.mux;
2077 uint32_t mux_b = instr->alu.mul.b.mux;
2078 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2079
2080 const struct opcode_desc *desc =
2081 lookup_opcode_from_instr(devinfo, mul_ops_v33,
2082 ARRAY_SIZE(mul_ops_v33),
2083 instr->alu.mul.op);
2084
2085 if (!desc)
2086 return false;
2087
2088 uint32_t opcode = desc->opcode_first;
2089
2090 /* Some opcodes have a single valid value for their mux a/b, so set
2091 * that here. If mux a/b determine packing, it will be set below.
2092 */
2093 if (nsrc < 2)
2094 mux_b = ffs(desc->mux.b_mask) - 1;
2095
2096 if (nsrc < 1)
2097 mux_a = ffs(desc->mux.a_mask) - 1;
2098
2099 switch (instr->alu.mul.op) {
2100 case V3D_QPU_M_FMUL: {
2101 uint32_t packed;
2102
2103 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2104 &packed)) {
2105 return false;
2106 }
2107 /* No need for a +1 because desc->opcode_first has a 1 in this
2108 * field.
2109 */
2110 opcode += packed << 4;
2111
2112 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2113 &packed)) {
2114 return false;
2115 }
2116 opcode |= packed << 2;
2117
2118 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
2119 &packed)) {
2120 return false;
2121 }
2122 opcode |= packed << 0;
2123 break;
2124 }
2125
2126 case V3D_QPU_M_FMOV: {
2127 uint32_t packed;
2128
2129 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2130 &packed)) {
2131 return false;
2132 }
2133 opcode |= (packed >> 1) & 1;
2134 mux_b = (packed & 1) << 2;
2135
2136 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2137 &packed)) {
2138 return false;
2139 }
2140 mux_b |= packed;
2141 break;
2142 }
2143
2144 case V3D_QPU_M_VFMUL: {
2145 uint32_t packed;
2146
2147 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2148 return false;
2149
2150 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2151 &packed)) {
2152 return false;
2153 }
2154 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2155 opcode = 8;
2156 else
2157 opcode |= (packed + 4) & 7;
2158
2159 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2160 return false;
2161
2162 break;
2163 }
2164
2165 default:
2166 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2167 (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2168 instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2169 instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2170 return false;
2171 }
2172 break;
2173 }
2174
2175 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
2176 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
2177
2178 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2179 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2180 if (instr->alu.mul.magic_write)
2181 *packed_instr |= V3D_QPU_MM;
2182
2183 return true;
2184 }
2185
2186 static bool
v3d71_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2187 v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
2188 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2189 {
2190 uint32_t raddr_c = instr->alu.mul.a.raddr;
2191 uint32_t raddr_d = instr->alu.mul.b.raddr;
2192 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2193
2194 const struct opcode_desc *desc =
2195 lookup_opcode_from_instr(devinfo, mul_ops_v71,
2196 ARRAY_SIZE(mul_ops_v71),
2197 instr->alu.mul.op);
2198 if (!desc)
2199 return false;
2200
2201 uint32_t opcode = desc->opcode_first;
2202
2203 /* Some opcodes have a single valid value for their raddr_d, so set
2204 * that here. If raddr_b determine packing, it will be set below.
2205 */
2206 if (nsrc < 2)
2207 raddr_d = ffsll(desc->raddr_mask) - 1;
2208
2209 switch (instr->alu.mul.op) {
2210 case V3D_QPU_M_FMUL: {
2211 uint32_t packed;
2212
2213 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2214 &packed)) {
2215 return false;
2216 }
2217 /* No need for a +1 because desc->opcode_first has a 1 in this
2218 * field.
2219 */
2220 opcode += packed << 4;
2221
2222 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2223 &packed)) {
2224 return false;
2225 }
2226 opcode |= packed << 2;
2227
2228 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
2229 &packed)) {
2230 return false;
2231 }
2232 opcode |= packed << 0;
2233 break;
2234 }
2235
2236 case V3D_QPU_M_FMOV: {
2237 uint32_t packed;
2238
2239 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2240 &packed)) {
2241 return false;
2242 }
2243 raddr_d |= packed;
2244
2245 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2246 &packed)) {
2247 return false;
2248 }
2249 raddr_d |= packed << 2;
2250 break;
2251 }
2252
2253 case V3D_QPU_M_VFMUL: {
2254 unreachable("pending v71 update");
2255 uint32_t packed;
2256
2257 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2258 return false;
2259
2260 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2261 &packed)) {
2262 return false;
2263 }
2264 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2265 opcode = 8;
2266 else
2267 opcode |= (packed + 4) & 7;
2268
2269 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2270 return false;
2271
2272 break;
2273 }
2274
2275 case V3D_QPU_M_MOV: {
2276 uint32_t packed;
2277
2278 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2279 return false;
2280
2281 if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
2282 &packed)) {
2283 return false;
2284 }
2285
2286 raddr_d |= packed << 2;
2287 break;
2288 }
2289
2290 default:
2291 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2292 (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2293 instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2294 instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2295 return false;
2296 }
2297 break;
2298 }
2299
2300 *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C);
2301 *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D);
2302 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2303 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2304 if (instr->alu.mul.magic_write)
2305 *packed_instr |= V3D_QPU_MM;
2306
2307 return true;
2308 }
2309
2310 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2311 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
2312 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2313 {
2314 if (devinfo->ver < 71)
2315 return v3d33_qpu_add_pack(devinfo, instr, packed_instr);
2316 else
2317 return v3d71_qpu_add_pack(devinfo, instr, packed_instr);
2318 }
2319
2320 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2321 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
2322 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2323 {
2324 if (devinfo->ver < 71)
2325 return v3d33_qpu_mul_pack(devinfo, instr, packed_instr);
2326 else
2327 return v3d71_qpu_mul_pack(devinfo, instr, packed_instr);
2328 }
2329
2330 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2331 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
2332 uint64_t packed_instr,
2333 struct v3d_qpu_instr *instr)
2334 {
2335 instr->type = V3D_QPU_INSTR_TYPE_ALU;
2336
2337 if (!v3d_qpu_sig_unpack(devinfo,
2338 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
2339 &instr->sig))
2340 return false;
2341
2342 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
2343 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2344 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
2345 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
2346
2347 instr->flags.ac = V3D_QPU_COND_NONE;
2348 instr->flags.mc = V3D_QPU_COND_NONE;
2349 instr->flags.apf = V3D_QPU_PF_NONE;
2350 instr->flags.mpf = V3D_QPU_PF_NONE;
2351 instr->flags.auf = V3D_QPU_UF_NONE;
2352 instr->flags.muf = V3D_QPU_UF_NONE;
2353 } else {
2354 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
2355 return false;
2356 }
2357
2358 if (devinfo->ver <= 71) {
2359 /*
2360 * For v71 this will be set on add/mul unpack, as raddr are now
2361 * part of v3d_qpu_input
2362 */
2363 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
2364 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
2365 }
2366
2367 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
2368 return false;
2369
2370 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
2371 return false;
2372
2373 return true;
2374 }
2375
2376 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2377 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
2378 uint64_t packed_instr,
2379 struct v3d_qpu_instr *instr)
2380 {
2381 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
2382
2383 uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
2384 if (cond == 0)
2385 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
2386 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
2387 V3D_QPU_BRANCH_COND_ALLNA)
2388 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
2389 else
2390 return false;
2391
2392 uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
2393 if (msfign == 3)
2394 return false;
2395 instr->branch.msfign = msfign;
2396
2397 instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
2398
2399 instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
2400 if (instr->branch.ub) {
2401 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
2402 V3D_QPU_BRANCH_BDU);
2403 }
2404
2405 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
2406 V3D_QPU_RADDR_A);
2407
2408 instr->branch.offset = 0;
2409
2410 instr->branch.offset +=
2411 QPU_GET_FIELD(packed_instr,
2412 V3D_QPU_BRANCH_ADDR_LOW) << 3;
2413
2414 instr->branch.offset +=
2415 QPU_GET_FIELD(packed_instr,
2416 V3D_QPU_BRANCH_ADDR_HIGH) << 24;
2417
2418 return true;
2419 }
2420
2421 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2422 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
2423 uint64_t packed_instr,
2424 struct v3d_qpu_instr *instr)
2425 {
2426 if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
2427 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
2428 } else {
2429 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
2430
2431 if ((sig & 24) == 16) {
2432 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
2433 instr);
2434 } else {
2435 return false;
2436 }
2437 }
2438 }
2439
2440 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2441 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
2442 const struct v3d_qpu_instr *instr,
2443 uint64_t *packed_instr)
2444 {
2445 uint32_t sig;
2446 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
2447 return false;
2448 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
2449
2450 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
2451 if (devinfo->ver < 71) {
2452 /*
2453 * For v71 this will be set on add/mul unpack, as raddr are now
2454 * part of v3d_qpu_input
2455 */
2456 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
2457 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
2458 }
2459
2460 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
2461 return false;
2462 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
2463 return false;
2464
2465 uint32_t flags;
2466 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2467 if (instr->flags.ac != V3D_QPU_COND_NONE ||
2468 instr->flags.mc != V3D_QPU_COND_NONE ||
2469 instr->flags.apf != V3D_QPU_PF_NONE ||
2470 instr->flags.mpf != V3D_QPU_PF_NONE ||
2471 instr->flags.auf != V3D_QPU_UF_NONE ||
2472 instr->flags.muf != V3D_QPU_UF_NONE) {
2473 return false;
2474 }
2475
2476 flags = instr->sig_addr;
2477 if (instr->sig_magic)
2478 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
2479 } else {
2480 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
2481 return false;
2482 }
2483
2484 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
2485 } else {
2486 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
2487 return false;
2488 }
2489
2490 return true;
2491 }
2492
2493 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2494 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
2495 const struct v3d_qpu_instr *instr,
2496 uint64_t *packed_instr)
2497 {
2498 *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
2499
2500 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2501 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
2502 V3D_QPU_BRANCH_COND_A0),
2503 V3D_QPU_BRANCH_COND);
2504 }
2505
2506 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2507 V3D_QPU_BRANCH_MSFIGN);
2508
2509 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
2510 V3D_QPU_BRANCH_BDI);
2511
2512 if (instr->branch.ub) {
2513 *packed_instr |= V3D_QPU_BRANCH_UB;
2514 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
2515 V3D_QPU_BRANCH_BDU);
2516 }
2517
2518 switch (instr->branch.bdi) {
2519 case V3D_QPU_BRANCH_DEST_ABS:
2520 case V3D_QPU_BRANCH_DEST_REL:
2521 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2522 V3D_QPU_BRANCH_MSFIGN);
2523
2524 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
2525 ~0xff000000) >> 3,
2526 V3D_QPU_BRANCH_ADDR_LOW);
2527
2528 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
2529 V3D_QPU_BRANCH_ADDR_HIGH);
2530 break;
2531 default:
2532 break;
2533 }
2534
2535 if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
2536 instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
2537 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
2538 V3D_QPU_RADDR_A);
2539 }
2540
2541 return true;
2542 }
2543
2544 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2545 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
2546 const struct v3d_qpu_instr *instr,
2547 uint64_t *packed_instr)
2548 {
2549 *packed_instr = 0;
2550
2551 switch (instr->type) {
2552 case V3D_QPU_INSTR_TYPE_ALU:
2553 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
2554 case V3D_QPU_INSTR_TYPE_BRANCH:
2555 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
2556 default:
2557 return false;
2558 }
2559 }
2560