1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include "util/macros.h"
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 const char *
v3d_qpu_magic_waddr_name(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)31 v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32 enum v3d_qpu_waddr waddr)
33 {
34 /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35 if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36 return "tmu";
37
38 /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
39 */
40 if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
41 return "quad";
42
43 if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
44 return "rep";
45
46 static const char *waddr_magic[] = {
47 [V3D_QPU_WADDR_R0] = "r0",
48 [V3D_QPU_WADDR_R1] = "r1",
49 [V3D_QPU_WADDR_R2] = "r2",
50 [V3D_QPU_WADDR_R3] = "r3",
51 [V3D_QPU_WADDR_R4] = "r4",
52 [V3D_QPU_WADDR_R5] = "r5",
53 [V3D_QPU_WADDR_NOP] = "-",
54 [V3D_QPU_WADDR_TLB] = "tlb",
55 [V3D_QPU_WADDR_TLBU] = "tlbu",
56 [V3D_QPU_WADDR_UNIFA] = "unifa",
57 [V3D_QPU_WADDR_TMUL] = "tmul",
58 [V3D_QPU_WADDR_TMUD] = "tmud",
59 [V3D_QPU_WADDR_TMUA] = "tmua",
60 [V3D_QPU_WADDR_TMUAU] = "tmuau",
61 [V3D_QPU_WADDR_VPM] = "vpm",
62 [V3D_QPU_WADDR_VPMU] = "vpmu",
63 [V3D_QPU_WADDR_SYNC] = "sync",
64 [V3D_QPU_WADDR_SYNCU] = "syncu",
65 [V3D_QPU_WADDR_SYNCB] = "syncb",
66 [V3D_QPU_WADDR_RECIP] = "recip",
67 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
68 [V3D_QPU_WADDR_EXP] = "exp",
69 [V3D_QPU_WADDR_LOG] = "log",
70 [V3D_QPU_WADDR_SIN] = "sin",
71 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
72 [V3D_QPU_WADDR_TMUC] = "tmuc",
73 [V3D_QPU_WADDR_TMUS] = "tmus",
74 [V3D_QPU_WADDR_TMUT] = "tmut",
75 [V3D_QPU_WADDR_TMUR] = "tmur",
76 [V3D_QPU_WADDR_TMUI] = "tmui",
77 [V3D_QPU_WADDR_TMUB] = "tmub",
78 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
79 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
80 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
81 [V3D_QPU_WADDR_TMUSF] = "tmusf",
82 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
83 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
84 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
85 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
86 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
87 [V3D_QPU_WADDR_R5REP] = "r5rep",
88 };
89
90 return waddr_magic[waddr];
91 }
92
93 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)94 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
95 {
96 static const char *op_names[] = {
97 [V3D_QPU_A_FADD] = "fadd",
98 [V3D_QPU_A_FADDNF] = "faddnf",
99 [V3D_QPU_A_VFPACK] = "vfpack",
100 [V3D_QPU_A_ADD] = "add",
101 [V3D_QPU_A_SUB] = "sub",
102 [V3D_QPU_A_FSUB] = "fsub",
103 [V3D_QPU_A_MIN] = "min",
104 [V3D_QPU_A_MAX] = "max",
105 [V3D_QPU_A_UMIN] = "umin",
106 [V3D_QPU_A_UMAX] = "umax",
107 [V3D_QPU_A_SHL] = "shl",
108 [V3D_QPU_A_SHR] = "shr",
109 [V3D_QPU_A_ASR] = "asr",
110 [V3D_QPU_A_ROR] = "ror",
111 [V3D_QPU_A_FMIN] = "fmin",
112 [V3D_QPU_A_FMAX] = "fmax",
113 [V3D_QPU_A_VFMIN] = "vfmin",
114 [V3D_QPU_A_AND] = "and",
115 [V3D_QPU_A_OR] = "or",
116 [V3D_QPU_A_XOR] = "xor",
117 [V3D_QPU_A_VADD] = "vadd",
118 [V3D_QPU_A_VSUB] = "vsub",
119 [V3D_QPU_A_NOT] = "not",
120 [V3D_QPU_A_NEG] = "neg",
121 [V3D_QPU_A_FLAPUSH] = "flapush",
122 [V3D_QPU_A_FLBPUSH] = "flbpush",
123 [V3D_QPU_A_FLPOP] = "flpop",
124 [V3D_QPU_A_RECIP] = "recip",
125 [V3D_QPU_A_SETMSF] = "setmsf",
126 [V3D_QPU_A_SETREVF] = "setrevf",
127 [V3D_QPU_A_NOP] = "nop",
128 [V3D_QPU_A_TIDX] = "tidx",
129 [V3D_QPU_A_EIDX] = "eidx",
130 [V3D_QPU_A_LR] = "lr",
131 [V3D_QPU_A_VFLA] = "vfla",
132 [V3D_QPU_A_VFLNA] = "vflna",
133 [V3D_QPU_A_VFLB] = "vflb",
134 [V3D_QPU_A_VFLNB] = "vflnb",
135 [V3D_QPU_A_FXCD] = "fxcd",
136 [V3D_QPU_A_XCD] = "xcd",
137 [V3D_QPU_A_FYCD] = "fycd",
138 [V3D_QPU_A_YCD] = "ycd",
139 [V3D_QPU_A_MSF] = "msf",
140 [V3D_QPU_A_REVF] = "revf",
141 [V3D_QPU_A_VDWWT] = "vdwwt",
142 [V3D_QPU_A_IID] = "iid",
143 [V3D_QPU_A_SAMPID] = "sampid",
144 [V3D_QPU_A_BARRIERID] = "barrierid",
145 [V3D_QPU_A_TMUWT] = "tmuwt",
146 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
147 [V3D_QPU_A_VPMWT] = "vpmwt",
148 [V3D_QPU_A_FLAFIRST] = "flafirst",
149 [V3D_QPU_A_FLNAFIRST] = "flnafirst",
150 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
151 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
152 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
153 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
154 [V3D_QPU_A_LDVPMP] = "ldvpmp",
155 [V3D_QPU_A_RSQRT] = "rsqrt",
156 [V3D_QPU_A_EXP] = "exp",
157 [V3D_QPU_A_LOG] = "log",
158 [V3D_QPU_A_SIN] = "sin",
159 [V3D_QPU_A_RSQRT2] = "rsqrt2",
160 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
161 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
162 [V3D_QPU_A_FCMP] = "fcmp",
163 [V3D_QPU_A_VFMAX] = "vfmax",
164 [V3D_QPU_A_FROUND] = "fround",
165 [V3D_QPU_A_FTOIN] = "ftoin",
166 [V3D_QPU_A_FTRUNC] = "ftrunc",
167 [V3D_QPU_A_FTOIZ] = "ftoiz",
168 [V3D_QPU_A_FFLOOR] = "ffloor",
169 [V3D_QPU_A_FTOUZ] = "ftouz",
170 [V3D_QPU_A_FCEIL] = "fceil",
171 [V3D_QPU_A_FTOC] = "ftoc",
172 [V3D_QPU_A_FDX] = "fdx",
173 [V3D_QPU_A_FDY] = "fdy",
174 [V3D_QPU_A_STVPMV] = "stvpmv",
175 [V3D_QPU_A_STVPMD] = "stvpmd",
176 [V3D_QPU_A_STVPMP] = "stvpmp",
177 [V3D_QPU_A_ITOF] = "itof",
178 [V3D_QPU_A_CLZ] = "clz",
179 [V3D_QPU_A_UTOF] = "utof",
180 [V3D_QPU_A_MOV] = "mov",
181 [V3D_QPU_A_FMOV] = "fmov",
182 [V3D_QPU_A_VPACK] = "vpack",
183 [V3D_QPU_A_V8PACK] = "v8pack",
184 [V3D_QPU_A_V10PACK] = "v10pack",
185 [V3D_QPU_A_V11FPACK] = "v11fpack",
186 [V3D_QPU_A_BALLOT] = "ballot",
187 [V3D_QPU_A_BCASTF] = "bcastf",
188 [V3D_QPU_A_ALLEQ] = "alleq",
189 [V3D_QPU_A_ALLFEQ] = "allfeq",
190 [V3D_QPU_A_ROTQ] = "rotq",
191 [V3D_QPU_A_ROT] = "rot",
192 [V3D_QPU_A_SHUFFLE] = "shuffle",
193 };
194
195 if (op >= ARRAY_SIZE(op_names))
196 return NULL;
197
198 return op_names[op];
199 }
200
201 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)202 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
203 {
204 static const char *op_names[] = {
205 [V3D_QPU_M_ADD] = "add",
206 [V3D_QPU_M_SUB] = "sub",
207 [V3D_QPU_M_UMUL24] = "umul24",
208 [V3D_QPU_M_VFMUL] = "vfmul",
209 [V3D_QPU_M_SMUL24] = "smul24",
210 [V3D_QPU_M_MULTOP] = "multop",
211 [V3D_QPU_M_FMOV] = "fmov",
212 [V3D_QPU_M_MOV] = "mov",
213 [V3D_QPU_M_NOP] = "nop",
214 [V3D_QPU_M_FMUL] = "fmul",
215 [V3D_QPU_M_FTOUNORM16] = "ftounorm16",
216 [V3D_QPU_M_FTOSNORM16] = "ftosnorm16",
217 [V3D_QPU_M_VFTOUNORM8] = "vftounorm8",
218 [V3D_QPU_M_VFTOSNORM8] = "vftosnorm8",
219 [V3D_QPU_M_VFTOUNORM10LO] = "vftounorm10lo",
220 [V3D_QPU_M_VFTOUNORM10HI] = "vftounorm10hi",
221 };
222
223 if (op >= ARRAY_SIZE(op_names))
224 return NULL;
225
226 return op_names[op];
227 }
228
229 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)230 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
231 {
232 switch (cond) {
233 case V3D_QPU_COND_NONE:
234 return "";
235 case V3D_QPU_COND_IFA:
236 return ".ifa";
237 case V3D_QPU_COND_IFB:
238 return ".ifb";
239 case V3D_QPU_COND_IFNA:
240 return ".ifna";
241 case V3D_QPU_COND_IFNB:
242 return ".ifnb";
243 default:
244 unreachable("bad cond value");
245 }
246 }
247
248 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)249 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
250 {
251 switch (cond) {
252 case V3D_QPU_BRANCH_COND_ALWAYS:
253 return "";
254 case V3D_QPU_BRANCH_COND_A0:
255 return ".a0";
256 case V3D_QPU_BRANCH_COND_NA0:
257 return ".na0";
258 case V3D_QPU_BRANCH_COND_ALLA:
259 return ".alla";
260 case V3D_QPU_BRANCH_COND_ANYNA:
261 return ".anyna";
262 case V3D_QPU_BRANCH_COND_ANYA:
263 return ".anya";
264 case V3D_QPU_BRANCH_COND_ALLNA:
265 return ".allna";
266 default:
267 unreachable("bad branch cond value");
268 }
269 }
270
271 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)272 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
273 {
274 switch (msfign) {
275 case V3D_QPU_MSFIGN_NONE:
276 return "";
277 case V3D_QPU_MSFIGN_P:
278 return "p";
279 case V3D_QPU_MSFIGN_Q:
280 return "q";
281 default:
282 unreachable("bad branch cond value");
283 }
284 }
285
286 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)287 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
288 {
289 switch (pf) {
290 case V3D_QPU_PF_NONE:
291 return "";
292 case V3D_QPU_PF_PUSHZ:
293 return ".pushz";
294 case V3D_QPU_PF_PUSHN:
295 return ".pushn";
296 case V3D_QPU_PF_PUSHC:
297 return ".pushc";
298 default:
299 unreachable("bad pf value");
300 }
301 }
302
303 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)304 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
305 {
306 switch (uf) {
307 case V3D_QPU_UF_NONE:
308 return "";
309 case V3D_QPU_UF_ANDZ:
310 return ".andz";
311 case V3D_QPU_UF_ANDNZ:
312 return ".andnz";
313 case V3D_QPU_UF_NORZ:
314 return ".norz";
315 case V3D_QPU_UF_NORNZ:
316 return ".nornz";
317 case V3D_QPU_UF_ANDN:
318 return ".andn";
319 case V3D_QPU_UF_ANDNN:
320 return ".andnn";
321 case V3D_QPU_UF_NORN:
322 return ".norn";
323 case V3D_QPU_UF_NORNN:
324 return ".nornn";
325 case V3D_QPU_UF_ANDC:
326 return ".andc";
327 case V3D_QPU_UF_ANDNC:
328 return ".andnc";
329 case V3D_QPU_UF_NORC:
330 return ".norc";
331 case V3D_QPU_UF_NORNC:
332 return ".nornc";
333 default:
334 unreachable("bad pf value");
335 }
336 }
337
338 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)339 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
340 {
341 switch (pack) {
342 case V3D_QPU_PACK_NONE:
343 return "";
344 case V3D_QPU_PACK_L:
345 return ".l";
346 case V3D_QPU_PACK_H:
347 return ".h";
348 default:
349 unreachable("bad pack value");
350 }
351 }
352
353 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)354 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
355 {
356 switch (unpack) {
357 case V3D_QPU_UNPACK_NONE:
358 return "";
359 case V3D_QPU_UNPACK_L:
360 return ".l";
361 case V3D_QPU_UNPACK_H:
362 return ".h";
363 case V3D_QPU_UNPACK_ABS:
364 return ".abs";
365 case V3D_QPU_UNPACK_REPLICATE_32F_16:
366 return ".ff";
367 case V3D_QPU_UNPACK_REPLICATE_L_16:
368 return ".ll";
369 case V3D_QPU_UNPACK_REPLICATE_H_16:
370 return ".hh";
371 case V3D_QPU_UNPACK_SWAP_16:
372 return ".swp";
373 default:
374 unreachable("bad unpack value");
375 }
376 }
377
378 #define D 1
379 #define A 2
380 #define B 4
381 static const uint8_t add_op_args[] = {
382 [V3D_QPU_A_FADD] = D | A | B,
383 [V3D_QPU_A_FADDNF] = D | A | B,
384 [V3D_QPU_A_VFPACK] = D | A | B,
385 [V3D_QPU_A_ADD] = D | A | B,
386 [V3D_QPU_A_VFPACK] = D | A | B,
387 [V3D_QPU_A_SUB] = D | A | B,
388 [V3D_QPU_A_VFPACK] = D | A | B,
389 [V3D_QPU_A_FSUB] = D | A | B,
390 [V3D_QPU_A_MIN] = D | A | B,
391 [V3D_QPU_A_MAX] = D | A | B,
392 [V3D_QPU_A_UMIN] = D | A | B,
393 [V3D_QPU_A_UMAX] = D | A | B,
394 [V3D_QPU_A_SHL] = D | A | B,
395 [V3D_QPU_A_SHR] = D | A | B,
396 [V3D_QPU_A_ASR] = D | A | B,
397 [V3D_QPU_A_ROR] = D | A | B,
398 [V3D_QPU_A_FMIN] = D | A | B,
399 [V3D_QPU_A_FMAX] = D | A | B,
400 [V3D_QPU_A_VFMIN] = D | A | B,
401
402 [V3D_QPU_A_AND] = D | A | B,
403 [V3D_QPU_A_OR] = D | A | B,
404 [V3D_QPU_A_XOR] = D | A | B,
405
406 [V3D_QPU_A_VADD] = D | A | B,
407 [V3D_QPU_A_VSUB] = D | A | B,
408 [V3D_QPU_A_NOT] = D | A,
409 [V3D_QPU_A_NEG] = D | A,
410 [V3D_QPU_A_FLAPUSH] = D | A,
411 [V3D_QPU_A_FLBPUSH] = D | A,
412 [V3D_QPU_A_FLPOP] = D | A,
413 [V3D_QPU_A_RECIP] = D | A,
414 [V3D_QPU_A_SETMSF] = D | A,
415 [V3D_QPU_A_SETREVF] = D | A,
416 [V3D_QPU_A_NOP] = 0,
417 [V3D_QPU_A_TIDX] = D,
418 [V3D_QPU_A_EIDX] = D,
419 [V3D_QPU_A_LR] = D,
420 [V3D_QPU_A_VFLA] = D,
421 [V3D_QPU_A_VFLNA] = D,
422 [V3D_QPU_A_VFLB] = D,
423 [V3D_QPU_A_VFLNB] = D,
424
425 [V3D_QPU_A_FXCD] = D,
426 [V3D_QPU_A_XCD] = D,
427 [V3D_QPU_A_FYCD] = D,
428 [V3D_QPU_A_YCD] = D,
429
430 [V3D_QPU_A_MSF] = D,
431 [V3D_QPU_A_REVF] = D,
432 [V3D_QPU_A_VDWWT] = D,
433 [V3D_QPU_A_IID] = D,
434 [V3D_QPU_A_SAMPID] = D,
435 [V3D_QPU_A_BARRIERID] = D,
436 [V3D_QPU_A_TMUWT] = D,
437 [V3D_QPU_A_VPMWT] = D,
438 [V3D_QPU_A_FLAFIRST] = D,
439 [V3D_QPU_A_FLNAFIRST] = D,
440
441 [V3D_QPU_A_VPMSETUP] = D | A,
442
443 [V3D_QPU_A_LDVPMV_IN] = D | A,
444 [V3D_QPU_A_LDVPMV_OUT] = D | A,
445 [V3D_QPU_A_LDVPMD_IN] = D | A,
446 [V3D_QPU_A_LDVPMD_OUT] = D | A,
447 [V3D_QPU_A_LDVPMP] = D | A,
448 [V3D_QPU_A_RSQRT] = D | A,
449 [V3D_QPU_A_EXP] = D | A,
450 [V3D_QPU_A_LOG] = D | A,
451 [V3D_QPU_A_SIN] = D | A,
452 [V3D_QPU_A_RSQRT2] = D | A,
453 [V3D_QPU_A_LDVPMG_IN] = D | A | B,
454 [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
455
456 /* FIXME: MOVABSNEG */
457
458 [V3D_QPU_A_FCMP] = D | A | B,
459 [V3D_QPU_A_VFMAX] = D | A | B,
460
461 [V3D_QPU_A_FROUND] = D | A,
462 [V3D_QPU_A_FTOIN] = D | A,
463 [V3D_QPU_A_FTRUNC] = D | A,
464 [V3D_QPU_A_FTOIZ] = D | A,
465 [V3D_QPU_A_FFLOOR] = D | A,
466 [V3D_QPU_A_FTOUZ] = D | A,
467 [V3D_QPU_A_FCEIL] = D | A,
468 [V3D_QPU_A_FTOC] = D | A,
469
470 [V3D_QPU_A_FDX] = D | A,
471 [V3D_QPU_A_FDY] = D | A,
472
473 [V3D_QPU_A_STVPMV] = A | B,
474 [V3D_QPU_A_STVPMD] = A | B,
475 [V3D_QPU_A_STVPMP] = A | B,
476
477 [V3D_QPU_A_ITOF] = D | A,
478 [V3D_QPU_A_CLZ] = D | A,
479 [V3D_QPU_A_UTOF] = D | A,
480
481 [V3D_QPU_A_MOV] = D | A,
482 [V3D_QPU_A_FMOV] = D | A,
483 [V3D_QPU_A_VPACK] = D | A | B,
484 [V3D_QPU_A_V8PACK] = D | A | B,
485 [V3D_QPU_A_V10PACK] = D | A | B,
486 [V3D_QPU_A_V11FPACK] = D | A | B,
487
488 [V3D_QPU_A_BALLOT] = D | A,
489 [V3D_QPU_A_BCASTF] = D | A,
490 [V3D_QPU_A_ALLEQ] = D | A,
491 [V3D_QPU_A_ALLFEQ] = D | A,
492 [V3D_QPU_A_ROTQ] = D | A | B,
493 [V3D_QPU_A_ROT] = D | A | B,
494 [V3D_QPU_A_SHUFFLE] = D | A | B,
495 };
496
497 static const uint8_t mul_op_args[] = {
498 [V3D_QPU_M_ADD] = D | A | B,
499 [V3D_QPU_M_SUB] = D | A | B,
500 [V3D_QPU_M_UMUL24] = D | A | B,
501 [V3D_QPU_M_VFMUL] = D | A | B,
502 [V3D_QPU_M_SMUL24] = D | A | B,
503 [V3D_QPU_M_MULTOP] = D | A | B,
504 [V3D_QPU_M_FMOV] = D | A,
505 [V3D_QPU_M_NOP] = 0,
506 [V3D_QPU_M_MOV] = D | A,
507 [V3D_QPU_M_FMUL] = D | A | B,
508 [V3D_QPU_M_FTOUNORM16] = D | A,
509 [V3D_QPU_M_FTOSNORM16] = D | A,
510 [V3D_QPU_M_VFTOUNORM8] = D | A,
511 [V3D_QPU_M_VFTOSNORM8] = D | A,
512 [V3D_QPU_M_VFTOUNORM10LO] = D | A,
513 [V3D_QPU_M_VFTOUNORM10HI] = D | A,
514 };
515
516 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)517 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
518 {
519 assert(op < ARRAY_SIZE(add_op_args));
520
521 return add_op_args[op] & D;
522 }
523
524 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)525 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
526 {
527 assert(op < ARRAY_SIZE(mul_op_args));
528
529 return mul_op_args[op] & D;
530 }
531
532 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)533 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
534 {
535 assert(op < ARRAY_SIZE(add_op_args));
536
537 uint8_t args = add_op_args[op];
538 if (args & B)
539 return 2;
540 else if (args & A)
541 return 1;
542 else
543 return 0;
544 }
545
546 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)547 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
548 {
549 assert(op < ARRAY_SIZE(mul_op_args));
550
551 uint8_t args = mul_op_args[op];
552 if (args & B)
553 return 2;
554 else if (args & A)
555 return 1;
556 else
557 return 0;
558 }
559
560 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)561 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
562 {
563 switch (cond) {
564 case V3D_QPU_COND_IFA:
565 return V3D_QPU_COND_IFNA;
566 case V3D_QPU_COND_IFNA:
567 return V3D_QPU_COND_IFA;
568 case V3D_QPU_COND_IFB:
569 return V3D_QPU_COND_IFNB;
570 case V3D_QPU_COND_IFNB:
571 return V3D_QPU_COND_IFB;
572 default:
573 unreachable("Non-invertible cond");
574 }
575 }
576
577 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)578 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
579 {
580 switch (waddr) {
581 case V3D_QPU_WADDR_RECIP:
582 case V3D_QPU_WADDR_RSQRT:
583 case V3D_QPU_WADDR_EXP:
584 case V3D_QPU_WADDR_LOG:
585 case V3D_QPU_WADDR_SIN:
586 case V3D_QPU_WADDR_RSQRT2:
587 return true;
588 default:
589 return false;
590 }
591 }
592
593 bool
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)594 v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
595 enum v3d_qpu_waddr waddr)
596 {
597 if (devinfo->ver >= 40) {
598 return ((waddr >= V3D_QPU_WADDR_TMUD &&
599 waddr <= V3D_QPU_WADDR_TMUAU) ||
600 (waddr >= V3D_QPU_WADDR_TMUC &&
601 waddr <= V3D_QPU_WADDR_TMUHSLOD));
602 } else {
603 return ((waddr >= V3D_QPU_WADDR_TMU &&
604 waddr <= V3D_QPU_WADDR_TMUAU) ||
605 (waddr >= V3D_QPU_WADDR_TMUC &&
606 waddr <= V3D_QPU_WADDR_TMUHSLOD));
607 }
608 }
609
610 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)611 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
612 {
613 return (inst->sig.ldtmu ||
614 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
615 inst->alu.add.op == V3D_QPU_A_TMUWT));
616 }
617
618 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)619 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
620 {
621 return (waddr == V3D_QPU_WADDR_TLB ||
622 waddr == V3D_QPU_WADDR_TLBU);
623 }
624
625 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)626 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
627 {
628 return (waddr == V3D_QPU_WADDR_VPM ||
629 waddr == V3D_QPU_WADDR_VPMU);
630 }
631
632 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)633 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
634 {
635 return (waddr == V3D_QPU_WADDR_SYNC ||
636 waddr == V3D_QPU_WADDR_SYNCB ||
637 waddr == V3D_QPU_WADDR_SYNCU);
638 }
639
640 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)641 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
642 {
643 switch (waddr) {
644 case V3D_QPU_WADDR_VPMU:
645 case V3D_QPU_WADDR_TLBU:
646 case V3D_QPU_WADDR_TMUAU:
647 case V3D_QPU_WADDR_SYNCU:
648 return true;
649 default:
650 return false;
651 }
652 }
653
654 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)655 v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)
656 {
657 switch (op) {
658 case V3D_QPU_A_VPMSETUP:
659 case V3D_QPU_A_LDVPMV_IN:
660 case V3D_QPU_A_LDVPMV_OUT:
661 case V3D_QPU_A_LDVPMD_IN:
662 case V3D_QPU_A_LDVPMD_OUT:
663 case V3D_QPU_A_LDVPMP:
664 case V3D_QPU_A_LDVPMG_IN:
665 case V3D_QPU_A_LDVPMG_OUT:
666 return true;
667 default:
668 return false;
669 }
670 }
671
672 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)673 v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
674 {
675 switch (op) {
676 case V3D_QPU_A_VPMSETUP:
677 case V3D_QPU_A_STVPMV:
678 case V3D_QPU_A_STVPMD:
679 case V3D_QPU_A_STVPMP:
680 return true;
681 default:
682 return false;
683 }
684 }
685
686 bool
v3d_qpu_reads_tlb(const struct v3d_qpu_instr * inst)687 v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst)
688 {
689 return inst->sig.ldtlb || inst->sig.ldtlbu;
690 }
691
692 bool
v3d_qpu_writes_tlb(const struct v3d_qpu_instr * inst)693 v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst)
694 {
695 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
696 if (inst->alu.add.op != V3D_QPU_A_NOP &&
697 inst->alu.add.magic_write &&
698 v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
699 return true;
700 }
701
702 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
703 inst->alu.mul.magic_write &&
704 v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
705 return true;
706 }
707 }
708
709 return false;
710 }
711
712 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)713 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
714 {
715 return v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst);
716 }
717
718 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)719 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
720 {
721 return v3d_qpu_instr_is_sfu(inst) || v3d_qpu_instr_is_legacy_sfu(inst);
722 }
723
724 /* Checks whether the instruction implements a SFU operation by the writing
725 * to specific magic register addresses instead of using SFU ALU opcodes.
726 */
727 bool
v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr * inst)728 v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst)
729 {
730 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
731 if (inst->alu.add.op != V3D_QPU_A_NOP &&
732 inst->alu.add.magic_write &&
733 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
734 return true;
735 }
736
737 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
738 inst->alu.mul.magic_write &&
739 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
740 return true;
741 }
742 }
743
744 return false;
745 }
746
747 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)748 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
749 {
750 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
751 switch (inst->alu.add.op) {
752 case V3D_QPU_A_RECIP:
753 case V3D_QPU_A_RSQRT:
754 case V3D_QPU_A_EXP:
755 case V3D_QPU_A_LOG:
756 case V3D_QPU_A_SIN:
757 case V3D_QPU_A_RSQRT2:
758 case V3D_QPU_A_BALLOT:
759 case V3D_QPU_A_BCASTF:
760 case V3D_QPU_A_ALLEQ:
761 case V3D_QPU_A_ALLFEQ:
762 case V3D_QPU_A_ROTQ:
763 case V3D_QPU_A_ROT:
764 case V3D_QPU_A_SHUFFLE:
765 return true;
766 default:
767 return false;
768 }
769 }
770 return false;
771 }
772
773 bool
v3d_qpu_writes_tmu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)774 v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
775 const struct v3d_qpu_instr *inst)
776 {
777 return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
778 ((inst->alu.add.op != V3D_QPU_A_NOP &&
779 inst->alu.add.magic_write &&
780 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
781 (inst->alu.mul.op != V3D_QPU_M_NOP &&
782 inst->alu.mul.magic_write &&
783 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
784 }
785
786 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)787 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
788 const struct v3d_qpu_instr *inst)
789 {
790 return v3d_qpu_writes_tmu(devinfo, inst) &&
791 (!inst->alu.add.magic_write ||
792 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
793 (!inst->alu.mul.magic_write ||
794 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
795 }
796
797 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)798 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
799 {
800 if (inst->sig.ldvpm)
801 return true;
802
803 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
804 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
805 return true;
806 }
807
808 return false;
809 }
810
811 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)812 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
813 {
814 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
815 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
816 return true;
817
818 if (inst->alu.add.op != V3D_QPU_A_NOP &&
819 inst->alu.add.magic_write &&
820 v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
821 return true;
822 }
823
824 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
825 inst->alu.mul.magic_write &&
826 v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
827 return true;
828 }
829 }
830
831 return false;
832 }
833
834 bool
v3d_qpu_writes_unifa(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)835 v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
836 const struct v3d_qpu_instr *inst)
837 {
838 if (devinfo->ver < 40)
839 return false;
840
841 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
842 if (inst->alu.add.op != V3D_QPU_A_NOP &&
843 inst->alu.add.magic_write &&
844 inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
845 return true;
846 }
847
848 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
849 inst->alu.mul.magic_write &&
850 inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
851 return true;
852 }
853
854 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
855 inst->sig_magic &&
856 inst->sig_addr == V3D_QPU_WADDR_UNIFA) {
857 return true;
858 }
859 }
860
861 return false;
862 }
863
864 bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)865 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
866 {
867 return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
868 inst->alu.add.op == V3D_QPU_A_VPMWT;
869 }
870
871 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)872 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
873 {
874 return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
875 }
876
877 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)878 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
879 {
880 return v3d_qpu_reads_vpm(inst) ||
881 v3d_qpu_writes_vpm(inst) ||
882 v3d_qpu_waits_vpm(inst);
883 }
884
885 static bool
qpu_writes_magic_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)886 qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
887 const struct v3d_qpu_instr *inst,
888 uint32_t waddr)
889 {
890 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
891 if (inst->alu.add.op != V3D_QPU_A_NOP &&
892 inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
893 return true;
894
895 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
896 inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
897 return true;
898 }
899
900 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
901 inst->sig_magic && inst->sig_addr == waddr) {
902 return true;
903 }
904
905 return false;
906 }
907
908 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)909 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
910 const struct v3d_qpu_instr *inst)
911 {
912 if(!devinfo->has_accumulators)
913 return false;
914
915 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
916 return true;
917
918 return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
919 }
920
921 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)922 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
923 const struct v3d_qpu_instr *inst)
924 {
925 if (!devinfo->has_accumulators)
926 return false;
927
928 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
929 if (inst->alu.add.op != V3D_QPU_A_NOP &&
930 inst->alu.add.magic_write &&
931 (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
932 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
933 return true;
934 }
935
936 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
937 inst->alu.mul.magic_write &&
938 (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
939 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
940 return true;
941 }
942 }
943
944 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
945 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
946 return true;
947 } else if (inst->sig.ldtmu) {
948 return true;
949 }
950
951 return false;
952 }
953
954 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)955 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
956 const struct v3d_qpu_instr *inst)
957 {
958 if (!devinfo->has_accumulators)
959 return false;
960
961 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
962 return true;
963
964 return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
965 }
966
967 bool
v3d_qpu_writes_accum(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)968 v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
969 const struct v3d_qpu_instr *inst)
970 {
971 if (!devinfo->has_accumulators)
972 return false;
973
974 if (v3d_qpu_writes_r5(devinfo, inst))
975 return true;
976 if (v3d_qpu_writes_r4(devinfo, inst))
977 return true;
978 if (v3d_qpu_writes_r3(devinfo, inst))
979 return true;
980 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
981 return true;
982 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
983 return true;
984 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
985 return true;
986
987 return false;
988 }
989
990 bool
v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)991 v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
992 const struct v3d_qpu_instr *inst)
993 {
994 if (devinfo->ver >= 71 &&
995 (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
996 return true;
997 }
998
999 return false;
1000 }
1001
1002 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)1003 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
1004 {
1005 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
1006 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
1007
1008 return ((add_nsrc > 0 && inst->alu.add.a.mux == mux) ||
1009 (add_nsrc > 1 && inst->alu.add.b.mux == mux) ||
1010 (mul_nsrc > 0 && inst->alu.mul.a.mux == mux) ||
1011 (mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
1012 }
1013
1014 bool
v3d71_qpu_reads_raddr(const struct v3d_qpu_instr * inst,uint8_t raddr)1015 v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
1016 {
1017 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
1018 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
1019
1020 return (add_nsrc > 0 && !inst->sig.small_imm_a && inst->alu.add.a.raddr == raddr) ||
1021 (add_nsrc > 1 && !inst->sig.small_imm_b && inst->alu.add.b.raddr == raddr) ||
1022 (mul_nsrc > 0 && !inst->sig.small_imm_c && inst->alu.mul.a.raddr == raddr) ||
1023 (mul_nsrc > 1 && !inst->sig.small_imm_d && inst->alu.mul.b.raddr == raddr);
1024 }
1025
1026 bool
v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint8_t waddr)1027 v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
1028 const struct v3d_qpu_instr *inst,
1029 uint8_t waddr)
1030 {
1031 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1032 return false;
1033
1034 if (v3d_qpu_add_op_has_dst(inst->alu.add.op) &&
1035 !inst->alu.add.magic_write &&
1036 inst->alu.add.waddr == waddr) {
1037 return true;
1038 }
1039
1040 if (v3d_qpu_mul_op_has_dst(inst->alu.mul.op) &&
1041 !inst->alu.mul.magic_write &&
1042 inst->alu.mul.waddr == waddr) {
1043 return true;
1044 }
1045
1046 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
1047 !inst->sig_magic && inst->sig_addr == waddr) {
1048 return true;
1049 }
1050
1051 return false;
1052 }
1053
1054 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)1055 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
1056 const struct v3d_qpu_sig *sig)
1057 {
1058 if (devinfo->ver < 41)
1059 return false;
1060
1061 return (sig->ldunifrf ||
1062 sig->ldunifarf ||
1063 sig->ldvary ||
1064 sig->ldtmu ||
1065 sig->ldtlb ||
1066 sig->ldtlbu);
1067 }
1068
1069 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)1070 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
1071 {
1072 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
1073 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
1074 } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
1075 if (inst->flags.ac != V3D_QPU_COND_NONE ||
1076 inst->flags.mc != V3D_QPU_COND_NONE ||
1077 inst->flags.auf != V3D_QPU_UF_NONE ||
1078 inst->flags.muf != V3D_QPU_UF_NONE)
1079 return true;
1080
1081 switch (inst->alu.add.op) {
1082 case V3D_QPU_A_VFLA:
1083 case V3D_QPU_A_VFLNA:
1084 case V3D_QPU_A_VFLB:
1085 case V3D_QPU_A_VFLNB:
1086 case V3D_QPU_A_FLAPUSH:
1087 case V3D_QPU_A_FLBPUSH:
1088 case V3D_QPU_A_FLAFIRST:
1089 case V3D_QPU_A_FLNAFIRST:
1090 return true;
1091 default:
1092 break;
1093 }
1094 }
1095
1096 return false;
1097 }
1098
1099 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)1100 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
1101 {
1102 if (inst->flags.apf != V3D_QPU_PF_NONE ||
1103 inst->flags.mpf != V3D_QPU_PF_NONE ||
1104 inst->flags.auf != V3D_QPU_UF_NONE ||
1105 inst->flags.muf != V3D_QPU_UF_NONE) {
1106 return true;
1107 }
1108
1109 return false;
1110 }
1111
1112 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)1113 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
1114 {
1115 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1116 return false;
1117
1118 switch (inst->alu.add.op) {
1119 case V3D_QPU_A_FADD:
1120 case V3D_QPU_A_FADDNF:
1121 case V3D_QPU_A_FSUB:
1122 case V3D_QPU_A_FMIN:
1123 case V3D_QPU_A_FMAX:
1124 case V3D_QPU_A_FCMP:
1125 case V3D_QPU_A_FROUND:
1126 case V3D_QPU_A_FTRUNC:
1127 case V3D_QPU_A_FFLOOR:
1128 case V3D_QPU_A_FCEIL:
1129 case V3D_QPU_A_FDX:
1130 case V3D_QPU_A_FDY:
1131 case V3D_QPU_A_FTOIN:
1132 case V3D_QPU_A_FTOIZ:
1133 case V3D_QPU_A_FTOUZ:
1134 case V3D_QPU_A_FTOC:
1135 case V3D_QPU_A_VFPACK:
1136 return true;
1137 break;
1138 default:
1139 break;
1140 }
1141
1142 switch (inst->alu.mul.op) {
1143 case V3D_QPU_M_FMOV:
1144 case V3D_QPU_M_FMUL:
1145 return true;
1146 break;
1147 default:
1148 break;
1149 }
1150
1151 return false;
1152 }
1153 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)1154 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1155 {
1156 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1157 return false;
1158
1159 switch (inst->alu.add.op) {
1160 case V3D_QPU_A_VFMIN:
1161 case V3D_QPU_A_VFMAX:
1162 return true;
1163 break;
1164 default:
1165 break;
1166 }
1167
1168 switch (inst->alu.mul.op) {
1169 case V3D_QPU_M_VFMUL:
1170 return true;
1171 break;
1172 default:
1173 break;
1174 }
1175
1176 return false;
1177 }
1178
1179 bool
v3d_qpu_is_nop(struct v3d_qpu_instr * inst)1180 v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1181 {
1182 static const struct v3d_qpu_sig nosig = { 0 };
1183
1184 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1185 return false;
1186 if (inst->alu.add.op != V3D_QPU_A_NOP)
1187 return false;
1188 if (inst->alu.mul.op != V3D_QPU_M_NOP)
1189 return false;
1190 if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1191 return false;
1192 return true;
1193 }
1194