1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include "util/macros.h"
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 const char *
v3d_qpu_magic_waddr_name(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)31 v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32 enum v3d_qpu_waddr waddr)
33 {
34 /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35 if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36 return "tmu";
37
38 static const char *waddr_magic[] = {
39 [V3D_QPU_WADDR_R0] = "r0",
40 [V3D_QPU_WADDR_R1] = "r1",
41 [V3D_QPU_WADDR_R2] = "r2",
42 [V3D_QPU_WADDR_R3] = "r3",
43 [V3D_QPU_WADDR_R4] = "r4",
44 [V3D_QPU_WADDR_R5] = "r5",
45 [V3D_QPU_WADDR_NOP] = "-",
46 [V3D_QPU_WADDR_TLB] = "tlb",
47 [V3D_QPU_WADDR_TLBU] = "tlbu",
48 [V3D_QPU_WADDR_UNIFA] = "unifa",
49 [V3D_QPU_WADDR_TMUL] = "tmul",
50 [V3D_QPU_WADDR_TMUD] = "tmud",
51 [V3D_QPU_WADDR_TMUA] = "tmua",
52 [V3D_QPU_WADDR_TMUAU] = "tmuau",
53 [V3D_QPU_WADDR_VPM] = "vpm",
54 [V3D_QPU_WADDR_VPMU] = "vpmu",
55 [V3D_QPU_WADDR_SYNC] = "sync",
56 [V3D_QPU_WADDR_SYNCU] = "syncu",
57 [V3D_QPU_WADDR_SYNCB] = "syncb",
58 [V3D_QPU_WADDR_RECIP] = "recip",
59 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
60 [V3D_QPU_WADDR_EXP] = "exp",
61 [V3D_QPU_WADDR_LOG] = "log",
62 [V3D_QPU_WADDR_SIN] = "sin",
63 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
64 [V3D_QPU_WADDR_TMUC] = "tmuc",
65 [V3D_QPU_WADDR_TMUS] = "tmus",
66 [V3D_QPU_WADDR_TMUT] = "tmut",
67 [V3D_QPU_WADDR_TMUR] = "tmur",
68 [V3D_QPU_WADDR_TMUI] = "tmui",
69 [V3D_QPU_WADDR_TMUB] = "tmub",
70 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
71 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
72 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
73 [V3D_QPU_WADDR_TMUSF] = "tmusf",
74 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
75 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
76 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
77 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
78 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
79 [V3D_QPU_WADDR_R5REP] = "r5rep",
80 };
81
82 return waddr_magic[waddr];
83 }
84
85 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)86 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
87 {
88 static const char *op_names[] = {
89 [V3D_QPU_A_FADD] = "fadd",
90 [V3D_QPU_A_FADDNF] = "faddnf",
91 [V3D_QPU_A_VFPACK] = "vfpack",
92 [V3D_QPU_A_ADD] = "add",
93 [V3D_QPU_A_SUB] = "sub",
94 [V3D_QPU_A_FSUB] = "fsub",
95 [V3D_QPU_A_MIN] = "min",
96 [V3D_QPU_A_MAX] = "max",
97 [V3D_QPU_A_UMIN] = "umin",
98 [V3D_QPU_A_UMAX] = "umax",
99 [V3D_QPU_A_SHL] = "shl",
100 [V3D_QPU_A_SHR] = "shr",
101 [V3D_QPU_A_ASR] = "asr",
102 [V3D_QPU_A_ROR] = "ror",
103 [V3D_QPU_A_FMIN] = "fmin",
104 [V3D_QPU_A_FMAX] = "fmax",
105 [V3D_QPU_A_VFMIN] = "vfmin",
106 [V3D_QPU_A_AND] = "and",
107 [V3D_QPU_A_OR] = "or",
108 [V3D_QPU_A_XOR] = "xor",
109 [V3D_QPU_A_VADD] = "vadd",
110 [V3D_QPU_A_VSUB] = "vsub",
111 [V3D_QPU_A_NOT] = "not",
112 [V3D_QPU_A_NEG] = "neg",
113 [V3D_QPU_A_FLAPUSH] = "flapush",
114 [V3D_QPU_A_FLBPUSH] = "flbpush",
115 [V3D_QPU_A_FLPOP] = "flpop",
116 [V3D_QPU_A_RECIP] = "recip",
117 [V3D_QPU_A_SETMSF] = "setmsf",
118 [V3D_QPU_A_SETREVF] = "setrevf",
119 [V3D_QPU_A_NOP] = "nop",
120 [V3D_QPU_A_TIDX] = "tidx",
121 [V3D_QPU_A_EIDX] = "eidx",
122 [V3D_QPU_A_LR] = "lr",
123 [V3D_QPU_A_VFLA] = "vfla",
124 [V3D_QPU_A_VFLNA] = "vflna",
125 [V3D_QPU_A_VFLB] = "vflb",
126 [V3D_QPU_A_VFLNB] = "vflnb",
127 [V3D_QPU_A_FXCD] = "fxcd",
128 [V3D_QPU_A_XCD] = "xcd",
129 [V3D_QPU_A_FYCD] = "fycd",
130 [V3D_QPU_A_YCD] = "ycd",
131 [V3D_QPU_A_MSF] = "msf",
132 [V3D_QPU_A_REVF] = "revf",
133 [V3D_QPU_A_VDWWT] = "vdwwt",
134 [V3D_QPU_A_IID] = "iid",
135 [V3D_QPU_A_SAMPID] = "sampid",
136 [V3D_QPU_A_BARRIERID] = "barrierid",
137 [V3D_QPU_A_TMUWT] = "tmuwt",
138 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
139 [V3D_QPU_A_VPMWT] = "vpmwt",
140 [V3D_QPU_A_FLAFIRST] = "flafirst",
141 [V3D_QPU_A_FLNAFIRST] = "flnafirst",
142 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
143 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
144 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
145 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
146 [V3D_QPU_A_LDVPMP] = "ldvpmp",
147 [V3D_QPU_A_RSQRT] = "rsqrt",
148 [V3D_QPU_A_EXP] = "exp",
149 [V3D_QPU_A_LOG] = "log",
150 [V3D_QPU_A_SIN] = "sin",
151 [V3D_QPU_A_RSQRT2] = "rsqrt2",
152 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
153 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
154 [V3D_QPU_A_FCMP] = "fcmp",
155 [V3D_QPU_A_VFMAX] = "vfmax",
156 [V3D_QPU_A_FROUND] = "fround",
157 [V3D_QPU_A_FTOIN] = "ftoin",
158 [V3D_QPU_A_FTRUNC] = "ftrunc",
159 [V3D_QPU_A_FTOIZ] = "ftoiz",
160 [V3D_QPU_A_FFLOOR] = "ffloor",
161 [V3D_QPU_A_FTOUZ] = "ftouz",
162 [V3D_QPU_A_FCEIL] = "fceil",
163 [V3D_QPU_A_FTOC] = "ftoc",
164 [V3D_QPU_A_FDX] = "fdx",
165 [V3D_QPU_A_FDY] = "fdy",
166 [V3D_QPU_A_STVPMV] = "stvpmv",
167 [V3D_QPU_A_STVPMD] = "stvpmd",
168 [V3D_QPU_A_STVPMP] = "stvpmp",
169 [V3D_QPU_A_ITOF] = "itof",
170 [V3D_QPU_A_CLZ] = "clz",
171 [V3D_QPU_A_UTOF] = "utof",
172 };
173
174 if (op >= ARRAY_SIZE(op_names))
175 return NULL;
176
177 return op_names[op];
178 }
179
180 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)181 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
182 {
183 static const char *op_names[] = {
184 [V3D_QPU_M_ADD] = "add",
185 [V3D_QPU_M_SUB] = "sub",
186 [V3D_QPU_M_UMUL24] = "umul24",
187 [V3D_QPU_M_VFMUL] = "vfmul",
188 [V3D_QPU_M_SMUL24] = "smul24",
189 [V3D_QPU_M_MULTOP] = "multop",
190 [V3D_QPU_M_FMOV] = "fmov",
191 [V3D_QPU_M_MOV] = "mov",
192 [V3D_QPU_M_NOP] = "nop",
193 [V3D_QPU_M_FMUL] = "fmul",
194 };
195
196 if (op >= ARRAY_SIZE(op_names))
197 return NULL;
198
199 return op_names[op];
200 }
201
202 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)203 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
204 {
205 switch (cond) {
206 case V3D_QPU_COND_NONE:
207 return "";
208 case V3D_QPU_COND_IFA:
209 return ".ifa";
210 case V3D_QPU_COND_IFB:
211 return ".ifb";
212 case V3D_QPU_COND_IFNA:
213 return ".ifna";
214 case V3D_QPU_COND_IFNB:
215 return ".ifnb";
216 default:
217 unreachable("bad cond value");
218 }
219 }
220
221 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)222 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
223 {
224 switch (cond) {
225 case V3D_QPU_BRANCH_COND_ALWAYS:
226 return "";
227 case V3D_QPU_BRANCH_COND_A0:
228 return ".a0";
229 case V3D_QPU_BRANCH_COND_NA0:
230 return ".na0";
231 case V3D_QPU_BRANCH_COND_ALLA:
232 return ".alla";
233 case V3D_QPU_BRANCH_COND_ANYNA:
234 return ".anyna";
235 case V3D_QPU_BRANCH_COND_ANYA:
236 return ".anya";
237 case V3D_QPU_BRANCH_COND_ALLNA:
238 return ".allna";
239 default:
240 unreachable("bad branch cond value");
241 }
242 }
243
244 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)245 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
246 {
247 switch (msfign) {
248 case V3D_QPU_MSFIGN_NONE:
249 return "";
250 case V3D_QPU_MSFIGN_P:
251 return "p";
252 case V3D_QPU_MSFIGN_Q:
253 return "q";
254 default:
255 unreachable("bad branch cond value");
256 }
257 }
258
259 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)260 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
261 {
262 switch (pf) {
263 case V3D_QPU_PF_NONE:
264 return "";
265 case V3D_QPU_PF_PUSHZ:
266 return ".pushz";
267 case V3D_QPU_PF_PUSHN:
268 return ".pushn";
269 case V3D_QPU_PF_PUSHC:
270 return ".pushc";
271 default:
272 unreachable("bad pf value");
273 }
274 }
275
276 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)277 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
278 {
279 switch (uf) {
280 case V3D_QPU_UF_NONE:
281 return "";
282 case V3D_QPU_UF_ANDZ:
283 return ".andz";
284 case V3D_QPU_UF_ANDNZ:
285 return ".andnz";
286 case V3D_QPU_UF_NORZ:
287 return ".norz";
288 case V3D_QPU_UF_NORNZ:
289 return ".nornz";
290 case V3D_QPU_UF_ANDN:
291 return ".andn";
292 case V3D_QPU_UF_ANDNN:
293 return ".andnn";
294 case V3D_QPU_UF_NORN:
295 return ".norn";
296 case V3D_QPU_UF_NORNN:
297 return ".nornn";
298 case V3D_QPU_UF_ANDC:
299 return ".andc";
300 case V3D_QPU_UF_ANDNC:
301 return ".andnc";
302 case V3D_QPU_UF_NORC:
303 return ".norc";
304 case V3D_QPU_UF_NORNC:
305 return ".nornc";
306 default:
307 unreachable("bad pf value");
308 }
309 }
310
311 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)312 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
313 {
314 switch (pack) {
315 case V3D_QPU_PACK_NONE:
316 return "";
317 case V3D_QPU_PACK_L:
318 return ".l";
319 case V3D_QPU_PACK_H:
320 return ".h";
321 default:
322 unreachable("bad pack value");
323 }
324 }
325
326 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)327 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
328 {
329 switch (unpack) {
330 case V3D_QPU_UNPACK_NONE:
331 return "";
332 case V3D_QPU_UNPACK_L:
333 return ".l";
334 case V3D_QPU_UNPACK_H:
335 return ".h";
336 case V3D_QPU_UNPACK_ABS:
337 return ".abs";
338 case V3D_QPU_UNPACK_REPLICATE_32F_16:
339 return ".ff";
340 case V3D_QPU_UNPACK_REPLICATE_L_16:
341 return ".ll";
342 case V3D_QPU_UNPACK_REPLICATE_H_16:
343 return ".hh";
344 case V3D_QPU_UNPACK_SWAP_16:
345 return ".swp";
346 default:
347 unreachable("bad unpack value");
348 }
349 }
350
351 #define D 1
352 #define A 2
353 #define B 4
354 static const uint8_t add_op_args[] = {
355 [V3D_QPU_A_FADD] = D | A | B,
356 [V3D_QPU_A_FADDNF] = D | A | B,
357 [V3D_QPU_A_VFPACK] = D | A | B,
358 [V3D_QPU_A_ADD] = D | A | B,
359 [V3D_QPU_A_VFPACK] = D | A | B,
360 [V3D_QPU_A_SUB] = D | A | B,
361 [V3D_QPU_A_VFPACK] = D | A | B,
362 [V3D_QPU_A_FSUB] = D | A | B,
363 [V3D_QPU_A_MIN] = D | A | B,
364 [V3D_QPU_A_MAX] = D | A | B,
365 [V3D_QPU_A_UMIN] = D | A | B,
366 [V3D_QPU_A_UMAX] = D | A | B,
367 [V3D_QPU_A_SHL] = D | A | B,
368 [V3D_QPU_A_SHR] = D | A | B,
369 [V3D_QPU_A_ASR] = D | A | B,
370 [V3D_QPU_A_ROR] = D | A | B,
371 [V3D_QPU_A_FMIN] = D | A | B,
372 [V3D_QPU_A_FMAX] = D | A | B,
373 [V3D_QPU_A_VFMIN] = D | A | B,
374
375 [V3D_QPU_A_AND] = D | A | B,
376 [V3D_QPU_A_OR] = D | A | B,
377 [V3D_QPU_A_XOR] = D | A | B,
378
379 [V3D_QPU_A_VADD] = D | A | B,
380 [V3D_QPU_A_VSUB] = D | A | B,
381 [V3D_QPU_A_NOT] = D | A,
382 [V3D_QPU_A_NEG] = D | A,
383 [V3D_QPU_A_FLAPUSH] = D | A,
384 [V3D_QPU_A_FLBPUSH] = D | A,
385 [V3D_QPU_A_FLPOP] = D | A,
386 [V3D_QPU_A_RECIP] = D | A,
387 [V3D_QPU_A_SETMSF] = D | A,
388 [V3D_QPU_A_SETREVF] = D | A,
389 [V3D_QPU_A_NOP] = 0,
390 [V3D_QPU_A_TIDX] = D,
391 [V3D_QPU_A_EIDX] = D,
392 [V3D_QPU_A_LR] = D,
393 [V3D_QPU_A_VFLA] = D,
394 [V3D_QPU_A_VFLNA] = D,
395 [V3D_QPU_A_VFLB] = D,
396 [V3D_QPU_A_VFLNB] = D,
397
398 [V3D_QPU_A_FXCD] = D,
399 [V3D_QPU_A_XCD] = D,
400 [V3D_QPU_A_FYCD] = D,
401 [V3D_QPU_A_YCD] = D,
402
403 [V3D_QPU_A_MSF] = D,
404 [V3D_QPU_A_REVF] = D,
405 [V3D_QPU_A_VDWWT] = D,
406 [V3D_QPU_A_IID] = D,
407 [V3D_QPU_A_SAMPID] = D,
408 [V3D_QPU_A_BARRIERID] = D,
409 [V3D_QPU_A_TMUWT] = D,
410 [V3D_QPU_A_VPMWT] = D,
411 [V3D_QPU_A_FLAFIRST] = D,
412 [V3D_QPU_A_FLNAFIRST] = D,
413
414 [V3D_QPU_A_VPMSETUP] = D | A,
415
416 [V3D_QPU_A_LDVPMV_IN] = D | A,
417 [V3D_QPU_A_LDVPMV_OUT] = D | A,
418 [V3D_QPU_A_LDVPMD_IN] = D | A,
419 [V3D_QPU_A_LDVPMD_OUT] = D | A,
420 [V3D_QPU_A_LDVPMP] = D | A,
421 [V3D_QPU_A_RSQRT] = D | A,
422 [V3D_QPU_A_EXP] = D | A,
423 [V3D_QPU_A_LOG] = D | A,
424 [V3D_QPU_A_SIN] = D | A,
425 [V3D_QPU_A_RSQRT2] = D | A,
426 [V3D_QPU_A_LDVPMG_IN] = D | A | B,
427 [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
428
429 /* FIXME: MOVABSNEG */
430
431 [V3D_QPU_A_FCMP] = D | A | B,
432 [V3D_QPU_A_VFMAX] = D | A | B,
433
434 [V3D_QPU_A_FROUND] = D | A,
435 [V3D_QPU_A_FTOIN] = D | A,
436 [V3D_QPU_A_FTRUNC] = D | A,
437 [V3D_QPU_A_FTOIZ] = D | A,
438 [V3D_QPU_A_FFLOOR] = D | A,
439 [V3D_QPU_A_FTOUZ] = D | A,
440 [V3D_QPU_A_FCEIL] = D | A,
441 [V3D_QPU_A_FTOC] = D | A,
442
443 [V3D_QPU_A_FDX] = D | A,
444 [V3D_QPU_A_FDY] = D | A,
445
446 [V3D_QPU_A_STVPMV] = A | B,
447 [V3D_QPU_A_STVPMD] = A | B,
448 [V3D_QPU_A_STVPMP] = A | B,
449
450 [V3D_QPU_A_ITOF] = D | A,
451 [V3D_QPU_A_CLZ] = D | A,
452 [V3D_QPU_A_UTOF] = D | A,
453 };
454
455 static const uint8_t mul_op_args[] = {
456 [V3D_QPU_M_ADD] = D | A | B,
457 [V3D_QPU_M_SUB] = D | A | B,
458 [V3D_QPU_M_UMUL24] = D | A | B,
459 [V3D_QPU_M_VFMUL] = D | A | B,
460 [V3D_QPU_M_SMUL24] = D | A | B,
461 [V3D_QPU_M_MULTOP] = D | A | B,
462 [V3D_QPU_M_FMOV] = D | A,
463 [V3D_QPU_M_NOP] = 0,
464 [V3D_QPU_M_MOV] = D | A,
465 [V3D_QPU_M_FMUL] = D | A | B,
466 };
467
468 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)469 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
470 {
471 assert(op < ARRAY_SIZE(add_op_args));
472
473 return add_op_args[op] & D;
474 }
475
476 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)477 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
478 {
479 assert(op < ARRAY_SIZE(mul_op_args));
480
481 return mul_op_args[op] & D;
482 }
483
484 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)485 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
486 {
487 assert(op < ARRAY_SIZE(add_op_args));
488
489 uint8_t args = add_op_args[op];
490 if (args & B)
491 return 2;
492 else if (args & A)
493 return 1;
494 else
495 return 0;
496 }
497
498 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)499 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
500 {
501 assert(op < ARRAY_SIZE(mul_op_args));
502
503 uint8_t args = mul_op_args[op];
504 if (args & B)
505 return 2;
506 else if (args & A)
507 return 1;
508 else
509 return 0;
510 }
511
512 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)513 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
514 {
515 switch (cond) {
516 case V3D_QPU_COND_IFA:
517 return V3D_QPU_COND_IFNA;
518 case V3D_QPU_COND_IFNA:
519 return V3D_QPU_COND_IFA;
520 case V3D_QPU_COND_IFB:
521 return V3D_QPU_COND_IFNB;
522 case V3D_QPU_COND_IFNB:
523 return V3D_QPU_COND_IFB;
524 default:
525 unreachable("Non-invertible cond");
526 }
527 }
528
529 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)530 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
531 {
532 switch (waddr) {
533 case V3D_QPU_WADDR_RECIP:
534 case V3D_QPU_WADDR_RSQRT:
535 case V3D_QPU_WADDR_EXP:
536 case V3D_QPU_WADDR_LOG:
537 case V3D_QPU_WADDR_SIN:
538 case V3D_QPU_WADDR_RSQRT2:
539 return true;
540 default:
541 return false;
542 }
543 }
544
545 bool
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)546 v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
547 enum v3d_qpu_waddr waddr)
548 {
549 if (devinfo->ver >= 40) {
550 return ((waddr >= V3D_QPU_WADDR_TMUD &&
551 waddr <= V3D_QPU_WADDR_TMUAU) ||
552 (waddr >= V3D_QPU_WADDR_TMUC &&
553 waddr <= V3D_QPU_WADDR_TMUHSLOD));
554 } else {
555 return ((waddr >= V3D_QPU_WADDR_TMU &&
556 waddr <= V3D_QPU_WADDR_TMUAU) ||
557 (waddr >= V3D_QPU_WADDR_TMUC &&
558 waddr <= V3D_QPU_WADDR_TMUHSLOD));
559 }
560 }
561
562 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)563 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
564 {
565 return (inst->sig.ldtmu ||
566 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
567 inst->alu.add.op == V3D_QPU_A_TMUWT));
568 }
569
570 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)571 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
572 {
573 return (waddr == V3D_QPU_WADDR_TLB ||
574 waddr == V3D_QPU_WADDR_TLBU);
575 }
576
577 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)578 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
579 {
580 return (waddr == V3D_QPU_WADDR_VPM ||
581 waddr == V3D_QPU_WADDR_VPMU);
582 }
583
584 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)585 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
586 {
587 return (waddr == V3D_QPU_WADDR_SYNC ||
588 waddr == V3D_QPU_WADDR_SYNCB ||
589 waddr == V3D_QPU_WADDR_SYNCU);
590 }
591
592 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)593 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
594 {
595 switch (waddr) {
596 case V3D_QPU_WADDR_VPMU:
597 case V3D_QPU_WADDR_TLBU:
598 case V3D_QPU_WADDR_TMUAU:
599 case V3D_QPU_WADDR_SYNCU:
600 return true;
601 default:
602 return false;
603 }
604 }
605
606 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)607 v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)
608 {
609 switch (op) {
610 case V3D_QPU_A_VPMSETUP:
611 case V3D_QPU_A_LDVPMV_IN:
612 case V3D_QPU_A_LDVPMV_OUT:
613 case V3D_QPU_A_LDVPMD_IN:
614 case V3D_QPU_A_LDVPMD_OUT:
615 case V3D_QPU_A_LDVPMP:
616 case V3D_QPU_A_LDVPMG_IN:
617 case V3D_QPU_A_LDVPMG_OUT:
618 return true;
619 default:
620 return false;
621 }
622 }
623
624 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)625 v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
626 {
627 switch (op) {
628 case V3D_QPU_A_VPMSETUP:
629 case V3D_QPU_A_STVPMV:
630 case V3D_QPU_A_STVPMD:
631 case V3D_QPU_A_STVPMP:
632 return true;
633 default:
634 return false;
635 }
636 }
637
638 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)639 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
640 {
641 if (inst->sig.ldtlb ||
642 inst->sig.ldtlbu)
643 return true;
644
645 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
646 if (inst->alu.add.magic_write &&
647 v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
648 return true;
649 }
650
651 if (inst->alu.mul.magic_write &&
652 v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
653 return true;
654 }
655 }
656
657 return false;
658 }
659
660 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)661 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
662 {
663 if (v3d_qpu_instr_is_sfu(inst))
664 return true;
665
666 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
667 if (inst->alu.add.magic_write &&
668 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
669 return true;
670 }
671
672 if (inst->alu.mul.magic_write &&
673 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
674 return true;
675 }
676 }
677
678 return false;
679 }
680
681 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)682 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
683 {
684 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
685 switch (inst->alu.add.op) {
686 case V3D_QPU_A_RECIP:
687 case V3D_QPU_A_RSQRT:
688 case V3D_QPU_A_EXP:
689 case V3D_QPU_A_LOG:
690 case V3D_QPU_A_SIN:
691 case V3D_QPU_A_RSQRT2:
692 return true;
693 default:
694 return false;
695 }
696 }
697 return false;
698 }
699
700 bool
v3d_qpu_writes_tmu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)701 v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
702 const struct v3d_qpu_instr *inst)
703 {
704 return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
705 ((inst->alu.add.magic_write &&
706 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
707 (inst->alu.mul.magic_write &&
708 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
709 }
710
711 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)712 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
713 const struct v3d_qpu_instr *inst)
714 {
715 return v3d_qpu_writes_tmu(devinfo, inst) &&
716 (!inst->alu.add.magic_write ||
717 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
718 (!inst->alu.mul.magic_write ||
719 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
720 }
721
722 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)723 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
724 {
725 if (inst->sig.ldvpm)
726 return true;
727
728 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
729 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
730 return true;
731 }
732
733 return false;
734 }
735
736 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)737 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
738 {
739 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
740 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
741 return true;
742
743 if (inst->alu.add.magic_write &&
744 v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
745 return true;
746 }
747
748 if (inst->alu.mul.magic_write &&
749 v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
750 return true;
751 }
752 }
753
754 return false;
755 }
756
757 bool
v3d_qpu_writes_unifa(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)758 v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
759 const struct v3d_qpu_instr *inst)
760 {
761 if (devinfo->ver < 40)
762 return false;
763
764 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
765 if (inst->alu.add.op != V3D_QPU_A_NOP &&
766 inst->alu.add.magic_write &&
767 inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
768 return true;
769 }
770
771 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
772 inst->alu.mul.magic_write &&
773 inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
774 return true;
775 }
776 }
777
778 return false;
779 }
780
781 bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)782 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
783 {
784 return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
785 inst->alu.add.op == V3D_QPU_A_VPMWT;
786 }
787
788 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)789 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
790 {
791 return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
792 }
793
794 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)795 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
796 {
797 return v3d_qpu_reads_vpm(inst) ||
798 v3d_qpu_writes_vpm(inst) ||
799 v3d_qpu_waits_vpm(inst);
800 }
801
802 static bool
qpu_writes_magic_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)803 qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
804 const struct v3d_qpu_instr *inst,
805 uint32_t waddr)
806 {
807 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
808 if (inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
809 return true;
810
811 if (inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
812 return true;
813 }
814
815 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
816 inst->sig_magic && inst->sig_addr == waddr) {
817 return true;
818 }
819
820 return false;
821 }
822
823 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)824 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
825 const struct v3d_qpu_instr *inst)
826 {
827 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
828 return true;
829
830 return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
831 }
832
833 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)834 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
835 const struct v3d_qpu_instr *inst)
836 {
837 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
838 if (inst->alu.add.magic_write &&
839 (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
840 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
841 return true;
842 }
843
844 if (inst->alu.mul.magic_write &&
845 (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
846 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
847 return true;
848 }
849 }
850
851 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
852 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
853 return true;
854 } else if (inst->sig.ldtmu) {
855 return true;
856 }
857
858 return false;
859 }
860
861 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)862 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
863 const struct v3d_qpu_instr *inst)
864 {
865 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
866 return true;
867
868 return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
869 }
870
871 bool
v3d_qpu_writes_accum(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)872 v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
873 const struct v3d_qpu_instr *inst)
874 {
875 if (v3d_qpu_writes_r5(devinfo, inst))
876 return true;
877 if (v3d_qpu_writes_r4(devinfo, inst))
878 return true;
879 if (v3d_qpu_writes_r3(devinfo, inst))
880 return true;
881 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
882 return true;
883 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
884 return true;
885 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
886 return true;
887
888 return false;
889 }
890
891 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)892 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
893 {
894 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
895 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
896
897 return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
898 (add_nsrc > 1 && inst->alu.add.b == mux) ||
899 (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
900 (mul_nsrc > 1 && inst->alu.mul.b == mux));
901 }
902
903 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)904 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
905 const struct v3d_qpu_sig *sig)
906 {
907 if (devinfo->ver < 41)
908 return false;
909
910 return (sig->ldunifrf ||
911 sig->ldunifarf ||
912 sig->ldvary ||
913 sig->ldtmu ||
914 sig->ldtlb ||
915 sig->ldtlbu);
916 }
917
918 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)919 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
920 {
921 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
922 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
923 } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
924 if (inst->flags.ac != V3D_QPU_COND_NONE ||
925 inst->flags.mc != V3D_QPU_COND_NONE ||
926 inst->flags.auf != V3D_QPU_UF_NONE ||
927 inst->flags.muf != V3D_QPU_UF_NONE)
928 return true;
929
930 switch (inst->alu.add.op) {
931 case V3D_QPU_A_VFLA:
932 case V3D_QPU_A_VFLNA:
933 case V3D_QPU_A_VFLB:
934 case V3D_QPU_A_VFLNB:
935 case V3D_QPU_A_FLAPUSH:
936 case V3D_QPU_A_FLBPUSH:
937 case V3D_QPU_A_FLAFIRST:
938 case V3D_QPU_A_FLNAFIRST:
939 return true;
940 default:
941 break;
942 }
943 }
944
945 return false;
946 }
947
948 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)949 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
950 {
951 if (inst->flags.apf != V3D_QPU_PF_NONE ||
952 inst->flags.mpf != V3D_QPU_PF_NONE ||
953 inst->flags.auf != V3D_QPU_UF_NONE ||
954 inst->flags.muf != V3D_QPU_UF_NONE) {
955 return true;
956 }
957
958 return false;
959 }
960
961 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)962 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
963 {
964 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
965 return false;
966
967 switch (inst->alu.add.op) {
968 case V3D_QPU_A_FADD:
969 case V3D_QPU_A_FADDNF:
970 case V3D_QPU_A_FSUB:
971 case V3D_QPU_A_FMIN:
972 case V3D_QPU_A_FMAX:
973 case V3D_QPU_A_FCMP:
974 case V3D_QPU_A_FROUND:
975 case V3D_QPU_A_FTRUNC:
976 case V3D_QPU_A_FFLOOR:
977 case V3D_QPU_A_FCEIL:
978 case V3D_QPU_A_FDX:
979 case V3D_QPU_A_FDY:
980 case V3D_QPU_A_FTOIN:
981 case V3D_QPU_A_FTOIZ:
982 case V3D_QPU_A_FTOUZ:
983 case V3D_QPU_A_FTOC:
984 case V3D_QPU_A_VFPACK:
985 return true;
986 break;
987 default:
988 break;
989 }
990
991 switch (inst->alu.mul.op) {
992 case V3D_QPU_M_FMOV:
993 case V3D_QPU_M_FMUL:
994 return true;
995 break;
996 default:
997 break;
998 }
999
1000 return false;
1001 }
1002 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)1003 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1004 {
1005 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1006 return false;
1007
1008 switch (inst->alu.add.op) {
1009 case V3D_QPU_A_VFMIN:
1010 case V3D_QPU_A_VFMAX:
1011 return true;
1012 break;
1013 default:
1014 break;
1015 }
1016
1017 switch (inst->alu.mul.op) {
1018 case V3D_QPU_M_VFMUL:
1019 return true;
1020 break;
1021 default:
1022 break;
1023 }
1024
1025 return false;
1026 }
1027
1028 bool
v3d_qpu_is_nop(struct v3d_qpu_instr * inst)1029 v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1030 {
1031 static const struct v3d_qpu_sig nosig = { 0 };
1032
1033 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1034 return false;
1035 if (inst->alu.add.op != V3D_QPU_A_NOP)
1036 return false;
1037 if (inst->alu.mul.op != V3D_QPU_M_NOP)
1038 return false;
1039 if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1040 return false;
1041 return true;
1042 }
1043