1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include "util/macros.h"
26 #include "broadcom/common/v3d_device_info.h"
27 #include "qpu_instr.h"
28
29 const char *
v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr)30 v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr)
31 {
32 static const char *waddr_magic[] = {
33 [V3D_QPU_WADDR_R0] = "r0",
34 [V3D_QPU_WADDR_R1] = "r1",
35 [V3D_QPU_WADDR_R2] = "r2",
36 [V3D_QPU_WADDR_R3] = "r3",
37 [V3D_QPU_WADDR_R4] = "r4",
38 [V3D_QPU_WADDR_R5] = "r5",
39 [V3D_QPU_WADDR_NOP] = "-",
40 [V3D_QPU_WADDR_TLB] = "tlb",
41 [V3D_QPU_WADDR_TLBU] = "tlbu",
42 [V3D_QPU_WADDR_TMU] = "tmu",
43 [V3D_QPU_WADDR_TMUL] = "tmul",
44 [V3D_QPU_WADDR_TMUD] = "tmud",
45 [V3D_QPU_WADDR_TMUA] = "tmua",
46 [V3D_QPU_WADDR_TMUAU] = "tmuau",
47 [V3D_QPU_WADDR_VPM] = "vpm",
48 [V3D_QPU_WADDR_VPMU] = "vpmu",
49 [V3D_QPU_WADDR_SYNC] = "sync",
50 [V3D_QPU_WADDR_SYNCU] = "syncu",
51 [V3D_QPU_WADDR_SYNCB] = "syncb",
52 [V3D_QPU_WADDR_RECIP] = "recip",
53 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
54 [V3D_QPU_WADDR_EXP] = "exp",
55 [V3D_QPU_WADDR_LOG] = "log",
56 [V3D_QPU_WADDR_SIN] = "sin",
57 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
58 [V3D_QPU_WADDR_TMUC] = "tmuc",
59 [V3D_QPU_WADDR_TMUS] = "tmus",
60 [V3D_QPU_WADDR_TMUT] = "tmut",
61 [V3D_QPU_WADDR_TMUR] = "tmur",
62 [V3D_QPU_WADDR_TMUI] = "tmui",
63 [V3D_QPU_WADDR_TMUB] = "tmub",
64 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
65 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
66 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
67 [V3D_QPU_WADDR_TMUSF] = "tmusf",
68 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
69 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
70 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
71 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
72 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
73 [V3D_QPU_WADDR_R5REP] = "r5rep",
74 };
75
76 return waddr_magic[waddr];
77 }
78
79 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)80 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
81 {
82 static const char *op_names[] = {
83 [V3D_QPU_A_FADD] = "fadd",
84 [V3D_QPU_A_FADDNF] = "faddnf",
85 [V3D_QPU_A_VFPACK] = "vfpack",
86 [V3D_QPU_A_ADD] = "add",
87 [V3D_QPU_A_SUB] = "sub",
88 [V3D_QPU_A_FSUB] = "fsub",
89 [V3D_QPU_A_MIN] = "min",
90 [V3D_QPU_A_MAX] = "max",
91 [V3D_QPU_A_UMIN] = "umin",
92 [V3D_QPU_A_UMAX] = "umax",
93 [V3D_QPU_A_SHL] = "shl",
94 [V3D_QPU_A_SHR] = "shr",
95 [V3D_QPU_A_ASR] = "asr",
96 [V3D_QPU_A_ROR] = "ror",
97 [V3D_QPU_A_FMIN] = "fmin",
98 [V3D_QPU_A_FMAX] = "fmax",
99 [V3D_QPU_A_VFMIN] = "vfmin",
100 [V3D_QPU_A_AND] = "and",
101 [V3D_QPU_A_OR] = "or",
102 [V3D_QPU_A_XOR] = "xor",
103 [V3D_QPU_A_VADD] = "vadd",
104 [V3D_QPU_A_VSUB] = "vsub",
105 [V3D_QPU_A_NOT] = "not",
106 [V3D_QPU_A_NEG] = "neg",
107 [V3D_QPU_A_FLAPUSH] = "flapush",
108 [V3D_QPU_A_FLBPUSH] = "flbpush",
109 [V3D_QPU_A_FLPOP] = "flpop",
110 [V3D_QPU_A_RECIP] = "recip",
111 [V3D_QPU_A_SETMSF] = "setmsf",
112 [V3D_QPU_A_SETREVF] = "setrevf",
113 [V3D_QPU_A_NOP] = "nop",
114 [V3D_QPU_A_TIDX] = "tidx",
115 [V3D_QPU_A_EIDX] = "eidx",
116 [V3D_QPU_A_LR] = "lr",
117 [V3D_QPU_A_VFLA] = "vfla",
118 [V3D_QPU_A_VFLNA] = "vflna",
119 [V3D_QPU_A_VFLB] = "vflb",
120 [V3D_QPU_A_VFLNB] = "vflnb",
121 [V3D_QPU_A_FXCD] = "fxcd",
122 [V3D_QPU_A_XCD] = "xcd",
123 [V3D_QPU_A_FYCD] = "fycd",
124 [V3D_QPU_A_YCD] = "ycd",
125 [V3D_QPU_A_MSF] = "msf",
126 [V3D_QPU_A_REVF] = "revf",
127 [V3D_QPU_A_VDWWT] = "vdwwt",
128 [V3D_QPU_A_IID] = "iid",
129 [V3D_QPU_A_SAMPID] = "sampid",
130 [V3D_QPU_A_BARRIERID] = "barrierid",
131 [V3D_QPU_A_TMUWT] = "tmuwt",
132 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
133 [V3D_QPU_A_VPMWT] = "vpmwt",
134 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
135 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
136 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
137 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
138 [V3D_QPU_A_LDVPMP] = "ldvpmp",
139 [V3D_QPU_A_RSQRT] = "rsqrt",
140 [V3D_QPU_A_EXP] = "exp",
141 [V3D_QPU_A_LOG] = "log",
142 [V3D_QPU_A_SIN] = "sin",
143 [V3D_QPU_A_RSQRT2] = "rsqrt2",
144 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
145 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
146 [V3D_QPU_A_FCMP] = "fcmp",
147 [V3D_QPU_A_VFMAX] = "vfmax",
148 [V3D_QPU_A_FROUND] = "fround",
149 [V3D_QPU_A_FTOIN] = "ftoin",
150 [V3D_QPU_A_FTRUNC] = "ftrunc",
151 [V3D_QPU_A_FTOIZ] = "ftoiz",
152 [V3D_QPU_A_FFLOOR] = "ffloor",
153 [V3D_QPU_A_FTOUZ] = "ftouz",
154 [V3D_QPU_A_FCEIL] = "fceil",
155 [V3D_QPU_A_FTOC] = "ftoc",
156 [V3D_QPU_A_FDX] = "fdx",
157 [V3D_QPU_A_FDY] = "fdy",
158 [V3D_QPU_A_STVPMV] = "stvpmv",
159 [V3D_QPU_A_STVPMD] = "stvpmd",
160 [V3D_QPU_A_STVPMP] = "stvpmp",
161 [V3D_QPU_A_ITOF] = "itof",
162 [V3D_QPU_A_CLZ] = "clz",
163 [V3D_QPU_A_UTOF] = "utof",
164 };
165
166 if (op >= ARRAY_SIZE(op_names))
167 return NULL;
168
169 return op_names[op];
170 }
171
172 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)173 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
174 {
175 static const char *op_names[] = {
176 [V3D_QPU_M_ADD] = "add",
177 [V3D_QPU_M_SUB] = "sub",
178 [V3D_QPU_M_UMUL24] = "umul24",
179 [V3D_QPU_M_VFMUL] = "vfmul",
180 [V3D_QPU_M_SMUL24] = "smul24",
181 [V3D_QPU_M_MULTOP] = "multop",
182 [V3D_QPU_M_FMOV] = "fmov",
183 [V3D_QPU_M_MOV] = "mov",
184 [V3D_QPU_M_NOP] = "nop",
185 [V3D_QPU_M_FMUL] = "fmul",
186 };
187
188 if (op >= ARRAY_SIZE(op_names))
189 return NULL;
190
191 return op_names[op];
192 }
193
194 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)195 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
196 {
197 switch (cond) {
198 case V3D_QPU_COND_NONE:
199 return "";
200 case V3D_QPU_COND_IFA:
201 return ".ifa";
202 case V3D_QPU_COND_IFB:
203 return ".ifb";
204 case V3D_QPU_COND_IFNA:
205 return ".ifna";
206 case V3D_QPU_COND_IFNB:
207 return ".ifnb";
208 default:
209 unreachable("bad cond value");
210 }
211 }
212
213 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)214 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
215 {
216 switch (cond) {
217 case V3D_QPU_BRANCH_COND_ALWAYS:
218 return "";
219 case V3D_QPU_BRANCH_COND_A0:
220 return ".a0";
221 case V3D_QPU_BRANCH_COND_NA0:
222 return ".na0";
223 case V3D_QPU_BRANCH_COND_ALLA:
224 return ".alla";
225 case V3D_QPU_BRANCH_COND_ANYNA:
226 return ".anyna";
227 case V3D_QPU_BRANCH_COND_ANYA:
228 return ".anya";
229 case V3D_QPU_BRANCH_COND_ALLNA:
230 return ".allna";
231 default:
232 unreachable("bad branch cond value");
233 }
234 }
235
236 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)237 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
238 {
239 switch (msfign) {
240 case V3D_QPU_MSFIGN_NONE:
241 return "";
242 case V3D_QPU_MSFIGN_P:
243 return "p";
244 case V3D_QPU_MSFIGN_Q:
245 return "q";
246 default:
247 unreachable("bad branch cond value");
248 }
249 }
250
251 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)252 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
253 {
254 switch (pf) {
255 case V3D_QPU_PF_NONE:
256 return "";
257 case V3D_QPU_PF_PUSHZ:
258 return ".pushz";
259 case V3D_QPU_PF_PUSHN:
260 return ".pushn";
261 case V3D_QPU_PF_PUSHC:
262 return ".pushc";
263 default:
264 unreachable("bad pf value");
265 }
266 }
267
268 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)269 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
270 {
271 switch (uf) {
272 case V3D_QPU_UF_NONE:
273 return "";
274 case V3D_QPU_UF_ANDZ:
275 return ".andz";
276 case V3D_QPU_UF_ANDNZ:
277 return ".andnz";
278 case V3D_QPU_UF_NORZ:
279 return ".norz";
280 case V3D_QPU_UF_NORNZ:
281 return ".nornz";
282 case V3D_QPU_UF_ANDN:
283 return ".andn";
284 case V3D_QPU_UF_ANDNN:
285 return ".andnn";
286 case V3D_QPU_UF_NORN:
287 return ".norn";
288 case V3D_QPU_UF_NORNN:
289 return ".nornn";
290 case V3D_QPU_UF_ANDC:
291 return ".andc";
292 case V3D_QPU_UF_ANDNC:
293 return ".andnc";
294 case V3D_QPU_UF_NORC:
295 return ".norc";
296 case V3D_QPU_UF_NORNC:
297 return ".nornc";
298 default:
299 unreachable("bad pf value");
300 }
301 }
302
303 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)304 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
305 {
306 switch (pack) {
307 case V3D_QPU_PACK_NONE:
308 return "";
309 case V3D_QPU_PACK_L:
310 return ".l";
311 case V3D_QPU_PACK_H:
312 return ".h";
313 default:
314 unreachable("bad pack value");
315 }
316 }
317
318 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)319 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
320 {
321 switch (unpack) {
322 case V3D_QPU_UNPACK_NONE:
323 return "";
324 case V3D_QPU_UNPACK_L:
325 return ".l";
326 case V3D_QPU_UNPACK_H:
327 return ".h";
328 case V3D_QPU_UNPACK_ABS:
329 return ".abs";
330 case V3D_QPU_UNPACK_REPLICATE_32F_16:
331 return ".ff";
332 case V3D_QPU_UNPACK_REPLICATE_L_16:
333 return ".ll";
334 case V3D_QPU_UNPACK_REPLICATE_H_16:
335 return ".hh";
336 case V3D_QPU_UNPACK_SWAP_16:
337 return ".swp";
338 default:
339 unreachable("bad unpack value");
340 }
341 }
342
343 #define D 1
344 #define A 2
345 #define B 4
346 static const uint8_t add_op_args[] = {
347 [V3D_QPU_A_FADD] = D | A | B,
348 [V3D_QPU_A_FADDNF] = D | A | B,
349 [V3D_QPU_A_VFPACK] = D | A | B,
350 [V3D_QPU_A_ADD] = D | A | B,
351 [V3D_QPU_A_VFPACK] = D | A | B,
352 [V3D_QPU_A_SUB] = D | A | B,
353 [V3D_QPU_A_VFPACK] = D | A | B,
354 [V3D_QPU_A_FSUB] = D | A | B,
355 [V3D_QPU_A_MIN] = D | A | B,
356 [V3D_QPU_A_MAX] = D | A | B,
357 [V3D_QPU_A_UMIN] = D | A | B,
358 [V3D_QPU_A_UMAX] = D | A | B,
359 [V3D_QPU_A_SHL] = D | A | B,
360 [V3D_QPU_A_SHR] = D | A | B,
361 [V3D_QPU_A_ASR] = D | A | B,
362 [V3D_QPU_A_ROR] = D | A | B,
363 [V3D_QPU_A_FMIN] = D | A | B,
364 [V3D_QPU_A_FMAX] = D | A | B,
365 [V3D_QPU_A_VFMIN] = D | A | B,
366
367 [V3D_QPU_A_AND] = D | A | B,
368 [V3D_QPU_A_OR] = D | A | B,
369 [V3D_QPU_A_XOR] = D | A | B,
370
371 [V3D_QPU_A_VADD] = D | A | B,
372 [V3D_QPU_A_VSUB] = D | A | B,
373 [V3D_QPU_A_NOT] = D | A,
374 [V3D_QPU_A_NEG] = D | A,
375 [V3D_QPU_A_FLAPUSH] = D | A,
376 [V3D_QPU_A_FLBPUSH] = D | A,
377 [V3D_QPU_A_FLPOP] = D | A,
378 [V3D_QPU_A_RECIP] = D | A,
379 [V3D_QPU_A_SETMSF] = D | A,
380 [V3D_QPU_A_SETREVF] = D | A,
381 [V3D_QPU_A_NOP] = 0,
382 [V3D_QPU_A_TIDX] = D,
383 [V3D_QPU_A_EIDX] = D,
384 [V3D_QPU_A_LR] = D,
385 [V3D_QPU_A_VFLA] = D,
386 [V3D_QPU_A_VFLNA] = D,
387 [V3D_QPU_A_VFLB] = D,
388 [V3D_QPU_A_VFLNB] = D,
389
390 [V3D_QPU_A_FXCD] = D,
391 [V3D_QPU_A_XCD] = D,
392 [V3D_QPU_A_FYCD] = D,
393 [V3D_QPU_A_YCD] = D,
394
395 [V3D_QPU_A_MSF] = D,
396 [V3D_QPU_A_REVF] = D,
397 [V3D_QPU_A_VDWWT] = D,
398 [V3D_QPU_A_IID] = D,
399 [V3D_QPU_A_SAMPID] = D,
400 [V3D_QPU_A_BARRIERID] = D,
401 [V3D_QPU_A_TMUWT] = D,
402 [V3D_QPU_A_VPMWT] = D,
403
404 [V3D_QPU_A_VPMSETUP] = D | A,
405
406 [V3D_QPU_A_LDVPMV_IN] = D | A,
407 [V3D_QPU_A_LDVPMV_OUT] = D | A,
408 [V3D_QPU_A_LDVPMD_IN] = D | A,
409 [V3D_QPU_A_LDVPMD_OUT] = D | A,
410 [V3D_QPU_A_LDVPMP] = D | A,
411 [V3D_QPU_A_RSQRT] = D | A,
412 [V3D_QPU_A_EXP] = D | A,
413 [V3D_QPU_A_LOG] = D | A,
414 [V3D_QPU_A_SIN] = D | A,
415 [V3D_QPU_A_RSQRT2] = D | A,
416 [V3D_QPU_A_LDVPMG_IN] = D | A | B,
417 [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
418
419 /* FIXME: MOVABSNEG */
420
421 [V3D_QPU_A_FCMP] = D | A | B,
422 [V3D_QPU_A_VFMAX] = D | A | B,
423
424 [V3D_QPU_A_FROUND] = D | A,
425 [V3D_QPU_A_FTOIN] = D | A,
426 [V3D_QPU_A_FTRUNC] = D | A,
427 [V3D_QPU_A_FTOIZ] = D | A,
428 [V3D_QPU_A_FFLOOR] = D | A,
429 [V3D_QPU_A_FTOUZ] = D | A,
430 [V3D_QPU_A_FCEIL] = D | A,
431 [V3D_QPU_A_FTOC] = D | A,
432
433 [V3D_QPU_A_FDX] = D | A,
434 [V3D_QPU_A_FDY] = D | A,
435
436 [V3D_QPU_A_STVPMV] = A | B,
437 [V3D_QPU_A_STVPMD] = A | B,
438 [V3D_QPU_A_STVPMP] = A | B,
439
440 [V3D_QPU_A_ITOF] = D | A,
441 [V3D_QPU_A_CLZ] = D | A,
442 [V3D_QPU_A_UTOF] = D | A,
443 };
444
445 static const uint8_t mul_op_args[] = {
446 [V3D_QPU_M_ADD] = D | A | B,
447 [V3D_QPU_M_SUB] = D | A | B,
448 [V3D_QPU_M_UMUL24] = D | A | B,
449 [V3D_QPU_M_VFMUL] = D | A | B,
450 [V3D_QPU_M_SMUL24] = D | A | B,
451 [V3D_QPU_M_MULTOP] = D | A | B,
452 [V3D_QPU_M_FMOV] = D | A,
453 [V3D_QPU_M_NOP] = 0,
454 [V3D_QPU_M_MOV] = D | A,
455 [V3D_QPU_M_FMUL] = D | A | B,
456 };
457
458 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)459 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
460 {
461 assert(op < ARRAY_SIZE(add_op_args));
462
463 return add_op_args[op] & D;
464 }
465
466 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)467 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
468 {
469 assert(op < ARRAY_SIZE(mul_op_args));
470
471 return mul_op_args[op] & D;
472 }
473
474 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)475 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
476 {
477 assert(op < ARRAY_SIZE(add_op_args));
478
479 uint8_t args = add_op_args[op];
480 if (args & B)
481 return 2;
482 else if (args & A)
483 return 1;
484 else
485 return 0;
486 }
487
488 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)489 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
490 {
491 assert(op < ARRAY_SIZE(mul_op_args));
492
493 uint8_t args = mul_op_args[op];
494 if (args & B)
495 return 2;
496 else if (args & A)
497 return 1;
498 else
499 return 0;
500 }
501
502 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)503 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
504 {
505 switch (cond) {
506 case V3D_QPU_COND_IFA:
507 return V3D_QPU_COND_IFNA;
508 case V3D_QPU_COND_IFNA:
509 return V3D_QPU_COND_IFA;
510 case V3D_QPU_COND_IFB:
511 return V3D_QPU_COND_IFNB;
512 case V3D_QPU_COND_IFNB:
513 return V3D_QPU_COND_IFB;
514 default:
515 unreachable("Non-invertible cond");
516 }
517 }
518
519 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)520 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
521 {
522 switch (waddr) {
523 case V3D_QPU_WADDR_RECIP:
524 case V3D_QPU_WADDR_RSQRT:
525 case V3D_QPU_WADDR_EXP:
526 case V3D_QPU_WADDR_LOG:
527 case V3D_QPU_WADDR_SIN:
528 case V3D_QPU_WADDR_RSQRT2:
529 return true;
530 default:
531 return false;
532 }
533 }
534
535 bool
v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)536 v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
537 {
538 /* XXX: WADDR_TMU changed to UNIFA on 4.x */
539 return ((waddr >= V3D_QPU_WADDR_TMU &&
540 waddr <= V3D_QPU_WADDR_TMUAU) ||
541 (waddr >= V3D_QPU_WADDR_TMUC &&
542 waddr <= V3D_QPU_WADDR_TMUHSLOD));
543 }
544
545 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)546 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
547 {
548 return (inst->sig.ldtmu ||
549 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
550 inst->alu.add.op == V3D_QPU_A_TMUWT));
551 }
552
553 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)554 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
555 {
556 return (waddr == V3D_QPU_WADDR_TLB ||
557 waddr == V3D_QPU_WADDR_TLBU);
558 }
559
560 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)561 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
562 {
563 return (waddr == V3D_QPU_WADDR_VPM ||
564 waddr == V3D_QPU_WADDR_VPMU);
565 }
566
567 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)568 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
569 {
570 return (waddr == V3D_QPU_WADDR_SYNC ||
571 waddr == V3D_QPU_WADDR_SYNCB ||
572 waddr == V3D_QPU_WADDR_SYNCU);
573 }
574
575 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)576 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
577 {
578 switch (waddr) {
579 case V3D_QPU_WADDR_VPMU:
580 case V3D_QPU_WADDR_TLBU:
581 case V3D_QPU_WADDR_TMUAU:
582 case V3D_QPU_WADDR_SYNCU:
583 return true;
584 default:
585 return false;
586 }
587 }
588
589 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)590 v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)
591 {
592 switch (op) {
593 case V3D_QPU_A_VPMSETUP:
594 case V3D_QPU_A_LDVPMV_IN:
595 case V3D_QPU_A_LDVPMV_OUT:
596 case V3D_QPU_A_LDVPMD_IN:
597 case V3D_QPU_A_LDVPMD_OUT:
598 case V3D_QPU_A_LDVPMP:
599 case V3D_QPU_A_LDVPMG_IN:
600 case V3D_QPU_A_LDVPMG_OUT:
601 return true;
602 default:
603 return false;
604 }
605 }
606
607 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)608 v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
609 {
610 switch (op) {
611 case V3D_QPU_A_VPMSETUP:
612 case V3D_QPU_A_STVPMV:
613 case V3D_QPU_A_STVPMD:
614 case V3D_QPU_A_STVPMP:
615 return true;
616 default:
617 return false;
618 }
619 }
620
621 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)622 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
623 {
624 if (inst->sig.ldtlb ||
625 inst->sig.ldtlbu)
626 return true;
627
628 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
629 if (inst->alu.add.magic_write &&
630 v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
631 return true;
632 }
633
634 if (inst->alu.mul.magic_write &&
635 v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
636 return true;
637 }
638 }
639
640 return false;
641 }
642
643 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)644 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
645 {
646 if (v3d_qpu_instr_is_sfu(inst))
647 return true;
648
649 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
650 if (inst->alu.add.magic_write &&
651 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
652 return true;
653 }
654
655 if (inst->alu.mul.magic_write &&
656 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
657 return true;
658 }
659 }
660
661 return false;
662 }
663
664 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)665 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
666 {
667 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
668 switch (inst->alu.add.op) {
669 case V3D_QPU_A_RECIP:
670 case V3D_QPU_A_RSQRT:
671 case V3D_QPU_A_EXP:
672 case V3D_QPU_A_LOG:
673 case V3D_QPU_A_SIN:
674 case V3D_QPU_A_RSQRT2:
675 return true;
676 default:
677 return false;
678 }
679 }
680 return false;
681 }
682
683 bool
v3d_qpu_writes_tmu(const struct v3d_qpu_instr * inst)684 v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
685 {
686 return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
687 ((inst->alu.add.magic_write &&
688 v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) ||
689 (inst->alu.mul.magic_write &&
690 v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))));
691 }
692
693 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr * inst)694 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst)
695 {
696 return v3d_qpu_writes_tmu(inst) &&
697 (!inst->alu.add.magic_write ||
698 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
699 (!inst->alu.mul.magic_write ||
700 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
701 }
702
703 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)704 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
705 {
706 if (inst->sig.ldvpm)
707 return true;
708
709 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
710 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
711 return true;
712 }
713
714 return false;
715 }
716
717 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)718 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
719 {
720 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
721 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
722 return true;
723
724 if (inst->alu.add.magic_write &&
725 v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
726 return true;
727 }
728
729 if (inst->alu.mul.magic_write &&
730 v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
731 return true;
732 }
733 }
734
735 return false;
736 }
737
738 static bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)739 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
740 {
741 return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
742 inst->alu.add.op == V3D_QPU_A_VPMWT;
743 }
744
745 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)746 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
747 {
748 return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
749 }
750
751 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)752 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
753 {
754 return v3d_qpu_reads_vpm(inst) ||
755 v3d_qpu_writes_vpm(inst) ||
756 v3d_qpu_waits_vpm(inst);
757 }
758
759 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)760 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
761 const struct v3d_qpu_instr *inst)
762 {
763 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
764 if (inst->alu.add.magic_write &&
765 inst->alu.add.waddr == V3D_QPU_WADDR_R3) {
766 return true;
767 }
768
769 if (inst->alu.mul.magic_write &&
770 inst->alu.mul.waddr == V3D_QPU_WADDR_R3) {
771 return true;
772 }
773 }
774
775 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
776 inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
777 return true;
778 }
779
780 return inst->sig.ldvary || inst->sig.ldvpm;
781 }
782
783 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)784 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
785 const struct v3d_qpu_instr *inst)
786 {
787 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
788 if (inst->alu.add.magic_write &&
789 (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
790 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
791 return true;
792 }
793
794 if (inst->alu.mul.magic_write &&
795 (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
796 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
797 return true;
798 }
799 }
800
801 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
802 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
803 return true;
804 } else if (inst->sig.ldtmu) {
805 return true;
806 }
807
808 return false;
809 }
810
811 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)812 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
813 const struct v3d_qpu_instr *inst)
814 {
815 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
816 if (inst->alu.add.magic_write &&
817 inst->alu.add.waddr == V3D_QPU_WADDR_R5) {
818 return true;
819 }
820
821 if (inst->alu.mul.magic_write &&
822 inst->alu.mul.waddr == V3D_QPU_WADDR_R5) {
823 return true;
824 }
825 }
826
827 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
828 inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
829 return true;
830 }
831
832 return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
833 }
834
835 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)836 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
837 {
838 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
839 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
840
841 return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
842 (add_nsrc > 1 && inst->alu.add.b == mux) ||
843 (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
844 (mul_nsrc > 1 && inst->alu.mul.b == mux));
845 }
846
847 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)848 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
849 const struct v3d_qpu_sig *sig)
850 {
851 if (devinfo->ver < 41)
852 return false;
853
854 return (sig->ldunifrf ||
855 sig->ldunifarf ||
856 sig->ldvary ||
857 sig->ldtmu ||
858 sig->ldtlb ||
859 sig->ldtlbu);
860 }
861
862 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)863 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
864 {
865 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
866 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
867 } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
868 if (inst->flags.ac != V3D_QPU_COND_NONE ||
869 inst->flags.mc != V3D_QPU_COND_NONE ||
870 inst->flags.auf != V3D_QPU_UF_NONE ||
871 inst->flags.muf != V3D_QPU_UF_NONE)
872 return true;
873
874 switch (inst->alu.add.op) {
875 case V3D_QPU_A_VFLA:
876 case V3D_QPU_A_VFLNA:
877 case V3D_QPU_A_VFLB:
878 case V3D_QPU_A_VFLNB:
879 case V3D_QPU_A_FLAPUSH:
880 case V3D_QPU_A_FLBPUSH:
881 return true;
882 default:
883 break;
884 }
885 }
886
887 return false;
888 }
889
890 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)891 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
892 {
893 if (inst->flags.apf != V3D_QPU_PF_NONE ||
894 inst->flags.mpf != V3D_QPU_PF_NONE ||
895 inst->flags.auf != V3D_QPU_UF_NONE ||
896 inst->flags.muf != V3D_QPU_UF_NONE) {
897 return true;
898 }
899
900 return false;
901 }
902
903 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)904 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
905 {
906 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
907 return false;
908
909 switch (inst->alu.add.op) {
910 case V3D_QPU_A_FADD:
911 case V3D_QPU_A_FADDNF:
912 case V3D_QPU_A_FSUB:
913 case V3D_QPU_A_FMIN:
914 case V3D_QPU_A_FMAX:
915 case V3D_QPU_A_FCMP:
916 case V3D_QPU_A_FROUND:
917 case V3D_QPU_A_FTRUNC:
918 case V3D_QPU_A_FFLOOR:
919 case V3D_QPU_A_FCEIL:
920 case V3D_QPU_A_FDX:
921 case V3D_QPU_A_FDY:
922 case V3D_QPU_A_FTOIN:
923 case V3D_QPU_A_FTOIZ:
924 case V3D_QPU_A_FTOUZ:
925 case V3D_QPU_A_FTOC:
926 case V3D_QPU_A_VFPACK:
927 return true;
928 break;
929 default:
930 break;
931 }
932
933 switch (inst->alu.mul.op) {
934 case V3D_QPU_M_FMOV:
935 case V3D_QPU_M_FMUL:
936 return true;
937 break;
938 default:
939 break;
940 }
941
942 return false;
943 }
944 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)945 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
946 {
947 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
948 return false;
949
950 switch (inst->alu.add.op) {
951 case V3D_QPU_A_VFMIN:
952 case V3D_QPU_A_VFMAX:
953 return true;
954 break;
955 default:
956 break;
957 }
958
959 switch (inst->alu.mul.op) {
960 case V3D_QPU_M_VFMUL:
961 return true;
962 break;
963 default:
964 break;
965 }
966
967 return false;
968 }
969