• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdlib.h>
25 #include <string.h>
26 #include "util/macros.h"
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29 
30 const char *
v3d_qpu_magic_waddr_name(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)31 v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32                          enum v3d_qpu_waddr waddr)
33 {
34         /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35         if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36                 return "tmu";
37 
38         static const char *waddr_magic[] = {
39                 [V3D_QPU_WADDR_R0] = "r0",
40                 [V3D_QPU_WADDR_R1] = "r1",
41                 [V3D_QPU_WADDR_R2] = "r2",
42                 [V3D_QPU_WADDR_R3] = "r3",
43                 [V3D_QPU_WADDR_R4] = "r4",
44                 [V3D_QPU_WADDR_R5] = "r5",
45                 [V3D_QPU_WADDR_NOP] = "-",
46                 [V3D_QPU_WADDR_TLB] = "tlb",
47                 [V3D_QPU_WADDR_TLBU] = "tlbu",
48                 [V3D_QPU_WADDR_UNIFA] = "unifa",
49                 [V3D_QPU_WADDR_TMUL] = "tmul",
50                 [V3D_QPU_WADDR_TMUD] = "tmud",
51                 [V3D_QPU_WADDR_TMUA] = "tmua",
52                 [V3D_QPU_WADDR_TMUAU] = "tmuau",
53                 [V3D_QPU_WADDR_VPM] = "vpm",
54                 [V3D_QPU_WADDR_VPMU] = "vpmu",
55                 [V3D_QPU_WADDR_SYNC] = "sync",
56                 [V3D_QPU_WADDR_SYNCU] = "syncu",
57                 [V3D_QPU_WADDR_SYNCB] = "syncb",
58                 [V3D_QPU_WADDR_RECIP] = "recip",
59                 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
60                 [V3D_QPU_WADDR_EXP] = "exp",
61                 [V3D_QPU_WADDR_LOG] = "log",
62                 [V3D_QPU_WADDR_SIN] = "sin",
63                 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
64                 [V3D_QPU_WADDR_TMUC] = "tmuc",
65                 [V3D_QPU_WADDR_TMUS] = "tmus",
66                 [V3D_QPU_WADDR_TMUT] = "tmut",
67                 [V3D_QPU_WADDR_TMUR] = "tmur",
68                 [V3D_QPU_WADDR_TMUI] = "tmui",
69                 [V3D_QPU_WADDR_TMUB] = "tmub",
70                 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
71                 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
72                 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
73                 [V3D_QPU_WADDR_TMUSF] = "tmusf",
74                 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
75                 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
76                 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
77                 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
78                 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
79                 [V3D_QPU_WADDR_R5REP] = "r5rep",
80         };
81 
82         return waddr_magic[waddr];
83 }
84 
85 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)86 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
87 {
88         static const char *op_names[] = {
89                 [V3D_QPU_A_FADD] = "fadd",
90                 [V3D_QPU_A_FADDNF] = "faddnf",
91                 [V3D_QPU_A_VFPACK] = "vfpack",
92                 [V3D_QPU_A_ADD] = "add",
93                 [V3D_QPU_A_SUB] = "sub",
94                 [V3D_QPU_A_FSUB] = "fsub",
95                 [V3D_QPU_A_MIN] = "min",
96                 [V3D_QPU_A_MAX] = "max",
97                 [V3D_QPU_A_UMIN] = "umin",
98                 [V3D_QPU_A_UMAX] = "umax",
99                 [V3D_QPU_A_SHL] = "shl",
100                 [V3D_QPU_A_SHR] = "shr",
101                 [V3D_QPU_A_ASR] = "asr",
102                 [V3D_QPU_A_ROR] = "ror",
103                 [V3D_QPU_A_FMIN] = "fmin",
104                 [V3D_QPU_A_FMAX] = "fmax",
105                 [V3D_QPU_A_VFMIN] = "vfmin",
106                 [V3D_QPU_A_AND] = "and",
107                 [V3D_QPU_A_OR] = "or",
108                 [V3D_QPU_A_XOR] = "xor",
109                 [V3D_QPU_A_VADD] = "vadd",
110                 [V3D_QPU_A_VSUB] = "vsub",
111                 [V3D_QPU_A_NOT] = "not",
112                 [V3D_QPU_A_NEG] = "neg",
113                 [V3D_QPU_A_FLAPUSH] = "flapush",
114                 [V3D_QPU_A_FLBPUSH] = "flbpush",
115                 [V3D_QPU_A_FLPOP] = "flpop",
116                 [V3D_QPU_A_RECIP] = "recip",
117                 [V3D_QPU_A_SETMSF] = "setmsf",
118                 [V3D_QPU_A_SETREVF] = "setrevf",
119                 [V3D_QPU_A_NOP] = "nop",
120                 [V3D_QPU_A_TIDX] = "tidx",
121                 [V3D_QPU_A_EIDX] = "eidx",
122                 [V3D_QPU_A_LR] = "lr",
123                 [V3D_QPU_A_VFLA] = "vfla",
124                 [V3D_QPU_A_VFLNA] = "vflna",
125                 [V3D_QPU_A_VFLB] = "vflb",
126                 [V3D_QPU_A_VFLNB] = "vflnb",
127                 [V3D_QPU_A_FXCD] = "fxcd",
128                 [V3D_QPU_A_XCD] = "xcd",
129                 [V3D_QPU_A_FYCD] = "fycd",
130                 [V3D_QPU_A_YCD] = "ycd",
131                 [V3D_QPU_A_MSF] = "msf",
132                 [V3D_QPU_A_REVF] = "revf",
133                 [V3D_QPU_A_VDWWT] = "vdwwt",
134                 [V3D_QPU_A_IID] = "iid",
135                 [V3D_QPU_A_SAMPID] = "sampid",
136                 [V3D_QPU_A_BARRIERID] = "barrierid",
137                 [V3D_QPU_A_TMUWT] = "tmuwt",
138                 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
139                 [V3D_QPU_A_VPMWT] = "vpmwt",
140                 [V3D_QPU_A_FLAFIRST] = "flafirst",
141                 [V3D_QPU_A_FLNAFIRST] = "flnafirst",
142                 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
143                 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
144                 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
145                 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
146                 [V3D_QPU_A_LDVPMP] = "ldvpmp",
147                 [V3D_QPU_A_RSQRT] = "rsqrt",
148                 [V3D_QPU_A_EXP] = "exp",
149                 [V3D_QPU_A_LOG] = "log",
150                 [V3D_QPU_A_SIN] = "sin",
151                 [V3D_QPU_A_RSQRT2] = "rsqrt2",
152                 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
153                 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
154                 [V3D_QPU_A_FCMP] = "fcmp",
155                 [V3D_QPU_A_VFMAX] = "vfmax",
156                 [V3D_QPU_A_FROUND] = "fround",
157                 [V3D_QPU_A_FTOIN] = "ftoin",
158                 [V3D_QPU_A_FTRUNC] = "ftrunc",
159                 [V3D_QPU_A_FTOIZ] = "ftoiz",
160                 [V3D_QPU_A_FFLOOR] = "ffloor",
161                 [V3D_QPU_A_FTOUZ] = "ftouz",
162                 [V3D_QPU_A_FCEIL] = "fceil",
163                 [V3D_QPU_A_FTOC] = "ftoc",
164                 [V3D_QPU_A_FDX] = "fdx",
165                 [V3D_QPU_A_FDY] = "fdy",
166                 [V3D_QPU_A_STVPMV] = "stvpmv",
167                 [V3D_QPU_A_STVPMD] = "stvpmd",
168                 [V3D_QPU_A_STVPMP] = "stvpmp",
169                 [V3D_QPU_A_ITOF] = "itof",
170                 [V3D_QPU_A_CLZ] = "clz",
171                 [V3D_QPU_A_UTOF] = "utof",
172         };
173 
174         if (op >= ARRAY_SIZE(op_names))
175                 return NULL;
176 
177         return op_names[op];
178 }
179 
180 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)181 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
182 {
183         static const char *op_names[] = {
184                 [V3D_QPU_M_ADD] = "add",
185                 [V3D_QPU_M_SUB] = "sub",
186                 [V3D_QPU_M_UMUL24] = "umul24",
187                 [V3D_QPU_M_VFMUL] = "vfmul",
188                 [V3D_QPU_M_SMUL24] = "smul24",
189                 [V3D_QPU_M_MULTOP] = "multop",
190                 [V3D_QPU_M_FMOV] = "fmov",
191                 [V3D_QPU_M_MOV] = "mov",
192                 [V3D_QPU_M_NOP] = "nop",
193                 [V3D_QPU_M_FMUL] = "fmul",
194         };
195 
196         if (op >= ARRAY_SIZE(op_names))
197                 return NULL;
198 
199         return op_names[op];
200 }
201 
202 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)203 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
204 {
205         switch (cond) {
206         case V3D_QPU_COND_NONE:
207                 return "";
208         case V3D_QPU_COND_IFA:
209                 return ".ifa";
210         case V3D_QPU_COND_IFB:
211                 return ".ifb";
212         case V3D_QPU_COND_IFNA:
213                 return ".ifna";
214         case V3D_QPU_COND_IFNB:
215                 return ".ifnb";
216         default:
217                 unreachable("bad cond value");
218         }
219 }
220 
221 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)222 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
223 {
224         switch (cond) {
225         case V3D_QPU_BRANCH_COND_ALWAYS:
226                 return "";
227         case V3D_QPU_BRANCH_COND_A0:
228                 return ".a0";
229         case V3D_QPU_BRANCH_COND_NA0:
230                 return ".na0";
231         case V3D_QPU_BRANCH_COND_ALLA:
232                 return ".alla";
233         case V3D_QPU_BRANCH_COND_ANYNA:
234                 return ".anyna";
235         case V3D_QPU_BRANCH_COND_ANYA:
236                 return ".anya";
237         case V3D_QPU_BRANCH_COND_ALLNA:
238                 return ".allna";
239         default:
240                 unreachable("bad branch cond value");
241         }
242 }
243 
244 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)245 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
246 {
247         switch (msfign) {
248         case V3D_QPU_MSFIGN_NONE:
249                 return "";
250         case V3D_QPU_MSFIGN_P:
251                 return "p";
252         case V3D_QPU_MSFIGN_Q:
253                 return "q";
254         default:
255                 unreachable("bad branch cond value");
256         }
257 }
258 
259 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)260 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
261 {
262         switch (pf) {
263         case V3D_QPU_PF_NONE:
264                 return "";
265         case V3D_QPU_PF_PUSHZ:
266                 return ".pushz";
267         case V3D_QPU_PF_PUSHN:
268                 return ".pushn";
269         case V3D_QPU_PF_PUSHC:
270                 return ".pushc";
271         default:
272                 unreachable("bad pf value");
273         }
274 }
275 
276 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)277 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
278 {
279         switch (uf) {
280         case V3D_QPU_UF_NONE:
281                 return "";
282         case V3D_QPU_UF_ANDZ:
283                 return ".andz";
284         case V3D_QPU_UF_ANDNZ:
285                 return ".andnz";
286         case V3D_QPU_UF_NORZ:
287                 return ".norz";
288         case V3D_QPU_UF_NORNZ:
289                 return ".nornz";
290         case V3D_QPU_UF_ANDN:
291                 return ".andn";
292         case V3D_QPU_UF_ANDNN:
293                 return ".andnn";
294         case V3D_QPU_UF_NORN:
295                 return ".norn";
296         case V3D_QPU_UF_NORNN:
297                 return ".nornn";
298         case V3D_QPU_UF_ANDC:
299                 return ".andc";
300         case V3D_QPU_UF_ANDNC:
301                 return ".andnc";
302         case V3D_QPU_UF_NORC:
303                 return ".norc";
304         case V3D_QPU_UF_NORNC:
305                 return ".nornc";
306         default:
307                 unreachable("bad pf value");
308         }
309 }
310 
311 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)312 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
313 {
314         switch (pack) {
315         case V3D_QPU_PACK_NONE:
316                 return "";
317         case V3D_QPU_PACK_L:
318                 return ".l";
319         case V3D_QPU_PACK_H:
320                 return ".h";
321         default:
322                 unreachable("bad pack value");
323         }
324 }
325 
326 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)327 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
328 {
329         switch (unpack) {
330         case V3D_QPU_UNPACK_NONE:
331                 return "";
332         case V3D_QPU_UNPACK_L:
333                 return ".l";
334         case V3D_QPU_UNPACK_H:
335                 return ".h";
336         case V3D_QPU_UNPACK_ABS:
337                 return ".abs";
338         case V3D_QPU_UNPACK_REPLICATE_32F_16:
339                 return ".ff";
340         case V3D_QPU_UNPACK_REPLICATE_L_16:
341                 return ".ll";
342         case V3D_QPU_UNPACK_REPLICATE_H_16:
343                 return ".hh";
344         case V3D_QPU_UNPACK_SWAP_16:
345                 return ".swp";
346         default:
347                 unreachable("bad unpack value");
348         }
349 }
350 
351 #define D	1
352 #define A	2
353 #define B	4
354 static const uint8_t add_op_args[] = {
355         [V3D_QPU_A_FADD] = D | A | B,
356         [V3D_QPU_A_FADDNF] = D | A | B,
357         [V3D_QPU_A_VFPACK] = D | A | B,
358         [V3D_QPU_A_ADD] = D | A | B,
359         [V3D_QPU_A_VFPACK] = D | A | B,
360         [V3D_QPU_A_SUB] = D | A | B,
361         [V3D_QPU_A_VFPACK] = D | A | B,
362         [V3D_QPU_A_FSUB] = D | A | B,
363         [V3D_QPU_A_MIN] = D | A | B,
364         [V3D_QPU_A_MAX] = D | A | B,
365         [V3D_QPU_A_UMIN] = D | A | B,
366         [V3D_QPU_A_UMAX] = D | A | B,
367         [V3D_QPU_A_SHL] = D | A | B,
368         [V3D_QPU_A_SHR] = D | A | B,
369         [V3D_QPU_A_ASR] = D | A | B,
370         [V3D_QPU_A_ROR] = D | A | B,
371         [V3D_QPU_A_FMIN] = D | A | B,
372         [V3D_QPU_A_FMAX] = D | A | B,
373         [V3D_QPU_A_VFMIN] = D | A | B,
374 
375         [V3D_QPU_A_AND] = D | A | B,
376         [V3D_QPU_A_OR] = D | A | B,
377         [V3D_QPU_A_XOR] = D | A | B,
378 
379         [V3D_QPU_A_VADD] = D | A | B,
380         [V3D_QPU_A_VSUB] = D | A | B,
381         [V3D_QPU_A_NOT] = D | A,
382         [V3D_QPU_A_NEG] = D | A,
383         [V3D_QPU_A_FLAPUSH] = D | A,
384         [V3D_QPU_A_FLBPUSH] = D | A,
385         [V3D_QPU_A_FLPOP] = D | A,
386         [V3D_QPU_A_RECIP] = D | A,
387         [V3D_QPU_A_SETMSF] = D | A,
388         [V3D_QPU_A_SETREVF] = D | A,
389         [V3D_QPU_A_NOP] = 0,
390         [V3D_QPU_A_TIDX] = D,
391         [V3D_QPU_A_EIDX] = D,
392         [V3D_QPU_A_LR] = D,
393         [V3D_QPU_A_VFLA] = D,
394         [V3D_QPU_A_VFLNA] = D,
395         [V3D_QPU_A_VFLB] = D,
396         [V3D_QPU_A_VFLNB] = D,
397 
398         [V3D_QPU_A_FXCD] = D,
399         [V3D_QPU_A_XCD] = D,
400         [V3D_QPU_A_FYCD] = D,
401         [V3D_QPU_A_YCD] = D,
402 
403         [V3D_QPU_A_MSF] = D,
404         [V3D_QPU_A_REVF] = D,
405         [V3D_QPU_A_VDWWT] = D,
406         [V3D_QPU_A_IID] = D,
407         [V3D_QPU_A_SAMPID] = D,
408         [V3D_QPU_A_BARRIERID] = D,
409         [V3D_QPU_A_TMUWT] = D,
410         [V3D_QPU_A_VPMWT] = D,
411         [V3D_QPU_A_FLAFIRST] = D,
412         [V3D_QPU_A_FLNAFIRST] = D,
413 
414         [V3D_QPU_A_VPMSETUP] = D | A,
415 
416         [V3D_QPU_A_LDVPMV_IN] = D | A,
417         [V3D_QPU_A_LDVPMV_OUT] = D | A,
418         [V3D_QPU_A_LDVPMD_IN] = D | A,
419         [V3D_QPU_A_LDVPMD_OUT] = D | A,
420         [V3D_QPU_A_LDVPMP] = D | A,
421         [V3D_QPU_A_RSQRT] = D | A,
422         [V3D_QPU_A_EXP] = D | A,
423         [V3D_QPU_A_LOG] = D | A,
424         [V3D_QPU_A_SIN] = D | A,
425         [V3D_QPU_A_RSQRT2] = D | A,
426         [V3D_QPU_A_LDVPMG_IN] = D | A | B,
427         [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
428 
429         /* FIXME: MOVABSNEG */
430 
431         [V3D_QPU_A_FCMP] = D | A | B,
432         [V3D_QPU_A_VFMAX] = D | A | B,
433 
434         [V3D_QPU_A_FROUND] = D | A,
435         [V3D_QPU_A_FTOIN] = D | A,
436         [V3D_QPU_A_FTRUNC] = D | A,
437         [V3D_QPU_A_FTOIZ] = D | A,
438         [V3D_QPU_A_FFLOOR] = D | A,
439         [V3D_QPU_A_FTOUZ] = D | A,
440         [V3D_QPU_A_FCEIL] = D | A,
441         [V3D_QPU_A_FTOC] = D | A,
442 
443         [V3D_QPU_A_FDX] = D | A,
444         [V3D_QPU_A_FDY] = D | A,
445 
446         [V3D_QPU_A_STVPMV] = A | B,
447         [V3D_QPU_A_STVPMD] = A | B,
448         [V3D_QPU_A_STVPMP] = A | B,
449 
450         [V3D_QPU_A_ITOF] = D | A,
451         [V3D_QPU_A_CLZ] = D | A,
452         [V3D_QPU_A_UTOF] = D | A,
453 };
454 
455 static const uint8_t mul_op_args[] = {
456         [V3D_QPU_M_ADD] = D | A | B,
457         [V3D_QPU_M_SUB] = D | A | B,
458         [V3D_QPU_M_UMUL24] = D | A | B,
459         [V3D_QPU_M_VFMUL] = D | A | B,
460         [V3D_QPU_M_SMUL24] = D | A | B,
461         [V3D_QPU_M_MULTOP] = D | A | B,
462         [V3D_QPU_M_FMOV] = D | A,
463         [V3D_QPU_M_NOP] = 0,
464         [V3D_QPU_M_MOV] = D | A,
465         [V3D_QPU_M_FMUL] = D | A | B,
466 };
467 
468 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)469 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
470 {
471         assert(op < ARRAY_SIZE(add_op_args));
472 
473         return add_op_args[op] & D;
474 }
475 
476 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)477 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
478 {
479         assert(op < ARRAY_SIZE(mul_op_args));
480 
481         return mul_op_args[op] & D;
482 }
483 
484 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)485 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
486 {
487         assert(op < ARRAY_SIZE(add_op_args));
488 
489         uint8_t args = add_op_args[op];
490         if (args & B)
491                 return 2;
492         else if (args & A)
493                 return 1;
494         else
495                 return 0;
496 }
497 
498 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)499 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
500 {
501         assert(op < ARRAY_SIZE(mul_op_args));
502 
503         uint8_t args = mul_op_args[op];
504         if (args & B)
505                 return 2;
506         else if (args & A)
507                 return 1;
508         else
509                 return 0;
510 }
511 
512 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)513 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
514 {
515         switch (cond) {
516         case V3D_QPU_COND_IFA:
517                 return V3D_QPU_COND_IFNA;
518         case V3D_QPU_COND_IFNA:
519                 return V3D_QPU_COND_IFA;
520         case V3D_QPU_COND_IFB:
521                 return V3D_QPU_COND_IFNB;
522         case V3D_QPU_COND_IFNB:
523                 return V3D_QPU_COND_IFB;
524         default:
525                 unreachable("Non-invertible cond");
526         }
527 }
528 
529 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)530 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
531 {
532         switch (waddr) {
533         case V3D_QPU_WADDR_RECIP:
534         case V3D_QPU_WADDR_RSQRT:
535         case V3D_QPU_WADDR_EXP:
536         case V3D_QPU_WADDR_LOG:
537         case V3D_QPU_WADDR_SIN:
538         case V3D_QPU_WADDR_RSQRT2:
539                 return true;
540         default:
541                 return false;
542         }
543 }
544 
545 bool
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)546 v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
547                            enum v3d_qpu_waddr waddr)
548 {
549         if (devinfo->ver >= 40) {
550                 return ((waddr >= V3D_QPU_WADDR_TMUD &&
551                          waddr <= V3D_QPU_WADDR_TMUAU) ||
552                        (waddr >= V3D_QPU_WADDR_TMUC &&
553                         waddr <= V3D_QPU_WADDR_TMUHSLOD));
554         } else {
555                 return ((waddr >= V3D_QPU_WADDR_TMU &&
556                          waddr <= V3D_QPU_WADDR_TMUAU) ||
557                        (waddr >= V3D_QPU_WADDR_TMUC &&
558                         waddr <= V3D_QPU_WADDR_TMUHSLOD));
559         }
560 }
561 
562 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)563 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
564 {
565         return (inst->sig.ldtmu ||
566                 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
567                  inst->alu.add.op == V3D_QPU_A_TMUWT));
568 }
569 
570 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)571 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
572 {
573         return (waddr == V3D_QPU_WADDR_TLB ||
574                 waddr == V3D_QPU_WADDR_TLBU);
575 }
576 
577 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)578 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
579 {
580         return (waddr == V3D_QPU_WADDR_VPM ||
581                 waddr == V3D_QPU_WADDR_VPMU);
582 }
583 
584 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)585 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
586 {
587         return (waddr == V3D_QPU_WADDR_SYNC ||
588                 waddr == V3D_QPU_WADDR_SYNCB ||
589                 waddr == V3D_QPU_WADDR_SYNCU);
590 }
591 
592 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)593 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
594 {
595         switch (waddr) {
596         case V3D_QPU_WADDR_VPMU:
597         case V3D_QPU_WADDR_TLBU:
598         case V3D_QPU_WADDR_TMUAU:
599         case V3D_QPU_WADDR_SYNCU:
600                 return true;
601         default:
602                 return false;
603         }
604 }
605 
606 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)607 v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
608 {
609         switch (op) {
610         case V3D_QPU_A_VPMSETUP:
611         case V3D_QPU_A_LDVPMV_IN:
612         case V3D_QPU_A_LDVPMV_OUT:
613         case V3D_QPU_A_LDVPMD_IN:
614         case V3D_QPU_A_LDVPMD_OUT:
615         case V3D_QPU_A_LDVPMP:
616         case V3D_QPU_A_LDVPMG_IN:
617         case V3D_QPU_A_LDVPMG_OUT:
618                 return true;
619         default:
620                 return false;
621         }
622 }
623 
624 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)625 v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
626 {
627         switch (op) {
628         case V3D_QPU_A_VPMSETUP:
629         case V3D_QPU_A_STVPMV:
630         case V3D_QPU_A_STVPMD:
631         case V3D_QPU_A_STVPMP:
632                 return true;
633         default:
634                 return false;
635         }
636 }
637 
638 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)639 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
640 {
641         if (inst->sig.ldtlb ||
642             inst->sig.ldtlbu)
643                 return true;
644 
645         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
646                 if (inst->alu.add.magic_write &&
647                     v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
648                         return true;
649                 }
650 
651                 if (inst->alu.mul.magic_write &&
652                     v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
653                         return true;
654                 }
655         }
656 
657         return false;
658 }
659 
660 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)661 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
662 {
663         if (v3d_qpu_instr_is_sfu(inst))
664                 return true;
665 
666         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
667                 if (inst->alu.add.magic_write &&
668                     v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
669                         return true;
670                 }
671 
672                 if (inst->alu.mul.magic_write &&
673                     v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
674                         return true;
675                 }
676         }
677 
678         return false;
679 }
680 
681 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)682 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
683 {
684         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
685                 switch (inst->alu.add.op) {
686                 case V3D_QPU_A_RECIP:
687                 case V3D_QPU_A_RSQRT:
688                 case V3D_QPU_A_EXP:
689                 case V3D_QPU_A_LOG:
690                 case V3D_QPU_A_SIN:
691                 case V3D_QPU_A_RSQRT2:
692                         return true;
693                 default:
694                         return false;
695                 }
696         }
697         return false;
698 }
699 
700 bool
v3d_qpu_writes_tmu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)701 v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
702                    const struct v3d_qpu_instr *inst)
703 {
704         return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
705                 ((inst->alu.add.magic_write &&
706                   v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
707                  (inst->alu.mul.magic_write &&
708                   v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
709 }
710 
711 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)712 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
713                             const struct v3d_qpu_instr *inst)
714 {
715         return v3d_qpu_writes_tmu(devinfo, inst) &&
716                (!inst->alu.add.magic_write ||
717                 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
718                (!inst->alu.mul.magic_write ||
719                 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
720 }
721 
722 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)723 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
724 {
725         if (inst->sig.ldvpm)
726                 return true;
727 
728         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
729                 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
730                         return true;
731         }
732 
733         return false;
734 }
735 
736 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)737 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
738 {
739         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
740                 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
741                         return true;
742 
743                 if (inst->alu.add.magic_write &&
744                     v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
745                         return true;
746                 }
747 
748                 if (inst->alu.mul.magic_write &&
749                     v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
750                         return true;
751                 }
752         }
753 
754         return false;
755 }
756 
757 bool
v3d_qpu_writes_unifa(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)758 v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
759                      const struct v3d_qpu_instr *inst)
760 {
761         if (devinfo->ver < 40)
762                 return false;
763 
764         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
765                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
766                     inst->alu.add.magic_write &&
767                     inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
768                         return true;
769                 }
770 
771                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
772                     inst->alu.mul.magic_write &&
773                     inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
774                         return true;
775                 }
776         }
777 
778         return false;
779 }
780 
781 static bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)782 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
783 {
784         return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
785                inst->alu.add.op == V3D_QPU_A_VPMWT;
786 }
787 
788 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)789 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
790 {
791         return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
792 }
793 
794 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)795 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
796 {
797         return v3d_qpu_reads_vpm(inst) ||
798                v3d_qpu_writes_vpm(inst) ||
799                v3d_qpu_waits_vpm(inst);
800 }
801 
802 static bool
qpu_writes_magic_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)803 qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
804                                   const struct v3d_qpu_instr *inst,
805                                   uint32_t waddr)
806 {
807         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
808                 if (inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
809                         return true;
810 
811                 if (inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
812                         return true;
813         }
814 
815         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
816             inst->sig_magic && inst->sig_addr == waddr) {
817                 return true;
818         }
819 
820         return false;
821 }
822 
823 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)824 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
825                   const struct v3d_qpu_instr *inst)
826 {
827         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
828                 return true;
829 
830         return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
831 }
832 
833 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)834 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
835                   const struct v3d_qpu_instr *inst)
836 {
837         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
838                 if (inst->alu.add.magic_write &&
839                     (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
840                      v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
841                         return true;
842                 }
843 
844                 if (inst->alu.mul.magic_write &&
845                     (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
846                      v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
847                         return true;
848                 }
849         }
850 
851         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
852                 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
853                         return true;
854         } else if (inst->sig.ldtmu) {
855                 return true;
856         }
857 
858         return false;
859 }
860 
861 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)862 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
863                   const struct v3d_qpu_instr *inst)
864 {
865         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
866                 return true;
867 
868         return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
869 }
870 
871 bool
v3d_qpu_writes_accum(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)872 v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
873                      const struct v3d_qpu_instr *inst)
874 {
875         if (v3d_qpu_writes_r5(devinfo, inst))
876                 return true;
877         if (v3d_qpu_writes_r4(devinfo, inst))
878                 return true;
879         if (v3d_qpu_writes_r3(devinfo, inst))
880                 return true;
881         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
882                 return true;
883         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
884                 return true;
885         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
886                 return true;
887 
888         return false;
889 }
890 
891 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)892 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
893 {
894         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
895         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
896 
897         return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
898                 (add_nsrc > 1 && inst->alu.add.b == mux) ||
899                 (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
900                 (mul_nsrc > 1 && inst->alu.mul.b == mux));
901 }
902 
903 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)904 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
905                            const struct v3d_qpu_sig *sig)
906 {
907         if (devinfo->ver < 41)
908                 return false;
909 
910         return (sig->ldunifrf ||
911                 sig->ldunifarf ||
912                 sig->ldvary ||
913                 sig->ldtmu ||
914                 sig->ldtlb ||
915                 sig->ldtlbu);
916 }
917 
918 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)919 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
920 {
921         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
922                 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
923         } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
924                 if (inst->flags.ac != V3D_QPU_COND_NONE ||
925                     inst->flags.mc != V3D_QPU_COND_NONE ||
926                     inst->flags.auf != V3D_QPU_UF_NONE ||
927                     inst->flags.muf != V3D_QPU_UF_NONE)
928                         return true;
929 
930                 switch (inst->alu.add.op) {
931                 case V3D_QPU_A_VFLA:
932                 case V3D_QPU_A_VFLNA:
933                 case V3D_QPU_A_VFLB:
934                 case V3D_QPU_A_VFLNB:
935                 case V3D_QPU_A_FLAPUSH:
936                 case V3D_QPU_A_FLBPUSH:
937                 case V3D_QPU_A_FLAFIRST:
938                 case V3D_QPU_A_FLNAFIRST:
939                         return true;
940                 default:
941                         break;
942                 }
943         }
944 
945         return false;
946 }
947 
948 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)949 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
950 {
951         if (inst->flags.apf != V3D_QPU_PF_NONE ||
952             inst->flags.mpf != V3D_QPU_PF_NONE ||
953             inst->flags.auf != V3D_QPU_UF_NONE ||
954             inst->flags.muf != V3D_QPU_UF_NONE) {
955                 return true;
956         }
957 
958         return false;
959 }
960 
961 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)962 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
963 {
964         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
965                 return false;
966 
967         switch (inst->alu.add.op) {
968         case V3D_QPU_A_FADD:
969         case V3D_QPU_A_FADDNF:
970         case V3D_QPU_A_FSUB:
971         case V3D_QPU_A_FMIN:
972         case V3D_QPU_A_FMAX:
973         case V3D_QPU_A_FCMP:
974         case V3D_QPU_A_FROUND:
975         case V3D_QPU_A_FTRUNC:
976         case V3D_QPU_A_FFLOOR:
977         case V3D_QPU_A_FCEIL:
978         case V3D_QPU_A_FDX:
979         case V3D_QPU_A_FDY:
980         case V3D_QPU_A_FTOIN:
981         case V3D_QPU_A_FTOIZ:
982         case V3D_QPU_A_FTOUZ:
983         case V3D_QPU_A_FTOC:
984         case V3D_QPU_A_VFPACK:
985                 return true;
986                 break;
987         default:
988                 break;
989         }
990 
991         switch (inst->alu.mul.op) {
992         case V3D_QPU_M_FMOV:
993         case V3D_QPU_M_FMUL:
994                 return true;
995                 break;
996         default:
997                 break;
998         }
999 
1000         return false;
1001 }
1002 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)1003 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1004 {
1005         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1006                 return false;
1007 
1008         switch (inst->alu.add.op) {
1009         case V3D_QPU_A_VFMIN:
1010         case V3D_QPU_A_VFMAX:
1011                 return true;
1012                 break;
1013         default:
1014                 break;
1015         }
1016 
1017         switch (inst->alu.mul.op) {
1018         case V3D_QPU_M_VFMUL:
1019                 return true;
1020                 break;
1021         default:
1022                 break;
1023         }
1024 
1025         return false;
1026 }
1027 
1028 bool
v3d_qpu_is_nop(struct v3d_qpu_instr * inst)1029 v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1030 {
1031         static const struct v3d_qpu_sig nosig = { 0 };
1032 
1033         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1034                 return false;
1035         if (inst->alu.add.op != V3D_QPU_A_NOP)
1036                 return false;
1037         if (inst->alu.mul.op != V3D_QPU_M_NOP)
1038                 return false;
1039         if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1040                 return false;
1041         return true;
1042 }
1043