• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file qpu_instr.h
26  *
27  * Definitions of the unpacked form of QPU instructions.  Assembly and
28  * disassembly will use this for talking about instructions, with qpu_encode.c
29  * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU
30  * instruction.
31  */
32 
33 #ifndef QPU_INSTR_H
34 #define QPU_INSTR_H
35 
36 #include <stdbool.h>
37 #include <stdint.h>
38 #include "util/macros.h"
39 
40 struct v3d_device_info;
41 
42 struct v3d_qpu_sig {
43         bool thrsw:1;
44         bool ldunif:1;
45         bool ldunifa:1;
46         bool ldunifrf:1;
47         bool ldunifarf:1;
48         bool ldtmu:1;
49         bool ldvary:1;
50         bool ldvpm:1;
51         bool ldtlb:1;
52         bool ldtlbu:1;
53         bool small_imm:1;
54         bool ucb:1;
55         bool rotate:1;
56         bool wrtmuc:1;
57 };
58 
59 enum v3d_qpu_cond {
60         V3D_QPU_COND_NONE,
61         V3D_QPU_COND_IFA,
62         V3D_QPU_COND_IFB,
63         V3D_QPU_COND_IFNA,
64         V3D_QPU_COND_IFNB,
65 };
66 
67 enum v3d_qpu_pf {
68         V3D_QPU_PF_NONE,
69         V3D_QPU_PF_PUSHZ,
70         V3D_QPU_PF_PUSHN,
71         V3D_QPU_PF_PUSHC,
72 };
73 
74 enum v3d_qpu_uf {
75         V3D_QPU_UF_NONE,
76         V3D_QPU_UF_ANDZ,
77         V3D_QPU_UF_ANDNZ,
78         V3D_QPU_UF_NORNZ,
79         V3D_QPU_UF_NORZ,
80         V3D_QPU_UF_ANDN,
81         V3D_QPU_UF_ANDNN,
82         V3D_QPU_UF_NORNN,
83         V3D_QPU_UF_NORN,
84         V3D_QPU_UF_ANDC,
85         V3D_QPU_UF_ANDNC,
86         V3D_QPU_UF_NORNC,
87         V3D_QPU_UF_NORC,
88 };
89 
90 enum v3d_qpu_waddr {
91         V3D_QPU_WADDR_R0 = 0,
92         V3D_QPU_WADDR_R1 = 1,
93         V3D_QPU_WADDR_R2 = 2,
94         V3D_QPU_WADDR_R3 = 3,
95         V3D_QPU_WADDR_R4 = 4,
96         V3D_QPU_WADDR_R5 = 5,
97         V3D_QPU_WADDR_NOP = 6,
98         V3D_QPU_WADDR_TLB = 7,
99         V3D_QPU_WADDR_TLBU = 8,
100         V3D_QPU_WADDR_TMU = 9,   /* V3D 3.x */
101         V3D_QPU_WADDR_UNIFA = 9, /* V3D 4.x */
102         V3D_QPU_WADDR_TMUL = 10,
103         V3D_QPU_WADDR_TMUD = 11,
104         V3D_QPU_WADDR_TMUA = 12,
105         V3D_QPU_WADDR_TMUAU = 13,
106         V3D_QPU_WADDR_VPM = 14,
107         V3D_QPU_WADDR_VPMU = 15,
108         V3D_QPU_WADDR_SYNC = 16,
109         V3D_QPU_WADDR_SYNCU = 17,
110         V3D_QPU_WADDR_SYNCB = 18,
111         V3D_QPU_WADDR_RECIP = 19,
112         V3D_QPU_WADDR_RSQRT = 20,
113         V3D_QPU_WADDR_EXP = 21,
114         V3D_QPU_WADDR_LOG = 22,
115         V3D_QPU_WADDR_SIN = 23,
116         V3D_QPU_WADDR_RSQRT2 = 24,
117         V3D_QPU_WADDR_TMUC = 32,
118         V3D_QPU_WADDR_TMUS = 33,
119         V3D_QPU_WADDR_TMUT = 34,
120         V3D_QPU_WADDR_TMUR = 35,
121         V3D_QPU_WADDR_TMUI = 36,
122         V3D_QPU_WADDR_TMUB = 37,
123         V3D_QPU_WADDR_TMUDREF = 38,
124         V3D_QPU_WADDR_TMUOFF = 39,
125         V3D_QPU_WADDR_TMUSCM = 40,
126         V3D_QPU_WADDR_TMUSF = 41,
127         V3D_QPU_WADDR_TMUSLOD = 42,
128         V3D_QPU_WADDR_TMUHS = 43,
129         V3D_QPU_WADDR_TMUHSCM = 44,
130         V3D_QPU_WADDR_TMUHSF = 45,
131         V3D_QPU_WADDR_TMUHSLOD = 46,
132         V3D_QPU_WADDR_R5REP = 55,
133 };
134 
135 struct v3d_qpu_flags {
136         enum v3d_qpu_cond ac, mc;
137         enum v3d_qpu_pf apf, mpf;
138         enum v3d_qpu_uf auf, muf;
139 };
140 
141 enum v3d_qpu_add_op {
142         V3D_QPU_A_FADD,
143         V3D_QPU_A_FADDNF,
144         V3D_QPU_A_VFPACK,
145         V3D_QPU_A_ADD,
146         V3D_QPU_A_SUB,
147         V3D_QPU_A_FSUB,
148         V3D_QPU_A_MIN,
149         V3D_QPU_A_MAX,
150         V3D_QPU_A_UMIN,
151         V3D_QPU_A_UMAX,
152         V3D_QPU_A_SHL,
153         V3D_QPU_A_SHR,
154         V3D_QPU_A_ASR,
155         V3D_QPU_A_ROR,
156         V3D_QPU_A_FMIN,
157         V3D_QPU_A_FMAX,
158         V3D_QPU_A_VFMIN,
159         V3D_QPU_A_AND,
160         V3D_QPU_A_OR,
161         V3D_QPU_A_XOR,
162         V3D_QPU_A_VADD,
163         V3D_QPU_A_VSUB,
164         V3D_QPU_A_NOT,
165         V3D_QPU_A_NEG,
166         V3D_QPU_A_FLAPUSH,
167         V3D_QPU_A_FLBPUSH,
168         V3D_QPU_A_FLPOP,
169         V3D_QPU_A_RECIP,
170         V3D_QPU_A_SETMSF,
171         V3D_QPU_A_SETREVF,
172         V3D_QPU_A_NOP,
173         V3D_QPU_A_TIDX,
174         V3D_QPU_A_EIDX,
175         V3D_QPU_A_LR,
176         V3D_QPU_A_VFLA,
177         V3D_QPU_A_VFLNA,
178         V3D_QPU_A_VFLB,
179         V3D_QPU_A_VFLNB,
180         V3D_QPU_A_FXCD,
181         V3D_QPU_A_XCD,
182         V3D_QPU_A_FYCD,
183         V3D_QPU_A_YCD,
184         V3D_QPU_A_MSF,
185         V3D_QPU_A_REVF,
186         V3D_QPU_A_VDWWT,
187         V3D_QPU_A_IID,
188         V3D_QPU_A_SAMPID,
189         V3D_QPU_A_BARRIERID,
190         V3D_QPU_A_TMUWT,
191         V3D_QPU_A_VPMSETUP,
192         V3D_QPU_A_VPMWT,
193         V3D_QPU_A_FLAFIRST,
194         V3D_QPU_A_FLNAFIRST,
195         V3D_QPU_A_LDVPMV_IN,
196         V3D_QPU_A_LDVPMV_OUT,
197         V3D_QPU_A_LDVPMD_IN,
198         V3D_QPU_A_LDVPMD_OUT,
199         V3D_QPU_A_LDVPMP,
200         V3D_QPU_A_RSQRT,
201         V3D_QPU_A_EXP,
202         V3D_QPU_A_LOG,
203         V3D_QPU_A_SIN,
204         V3D_QPU_A_RSQRT2,
205         V3D_QPU_A_LDVPMG_IN,
206         V3D_QPU_A_LDVPMG_OUT,
207         V3D_QPU_A_FCMP,
208         V3D_QPU_A_VFMAX,
209         V3D_QPU_A_FROUND,
210         V3D_QPU_A_FTOIN,
211         V3D_QPU_A_FTRUNC,
212         V3D_QPU_A_FTOIZ,
213         V3D_QPU_A_FFLOOR,
214         V3D_QPU_A_FTOUZ,
215         V3D_QPU_A_FCEIL,
216         V3D_QPU_A_FTOC,
217         V3D_QPU_A_FDX,
218         V3D_QPU_A_FDY,
219         V3D_QPU_A_STVPMV,
220         V3D_QPU_A_STVPMD,
221         V3D_QPU_A_STVPMP,
222         V3D_QPU_A_ITOF,
223         V3D_QPU_A_CLZ,
224         V3D_QPU_A_UTOF,
225 };
226 
227 enum v3d_qpu_mul_op {
228         V3D_QPU_M_ADD,
229         V3D_QPU_M_SUB,
230         V3D_QPU_M_UMUL24,
231         V3D_QPU_M_VFMUL,
232         V3D_QPU_M_SMUL24,
233         V3D_QPU_M_MULTOP,
234         V3D_QPU_M_FMOV,
235         V3D_QPU_M_MOV,
236         V3D_QPU_M_NOP,
237         V3D_QPU_M_FMUL,
238 };
239 
240 enum v3d_qpu_output_pack {
241         V3D_QPU_PACK_NONE,
242         /**
243          * Convert to 16-bit float, put in low 16 bits of destination leaving
244          * high unmodified.
245          */
246         V3D_QPU_PACK_L,
247         /**
248          * Convert to 16-bit float, put in high 16 bits of destination leaving
249          * low unmodified.
250          */
251         V3D_QPU_PACK_H,
252 };
253 
254 enum v3d_qpu_input_unpack {
255         /**
256          * No-op input unpacking.  Note that this enum's value doesn't match
257          * the packed QPU instruction value of the field (we use 0 so that the
258          * default on new instruction creation is no-op).
259          */
260         V3D_QPU_UNPACK_NONE,
261         /** Absolute value.  Only available for some operations. */
262         V3D_QPU_UNPACK_ABS,
263         /** Convert low 16 bits from 16-bit float to 32-bit float. */
264         V3D_QPU_UNPACK_L,
265         /** Convert high 16 bits from 16-bit float to 32-bit float. */
266         V3D_QPU_UNPACK_H,
267 
268         /** Convert to 16f and replicate it to the high bits. */
269         V3D_QPU_UNPACK_REPLICATE_32F_16,
270 
271         /** Replicate low 16 bits to high */
272         V3D_QPU_UNPACK_REPLICATE_L_16,
273 
274         /** Replicate high 16 bits to low */
275         V3D_QPU_UNPACK_REPLICATE_H_16,
276 
277         /** Swap high and low 16 bits */
278         V3D_QPU_UNPACK_SWAP_16,
279 };
280 
281 enum v3d_qpu_mux {
282         V3D_QPU_MUX_R0,
283         V3D_QPU_MUX_R1,
284         V3D_QPU_MUX_R2,
285         V3D_QPU_MUX_R3,
286         V3D_QPU_MUX_R4,
287         V3D_QPU_MUX_R5,
288         V3D_QPU_MUX_A,
289         V3D_QPU_MUX_B,
290 };
291 
292 struct v3d_qpu_alu_instr {
293         struct {
294                 enum v3d_qpu_add_op op;
295                 enum v3d_qpu_mux a, b;
296                 uint8_t waddr;
297                 bool magic_write;
298                 enum v3d_qpu_output_pack output_pack;
299                 enum v3d_qpu_input_unpack a_unpack;
300                 enum v3d_qpu_input_unpack b_unpack;
301         } add;
302 
303         struct {
304                 enum v3d_qpu_mul_op op;
305                 enum v3d_qpu_mux a, b;
306                 uint8_t waddr;
307                 bool magic_write;
308                 enum v3d_qpu_output_pack output_pack;
309                 enum v3d_qpu_input_unpack a_unpack;
310                 enum v3d_qpu_input_unpack b_unpack;
311         } mul;
312 };
313 
314 enum v3d_qpu_branch_cond {
315         V3D_QPU_BRANCH_COND_ALWAYS,
316         V3D_QPU_BRANCH_COND_A0,
317         V3D_QPU_BRANCH_COND_NA0,
318         V3D_QPU_BRANCH_COND_ALLA,
319         V3D_QPU_BRANCH_COND_ANYNA,
320         V3D_QPU_BRANCH_COND_ANYA,
321         V3D_QPU_BRANCH_COND_ALLNA,
322 };
323 
324 enum v3d_qpu_msfign {
325         /** Ignore multisample flags when determining branch condition. */
326         V3D_QPU_MSFIGN_NONE,
327         /**
328          * If no multisample flags are set in the lane (a pixel in the FS, a
329          * vertex in the VS), ignore the lane's condition when computing the
330          * branch condition.
331          */
332         V3D_QPU_MSFIGN_P,
333         /**
334          * If no multisample flags are set in a 2x2 quad in the FS, ignore the
335          * quad's a/b conditions.
336          */
337         V3D_QPU_MSFIGN_Q,
338 };
339 
340 enum v3d_qpu_branch_dest {
341         V3D_QPU_BRANCH_DEST_ABS,
342         V3D_QPU_BRANCH_DEST_REL,
343         V3D_QPU_BRANCH_DEST_LINK_REG,
344         V3D_QPU_BRANCH_DEST_REGFILE,
345 };
346 
347 struct v3d_qpu_branch_instr {
348         enum v3d_qpu_branch_cond cond;
349         enum v3d_qpu_msfign msfign;
350 
351         /** Selects how to compute the new IP if the branch is taken. */
352         enum v3d_qpu_branch_dest bdi;
353 
354         /**
355          * Selects how to compute the new uniforms pointer if the branch is
356          * taken.  (ABS/REL implicitly load a uniform and use that)
357          */
358         enum v3d_qpu_branch_dest bdu;
359 
360         /**
361          * If set, then udest determines how the uniform stream will branch,
362          * otherwise the uniform stream is left as is.
363          */
364         bool ub;
365 
366         uint8_t raddr_a;
367 
368         uint32_t offset;
369 };
370 
371 enum v3d_qpu_instr_type {
372         V3D_QPU_INSTR_TYPE_ALU,
373         V3D_QPU_INSTR_TYPE_BRANCH,
374 };
375 
376 struct v3d_qpu_instr {
377         enum v3d_qpu_instr_type type;
378 
379         struct v3d_qpu_sig sig;
380         uint8_t sig_addr;
381         bool sig_magic; /* If the signal writes to a magic address */
382         uint8_t raddr_a;
383         uint8_t raddr_b;
384         struct v3d_qpu_flags flags;
385 
386         union {
387                 struct v3d_qpu_alu_instr alu;
388                 struct v3d_qpu_branch_instr branch;
389         };
390 };
391 
392 const char *v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
393                                      enum v3d_qpu_waddr waddr);
394 const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op);
395 const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op);
396 const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond);
397 const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf);
398 const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf);
399 const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack);
400 const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack);
401 const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
402 const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
403 
404 enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST;
405 
406 bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
407 bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
408 int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);
409 int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op);
410 
411 bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
412                       const struct v3d_qpu_sig *sig,
413                       uint32_t *packed_sig);
414 bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
415                         uint32_t packed_sig,
416                         struct v3d_qpu_sig *sig);
417 
418 bool
419 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
420                    const struct v3d_qpu_flags *cond,
421                    uint32_t *packed_cond);
422 bool
423 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
424                      uint32_t packed_cond,
425                      struct v3d_qpu_flags *cond);
426 
427 bool
428 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
429                        uint32_t value,
430                        uint32_t *packed_small_immediate);
431 
432 bool
433 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
434                          uint32_t packed_small_immediate,
435                          uint32_t *small_immediate);
436 
437 bool
438 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
439                    const struct v3d_qpu_instr *instr,
440                    uint64_t *packed_instr);
441 bool
442 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
443                      uint64_t packed_instr,
444                      struct v3d_qpu_instr *instr);
445 
446 bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
447 bool v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
448                                 enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
449 bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
450 bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
451 bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
452 bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
453 bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
454 bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
455 bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
456 bool v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
457                         const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
458 bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
459                                  const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
460 bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
461                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
462 bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
463                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
464 bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
465                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
466 bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
467                           const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
468 bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
469 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
470 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
471 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
472 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
473 bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
474 bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
475 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
476 bool v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
477                           const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
478 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
479                                 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
480 bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
481 bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
482 
483 bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
484 #endif
485