• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27 
28 static inline struct qpu_reg
qpu_reg(int index)29 qpu_reg(int index)
30 {
31         struct qpu_reg reg = {
32                 .magic = false,
33                 .index = index,
34         };
35         return reg;
36 }
37 
38 static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41         struct qpu_reg reg = {
42                 .magic = true,
43                 .index = waddr,
44         };
45         return reg;
46 }
47 
48 static inline struct qpu_reg
qpu_acc(int acc)49 qpu_acc(int acc)
50 {
51         return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53 
54 struct v3d_qpu_instr
v3d_qpu_nop(void)55 v3d_qpu_nop(void)
56 {
57         struct v3d_qpu_instr instr = {
58                 .type = V3D_QPU_INSTR_TYPE_ALU,
59                 .alu = {
60                         .add = {
61                                 .op = V3D_QPU_A_NOP,
62                                 .waddr = V3D_QPU_WADDR_NOP,
63                                 .magic_write = true,
64                         },
65                         .mul = {
66                                 .op = V3D_QPU_M_NOP,
67                                 .waddr = V3D_QPU_WADDR_NOP,
68                                 .magic_write = true,
69                         },
70                 }
71         };
72 
73         return instr;
74 }
75 
76 static struct qinst *
vir_nop(void)77 vir_nop(void)
78 {
79         struct qreg undef = vir_nop_reg();
80         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81 
82         return qinst;
83 }
84 
85 static struct qinst *
new_qpu_nop_before(struct qinst * inst)86 new_qpu_nop_before(struct qinst *inst)
87 {
88         struct qinst *q = vir_nop();
89 
90         list_addtail(&q->link, &inst->link);
91 
92         return q;
93 }
94 
95 /**
96  * Allocates the src register (accumulator or register file) into the RADDR
97  * fields of the instruction.
98  */
99 static void
set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,struct qpu_reg src)100 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
101 {
102         if (src.smimm) {
103                 assert(instr->sig.small_imm);
104                 *mux = V3D_QPU_MUX_B;
105                 return;
106         }
107 
108         if (src.magic) {
109                 assert(src.index >= V3D_QPU_WADDR_R0 &&
110                        src.index <= V3D_QPU_WADDR_R5);
111                 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
112                 return;
113         }
114 
115         if (instr->alu.add.a != V3D_QPU_MUX_A &&
116             instr->alu.add.b != V3D_QPU_MUX_A &&
117             instr->alu.mul.a != V3D_QPU_MUX_A &&
118             instr->alu.mul.b != V3D_QPU_MUX_A) {
119                 instr->raddr_a = src.index;
120                 *mux = V3D_QPU_MUX_A;
121         } else {
122                 if (instr->raddr_a == src.index) {
123                         *mux = V3D_QPU_MUX_A;
124                 } else {
125                         assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
126                                  instr->alu.add.b == V3D_QPU_MUX_B &&
127                                  instr->alu.mul.a == V3D_QPU_MUX_B &&
128                                  instr->alu.mul.b == V3D_QPU_MUX_B) ||
129                                src.index == instr->raddr_b);
130 
131                         instr->raddr_b = src.index;
132                         *mux = V3D_QPU_MUX_B;
133                 }
134         }
135 }
136 
137 static bool
is_no_op_mov(struct qinst * qinst)138 is_no_op_mov(struct qinst *qinst)
139 {
140         static const struct v3d_qpu_sig no_sig = {0};
141 
142         /* Make sure it's just a lone MOV. */
143         if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
144             qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
145             qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
146             memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
147                 return false;
148         }
149 
150         /* Check if it's a MOV from a register to itself. */
151         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
152         if (qinst->qpu.alu.mul.magic_write) {
153                 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
154                         return false;
155 
156                 if (qinst->qpu.alu.mul.a !=
157                     V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
158                         return false;
159                 }
160         } else {
161                 int raddr;
162 
163                 switch (qinst->qpu.alu.mul.a) {
164                 case V3D_QPU_MUX_A:
165                         raddr = qinst->qpu.raddr_a;
166                         break;
167                 case V3D_QPU_MUX_B:
168                         raddr = qinst->qpu.raddr_b;
169                         break;
170                 default:
171                         return false;
172                 }
173                 if (raddr != waddr)
174                         return false;
175         }
176 
177         /* No packing or flags updates, or we need to execute the
178          * instruction.
179          */
180         if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
181             qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
182             qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
183             qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
184             qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
185                 return false;
186         }
187 
188         return true;
189 }
190 
191 static void
v3d_generate_code_block(struct v3d_compile * c,struct qblock * block,struct qpu_reg * temp_registers)192 v3d_generate_code_block(struct v3d_compile *c,
193                         struct qblock *block,
194                         struct qpu_reg *temp_registers)
195 {
196         int last_vpm_read_index = -1;
197 
198         vir_for_each_inst_safe(qinst, block) {
199 #if 0
200                 fprintf(stderr, "translating qinst to qpu: ");
201                 vir_dump_inst(c, qinst);
202                 fprintf(stderr, "\n");
203 #endif
204 
205                 struct qinst *temp;
206 
207                 if (vir_has_uniform(qinst))
208                         c->num_uniforms++;
209 
210                 int nsrc = vir_get_nsrc(qinst);
211                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
212                 for (int i = 0; i < nsrc; i++) {
213                         int index = qinst->src[i].index;
214                         switch (qinst->src[i].file) {
215                         case QFILE_REG:
216                                 src[i] = qpu_reg(qinst->src[i].index);
217                                 break;
218                         case QFILE_MAGIC:
219                                 src[i] = qpu_magic(qinst->src[i].index);
220                                 break;
221                         case QFILE_NULL:
222                         case QFILE_LOAD_IMM:
223                                 src[i] = qpu_acc(0);
224                                 break;
225                         case QFILE_TEMP:
226                                 src[i] = temp_registers[index];
227                                 break;
228                         case QFILE_SMALL_IMM:
229                                 src[i].smimm = true;
230                                 break;
231 
232                         case QFILE_VPM:
233                                 assert((int)qinst->src[i].index >=
234                                        last_vpm_read_index);
235                                 (void)last_vpm_read_index;
236                                 last_vpm_read_index = qinst->src[i].index;
237 
238                                 temp = new_qpu_nop_before(qinst);
239                                 temp->qpu.sig.ldvpm = true;
240 
241                                 src[i] = qpu_acc(3);
242                                 break;
243                         }
244                 }
245 
246                 struct qpu_reg dst;
247                 switch (qinst->dst.file) {
248                 case QFILE_NULL:
249                         dst = qpu_magic(V3D_QPU_WADDR_NOP);
250                         break;
251 
252                 case QFILE_REG:
253                         dst = qpu_reg(qinst->dst.index);
254                         break;
255 
256                 case QFILE_MAGIC:
257                         dst = qpu_magic(qinst->dst.index);
258                         break;
259 
260                 case QFILE_TEMP:
261                         dst = temp_registers[qinst->dst.index];
262                         break;
263 
264                 case QFILE_VPM:
265                         dst = qpu_magic(V3D_QPU_WADDR_VPM);
266                         break;
267 
268                 case QFILE_SMALL_IMM:
269                 case QFILE_LOAD_IMM:
270                         assert(!"not reached");
271                         break;
272                 }
273 
274                 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
275                         if (qinst->qpu.sig.ldunif) {
276                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
277                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
278 
279                                 if (!dst.magic ||
280                                     dst.index != V3D_QPU_WADDR_R5) {
281                                         assert(c->devinfo->ver >= 40);
282 
283                                         qinst->qpu.sig.ldunif = false;
284                                         qinst->qpu.sig.ldunifrf = true;
285                                         qinst->qpu.sig_addr = dst.index;
286                                         qinst->qpu.sig_magic = dst.magic;
287                                 }
288                         } else if (v3d_qpu_sig_writes_address(c->devinfo,
289                                                        &qinst->qpu.sig)) {
290                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
291                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
292 
293                                 qinst->qpu.sig_addr = dst.index;
294                                 qinst->qpu.sig_magic = dst.magic;
295                         } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
296                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
297                                 if (nsrc >= 1) {
298                                         set_src(&qinst->qpu,
299                                                 &qinst->qpu.alu.add.a, src[0]);
300                                 }
301                                 if (nsrc >= 2) {
302                                         set_src(&qinst->qpu,
303                                                 &qinst->qpu.alu.add.b, src[1]);
304                                 }
305 
306                                 qinst->qpu.alu.add.waddr = dst.index;
307                                 qinst->qpu.alu.add.magic_write = dst.magic;
308                         } else {
309                                 if (nsrc >= 1) {
310                                         set_src(&qinst->qpu,
311                                                 &qinst->qpu.alu.mul.a, src[0]);
312                                 }
313                                 if (nsrc >= 2) {
314                                         set_src(&qinst->qpu,
315                                                 &qinst->qpu.alu.mul.b, src[1]);
316                                 }
317 
318                                 qinst->qpu.alu.mul.waddr = dst.index;
319                                 qinst->qpu.alu.mul.magic_write = dst.magic;
320 
321                                 if (is_no_op_mov(qinst)) {
322                                         vir_remove_instruction(c, qinst);
323                                         continue;
324                                 }
325                         }
326                 } else {
327                         assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
328                 }
329         }
330 }
331 
332 static bool
reads_uniform(const struct v3d_device_info * devinfo,uint64_t instruction)333 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
334 {
335         struct v3d_qpu_instr qpu;
336         ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
337         assert(ok);
338 
339         if (qpu.sig.ldunif ||
340             qpu.sig.ldunifrf ||
341             qpu.sig.ldtlbu ||
342             qpu.sig.wrtmuc) {
343                 return true;
344         }
345 
346         if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
347                 return true;
348 
349         if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
350                 if (qpu.alu.add.magic_write &&
351                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
352                         return true;
353                 }
354 
355                 if (qpu.alu.mul.magic_write &&
356                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
357                         return true;
358                 }
359         }
360 
361         return false;
362 }
363 
364 static void
v3d_dump_qpu(struct v3d_compile * c)365 v3d_dump_qpu(struct v3d_compile *c)
366 {
367         fprintf(stderr, "%s prog %d/%d QPU:\n",
368                 vir_get_stage_name(c),
369                 c->program_id, c->variant_id);
370 
371         int next_uniform = 0;
372         for (int i = 0; i < c->qpu_inst_count; i++) {
373                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
374                 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
375 
376                 /* We can only do this on 4.x, because we're not tracking TMU
377                  * implicit uniforms here on 3.x.
378                  */
379                 if (c->devinfo->ver >= 40 &&
380                     reads_uniform(c->devinfo, c->qpu_insts[i])) {
381                         fprintf(stderr, " (");
382                         vir_dump_uniform(c->uniform_contents[next_uniform],
383                                          c->uniform_data[next_uniform]);
384                         fprintf(stderr, ")");
385                         next_uniform++;
386                 }
387                 fprintf(stderr, "\n");
388                 ralloc_free((void *)str);
389         }
390 
391         /* Make sure our dumping lined up. */
392         if (c->devinfo->ver >= 40)
393                 assert(next_uniform == c->num_uniforms);
394 
395         fprintf(stderr, "\n");
396 }
397 
398 void
v3d_vir_to_qpu(struct v3d_compile * c,struct qpu_reg * temp_registers)399 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
400 {
401         /* Reset the uniform count to how many will be actually loaded by the
402          * generated QPU code.
403          */
404         c->num_uniforms = 0;
405 
406         vir_for_each_block(block, c)
407                 v3d_generate_code_block(c, block, temp_registers);
408 
409         v3d_qpu_schedule_instructions(c);
410 
411         c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
412         int i = 0;
413         vir_for_each_inst_inorder(inst, c) {
414                 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
415                                              &c->qpu_insts[i++]);
416                 if (!ok) {
417                         fprintf(stderr, "Failed to pack instruction:\n");
418                         vir_dump_inst(c, inst);
419                         fprintf(stderr, "\n");
420                         c->compilation_result = V3D_COMPILATION_FAILED;
421                         return;
422                 }
423         }
424         assert(i == c->qpu_inst_count);
425 
426         if (V3D_DEBUG & (V3D_DEBUG_QPU |
427                          v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
428                 v3d_dump_qpu(c);
429         }
430 
431         qpu_validate(c);
432 
433         free(temp_registers);
434 }
435