• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27 
28 static inline struct qpu_reg
qpu_reg(int index)29 qpu_reg(int index)
30 {
31         struct qpu_reg reg = {
32                 .magic = false,
33                 .index = index,
34         };
35         return reg;
36 }
37 
38 static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41         struct qpu_reg reg = {
42                 .magic = true,
43                 .index = waddr,
44         };
45         return reg;
46 }
47 
48 struct v3d_qpu_instr
v3d_qpu_nop(void)49 v3d_qpu_nop(void)
50 {
51         struct v3d_qpu_instr instr = {
52                 .type = V3D_QPU_INSTR_TYPE_ALU,
53                 .alu = {
54                         .add = {
55                                 .op = V3D_QPU_A_NOP,
56                                 .waddr = V3D_QPU_WADDR_NOP,
57                                 .magic_write = true,
58                         },
59                         .mul = {
60                                 .op = V3D_QPU_M_NOP,
61                                 .waddr = V3D_QPU_WADDR_NOP,
62                                 .magic_write = true,
63                         },
64                 }
65         };
66 
67         return instr;
68 }
69 
70 static struct qinst *
vir_nop(void)71 vir_nop(void)
72 {
73         struct qreg undef = vir_nop_reg();
74         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
75 
76         return qinst;
77 }
78 
79 static struct qinst *
new_qpu_nop_before(struct qinst * inst)80 new_qpu_nop_before(struct qinst *inst)
81 {
82         struct qinst *q = vir_nop();
83 
84         list_addtail(&q->link, &inst->link);
85 
86         return q;
87 }
88 
89 /**
90  * Allocates the src register (accumulator or register file) into the RADDR
91  * fields of the instruction.
92  */
93 static void
set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,struct qpu_reg src)94 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
95 {
96         if (src.smimm) {
97                 assert(instr->sig.small_imm);
98                 *mux = V3D_QPU_MUX_B;
99                 return;
100         }
101 
102         if (src.magic) {
103                 assert(src.index >= V3D_QPU_WADDR_R0 &&
104                        src.index <= V3D_QPU_WADDR_R5);
105                 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
106                 return;
107         }
108 
109         if (instr->alu.add.a != V3D_QPU_MUX_A &&
110             instr->alu.add.b != V3D_QPU_MUX_A &&
111             instr->alu.mul.a != V3D_QPU_MUX_A &&
112             instr->alu.mul.b != V3D_QPU_MUX_A) {
113                 instr->raddr_a = src.index;
114                 *mux = V3D_QPU_MUX_A;
115         } else {
116                 if (instr->raddr_a == src.index) {
117                         *mux = V3D_QPU_MUX_A;
118                 } else {
119                         assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
120                                  instr->alu.add.b == V3D_QPU_MUX_B &&
121                                  instr->alu.mul.a == V3D_QPU_MUX_B &&
122                                  instr->alu.mul.b == V3D_QPU_MUX_B) ||
123                                src.index == instr->raddr_b);
124 
125                         instr->raddr_b = src.index;
126                         *mux = V3D_QPU_MUX_B;
127                 }
128         }
129 }
130 
131 static bool
is_no_op_mov(struct qinst * qinst)132 is_no_op_mov(struct qinst *qinst)
133 {
134         static const struct v3d_qpu_sig no_sig = {0};
135 
136         /* Make sure it's just a lone MOV. */
137         if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
138             qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
139             qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
140             memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
141                 return false;
142         }
143 
144         /* Check if it's a MOV from a register to itself. */
145         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
146         if (qinst->qpu.alu.mul.magic_write) {
147                 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
148                         return false;
149 
150                 if (qinst->qpu.alu.mul.a !=
151                     V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
152                         return false;
153                 }
154         } else {
155                 int raddr;
156 
157                 switch (qinst->qpu.alu.mul.a) {
158                 case V3D_QPU_MUX_A:
159                         raddr = qinst->qpu.raddr_a;
160                         break;
161                 case V3D_QPU_MUX_B:
162                         raddr = qinst->qpu.raddr_b;
163                         break;
164                 default:
165                         return false;
166                 }
167                 if (raddr != waddr)
168                         return false;
169         }
170 
171         /* No packing or flags updates, or we need to execute the
172          * instruction.
173          */
174         if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
175             qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
176             qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
177             qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
178             qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
179                 return false;
180         }
181 
182         return true;
183 }
184 
185 static void
v3d_generate_code_block(struct v3d_compile * c,struct qblock * block,struct qpu_reg * temp_registers)186 v3d_generate_code_block(struct v3d_compile *c,
187                         struct qblock *block,
188                         struct qpu_reg *temp_registers)
189 {
190         int last_vpm_read_index = -1;
191 
192         vir_for_each_inst_safe(qinst, block) {
193 #if 0
194                 fprintf(stderr, "translating qinst to qpu: ");
195                 vir_dump_inst(c, qinst);
196                 fprintf(stderr, "\n");
197 #endif
198 
199                 struct qinst *temp;
200 
201                 if (vir_has_uniform(qinst))
202                         c->num_uniforms++;
203 
204                 int nsrc = vir_get_nsrc(qinst);
205                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
206                 for (int i = 0; i < nsrc; i++) {
207                         int index = qinst->src[i].index;
208                         switch (qinst->src[i].file) {
209                         case QFILE_REG:
210                                 src[i] = qpu_reg(qinst->src[i].index);
211                                 break;
212                         case QFILE_MAGIC:
213                                 src[i] = qpu_magic(qinst->src[i].index);
214                                 break;
215                         case QFILE_NULL:
216                                 /* QFILE_NULL is an undef, so we can load
217                                  * anything. Using reg 0
218                                  */
219                                 src[i] = qpu_reg(0);
220                                 break;
221                         case QFILE_LOAD_IMM:
222                                 assert(!"not reached");
223                                 break;
224                         case QFILE_TEMP:
225                                 src[i] = temp_registers[index];
226                                 break;
227                         case QFILE_SMALL_IMM:
228                                 src[i].smimm = true;
229                                 break;
230 
231                         case QFILE_VPM:
232                                 assert((int)qinst->src[i].index >=
233                                        last_vpm_read_index);
234                                 (void)last_vpm_read_index;
235                                 last_vpm_read_index = qinst->src[i].index;
236 
237                                 temp = new_qpu_nop_before(qinst);
238                                 temp->qpu.sig.ldvpm = true;
239 
240                                 src[i] = qpu_magic(V3D_QPU_WADDR_R3);
241                                 break;
242                         }
243                 }
244 
245                 struct qpu_reg dst;
246                 switch (qinst->dst.file) {
247                 case QFILE_NULL:
248                         dst = qpu_magic(V3D_QPU_WADDR_NOP);
249                         break;
250 
251                 case QFILE_REG:
252                         dst = qpu_reg(qinst->dst.index);
253                         break;
254 
255                 case QFILE_MAGIC:
256                         dst = qpu_magic(qinst->dst.index);
257                         break;
258 
259                 case QFILE_TEMP:
260                         dst = temp_registers[qinst->dst.index];
261                         break;
262 
263                 case QFILE_VPM:
264                         dst = qpu_magic(V3D_QPU_WADDR_VPM);
265                         break;
266 
267                 case QFILE_SMALL_IMM:
268                 case QFILE_LOAD_IMM:
269                         assert(!"not reached");
270                         break;
271                 }
272 
273                 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
274                         if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {
275                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
276                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
277 
278                                 if (!dst.magic ||
279                                     dst.index != V3D_QPU_WADDR_R5) {
280                                         assert(c->devinfo->ver >= 40);
281 
282                                         if (qinst->qpu.sig.ldunif) {
283                                            qinst->qpu.sig.ldunif = false;
284                                            qinst->qpu.sig.ldunifrf = true;
285                                         } else {
286                                            qinst->qpu.sig.ldunifa = false;
287                                            qinst->qpu.sig.ldunifarf = true;
288                                         }
289                                         qinst->qpu.sig_addr = dst.index;
290                                         qinst->qpu.sig_magic = dst.magic;
291                                 }
292                         } else if (v3d_qpu_sig_writes_address(c->devinfo,
293                                                        &qinst->qpu.sig)) {
294                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
295                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
296 
297                                 qinst->qpu.sig_addr = dst.index;
298                                 qinst->qpu.sig_magic = dst.magic;
299                         } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
300                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
301                                 if (nsrc >= 1) {
302                                         set_src(&qinst->qpu,
303                                                 &qinst->qpu.alu.add.a, src[0]);
304                                 }
305                                 if (nsrc >= 2) {
306                                         set_src(&qinst->qpu,
307                                                 &qinst->qpu.alu.add.b, src[1]);
308                                 }
309 
310                                 qinst->qpu.alu.add.waddr = dst.index;
311                                 qinst->qpu.alu.add.magic_write = dst.magic;
312                         } else {
313                                 if (nsrc >= 1) {
314                                         set_src(&qinst->qpu,
315                                                 &qinst->qpu.alu.mul.a, src[0]);
316                                 }
317                                 if (nsrc >= 2) {
318                                         set_src(&qinst->qpu,
319                                                 &qinst->qpu.alu.mul.b, src[1]);
320                                 }
321 
322                                 qinst->qpu.alu.mul.waddr = dst.index;
323                                 qinst->qpu.alu.mul.magic_write = dst.magic;
324 
325                                 if (is_no_op_mov(qinst)) {
326                                         vir_remove_instruction(c, qinst);
327                                         continue;
328                                 }
329                         }
330                 } else {
331                         assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
332                 }
333         }
334 }
335 
336 static bool
reads_uniform(const struct v3d_device_info * devinfo,uint64_t instruction)337 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
338 {
339         struct v3d_qpu_instr qpu;
340         ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
341         assert(ok);
342 
343         if (qpu.sig.ldunif ||
344             qpu.sig.ldunifrf ||
345             qpu.sig.ldtlbu ||
346             qpu.sig.wrtmuc) {
347                 return true;
348         }
349 
350         if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
351                 return true;
352 
353         if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
354                 if (qpu.alu.add.magic_write &&
355                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
356                         return true;
357                 }
358 
359                 if (qpu.alu.mul.magic_write &&
360                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
361                         return true;
362                 }
363         }
364 
365         return false;
366 }
367 
368 static void
v3d_dump_qpu(struct v3d_compile * c)369 v3d_dump_qpu(struct v3d_compile *c)
370 {
371         fprintf(stderr, "%s prog %d/%d QPU:\n",
372                 vir_get_stage_name(c),
373                 c->program_id, c->variant_id);
374 
375         int next_uniform = 0;
376         for (int i = 0; i < c->qpu_inst_count; i++) {
377                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
378                 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
379 
380                 /* We can only do this on 4.x, because we're not tracking TMU
381                  * implicit uniforms here on 3.x.
382                  */
383                 if (c->devinfo->ver >= 40 &&
384                     reads_uniform(c->devinfo, c->qpu_insts[i])) {
385                         fprintf(stderr, " (");
386                         vir_dump_uniform(c->uniform_contents[next_uniform],
387                                          c->uniform_data[next_uniform]);
388                         fprintf(stderr, ")");
389                         next_uniform++;
390                 }
391                 fprintf(stderr, "\n");
392                 ralloc_free((void *)str);
393         }
394 
395         /* Make sure our dumping lined up. */
396         if (c->devinfo->ver >= 40)
397                 assert(next_uniform == c->num_uniforms);
398 
399         fprintf(stderr, "\n");
400 }
401 
402 void
v3d_vir_to_qpu(struct v3d_compile * c,struct qpu_reg * temp_registers)403 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
404 {
405         /* Reset the uniform count to how many will be actually loaded by the
406          * generated QPU code.
407          */
408         c->num_uniforms = 0;
409 
410         vir_for_each_block(block, c)
411                 v3d_generate_code_block(c, block, temp_registers);
412 
413         v3d_qpu_schedule_instructions(c);
414 
415         c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
416         int i = 0;
417         vir_for_each_inst_inorder(inst, c) {
418                 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
419                                              &c->qpu_insts[i++]);
420                 if (!ok) {
421                         fprintf(stderr, "Failed to pack instruction %d:\n", i);
422                         vir_dump_inst(c, inst);
423                         fprintf(stderr, "\n");
424                         c->compilation_result = V3D_COMPILATION_FAILED;
425                         return;
426                 }
427 
428                 if (v3d_qpu_is_nop(&inst->qpu))
429                         c->nop_count++;
430         }
431         assert(i == c->qpu_inst_count);
432 
433         if (V3D_DEBUG & (V3D_DEBUG_QPU |
434                          v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
435                 v3d_dump_qpu(c);
436         }
437 
438         qpu_validate(c);
439 
440         free(temp_registers);
441 }
442