1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
qpu_reg(int index)29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 struct v3d_qpu_instr
v3d_qpu_nop(void)49 v3d_qpu_nop(void)
50 {
51 struct v3d_qpu_instr instr = {
52 .type = V3D_QPU_INSTR_TYPE_ALU,
53 .alu = {
54 .add = {
55 .op = V3D_QPU_A_NOP,
56 .waddr = V3D_QPU_WADDR_NOP,
57 .magic_write = true,
58 },
59 .mul = {
60 .op = V3D_QPU_M_NOP,
61 .waddr = V3D_QPU_WADDR_NOP,
62 .magic_write = true,
63 },
64 }
65 };
66
67 return instr;
68 }
69
70 static struct qinst *
vir_nop(void)71 vir_nop(void)
72 {
73 struct qreg undef = vir_nop_reg();
74 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
75
76 return qinst;
77 }
78
79 static struct qinst *
new_qpu_nop_before(struct qinst * inst)80 new_qpu_nop_before(struct qinst *inst)
81 {
82 struct qinst *q = vir_nop();
83
84 list_addtail(&q->link, &inst->link);
85
86 return q;
87 }
88
89 /**
90 * Allocates the src register (accumulator or register file) into the RADDR
91 * fields of the instruction.
92 */
93 static void
set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,struct qpu_reg src)94 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
95 {
96 if (src.smimm) {
97 assert(instr->sig.small_imm);
98 *mux = V3D_QPU_MUX_B;
99 return;
100 }
101
102 if (src.magic) {
103 assert(src.index >= V3D_QPU_WADDR_R0 &&
104 src.index <= V3D_QPU_WADDR_R5);
105 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
106 return;
107 }
108
109 if (instr->alu.add.a != V3D_QPU_MUX_A &&
110 instr->alu.add.b != V3D_QPU_MUX_A &&
111 instr->alu.mul.a != V3D_QPU_MUX_A &&
112 instr->alu.mul.b != V3D_QPU_MUX_A) {
113 instr->raddr_a = src.index;
114 *mux = V3D_QPU_MUX_A;
115 } else {
116 if (instr->raddr_a == src.index) {
117 *mux = V3D_QPU_MUX_A;
118 } else {
119 assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
120 instr->alu.add.b == V3D_QPU_MUX_B &&
121 instr->alu.mul.a == V3D_QPU_MUX_B &&
122 instr->alu.mul.b == V3D_QPU_MUX_B) ||
123 src.index == instr->raddr_b);
124
125 instr->raddr_b = src.index;
126 *mux = V3D_QPU_MUX_B;
127 }
128 }
129 }
130
131 static bool
is_no_op_mov(struct qinst * qinst)132 is_no_op_mov(struct qinst *qinst)
133 {
134 static const struct v3d_qpu_sig no_sig = {0};
135
136 /* Make sure it's just a lone MOV. */
137 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
138 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
139 qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
140 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
141 return false;
142 }
143
144 /* Check if it's a MOV from a register to itself. */
145 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
146 if (qinst->qpu.alu.mul.magic_write) {
147 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
148 return false;
149
150 if (qinst->qpu.alu.mul.a !=
151 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
152 return false;
153 }
154 } else {
155 int raddr;
156
157 switch (qinst->qpu.alu.mul.a) {
158 case V3D_QPU_MUX_A:
159 raddr = qinst->qpu.raddr_a;
160 break;
161 case V3D_QPU_MUX_B:
162 raddr = qinst->qpu.raddr_b;
163 break;
164 default:
165 return false;
166 }
167 if (raddr != waddr)
168 return false;
169 }
170
171 /* No packing or flags updates, or we need to execute the
172 * instruction.
173 */
174 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
175 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
176 qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
177 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
178 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
179 return false;
180 }
181
182 return true;
183 }
184
185 static void
v3d_generate_code_block(struct v3d_compile * c,struct qblock * block,struct qpu_reg * temp_registers)186 v3d_generate_code_block(struct v3d_compile *c,
187 struct qblock *block,
188 struct qpu_reg *temp_registers)
189 {
190 int last_vpm_read_index = -1;
191
192 vir_for_each_inst_safe(qinst, block) {
193 #if 0
194 fprintf(stderr, "translating qinst to qpu: ");
195 vir_dump_inst(c, qinst);
196 fprintf(stderr, "\n");
197 #endif
198
199 struct qinst *temp;
200
201 if (vir_has_uniform(qinst))
202 c->num_uniforms++;
203
204 int nsrc = vir_get_nsrc(qinst);
205 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
206 for (int i = 0; i < nsrc; i++) {
207 int index = qinst->src[i].index;
208 switch (qinst->src[i].file) {
209 case QFILE_REG:
210 src[i] = qpu_reg(qinst->src[i].index);
211 break;
212 case QFILE_MAGIC:
213 src[i] = qpu_magic(qinst->src[i].index);
214 break;
215 case QFILE_NULL:
216 /* QFILE_NULL is an undef, so we can load
217 * anything. Using reg 0
218 */
219 src[i] = qpu_reg(0);
220 break;
221 case QFILE_LOAD_IMM:
222 assert(!"not reached");
223 break;
224 case QFILE_TEMP:
225 src[i] = temp_registers[index];
226 break;
227 case QFILE_SMALL_IMM:
228 src[i].smimm = true;
229 break;
230
231 case QFILE_VPM:
232 assert((int)qinst->src[i].index >=
233 last_vpm_read_index);
234 (void)last_vpm_read_index;
235 last_vpm_read_index = qinst->src[i].index;
236
237 temp = new_qpu_nop_before(qinst);
238 temp->qpu.sig.ldvpm = true;
239
240 src[i] = qpu_magic(V3D_QPU_WADDR_R3);
241 break;
242 }
243 }
244
245 struct qpu_reg dst;
246 switch (qinst->dst.file) {
247 case QFILE_NULL:
248 dst = qpu_magic(V3D_QPU_WADDR_NOP);
249 break;
250
251 case QFILE_REG:
252 dst = qpu_reg(qinst->dst.index);
253 break;
254
255 case QFILE_MAGIC:
256 dst = qpu_magic(qinst->dst.index);
257 break;
258
259 case QFILE_TEMP:
260 dst = temp_registers[qinst->dst.index];
261 break;
262
263 case QFILE_VPM:
264 dst = qpu_magic(V3D_QPU_WADDR_VPM);
265 break;
266
267 case QFILE_SMALL_IMM:
268 case QFILE_LOAD_IMM:
269 assert(!"not reached");
270 break;
271 }
272
273 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
274 if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {
275 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
276 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
277
278 if (!dst.magic ||
279 dst.index != V3D_QPU_WADDR_R5) {
280 assert(c->devinfo->ver >= 40);
281
282 if (qinst->qpu.sig.ldunif) {
283 qinst->qpu.sig.ldunif = false;
284 qinst->qpu.sig.ldunifrf = true;
285 } else {
286 qinst->qpu.sig.ldunifa = false;
287 qinst->qpu.sig.ldunifarf = true;
288 }
289 qinst->qpu.sig_addr = dst.index;
290 qinst->qpu.sig_magic = dst.magic;
291 }
292 } else if (v3d_qpu_sig_writes_address(c->devinfo,
293 &qinst->qpu.sig)) {
294 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
295 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
296
297 qinst->qpu.sig_addr = dst.index;
298 qinst->qpu.sig_magic = dst.magic;
299 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
300 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
301 if (nsrc >= 1) {
302 set_src(&qinst->qpu,
303 &qinst->qpu.alu.add.a, src[0]);
304 }
305 if (nsrc >= 2) {
306 set_src(&qinst->qpu,
307 &qinst->qpu.alu.add.b, src[1]);
308 }
309
310 qinst->qpu.alu.add.waddr = dst.index;
311 qinst->qpu.alu.add.magic_write = dst.magic;
312 } else {
313 if (nsrc >= 1) {
314 set_src(&qinst->qpu,
315 &qinst->qpu.alu.mul.a, src[0]);
316 }
317 if (nsrc >= 2) {
318 set_src(&qinst->qpu,
319 &qinst->qpu.alu.mul.b, src[1]);
320 }
321
322 qinst->qpu.alu.mul.waddr = dst.index;
323 qinst->qpu.alu.mul.magic_write = dst.magic;
324
325 if (is_no_op_mov(qinst)) {
326 vir_remove_instruction(c, qinst);
327 continue;
328 }
329 }
330 } else {
331 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
332 }
333 }
334 }
335
336 static bool
reads_uniform(const struct v3d_device_info * devinfo,uint64_t instruction)337 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
338 {
339 struct v3d_qpu_instr qpu;
340 ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
341 assert(ok);
342
343 if (qpu.sig.ldunif ||
344 qpu.sig.ldunifrf ||
345 qpu.sig.ldtlbu ||
346 qpu.sig.wrtmuc) {
347 return true;
348 }
349
350 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
351 return true;
352
353 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
354 if (qpu.alu.add.magic_write &&
355 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
356 return true;
357 }
358
359 if (qpu.alu.mul.magic_write &&
360 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
361 return true;
362 }
363 }
364
365 return false;
366 }
367
368 static void
v3d_dump_qpu(struct v3d_compile * c)369 v3d_dump_qpu(struct v3d_compile *c)
370 {
371 fprintf(stderr, "%s prog %d/%d QPU:\n",
372 vir_get_stage_name(c),
373 c->program_id, c->variant_id);
374
375 int next_uniform = 0;
376 for (int i = 0; i < c->qpu_inst_count; i++) {
377 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
378 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
379
380 /* We can only do this on 4.x, because we're not tracking TMU
381 * implicit uniforms here on 3.x.
382 */
383 if (c->devinfo->ver >= 40 &&
384 reads_uniform(c->devinfo, c->qpu_insts[i])) {
385 fprintf(stderr, " (");
386 vir_dump_uniform(c->uniform_contents[next_uniform],
387 c->uniform_data[next_uniform]);
388 fprintf(stderr, ")");
389 next_uniform++;
390 }
391 fprintf(stderr, "\n");
392 ralloc_free((void *)str);
393 }
394
395 /* Make sure our dumping lined up. */
396 if (c->devinfo->ver >= 40)
397 assert(next_uniform == c->num_uniforms);
398
399 fprintf(stderr, "\n");
400 }
401
402 void
v3d_vir_to_qpu(struct v3d_compile * c,struct qpu_reg * temp_registers)403 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
404 {
405 /* Reset the uniform count to how many will be actually loaded by the
406 * generated QPU code.
407 */
408 c->num_uniforms = 0;
409
410 vir_for_each_block(block, c)
411 v3d_generate_code_block(c, block, temp_registers);
412
413 v3d_qpu_schedule_instructions(c);
414
415 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
416 int i = 0;
417 vir_for_each_inst_inorder(inst, c) {
418 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
419 &c->qpu_insts[i++]);
420 if (!ok) {
421 fprintf(stderr, "Failed to pack instruction %d:\n", i);
422 vir_dump_inst(c, inst);
423 fprintf(stderr, "\n");
424 c->compilation_result = V3D_COMPILATION_FAILED;
425 return;
426 }
427
428 if (v3d_qpu_is_nop(&inst->qpu))
429 c->nop_count++;
430 }
431 assert(i == c->qpu_inst_count);
432
433 if (V3D_DEBUG & (V3D_DEBUG_QPU |
434 v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
435 v3d_dump_qpu(c);
436 }
437
438 qpu_validate(c);
439
440 free(temp_registers);
441 }
442