1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "aco_builder.h"
26 #include "aco_ir.h"
27
28 #include "common/ac_shader_util.h"
29 #include "common/sid.h"
30
31 #include <array>
32
33 namespace aco {
34
35 const std::array<const char*, num_reduce_ops> reduce_ops = []()
__anon1a4d769d0102() 36 {
37 std::array<const char*, num_reduce_ops> ret{};
38 ret[iadd8] = "iadd8";
39 ret[iadd16] = "iadd16";
40 ret[iadd32] = "iadd32";
41 ret[iadd64] = "iadd64";
42 ret[imul8] = "imul8";
43 ret[imul16] = "imul16";
44 ret[imul32] = "imul32";
45 ret[imul64] = "imul64";
46 ret[fadd16] = "fadd16";
47 ret[fadd32] = "fadd32";
48 ret[fadd64] = "fadd64";
49 ret[fmul16] = "fmul16";
50 ret[fmul32] = "fmul32";
51 ret[fmul64] = "fmul64";
52 ret[imin8] = "imin8";
53 ret[imin16] = "imin16";
54 ret[imin32] = "imin32";
55 ret[imin64] = "imin64";
56 ret[imax8] = "imax8";
57 ret[imax16] = "imax16";
58 ret[imax32] = "imax32";
59 ret[imax64] = "imax64";
60 ret[umin8] = "umin8";
61 ret[umin16] = "umin16";
62 ret[umin32] = "umin32";
63 ret[umin64] = "umin64";
64 ret[umax8] = "umax8";
65 ret[umax16] = "umax16";
66 ret[umax32] = "umax32";
67 ret[umax64] = "umax64";
68 ret[fmin16] = "fmin16";
69 ret[fmin32] = "fmin32";
70 ret[fmin64] = "fmin64";
71 ret[fmax16] = "fmax16";
72 ret[fmax32] = "fmax32";
73 ret[fmax64] = "fmax64";
74 ret[iand8] = "iand8";
75 ret[iand16] = "iand16";
76 ret[iand32] = "iand32";
77 ret[iand64] = "iand64";
78 ret[ior8] = "ior8";
79 ret[ior16] = "ior16";
80 ret[ior32] = "ior32";
81 ret[ior64] = "ior64";
82 ret[ixor8] = "ixor8";
83 ret[ixor16] = "ixor16";
84 ret[ixor32] = "ixor32";
85 ret[ixor64] = "ixor64";
86 return ret;
87 }();
88
89 static void
print_reg_class(const RegClass rc,FILE * output)90 print_reg_class(const RegClass rc, FILE* output)
91 {
92 if (rc.is_subdword()) {
93 fprintf(output, " v%ub: ", rc.bytes());
94 } else if (rc.type() == RegType::sgpr) {
95 fprintf(output, " s%u: ", rc.size());
96 } else if (rc.is_linear()) {
97 fprintf(output, " lv%u: ", rc.size());
98 } else {
99 fprintf(output, " v%u: ", rc.size());
100 }
101 }
102
103 void
print_physReg(PhysReg reg,unsigned bytes,FILE * output,unsigned flags)104 print_physReg(PhysReg reg, unsigned bytes, FILE* output, unsigned flags)
105 {
106 if (reg == 106) {
107 fprintf(output, bytes > 4 ? "vcc" : "vcc_lo");
108 } else if (reg == 107) {
109 fprintf(output, "vcc_hi");
110 } else if (reg == 124) {
111 fprintf(output, "m0");
112 } else if (reg == 125) {
113 fprintf(output, "null");
114 } else if (reg == 126) {
115 fprintf(output, bytes > 4 ? "exec" : "exec_lo");
116 } else if (reg == 127) {
117 fprintf(output, "exec_hi");
118 } else if (reg == 253) {
119 fprintf(output, "scc");
120 } else {
121 bool is_vgpr = reg / 256;
122 unsigned r = reg % 256;
123 unsigned size = DIV_ROUND_UP(bytes, 4);
124 if (size == 1 && (flags & print_no_ssa)) {
125 fprintf(output, "%c%d", is_vgpr ? 'v' : 's', r);
126 } else {
127 fprintf(output, "%c[%d", is_vgpr ? 'v' : 's', r);
128 if (size > 1)
129 fprintf(output, "-%d]", r + size - 1);
130 else
131 fprintf(output, "]");
132 }
133 if (reg.byte() || bytes % 4)
134 fprintf(output, "[%d:%d]", reg.byte() * 8, (reg.byte() + bytes) * 8);
135 }
136 }
137
138 static void
print_constant(uint8_t reg,FILE * output)139 print_constant(uint8_t reg, FILE* output)
140 {
141 if (reg >= 128 && reg <= 192) {
142 fprintf(output, "%d", reg - 128);
143 return;
144 } else if (reg >= 192 && reg <= 208) {
145 fprintf(output, "%d", 192 - reg);
146 return;
147 }
148
149 switch (reg) {
150 case 240: fprintf(output, "0.5"); break;
151 case 241: fprintf(output, "-0.5"); break;
152 case 242: fprintf(output, "1.0"); break;
153 case 243: fprintf(output, "-1.0"); break;
154 case 244: fprintf(output, "2.0"); break;
155 case 245: fprintf(output, "-2.0"); break;
156 case 246: fprintf(output, "4.0"); break;
157 case 247: fprintf(output, "-4.0"); break;
158 case 248: fprintf(output, "1/(2*PI)"); break;
159 }
160 }
161
162 void
aco_print_operand(const Operand * operand,FILE * output,unsigned flags)163 aco_print_operand(const Operand* operand, FILE* output, unsigned flags)
164 {
165 if (operand->isLiteral() || (operand->isConstant() && operand->bytes() == 1)) {
166 if (operand->bytes() == 1)
167 fprintf(output, "0x%.2x", operand->constantValue());
168 else if (operand->bytes() == 2)
169 fprintf(output, "0x%.4x", operand->constantValue());
170 else
171 fprintf(output, "0x%x", operand->constantValue());
172 } else if (operand->isConstant()) {
173 print_constant(operand->physReg().reg(), output);
174 } else if (operand->isUndefined()) {
175 print_reg_class(operand->regClass(), output);
176 fprintf(output, "undef");
177 } else {
178 if (operand->isLateKill())
179 fprintf(output, "(latekill)");
180 if (operand->is16bit())
181 fprintf(output, "(is16bit)");
182 if (operand->is24bit())
183 fprintf(output, "(is24bit)");
184 if ((flags & print_kill) && operand->isKill())
185 fprintf(output, "(kill)");
186
187 if (!(flags & print_no_ssa))
188 fprintf(output, "%%%d%s", operand->tempId(), operand->isFixed() ? ":" : "");
189
190 if (operand->isFixed())
191 print_physReg(operand->physReg(), operand->bytes(), output, flags);
192 }
193 }
194
195 static void
print_definition(const Definition * definition,FILE * output,unsigned flags)196 print_definition(const Definition* definition, FILE* output, unsigned flags)
197 {
198 if (!(flags & print_no_ssa))
199 print_reg_class(definition->regClass(), output);
200 if (definition->isPrecise())
201 fprintf(output, "(precise)");
202 if (definition->isNUW())
203 fprintf(output, "(nuw)");
204 if (definition->isNoCSE())
205 fprintf(output, "(noCSE)");
206 if ((flags & print_kill) && definition->isKill())
207 fprintf(output, "(kill)");
208 if (!(flags & print_no_ssa))
209 fprintf(output, "%%%d%s", definition->tempId(), definition->isFixed() ? ":" : "");
210
211 if (definition->isFixed())
212 print_physReg(definition->physReg(), definition->bytes(), output, flags);
213 }
214
215 static void
print_storage(storage_class storage,FILE * output)216 print_storage(storage_class storage, FILE* output)
217 {
218 fprintf(output, " storage:");
219 int printed = 0;
220 if (storage & storage_buffer)
221 printed += fprintf(output, "%sbuffer", printed ? "," : "");
222 if (storage & storage_gds)
223 printed += fprintf(output, "%sgds", printed ? "," : "");
224 if (storage & storage_image)
225 printed += fprintf(output, "%simage", printed ? "," : "");
226 if (storage & storage_shared)
227 printed += fprintf(output, "%sshared", printed ? "," : "");
228 if (storage & storage_task_payload)
229 printed += fprintf(output, "%stask_payload", printed ? "," : "");
230 if (storage & storage_vmem_output)
231 printed += fprintf(output, "%svmem_output", printed ? "," : "");
232 if (storage & storage_scratch)
233 printed += fprintf(output, "%sscratch", printed ? "," : "");
234 if (storage & storage_vgpr_spill)
235 printed += fprintf(output, "%svgpr_spill", printed ? "," : "");
236 }
237
238 static void
print_semantics(memory_semantics sem,FILE * output)239 print_semantics(memory_semantics sem, FILE* output)
240 {
241 fprintf(output, " semantics:");
242 int printed = 0;
243 if (sem & semantic_acquire)
244 printed += fprintf(output, "%sacquire", printed ? "," : "");
245 if (sem & semantic_release)
246 printed += fprintf(output, "%srelease", printed ? "," : "");
247 if (sem & semantic_volatile)
248 printed += fprintf(output, "%svolatile", printed ? "," : "");
249 if (sem & semantic_private)
250 printed += fprintf(output, "%sprivate", printed ? "," : "");
251 if (sem & semantic_can_reorder)
252 printed += fprintf(output, "%sreorder", printed ? "," : "");
253 if (sem & semantic_atomic)
254 printed += fprintf(output, "%satomic", printed ? "," : "");
255 if (sem & semantic_rmw)
256 printed += fprintf(output, "%srmw", printed ? "," : "");
257 }
258
259 static void
print_scope(sync_scope scope,FILE * output,const char * prefix="scope")260 print_scope(sync_scope scope, FILE* output, const char* prefix = "scope")
261 {
262 fprintf(output, " %s:", prefix);
263 switch (scope) {
264 case scope_invocation: fprintf(output, "invocation"); break;
265 case scope_subgroup: fprintf(output, "subgroup"); break;
266 case scope_workgroup: fprintf(output, "workgroup"); break;
267 case scope_queuefamily: fprintf(output, "queuefamily"); break;
268 case scope_device: fprintf(output, "device"); break;
269 }
270 }
271
272 static void
print_sync(memory_sync_info sync,FILE * output)273 print_sync(memory_sync_info sync, FILE* output)
274 {
275 if (sync.storage)
276 print_storage(sync.storage, output);
277 if (sync.semantics)
278 print_semantics(sync.semantics, output);
279 if (sync.scope != scope_invocation)
280 print_scope(sync.scope, output);
281 }
282
283 static void
print_instr_format_specific(enum amd_gfx_level gfx_level,const Instruction * instr,FILE * output)284 print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output)
285 {
286 switch (instr->format) {
287 case Format::SOPK: {
288 const SOPK_instruction& sopk = instr->sopk();
289 fprintf(output, " imm:%d", sopk.imm & 0x8000 ? (sopk.imm - 65536) : sopk.imm);
290 break;
291 }
292 case Format::SOPP: {
293 uint16_t imm = instr->sopp().imm;
294 switch (instr->opcode) {
295 case aco_opcode::s_waitcnt: {
296 wait_imm unpacked(gfx_level, imm);
297 if (unpacked.vm != wait_imm::unset_counter)
298 fprintf(output, " vmcnt(%d)", unpacked.vm);
299 if (unpacked.exp != wait_imm::unset_counter)
300 fprintf(output, " expcnt(%d)", unpacked.exp);
301 if (unpacked.lgkm != wait_imm::unset_counter)
302 fprintf(output, " lgkmcnt(%d)", unpacked.lgkm);
303 break;
304 }
305 case aco_opcode::s_waitcnt_depctr: {
306 unsigned va_vdst = (imm >> 12) & 0xf;
307 unsigned va_sdst = (imm >> 9) & 0x7;
308 unsigned va_ssrc = (imm >> 8) & 0x1;
309 unsigned hold_cnt = (imm >> 7) & 0x1;
310 unsigned vm_vsrc = (imm >> 2) & 0x7;
311 unsigned va_vcc = (imm >> 1) & 0x1;
312 unsigned sa_sdst = imm & 0x1;
313 if (va_vdst != 0xf)
314 fprintf(output, " va_vdst(%d)", va_vdst);
315 if (va_sdst != 0x7)
316 fprintf(output, " va_sdst(%d)", va_sdst);
317 if (va_ssrc != 0x1)
318 fprintf(output, " va_ssrc(%d)", va_ssrc);
319 if (hold_cnt != 0x1)
320 fprintf(output, " holt_cnt(%d)", hold_cnt);
321 if (vm_vsrc != 0x7)
322 fprintf(output, " vm_vsrc(%d)", vm_vsrc);
323 if (va_vcc != 0x1)
324 fprintf(output, " va_vcc(%d)", va_vcc);
325 if (sa_sdst != 0x1)
326 fprintf(output, " sa_sdst(%d)", sa_sdst);
327 break;
328 }
329 case aco_opcode::s_delay_alu: {
330 unsigned delay[2] = {imm & 0xfu, (imm >> 7) & 0xfu};
331 unsigned skip = (imm >> 4) & 0x3;
332 for (unsigned i = 0; i < 2; i++) {
333 if (i == 1 && skip) {
334 if (skip == 1)
335 fprintf(output, " next");
336 else
337 fprintf(output, " skip_%u", skip - 1);
338 }
339
340 alu_delay_wait wait = (alu_delay_wait)delay[i];
341 if (wait >= alu_delay_wait::VALU_DEP_1 && wait <= alu_delay_wait::VALU_DEP_4)
342 fprintf(output, " valu_dep_%u", delay[i]);
343 else if (wait >= alu_delay_wait::TRANS32_DEP_1 && wait <= alu_delay_wait::TRANS32_DEP_3)
344 fprintf(output, " trans32_dep_%u",
345 delay[i] - (unsigned)alu_delay_wait::TRANS32_DEP_1 + 1);
346 else if (wait == alu_delay_wait::FMA_ACCUM_CYCLE_1)
347 fprintf(output, " fma_accum_cycle_1");
348 else if (wait >= alu_delay_wait::SALU_CYCLE_1 && wait <= alu_delay_wait::SALU_CYCLE_3)
349 fprintf(output, " salu_cycle_%u",
350 delay[i] - (unsigned)alu_delay_wait::SALU_CYCLE_1 + 1);
351 }
352 break;
353 }
354 case aco_opcode::s_endpgm:
355 case aco_opcode::s_endpgm_saved:
356 case aco_opcode::s_endpgm_ordered_ps_done:
357 case aco_opcode::s_wakeup:
358 case aco_opcode::s_barrier:
359 case aco_opcode::s_icache_inv:
360 case aco_opcode::s_ttracedata:
361 case aco_opcode::s_set_gpr_idx_off: {
362 break;
363 }
364 case aco_opcode::s_sendmsg: {
365 unsigned id = imm & sendmsg_id_mask;
366 static_assert(sendmsg_gs == sendmsg_hs_tessfactor);
367 static_assert(sendmsg_gs_done == sendmsg_dealloc_vgprs);
368 switch (id) {
369 case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;
370 case sendmsg_gs:
371 if (gfx_level >= GFX11)
372 fprintf(output, " sendmsg(hs_tessfactor)");
373 else
374 fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
375 imm & 0x20 ? ", emit" : "", imm >> 8);
376 break;
377 case sendmsg_gs_done:
378 if (gfx_level >= GFX11)
379 fprintf(output, " sendmsg(dealloc_vgprs)");
380 else
381 fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
382 imm & 0x20 ? ", emit" : "", imm >> 8);
383 break;
384 case sendmsg_save_wave: fprintf(output, " sendmsg(save_wave)"); break;
385 case sendmsg_stall_wave_gen: fprintf(output, " sendmsg(stall_wave_gen)"); break;
386 case sendmsg_halt_waves: fprintf(output, " sendmsg(halt_waves)"); break;
387 case sendmsg_ordered_ps_done: fprintf(output, " sendmsg(ordered_ps_done)"); break;
388 case sendmsg_early_prim_dealloc: fprintf(output, " sendmsg(early_prim_dealloc)"); break;
389 case sendmsg_gs_alloc_req: fprintf(output, " sendmsg(gs_alloc_req)"); break;
390 case sendmsg_get_doorbell: fprintf(output, " sendmsg(get_doorbell)"); break;
391 case sendmsg_get_ddid: fprintf(output, " sendmsg(get_ddid)"); break;
392 default: fprintf(output, " imm:%u", imm);
393 }
394 break;
395 }
396 case aco_opcode::s_wait_event: {
397 if (!(imm & wait_event_imm_dont_wait_export_ready))
398 fprintf(output, " export_ready");
399 break;
400 }
401 default: {
402 if (imm)
403 fprintf(output, " imm:%u", imm);
404 break;
405 }
406 }
407 if (instr->sopp().block != -1)
408 fprintf(output, " block:BB%d", instr->sopp().block);
409 break;
410 }
411 case Format::SOP1: {
412 if (instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
413 instr->opcode == aco_opcode::s_sendmsg_rtn_b64) {
414 unsigned id = instr->operands[0].constantValue();
415 switch (id) {
416 case sendmsg_rtn_get_doorbell: fprintf(output, " sendmsg(rtn_get_doorbell)"); break;
417 case sendmsg_rtn_get_ddid: fprintf(output, " sendmsg(rtn_get_ddid)"); break;
418 case sendmsg_rtn_get_tma: fprintf(output, " sendmsg(rtn_get_tma)"); break;
419 case sendmsg_rtn_get_realtime: fprintf(output, " sendmsg(rtn_get_realtime)"); break;
420 case sendmsg_rtn_save_wave: fprintf(output, " sendmsg(rtn_save_wave)"); break;
421 case sendmsg_rtn_get_tba: fprintf(output, " sendmsg(rtn_get_tba)"); break;
422 default: break;
423 }
424 break;
425 }
426 break;
427 }
428 case Format::SMEM: {
429 const SMEM_instruction& smem = instr->smem();
430 if (smem.glc)
431 fprintf(output, " glc");
432 if (smem.dlc)
433 fprintf(output, " dlc");
434 if (smem.nv)
435 fprintf(output, " nv");
436 print_sync(smem.sync, output);
437 break;
438 }
439 case Format::VINTERP_INREG: {
440 const VINTERP_inreg_instruction& vinterp = instr->vinterp_inreg();
441 if (vinterp.wait_exp != 7)
442 fprintf(output, " wait_exp:%u", vinterp.wait_exp);
443 break;
444 }
445 case Format::VINTRP: {
446 const VINTRP_instruction& vintrp = instr->vintrp();
447 fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
448 break;
449 }
450 case Format::DS: {
451 const DS_instruction& ds = instr->ds();
452 if (ds.offset0)
453 fprintf(output, " offset0:%u", ds.offset0);
454 if (ds.offset1)
455 fprintf(output, " offset1:%u", ds.offset1);
456 if (ds.gds)
457 fprintf(output, " gds");
458 print_sync(ds.sync, output);
459 break;
460 }
461 case Format::LDSDIR: {
462 const LDSDIR_instruction& ldsdir = instr->ldsdir();
463 if (instr->opcode == aco_opcode::lds_param_load)
464 fprintf(output, " attr%u.%c", ldsdir.attr, "xyzw"[ldsdir.attr_chan]);
465 if (ldsdir.wait_vdst != 15)
466 fprintf(output, " wait_vdst:%u", ldsdir.wait_vdst);
467 print_sync(ldsdir.sync, output);
468 break;
469 }
470 case Format::MUBUF: {
471 const MUBUF_instruction& mubuf = instr->mubuf();
472 if (mubuf.offset)
473 fprintf(output, " offset:%u", mubuf.offset);
474 if (mubuf.offen)
475 fprintf(output, " offen");
476 if (mubuf.idxen)
477 fprintf(output, " idxen");
478 if (mubuf.addr64)
479 fprintf(output, " addr64");
480 if (mubuf.glc)
481 fprintf(output, " glc");
482 if (mubuf.dlc)
483 fprintf(output, " dlc");
484 if (mubuf.slc)
485 fprintf(output, " slc");
486 if (mubuf.tfe)
487 fprintf(output, " tfe");
488 if (mubuf.lds)
489 fprintf(output, " lds");
490 if (mubuf.disable_wqm)
491 fprintf(output, " disable_wqm");
492 print_sync(mubuf.sync, output);
493 break;
494 }
495 case Format::MIMG: {
496 const MIMG_instruction& mimg = instr->mimg();
497 unsigned identity_dmask =
498 !instr->definitions.empty() ? (1 << instr->definitions[0].size()) - 1 : 0xf;
499 if ((mimg.dmask & identity_dmask) != identity_dmask)
500 fprintf(output, " dmask:%s%s%s%s", mimg.dmask & 0x1 ? "x" : "",
501 mimg.dmask & 0x2 ? "y" : "", mimg.dmask & 0x4 ? "z" : "",
502 mimg.dmask & 0x8 ? "w" : "");
503 switch (mimg.dim) {
504 case ac_image_1d: fprintf(output, " 1d"); break;
505 case ac_image_2d: fprintf(output, " 2d"); break;
506 case ac_image_3d: fprintf(output, " 3d"); break;
507 case ac_image_cube: fprintf(output, " cube"); break;
508 case ac_image_1darray: fprintf(output, " 1darray"); break;
509 case ac_image_2darray: fprintf(output, " 2darray"); break;
510 case ac_image_2dmsaa: fprintf(output, " 2dmsaa"); break;
511 case ac_image_2darraymsaa: fprintf(output, " 2darraymsaa"); break;
512 }
513 if (mimg.unrm)
514 fprintf(output, " unrm");
515 if (mimg.glc)
516 fprintf(output, " glc");
517 if (mimg.dlc)
518 fprintf(output, " dlc");
519 if (mimg.slc)
520 fprintf(output, " slc");
521 if (mimg.tfe)
522 fprintf(output, " tfe");
523 if (mimg.da)
524 fprintf(output, " da");
525 if (mimg.lwe)
526 fprintf(output, " lwe");
527 if (mimg.r128)
528 fprintf(output, " r128");
529 if (mimg.a16)
530 fprintf(output, " a16");
531 if (mimg.d16)
532 fprintf(output, " d16");
533 if (mimg.disable_wqm)
534 fprintf(output, " disable_wqm");
535 print_sync(mimg.sync, output);
536 break;
537 }
538 case Format::EXP: {
539 const Export_instruction& exp = instr->exp();
540 unsigned identity_mask = exp.compressed ? 0x5 : 0xf;
541 if ((exp.enabled_mask & identity_mask) != identity_mask)
542 fprintf(output, " en:%c%c%c%c", exp.enabled_mask & 0x1 ? 'r' : '*',
543 exp.enabled_mask & 0x2 ? 'g' : '*', exp.enabled_mask & 0x4 ? 'b' : '*',
544 exp.enabled_mask & 0x8 ? 'a' : '*');
545 if (exp.compressed)
546 fprintf(output, " compr");
547 if (exp.done)
548 fprintf(output, " done");
549 if (exp.valid_mask)
550 fprintf(output, " vm");
551
552 if (exp.dest <= V_008DFC_SQ_EXP_MRT + 7)
553 fprintf(output, " mrt%d", exp.dest - V_008DFC_SQ_EXP_MRT);
554 else if (exp.dest == V_008DFC_SQ_EXP_MRTZ)
555 fprintf(output, " mrtz");
556 else if (exp.dest == V_008DFC_SQ_EXP_NULL)
557 fprintf(output, " null");
558 else if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= V_008DFC_SQ_EXP_POS + 3)
559 fprintf(output, " pos%d", exp.dest - V_008DFC_SQ_EXP_POS);
560 else if (exp.dest >= V_008DFC_SQ_EXP_PARAM && exp.dest <= V_008DFC_SQ_EXP_PARAM + 31)
561 fprintf(output, " param%d", exp.dest - V_008DFC_SQ_EXP_PARAM);
562 break;
563 }
564 case Format::PSEUDO_BRANCH: {
565 const Pseudo_branch_instruction& branch = instr->branch();
566 /* Note: BB0 cannot be a branch target */
567 if (branch.target[0] != 0)
568 fprintf(output, " BB%d", branch.target[0]);
569 if (branch.target[1] != 0)
570 fprintf(output, ", BB%d", branch.target[1]);
571 break;
572 }
573 case Format::PSEUDO_REDUCTION: {
574 const Pseudo_reduction_instruction& reduce = instr->reduction();
575 fprintf(output, " op:%s", reduce_ops[reduce.reduce_op]);
576 if (reduce.cluster_size)
577 fprintf(output, " cluster_size:%u", reduce.cluster_size);
578 break;
579 }
580 case Format::PSEUDO_BARRIER: {
581 const Pseudo_barrier_instruction& barrier = instr->barrier();
582 print_sync(barrier.sync, output);
583 print_scope(barrier.exec_scope, output, "exec_scope");
584 break;
585 }
586 case Format::FLAT:
587 case Format::GLOBAL:
588 case Format::SCRATCH: {
589 const FLAT_instruction& flat = instr->flatlike();
590 if (flat.offset)
591 fprintf(output, " offset:%d", flat.offset);
592 if (flat.glc)
593 fprintf(output, " glc");
594 if (flat.dlc)
595 fprintf(output, " dlc");
596 if (flat.slc)
597 fprintf(output, " slc");
598 if (flat.lds)
599 fprintf(output, " lds");
600 if (flat.nv)
601 fprintf(output, " nv");
602 if (flat.disable_wqm)
603 fprintf(output, " disable_wqm");
604 print_sync(flat.sync, output);
605 break;
606 }
607 case Format::MTBUF: {
608 const MTBUF_instruction& mtbuf = instr->mtbuf();
609 fprintf(output, " dfmt:");
610 switch (mtbuf.dfmt) {
611 case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break;
612 case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break;
613 case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break;
614 case V_008F0C_BUF_DATA_FORMAT_32: fprintf(output, "32"); break;
615 case V_008F0C_BUF_DATA_FORMAT_16_16: fprintf(output, "16_16"); break;
616 case V_008F0C_BUF_DATA_FORMAT_10_11_11: fprintf(output, "10_11_11"); break;
617 case V_008F0C_BUF_DATA_FORMAT_11_11_10: fprintf(output, "11_11_10"); break;
618 case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: fprintf(output, "10_10_10_2"); break;
619 case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: fprintf(output, "2_10_10_10"); break;
620 case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: fprintf(output, "8_8_8_8"); break;
621 case V_008F0C_BUF_DATA_FORMAT_32_32: fprintf(output, "32_32"); break;
622 case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: fprintf(output, "16_16_16_16"); break;
623 case V_008F0C_BUF_DATA_FORMAT_32_32_32: fprintf(output, "32_32_32"); break;
624 case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: fprintf(output, "32_32_32_32"); break;
625 case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break;
626 }
627 fprintf(output, " nfmt:");
628 switch (mtbuf.nfmt) {
629 case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break;
630 case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break;
631 case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break;
632 case V_008F0C_BUF_NUM_FORMAT_SSCALED: fprintf(output, "sscaled"); break;
633 case V_008F0C_BUF_NUM_FORMAT_UINT: fprintf(output, "uint"); break;
634 case V_008F0C_BUF_NUM_FORMAT_SINT: fprintf(output, "sint"); break;
635 case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break;
636 case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break;
637 }
638 if (mtbuf.offset)
639 fprintf(output, " offset:%u", mtbuf.offset);
640 if (mtbuf.offen)
641 fprintf(output, " offen");
642 if (mtbuf.idxen)
643 fprintf(output, " idxen");
644 if (mtbuf.glc)
645 fprintf(output, " glc");
646 if (mtbuf.dlc)
647 fprintf(output, " dlc");
648 if (mtbuf.slc)
649 fprintf(output, " slc");
650 if (mtbuf.tfe)
651 fprintf(output, " tfe");
652 if (mtbuf.disable_wqm)
653 fprintf(output, " disable_wqm");
654 print_sync(mtbuf.sync, output);
655 break;
656 }
657 default: {
658 break;
659 }
660 }
661 if (instr->isVALU()) {
662 const VALU_instruction& valu = instr->valu();
663 switch (valu.omod) {
664 case 1: fprintf(output, " *2"); break;
665 case 2: fprintf(output, " *4"); break;
666 case 3: fprintf(output, " *0.5"); break;
667 }
668 if (valu.clamp)
669 fprintf(output, " clamp");
670 if (valu.opsel & (1 << 3))
671 fprintf(output, " opsel_hi");
672 }
673
674 bool bound_ctrl = false, fetch_inactive = false;
675
676 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
677 instr->opcode == aco_opcode::v_permlanex16_b32) {
678 fetch_inactive = instr->valu().opsel[0];
679 bound_ctrl = instr->valu().opsel[1];
680 } else if (instr->isDPP16()) {
681 const DPP16_instruction& dpp = instr->dpp16();
682 if (dpp.dpp_ctrl <= 0xff) {
683 fprintf(output, " quad_perm:[%d,%d,%d,%d]", dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3,
684 (dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3);
685 } else if (dpp.dpp_ctrl >= 0x101 && dpp.dpp_ctrl <= 0x10f) {
686 fprintf(output, " row_shl:%d", dpp.dpp_ctrl & 0xf);
687 } else if (dpp.dpp_ctrl >= 0x111 && dpp.dpp_ctrl <= 0x11f) {
688 fprintf(output, " row_shr:%d", dpp.dpp_ctrl & 0xf);
689 } else if (dpp.dpp_ctrl >= 0x121 && dpp.dpp_ctrl <= 0x12f) {
690 fprintf(output, " row_ror:%d", dpp.dpp_ctrl & 0xf);
691 } else if (dpp.dpp_ctrl == dpp_wf_sl1) {
692 fprintf(output, " wave_shl:1");
693 } else if (dpp.dpp_ctrl == dpp_wf_rl1) {
694 fprintf(output, " wave_rol:1");
695 } else if (dpp.dpp_ctrl == dpp_wf_sr1) {
696 fprintf(output, " wave_shr:1");
697 } else if (dpp.dpp_ctrl == dpp_wf_rr1) {
698 fprintf(output, " wave_ror:1");
699 } else if (dpp.dpp_ctrl == dpp_row_mirror) {
700 fprintf(output, " row_mirror");
701 } else if (dpp.dpp_ctrl == dpp_row_half_mirror) {
702 fprintf(output, " row_half_mirror");
703 } else if (dpp.dpp_ctrl == dpp_row_bcast15) {
704 fprintf(output, " row_bcast:15");
705 } else if (dpp.dpp_ctrl == dpp_row_bcast31) {
706 fprintf(output, " row_bcast:31");
707 } else if (dpp.dpp_ctrl >= dpp_row_share(0) && dpp.dpp_ctrl <= dpp_row_share(15)) {
708 fprintf(output, " row_share:%d", dpp.dpp_ctrl & 0xf);
709 } else if (dpp.dpp_ctrl >= dpp_row_xmask(0) && dpp.dpp_ctrl <= dpp_row_xmask(15)) {
710 fprintf(output, " row_xmask:%d", dpp.dpp_ctrl & 0xf);
711 } else {
712 fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl);
713 }
714 if (dpp.row_mask != 0xf)
715 fprintf(output, " row_mask:0x%.1x", dpp.row_mask);
716 if (dpp.bank_mask != 0xf)
717 fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask);
718 bound_ctrl = dpp.bound_ctrl;
719 fetch_inactive = dpp.fetch_inactive;
720 } else if (instr->isDPP8()) {
721 const DPP8_instruction& dpp = instr->dpp8();
722 fprintf(output, " dpp8:[");
723 for (unsigned i = 0; i < 8; i++)
724 fprintf(output, "%s%u", i ? "," : "", (dpp.lane_sel >> (i * 3)) & 0x7);
725 fprintf(output, "]");
726 fetch_inactive = dpp.fetch_inactive;
727 } else if (instr->isSDWA()) {
728 const SDWA_instruction& sdwa = instr->sdwa();
729 if (!instr->isVOPC()) {
730 char sext = sdwa.dst_sel.sign_extend() ? 's' : 'u';
731 unsigned offset = sdwa.dst_sel.offset();
732 if (instr->definitions[0].isFixed())
733 offset += instr->definitions[0].physReg().byte();
734 switch (sdwa.dst_sel.size()) {
735 case 1: fprintf(output, " dst_sel:%cbyte%u", sext, offset); break;
736 case 2: fprintf(output, " dst_sel:%cword%u", sext, offset >> 1); break;
737 case 4: fprintf(output, " dst_sel:dword"); break;
738 default: break;
739 }
740 if (instr->definitions[0].bytes() < 4)
741 fprintf(output, " dst_preserve");
742 }
743 for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
744 char sext = sdwa.sel[i].sign_extend() ? 's' : 'u';
745 unsigned offset = sdwa.sel[i].offset();
746 if (instr->operands[i].isFixed())
747 offset += instr->operands[i].physReg().byte();
748 switch (sdwa.sel[i].size()) {
749 case 1: fprintf(output, " src%d_sel:%cbyte%u", i, sext, offset); break;
750 case 2: fprintf(output, " src%d_sel:%cword%u", i, sext, offset >> 1); break;
751 case 4: fprintf(output, " src%d_sel:dword", i); break;
752 default: break;
753 }
754 }
755 }
756
757 if (bound_ctrl)
758 fprintf(output, " bound_ctrl:1");
759 if (fetch_inactive)
760 fprintf(output, " fi");
761 }
762
763 void
print_vopd_instr(enum amd_gfx_level gfx_level,const Instruction * instr,FILE * output,unsigned flags)764 print_vopd_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
765 unsigned flags)
766 {
767 unsigned opy_start = get_vopd_opy_start(instr);
768
769 if (!instr->definitions.empty()) {
770 print_definition(&instr->definitions[0], output, flags);
771 fprintf(output, " = ");
772 }
773 fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
774 for (unsigned i = 0; i < MIN2(instr->operands.size(), opy_start); ++i) {
775 fprintf(output, i ? ", " : " ");
776 aco_print_operand(&instr->operands[i], output, flags);
777 }
778
779 fprintf(output, " ::");
780
781 if (instr->definitions.size() > 1) {
782 print_definition(&instr->definitions[1], output, flags);
783 fprintf(output, " = ");
784 }
785 fprintf(output, "%s", instr_info.name[(int)instr->vopd().opy]);
786 for (unsigned i = opy_start; i < instr->operands.size(); ++i) {
787 fprintf(output, i > opy_start ? ", " : " ");
788 aco_print_operand(&instr->operands[i], output, flags);
789 }
790 }
791
792 void
aco_print_instr(enum amd_gfx_level gfx_level,const Instruction * instr,FILE * output,unsigned flags)793 aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
794 unsigned flags)
795 {
796 if (instr->isVOPD()) {
797 print_vopd_instr(gfx_level, instr, output, flags);
798 return;
799 }
800
801 if (!instr->definitions.empty()) {
802 for (unsigned i = 0; i < instr->definitions.size(); ++i) {
803 print_definition(&instr->definitions[i], output, flags);
804 if (i + 1 != instr->definitions.size())
805 fprintf(output, ", ");
806 }
807 fprintf(output, " = ");
808 }
809 fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
810 if (instr->operands.size()) {
811 const unsigned num_operands = instr->operands.size();
812 bitarray8 abs = 0;
813 bitarray8 neg = 0;
814 bitarray8 neg_lo = 0;
815 bitarray8 neg_hi = 0;
816 bitarray8 opsel = 0;
817 bitarray8 f2f32 = 0;
818 bitarray8 opsel_lo = 0;
819 bitarray8 opsel_hi = -1;
820
821 if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
822 instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
823 instr->opcode == aco_opcode::v_fma_mixhi_f16) {
824 const VALU_instruction& vop3p = instr->valu();
825 abs = vop3p.abs;
826 neg = vop3p.neg;
827 f2f32 = vop3p.opsel_hi;
828 opsel = f2f32 & vop3p.opsel_lo;
829 } else if (instr->isVOP3P()) {
830 const VALU_instruction& vop3p = instr->valu();
831 neg = vop3p.neg_lo & vop3p.neg_hi;
832 neg_lo = vop3p.neg_lo & ~neg;
833 neg_hi = vop3p.neg_hi & ~neg;
834 opsel_lo = vop3p.opsel_lo;
835 opsel_hi = vop3p.opsel_hi;
836 } else if (instr->isVALU() && instr->opcode != aco_opcode::v_permlane16_b32 &&
837 instr->opcode != aco_opcode::v_permlanex16_b32) {
838 const VALU_instruction& valu = instr->valu();
839 abs = valu.abs;
840 neg = valu.neg;
841 opsel = valu.opsel;
842 }
843 for (unsigned i = 0; i < num_operands; ++i) {
844 if (i)
845 fprintf(output, ", ");
846 else
847 fprintf(output, " ");
848
849 if (i < 3) {
850 if (neg[i])
851 fprintf(output, "-");
852 if (abs[i])
853 fprintf(output, "|");
854 if (opsel[i])
855 fprintf(output, "hi(");
856 else if (f2f32[i])
857 fprintf(output, "lo(");
858 }
859
860 aco_print_operand(&instr->operands[i], output, flags);
861
862 if (i < 3) {
863 if (f2f32[i] || opsel[i])
864 fprintf(output, ")");
865 if (abs[i])
866 fprintf(output, "|");
867
868 if (opsel_lo[i] || !opsel_hi[i])
869 fprintf(output, ".%c%c", opsel_lo[i] ? 'y' : 'x', opsel_hi[i] ? 'y' : 'x');
870
871 if (neg_lo[i])
872 fprintf(output, "*[-1,1]");
873 if (neg_hi[i])
874 fprintf(output, "*[1,-1]");
875 }
876 }
877 }
878 print_instr_format_specific(gfx_level, instr, output);
879 }
880
881 static void
print_block_kind(uint16_t kind,FILE * output)882 print_block_kind(uint16_t kind, FILE* output)
883 {
884 if (kind & block_kind_uniform)
885 fprintf(output, "uniform, ");
886 if (kind & block_kind_top_level)
887 fprintf(output, "top-level, ");
888 if (kind & block_kind_loop_preheader)
889 fprintf(output, "loop-preheader, ");
890 if (kind & block_kind_loop_header)
891 fprintf(output, "loop-header, ");
892 if (kind & block_kind_loop_exit)
893 fprintf(output, "loop-exit, ");
894 if (kind & block_kind_continue)
895 fprintf(output, "continue, ");
896 if (kind & block_kind_break)
897 fprintf(output, "break, ");
898 if (kind & block_kind_continue_or_break)
899 fprintf(output, "continue_or_break, ");
900 if (kind & block_kind_branch)
901 fprintf(output, "branch, ");
902 if (kind & block_kind_merge)
903 fprintf(output, "merge, ");
904 if (kind & block_kind_invert)
905 fprintf(output, "invert, ");
906 if (kind & block_kind_uses_discard)
907 fprintf(output, "discard, ");
908 if (kind & block_kind_resume)
909 fprintf(output, "resume, ");
910 if (kind & block_kind_export_end)
911 fprintf(output, "export_end, ");
912 if (kind & block_kind_end_with_regs)
913 fprintf(output, "end_with_regs, ");
914 }
915
916 static void
print_stage(Stage stage,FILE * output)917 print_stage(Stage stage, FILE* output)
918 {
919 fprintf(output, "ACO shader stage: SW (");
920
921 u_foreach_bit (s, (uint32_t)stage.sw) {
922 switch ((SWStage)(1 << s)) {
923 case SWStage::VS: fprintf(output, "VS"); break;
924 case SWStage::GS: fprintf(output, "GS"); break;
925 case SWStage::TCS: fprintf(output, "TCS"); break;
926 case SWStage::TES: fprintf(output, "TES"); break;
927 case SWStage::FS: fprintf(output, "FS"); break;
928 case SWStage::CS: fprintf(output, "CS"); break;
929 case SWStage::TS: fprintf(output, "TS"); break;
930 case SWStage::MS: fprintf(output, "MS"); break;
931 case SWStage::RT: fprintf(output, "RT"); break;
932 default: unreachable("invalid SW stage");
933 }
934 if (stage.num_sw_stages() > 1)
935 fprintf(output, "+");
936 }
937
938 fprintf(output, "), HW (");
939
940 switch (stage.hw) {
941 case AC_HW_LOCAL_SHADER: fprintf(output, "LOCAL_SHADER"); break;
942 case AC_HW_HULL_SHADER: fprintf(output, "HULL_SHADER"); break;
943 case AC_HW_EXPORT_SHADER: fprintf(output, "EXPORT_SHADER"); break;
944 case AC_HW_LEGACY_GEOMETRY_SHADER: fprintf(output, "LEGACY_GEOMETRY_SHADER"); break;
945 case AC_HW_VERTEX_SHADER: fprintf(output, "VERTEX_SHADER"); break;
946 case AC_HW_NEXT_GEN_GEOMETRY_SHADER: fprintf(output, "NEXT_GEN_GEOMETRY_SHADER"); break;
947 case AC_HW_PIXEL_SHADER: fprintf(output, "PIXEL_SHADER"); break;
948 case AC_HW_COMPUTE_SHADER: fprintf(output, "COMPUTE_SHADER"); break;
949 default: unreachable("invalid HW stage");
950 }
951
952 fprintf(output, ")\n");
953 }
954
955 void
aco_print_block(enum amd_gfx_level gfx_level,const Block * block,FILE * output,unsigned flags,const live & live_vars)956 aco_print_block(enum amd_gfx_level gfx_level, const Block* block, FILE* output, unsigned flags,
957 const live& live_vars)
958 {
959 fprintf(output, "BB%d\n", block->index);
960 fprintf(output, "/* logical preds: ");
961 for (unsigned pred : block->logical_preds)
962 fprintf(output, "BB%d, ", pred);
963 fprintf(output, "/ linear preds: ");
964 for (unsigned pred : block->linear_preds)
965 fprintf(output, "BB%d, ", pred);
966 fprintf(output, "/ kind: ");
967 print_block_kind(block->kind, output);
968 fprintf(output, "*/\n");
969
970 if (flags & print_live_vars) {
971 fprintf(output, "\tlive out:");
972 for (unsigned id : live_vars.live_out[block->index])
973 fprintf(output, " %%%d", id);
974 fprintf(output, "\n");
975
976 RegisterDemand demand = block->register_demand;
977 fprintf(output, "\tdemand: %u vgpr, %u sgpr\n", demand.vgpr, demand.sgpr);
978 }
979
980 unsigned index = 0;
981 for (auto const& instr : block->instructions) {
982 fprintf(output, "\t");
983 if (flags & print_live_vars) {
984 RegisterDemand demand = live_vars.register_demand[block->index][index];
985 fprintf(output, "(%3u vgpr, %3u sgpr) ", demand.vgpr, demand.sgpr);
986 }
987 if (flags & print_perf_info)
988 fprintf(output, "(%3u clk) ", instr->pass_flags);
989
990 aco_print_instr(gfx_level, instr.get(), output, flags);
991 fprintf(output, "\n");
992 index++;
993 }
994 }
995
996 void
aco_print_program(const Program * program,FILE * output,const live & live_vars,unsigned flags)997 aco_print_program(const Program* program, FILE* output, const live& live_vars, unsigned flags)
998 {
999 switch (program->progress) {
1000 case CompilationProgress::after_isel: fprintf(output, "After Instruction Selection:\n"); break;
1001 case CompilationProgress::after_spilling:
1002 fprintf(output, "After Spilling:\n");
1003 flags |= print_kill;
1004 break;
1005 case CompilationProgress::after_ra: fprintf(output, "After RA:\n"); break;
1006 }
1007
1008 print_stage(program->stage, output);
1009
1010 for (Block const& block : program->blocks)
1011 aco_print_block(program->gfx_level, &block, output, flags, live_vars);
1012
1013 if (program->constant_data.size()) {
1014 fprintf(output, "\n/* constant data */\n");
1015 for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
1016 fprintf(output, "[%06d] ", i);
1017 unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
1018 for (unsigned j = 0; j < line_size; j += 4) {
1019 unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
1020 uint32_t v = 0;
1021 memcpy(&v, &program->constant_data[i + j], size);
1022 fprintf(output, " %08x", v);
1023 }
1024 fprintf(output, "\n");
1025 }
1026 }
1027
1028 fprintf(output, "\n");
1029 }
1030
1031 void
aco_print_program(const Program * program,FILE * output,unsigned flags)1032 aco_print_program(const Program* program, FILE* output, unsigned flags)
1033 {
1034 aco_print_program(program, output, live(), flags);
1035 }
1036
1037 } // namespace aco
1038