1 /*
2 * Copyright © 2018 Google
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "aco_interface.h"
8
9 #include "aco_ir.h"
10
11 #include "util/memstream.h"
12
13 #include "ac_gpu_info.h"
14 #include <array>
15 #include <iostream>
16 #include <vector>
17
18 using namespace aco;
19
20 namespace {
21
22 static const std::array<aco_compiler_statistic_info, aco_num_statistics> statistic_infos = []()
__anon28ce5aa60202() 23 {
24 std::array<aco_compiler_statistic_info, aco_num_statistics> ret{};
25 ret[aco_statistic_hash] =
26 aco_compiler_statistic_info{"Hash", "CRC32 hash of code and constant data"};
27 ret[aco_statistic_instructions] =
28 aco_compiler_statistic_info{"Instructions", "Instruction count"};
29 ret[aco_statistic_copies] =
30 aco_compiler_statistic_info{"Copies", "Copy instructions created for pseudo-instructions"};
31 ret[aco_statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"};
32 ret[aco_statistic_latency] =
33 aco_compiler_statistic_info{"Latency", "Issue cycles plus stall cycles"};
34 ret[aco_statistic_inv_throughput] = aco_compiler_statistic_info{
35 "Inverse Throughput", "Estimated busy cycles to execute one wave"};
36 ret[aco_statistic_vmem_clauses] = aco_compiler_statistic_info{
37 "VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
38 ret[aco_statistic_smem_clauses] = aco_compiler_statistic_info{
39 "SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
40 ret[aco_statistic_sgpr_presched] =
41 aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
42 ret[aco_statistic_vgpr_presched] =
43 aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
44 ret[aco_statistic_valu] = aco_compiler_statistic_info{"VALU", "Number of VALU instructions"};
45 ret[aco_statistic_salu] = aco_compiler_statistic_info{"SALU", "Number of SALU instructions"};
46 ret[aco_statistic_vmem] = aco_compiler_statistic_info{"VMEM", "Number of VMEM instructions"};
47 ret[aco_statistic_smem] = aco_compiler_statistic_info{"SMEM", "Number of SMEM instructions"};
48 ret[aco_statistic_vopd] = aco_compiler_statistic_info{"VOPD", "Number of VOPD instructions"};
49 return ret;
50 }();
51
52 static void
validate(Program * program)53 validate(Program* program)
54 {
55 if (!(debug_flags & DEBUG_VALIDATE_IR))
56 return;
57
58 ASSERTED bool is_valid = validate_ir(program);
59 assert(is_valid);
60 }
61
62 static std::string
get_disasm_string(Program * program,std::vector<uint32_t> & code,unsigned exec_size)63 get_disasm_string(Program* program, std::vector<uint32_t>& code, unsigned exec_size)
64 {
65 std::string disasm;
66
67 char* data = NULL;
68 size_t disasm_size = 0;
69 struct u_memstream mem;
70 if (u_memstream_open(&mem, &data, &disasm_size)) {
71 FILE* const memf = u_memstream_get(&mem);
72 if (check_print_asm_support(program)) {
73 print_asm(program, code, exec_size / 4u, memf);
74 } else {
75 fprintf(memf, "Shader disassembly is not supported in the current configuration"
76 #if !AMD_LLVM_AVAILABLE
77 " (LLVM not available)"
78 #endif
79 ", falling back to print_program.\n\n");
80 aco_print_program(program, memf);
81 }
82 fputc(0, memf);
83 u_memstream_close(&mem);
84 disasm = std::string(data, data + disasm_size);
85 free(data);
86 }
87
88 return disasm;
89 }
90
91 static std::string
aco_postprocess_shader(const struct aco_compiler_options * options,std::unique_ptr<Program> & program)92 aco_postprocess_shader(const struct aco_compiler_options* options,
93 std::unique_ptr<Program>& program)
94 {
95 std::string llvm_ir;
96
97 if (options->dump_preoptir)
98 aco_print_program(program.get(), stderr);
99
100 ASSERTED bool is_valid = validate_cfg(program.get());
101 assert(is_valid);
102
103 dominator_tree(program.get());
104 if (program->should_repair_ssa)
105 repair_ssa(program.get());
106 lower_phis(program.get());
107
108 if (program->gfx_level <= GFX7)
109 lower_subdword(program.get());
110
111 validate(program.get());
112
113 /* Optimization */
114 if (!options->optimisations_disabled) {
115 if (!(debug_flags & DEBUG_NO_VN))
116 value_numbering(program.get());
117 if (!(debug_flags & DEBUG_NO_OPT))
118 optimize(program.get());
119
120 /* Optimization may move SGPR uses down, requiring further SSA repair. */
121 if (program->should_repair_ssa && repair_ssa(program.get()))
122 lower_phis(program.get());
123 }
124
125 /* cleanup and exec mask handling */
126 setup_reduce_temp(program.get());
127 insert_exec_mask(program.get());
128 validate(program.get());
129
130 /* spilling and scheduling */
131 live_var_analysis(program.get());
132 if (program->collect_statistics)
133 collect_presched_stats(program.get());
134 spill(program.get());
135
136 if (options->record_ir) {
137 char* data = NULL;
138 size_t size = 0;
139 u_memstream mem;
140 if (u_memstream_open(&mem, &data, &size)) {
141 FILE* const memf = u_memstream_get(&mem);
142 aco_print_program(program.get(), memf);
143 fputc(0, memf);
144 u_memstream_close(&mem);
145 }
146
147 llvm_ir = std::string(data, data + size);
148 free(data);
149 }
150
151 if ((debug_flags & DEBUG_LIVE_INFO) && options->dump_ir)
152 aco_print_program(program.get(), stderr, print_live_vars | print_kill);
153
154 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED))
155 schedule_program(program.get());
156 validate(program.get());
157
158 /* Register Allocation */
159 register_allocation(program.get());
160
161 if (validate_ra(program.get())) {
162 aco_print_program(program.get(), stderr);
163 abort();
164 } else if (options->dump_ir) {
165 aco_print_program(program.get(), stderr);
166 }
167
168 validate(program.get());
169
170 /* Optimization */
171 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_OPT)) {
172 optimize_postRA(program.get());
173 validate(program.get());
174 }
175
176 /* Lower to HW Instructions */
177 ssa_elimination(program.get());
178 lower_to_hw_instr(program.get());
179 lower_branches(program.get());
180 validate(program.get());
181
182 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED_VOPD))
183 schedule_vopd(program.get());
184
185 /* Schedule hardware instructions for ILP */
186 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED_ILP))
187 schedule_ilp(program.get());
188
189 insert_waitcnt(program.get());
190 insert_NOPs(program.get());
191 if (program->gfx_level >= GFX11)
192 insert_delay_alu(program.get());
193
194 if (program->gfx_level >= GFX10)
195 form_hard_clauses(program.get());
196
197 if (program->gfx_level >= GFX11)
198 combine_delay_alu(program.get());
199
200 if (program->collect_statistics || (debug_flags & DEBUG_PERF_INFO))
201 collect_preasm_stats(program.get());
202
203 return llvm_ir;
204 }
205
206 typedef void(select_shader_part_callback)(Program* program, void* pinfo, ac_shader_config* config,
207 const struct aco_compiler_options* options,
208 const struct aco_shader_info* info,
209 const struct ac_shader_args* args);
210
211 static void
aco_compile_shader_part(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct ac_shader_args * args,select_shader_part_callback select_shader_part,void * pinfo,aco_shader_part_callback * build_binary,void ** binary,bool is_prolog=false)212 aco_compile_shader_part(const struct aco_compiler_options* options,
213 const struct aco_shader_info* info, const struct ac_shader_args* args,
214 select_shader_part_callback select_shader_part, void* pinfo,
215 aco_shader_part_callback* build_binary, void** binary,
216 bool is_prolog = false)
217 {
218 init();
219
220 ac_shader_config config = {0};
221 std::unique_ptr<Program> program{new Program};
222
223 program->collect_statistics = options->record_stats;
224 if (program->collect_statistics)
225 memset(program->statistics, 0, sizeof(program->statistics));
226
227 program->debug.func = options->debug.func;
228 program->debug.private_data = options->debug.private_data;
229
230 program->is_prolog = is_prolog;
231 program->is_epilog = !is_prolog;
232
233 /* Instruction selection */
234 select_shader_part(program.get(), pinfo, &config, options, info, args);
235
236 aco_postprocess_shader(options, program);
237
238 /* assembly */
239 std::vector<uint32_t> code;
240 bool append_endpgm = !(options->is_opengl && is_prolog);
241 unsigned exec_size = emit_program(program.get(), code, NULL, append_endpgm);
242
243 std::string disasm;
244 if (options->record_asm)
245 disasm = get_disasm_string(program.get(), code, exec_size);
246
247 (*build_binary)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
248 disasm.data(), disasm.size());
249 }
250
251 } /* end namespace */
252
253 void
aco_compile_shader(const struct aco_compiler_options * options,const struct aco_shader_info * info,unsigned shader_count,struct nir_shader * const * shaders,const struct ac_shader_args * args,aco_callback * build_binary,void ** binary)254 aco_compile_shader(const struct aco_compiler_options* options, const struct aco_shader_info* info,
255 unsigned shader_count, struct nir_shader* const* shaders,
256 const struct ac_shader_args* args, aco_callback* build_binary, void** binary)
257 {
258 init();
259
260 ac_shader_config config = {0};
261 std::unique_ptr<Program> program{new Program};
262
263 program->collect_statistics = options->record_stats;
264 if (program->collect_statistics)
265 memset(program->statistics, 0, sizeof(program->statistics));
266
267 program->debug.func = options->debug.func;
268 program->debug.private_data = options->debug.private_data;
269
270 /* Instruction Selection */
271 select_program(program.get(), shader_count, shaders, &config, options, info, args);
272
273 std::string llvm_ir = aco_postprocess_shader(options, program);
274
275 /* assembly */
276 std::vector<uint32_t> code;
277 std::vector<struct aco_symbol> symbols;
278 /* OpenGL combine multi shader parts into one continous code block,
279 * so only last part need the s_endpgm instruction.
280 */
281 bool append_endpgm = !(options->is_opengl && info->ps.has_epilog);
282 unsigned exec_size = emit_program(program.get(), code, &symbols, append_endpgm);
283
284 if (program->collect_statistics)
285 collect_postasm_stats(program.get(), code);
286
287 std::string disasm;
288 if (options->record_asm)
289 disasm = get_disasm_string(program.get(), code, exec_size);
290
291 size_t stats_size = 0;
292 if (program->collect_statistics)
293 stats_size = aco_num_statistics * sizeof(uint32_t);
294
295 (*build_binary)(binary, &config, llvm_ir.c_str(), llvm_ir.size(), disasm.c_str(), disasm.size(),
296 program->statistics, stats_size, exec_size, code.data(), code.size(),
297 symbols.data(), symbols.size(), program->debug_info.data(),
298 program->debug_info.size());
299 }
300
301 void
aco_compile_rt_prolog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct ac_shader_args * in_args,const struct ac_shader_args * out_args,aco_callback * build_prolog,void ** binary)302 aco_compile_rt_prolog(const struct aco_compiler_options* options,
303 const struct aco_shader_info* info, const struct ac_shader_args* in_args,
304 const struct ac_shader_args* out_args, aco_callback* build_prolog,
305 void** binary)
306 {
307 init();
308
309 /* create program */
310 ac_shader_config config = {0};
311 std::unique_ptr<Program> program{new Program};
312 program->collect_statistics = false;
313 program->debug.func = NULL;
314 program->debug.private_data = NULL;
315
316 select_rt_prolog(program.get(), &config, options, info, in_args, out_args);
317 validate(program.get());
318 insert_waitcnt(program.get());
319 insert_NOPs(program.get());
320 if (program->gfx_level >= GFX11)
321 insert_delay_alu(program.get());
322 if (program->gfx_level >= GFX10)
323 form_hard_clauses(program.get());
324 if (program->gfx_level >= GFX11)
325 combine_delay_alu(program.get());
326
327 if (options->dump_ir)
328 aco_print_program(program.get(), stderr);
329
330 /* assembly */
331 std::vector<uint32_t> code;
332 code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
333 unsigned exec_size = emit_program(program.get(), code);
334
335 std::string disasm;
336 if (options->record_asm)
337 disasm = get_disasm_string(program.get(), code, exec_size);
338
339 (*build_prolog)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
340 exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
341 }
342
343 void
aco_compile_vs_prolog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct aco_vs_prolog_info * pinfo,const struct ac_shader_args * args,aco_shader_part_callback * build_prolog,void ** binary)344 aco_compile_vs_prolog(const struct aco_compiler_options* options,
345 const struct aco_shader_info* info, const struct aco_vs_prolog_info* pinfo,
346 const struct ac_shader_args* args, aco_shader_part_callback* build_prolog,
347 void** binary)
348 {
349 init();
350
351 /* create program */
352 ac_shader_config config = {0};
353 std::unique_ptr<Program> program{new Program};
354 program->collect_statistics = false;
355 program->debug.func = NULL;
356 program->debug.private_data = NULL;
357
358 /* create IR */
359 select_vs_prolog(program.get(), pinfo, &config, options, info, args);
360 validate(program.get());
361 insert_NOPs(program.get());
362 if (program->gfx_level >= GFX10)
363 form_hard_clauses(program.get());
364
365 if (options->dump_ir)
366 aco_print_program(program.get(), stderr);
367
368 /* assembly */
369 std::vector<uint32_t> code;
370 code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
371 unsigned exec_size = emit_program(program.get(), code);
372
373 std::string disasm;
374 if (options->record_asm)
375 disasm = get_disasm_string(program.get(), code, exec_size);
376
377 (*build_prolog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
378 disasm.data(), disasm.size());
379 }
380
381 void
aco_compile_ps_epilog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct aco_ps_epilog_info * pinfo,const struct ac_shader_args * args,aco_shader_part_callback * build_epilog,void ** binary)382 aco_compile_ps_epilog(const struct aco_compiler_options* options,
383 const struct aco_shader_info* info, const struct aco_ps_epilog_info* pinfo,
384 const struct ac_shader_args* args, aco_shader_part_callback* build_epilog,
385 void** binary)
386 {
387 aco_compile_shader_part(options, info, args, select_ps_epilog, (void*)pinfo, build_epilog,
388 binary);
389 }
390
391 void
aco_compile_ps_prolog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct aco_ps_prolog_info * pinfo,const struct ac_shader_args * args,aco_shader_part_callback * build_prolog,void ** binary)392 aco_compile_ps_prolog(const struct aco_compiler_options* options,
393 const struct aco_shader_info* info, const struct aco_ps_prolog_info* pinfo,
394 const struct ac_shader_args* args, aco_shader_part_callback* build_prolog,
395 void** binary)
396 {
397 aco_compile_shader_part(options, info, args, select_ps_prolog, (void*)pinfo, build_prolog,
398 binary, true);
399 }
400
401 void
aco_compile_trap_handler(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct ac_shader_args * args,aco_callback * build_binary,void ** binary)402 aco_compile_trap_handler(const struct aco_compiler_options* options,
403 const struct aco_shader_info* info, const struct ac_shader_args* args,
404 aco_callback* build_binary, void** binary)
405 {
406 init();
407
408 ac_shader_config config = {0};
409 std::unique_ptr<Program> program{new Program};
410 program->collect_statistics = false;
411 program->debug.func = NULL;
412 program->debug.private_data = NULL;
413
414 select_trap_handler_shader(program.get(), &config, options, info, args);
415
416 if (options->dump_preoptir)
417 aco_print_program(program.get(), stderr);
418 validate(program.get());
419
420 insert_exec_mask(program.get());
421 validate(program.get());
422
423 lower_to_hw_instr(program.get());
424 lower_branches(program.get());
425 validate(program.get());
426
427 insert_waitcnt(program.get());
428 insert_NOPs(program.get());
429
430 /* assembly */
431 std::vector<uint32_t> code;
432 code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
433 unsigned exec_size = emit_program(program.get(), code);
434
435 std::string disasm;
436 if (options->record_asm)
437 disasm = get_disasm_string(program.get(), code, exec_size);
438
439 (*build_binary)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
440 exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
441 }
442
443 uint64_t
aco_get_codegen_flags()444 aco_get_codegen_flags()
445 {
446 init();
447 /* Exclude flags which don't affect code generation. */
448 uint64_t exclude =
449 DEBUG_VALIDATE_IR | DEBUG_VALIDATE_RA | DEBUG_PERF_INFO | DEBUG_LIVE_INFO |
450 DEBUG_NO_VALIDATE_IR | DEBUG_VALIDATE_LIVE_VARS;
451 return debug_flags & ~exclude;
452 }
453
454 bool
aco_is_gpu_supported(const struct radeon_info * info)455 aco_is_gpu_supported(const struct radeon_info* info)
456 {
457 switch (info->gfx_level) {
458 case GFX6:
459 case GFX7:
460 case GFX8:
461 return true;
462 case GFX9:
463 return info->has_graphics; /* no CDNA support */
464 case GFX10:
465 case GFX10_3:
466 case GFX11:
467 case GFX11_5:
468 case GFX12:
469 return true;
470 default:
471 return false;
472 }
473 }
474
475 bool
aco_nir_op_supports_packed_math_16bit(const nir_alu_instr * alu)476 aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
477 {
478 switch (alu->op) {
479 case nir_op_f2f16: {
480 nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
481 unsigned execution_mode = shader->info.float_controls_execution_mode;
482 return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
483 nir_is_rounding_mode_rtz(execution_mode, 16);
484 }
485 case nir_op_fadd:
486 case nir_op_fsub:
487 case nir_op_fmul:
488 case nir_op_ffma:
489 case nir_op_fdiv:
490 case nir_op_flrp:
491 case nir_op_fabs:
492 case nir_op_fneg:
493 case nir_op_fsat:
494 case nir_op_fmin:
495 case nir_op_fmax:
496 case nir_op_f2f16_rtz:
497 case nir_op_iabs:
498 case nir_op_iadd:
499 case nir_op_iadd_sat:
500 case nir_op_uadd_sat:
501 case nir_op_isub:
502 case nir_op_isub_sat:
503 case nir_op_usub_sat:
504 case nir_op_ineg:
505 case nir_op_imul:
506 case nir_op_imin:
507 case nir_op_imax:
508 case nir_op_umin:
509 case nir_op_umax: return true;
510 case nir_op_ishl: /* TODO: in NIR, these have 32bit shift operands */
511 case nir_op_ishr: /* while Radeon needs 16bit operands when vectorized */
512 case nir_op_ushr:
513 default: return false;
514 }
515 }
516
517 const aco_compiler_statistic_info* aco_statistic_infos = statistic_infos.data();
518
519 void
aco_print_asm(const struct radeon_info * info,unsigned wave_size,uint32_t * binary,unsigned num_dw)520 aco_print_asm(const struct radeon_info *info, unsigned wave_size,
521 uint32_t *binary, unsigned num_dw)
522 {
523 std::vector<uint32_t> binarray(binary, binary + num_dw);
524 aco::Program prog;
525
526 prog.gfx_level = info->gfx_level;
527 prog.family = info->family;
528 prog.wave_size = wave_size;
529 prog.blocks.push_back(aco::Block());
530
531 aco::print_asm(&prog, binarray, num_dw, stderr);
532 }
533