1 /*
2 * Copyright © 2018 Google
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "aco_interface.h"
8
9 #include "aco_ir.h"
10
11 #include "util/memstream.h"
12
13 #include "ac_gpu_info.h"
14 #include <array>
15 #include <iostream>
16 #include <vector>
17
18 using namespace aco;
19
20 namespace {
21
22 static const std::array<aco_compiler_statistic_info, aco_num_statistics> statistic_infos = []()
__anon5dbb34e90202() 23 {
24 std::array<aco_compiler_statistic_info, aco_num_statistics> ret{};
25 ret[aco_statistic_hash] =
26 aco_compiler_statistic_info{"Hash", "CRC32 hash of code and constant data"};
27 ret[aco_statistic_instructions] =
28 aco_compiler_statistic_info{"Instructions", "Instruction count"};
29 ret[aco_statistic_copies] =
30 aco_compiler_statistic_info{"Copies", "Copy instructions created for pseudo-instructions"};
31 ret[aco_statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"};
32 ret[aco_statistic_latency] =
33 aco_compiler_statistic_info{"Latency", "Issue cycles plus stall cycles"};
34 ret[aco_statistic_inv_throughput] = aco_compiler_statistic_info{
35 "Inverse Throughput", "Estimated busy cycles to execute one wave"};
36 ret[aco_statistic_vmem_clauses] = aco_compiler_statistic_info{
37 "VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
38 ret[aco_statistic_smem_clauses] = aco_compiler_statistic_info{
39 "SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
40 ret[aco_statistic_sgpr_presched] =
41 aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
42 ret[aco_statistic_vgpr_presched] =
43 aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
44 ret[aco_statistic_valu] = aco_compiler_statistic_info{"VALU", "Number of VALU instructions"};
45 ret[aco_statistic_salu] = aco_compiler_statistic_info{"SALU", "Number of SALU instructions"};
46 ret[aco_statistic_vmem] = aco_compiler_statistic_info{"VMEM", "Number of VMEM instructions"};
47 ret[aco_statistic_smem] = aco_compiler_statistic_info{"SMEM", "Number of SMEM instructions"};
48 ret[aco_statistic_vopd] = aco_compiler_statistic_info{"VOPD", "Number of VOPD instructions"};
49 return ret;
50 }();
51
52 static void
validate(Program * program)53 validate(Program* program)
54 {
55 if (!(debug_flags & DEBUG_VALIDATE_IR))
56 return;
57
58 ASSERTED bool is_valid = validate_ir(program);
59 assert(is_valid);
60 }
61
62 static std::string
get_disasm_string(Program * program,std::vector<uint32_t> & code,unsigned exec_size)63 get_disasm_string(Program* program, std::vector<uint32_t>& code, unsigned exec_size)
64 {
65 std::string disasm;
66
67 char* data = NULL;
68 size_t disasm_size = 0;
69 struct u_memstream mem;
70 if (u_memstream_open(&mem, &data, &disasm_size)) {
71 FILE* const memf = u_memstream_get(&mem);
72 if (check_print_asm_support(program)) {
73 print_asm(program, code, exec_size / 4u, memf);
74 } else {
75 fprintf(memf, "Shader disassembly is not supported in the current configuration"
76 #if !AMD_LLVM_AVAILABLE
77 " (LLVM not available)"
78 #endif
79 ", falling back to print_program.\n\n");
80 aco_print_program(program, memf);
81 }
82 fputc(0, memf);
83 u_memstream_close(&mem);
84 disasm = std::string(data, data + disasm_size);
85 free(data);
86 }
87
88 return disasm;
89 }
90
91 static std::string
aco_postprocess_shader(const struct aco_compiler_options * options,std::unique_ptr<Program> & program)92 aco_postprocess_shader(const struct aco_compiler_options* options,
93 std::unique_ptr<Program>& program)
94 {
95 std::string llvm_ir;
96
97 if (options->dump_preoptir)
98 aco_print_program(program.get(), stderr);
99
100 ASSERTED bool is_valid = validate_cfg(program.get());
101 assert(is_valid);
102
103 dominator_tree(program.get());
104 if (program->should_repair_ssa)
105 repair_ssa(program.get());
106 lower_phis(program.get());
107
108 if (program->gfx_level <= GFX7)
109 lower_subdword(program.get());
110
111 validate(program.get());
112
113 /* Optimization */
114 if (!options->optimisations_disabled) {
115 if (!(debug_flags & DEBUG_NO_VN))
116 value_numbering(program.get());
117 if (!(debug_flags & DEBUG_NO_OPT))
118 optimize(program.get());
119
120 /* Optimization may move SGPR uses down, requiring further SSA repair. */
121 if (program->should_repair_ssa && repair_ssa(program.get()))
122 lower_phis(program.get());
123 }
124
125 /* cleanup and exec mask handling */
126 setup_reduce_temp(program.get());
127 insert_exec_mask(program.get());
128 validate(program.get());
129
130 /* spilling and scheduling */
131 live_var_analysis(program.get());
132 if (program->collect_statistics)
133 collect_presched_stats(program.get());
134 spill(program.get());
135
136 if (options->record_ir) {
137 char* data = NULL;
138 size_t size = 0;
139 u_memstream mem;
140 if (u_memstream_open(&mem, &data, &size)) {
141 FILE* const memf = u_memstream_get(&mem);
142 aco_print_program(program.get(), memf);
143 fputc(0, memf);
144 u_memstream_close(&mem);
145 }
146
147 llvm_ir = std::string(data, data + size);
148 free(data);
149 }
150
151 if ((debug_flags & DEBUG_LIVE_INFO) && options->dump_ir)
152 aco_print_program(program.get(), stderr, print_live_vars | print_kill);
153
154 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED))
155 schedule_program(program.get());
156 validate(program.get());
157
158 /* Register Allocation */
159 register_allocation(program.get());
160
161 if (validate_ra(program.get())) {
162 aco_print_program(program.get(), stderr);
163 abort();
164 } else if (options->dump_ir) {
165 aco_print_program(program.get(), stderr);
166 }
167
168 validate(program.get());
169
170 /* Optimization */
171 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_OPT)) {
172 optimize_postRA(program.get());
173 validate(program.get());
174 }
175
176 ssa_elimination(program.get());
177 jump_threading(program.get());
178
179 /* Lower to HW Instructions */
180 lower_to_hw_instr(program.get());
181 lower_branches(program.get());
182 validate(program.get());
183
184 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED_VOPD))
185 schedule_vopd(program.get());
186
187 /* Schedule hardware instructions for ILP */
188 if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED_ILP))
189 schedule_ilp(program.get());
190
191 insert_waitcnt(program.get());
192 insert_NOPs(program.get());
193 if (program->gfx_level >= GFX11)
194 insert_delay_alu(program.get());
195
196 if (program->gfx_level >= GFX10)
197 form_hard_clauses(program.get());
198
199 if (program->gfx_level >= GFX11)
200 combine_delay_alu(program.get());
201
202 if (program->collect_statistics || (debug_flags & DEBUG_PERF_INFO))
203 collect_preasm_stats(program.get());
204
205 return llvm_ir;
206 }
207
208 typedef void(select_shader_part_callback)(Program* program, void* pinfo, ac_shader_config* config,
209 const struct aco_compiler_options* options,
210 const struct aco_shader_info* info,
211 const struct ac_shader_args* args);
212
213 static void
aco_compile_shader_part(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct ac_shader_args * args,select_shader_part_callback select_shader_part,void * pinfo,aco_shader_part_callback * build_binary,void ** binary,bool is_prolog=false)214 aco_compile_shader_part(const struct aco_compiler_options* options,
215 const struct aco_shader_info* info, const struct ac_shader_args* args,
216 select_shader_part_callback select_shader_part, void* pinfo,
217 aco_shader_part_callback* build_binary, void** binary,
218 bool is_prolog = false)
219 {
220 init();
221
222 ac_shader_config config = {0};
223 std::unique_ptr<Program> program{new Program};
224
225 program->collect_statistics = options->record_stats;
226 if (program->collect_statistics)
227 memset(program->statistics, 0, sizeof(program->statistics));
228
229 program->debug.func = options->debug.func;
230 program->debug.private_data = options->debug.private_data;
231
232 program->is_prolog = is_prolog;
233 program->is_epilog = !is_prolog;
234
235 /* Instruction selection */
236 select_shader_part(program.get(), pinfo, &config, options, info, args);
237
238 aco_postprocess_shader(options, program);
239
240 /* assembly */
241 std::vector<uint32_t> code;
242 bool append_endpgm = !(options->is_opengl && is_prolog);
243 unsigned exec_size = emit_program(program.get(), code, NULL, append_endpgm);
244
245 std::string disasm;
246 if (options->record_asm)
247 disasm = get_disasm_string(program.get(), code, exec_size);
248
249 (*build_binary)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
250 disasm.data(), disasm.size());
251 }
252
253 } /* end namespace */
254
255 void
aco_compile_shader(const struct aco_compiler_options * options,const struct aco_shader_info * info,unsigned shader_count,struct nir_shader * const * shaders,const struct ac_shader_args * args,aco_callback * build_binary,void ** binary)256 aco_compile_shader(const struct aco_compiler_options* options, const struct aco_shader_info* info,
257 unsigned shader_count, struct nir_shader* const* shaders,
258 const struct ac_shader_args* args, aco_callback* build_binary, void** binary)
259 {
260 init();
261
262 ac_shader_config config = {0};
263 std::unique_ptr<Program> program{new Program};
264
265 program->collect_statistics = options->record_stats;
266 if (program->collect_statistics)
267 memset(program->statistics, 0, sizeof(program->statistics));
268
269 program->debug.func = options->debug.func;
270 program->debug.private_data = options->debug.private_data;
271
272 /* Instruction Selection */
273 select_program(program.get(), shader_count, shaders, &config, options, info, args);
274
275 std::string llvm_ir = aco_postprocess_shader(options, program);
276
277 /* assembly */
278 std::vector<uint32_t> code;
279 std::vector<struct aco_symbol> symbols;
280 /* OpenGL combine multi shader parts into one continous code block,
281 * so only last part need the s_endpgm instruction.
282 */
283 bool append_endpgm = !(options->is_opengl && info->ps.has_epilog);
284 unsigned exec_size = emit_program(program.get(), code, &symbols, append_endpgm);
285
286 if (program->collect_statistics)
287 collect_postasm_stats(program.get(), code);
288
289 std::string disasm;
290 if (options->record_asm)
291 disasm = get_disasm_string(program.get(), code, exec_size);
292
293 size_t stats_size = 0;
294 if (program->collect_statistics)
295 stats_size = aco_num_statistics * sizeof(uint32_t);
296
297 (*build_binary)(binary, &config, llvm_ir.c_str(), llvm_ir.size(), disasm.c_str(), disasm.size(),
298 program->statistics, stats_size, exec_size, code.data(), code.size(),
299 symbols.data(), symbols.size(), program->debug_info.data(),
300 program->debug_info.size());
301 }
302
303 void
aco_compile_rt_prolog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct ac_shader_args * in_args,const struct ac_shader_args * out_args,aco_callback * build_prolog,void ** binary)304 aco_compile_rt_prolog(const struct aco_compiler_options* options,
305 const struct aco_shader_info* info, const struct ac_shader_args* in_args,
306 const struct ac_shader_args* out_args, aco_callback* build_prolog,
307 void** binary)
308 {
309 init();
310
311 /* create program */
312 ac_shader_config config = {0};
313 std::unique_ptr<Program> program{new Program};
314 program->collect_statistics = false;
315 program->debug.func = NULL;
316 program->debug.private_data = NULL;
317
318 select_rt_prolog(program.get(), &config, options, info, in_args, out_args);
319 validate(program.get());
320 insert_waitcnt(program.get());
321 insert_NOPs(program.get());
322 if (program->gfx_level >= GFX11)
323 insert_delay_alu(program.get());
324 if (program->gfx_level >= GFX10)
325 form_hard_clauses(program.get());
326 if (program->gfx_level >= GFX11)
327 combine_delay_alu(program.get());
328
329 if (options->dump_ir)
330 aco_print_program(program.get(), stderr);
331
332 /* assembly */
333 std::vector<uint32_t> code;
334 code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
335 unsigned exec_size = emit_program(program.get(), code);
336
337 std::string disasm;
338 if (options->record_asm)
339 disasm = get_disasm_string(program.get(), code, exec_size);
340
341 (*build_prolog)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
342 exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
343 }
344
345 void
aco_compile_vs_prolog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct aco_vs_prolog_info * pinfo,const struct ac_shader_args * args,aco_shader_part_callback * build_prolog,void ** binary)346 aco_compile_vs_prolog(const struct aco_compiler_options* options,
347 const struct aco_shader_info* info, const struct aco_vs_prolog_info* pinfo,
348 const struct ac_shader_args* args, aco_shader_part_callback* build_prolog,
349 void** binary)
350 {
351 init();
352
353 /* create program */
354 ac_shader_config config = {0};
355 std::unique_ptr<Program> program{new Program};
356 program->collect_statistics = false;
357 program->debug.func = NULL;
358 program->debug.private_data = NULL;
359
360 /* create IR */
361 select_vs_prolog(program.get(), pinfo, &config, options, info, args);
362 validate(program.get());
363 insert_NOPs(program.get());
364 if (program->gfx_level >= GFX10)
365 form_hard_clauses(program.get());
366
367 if (options->dump_ir)
368 aco_print_program(program.get(), stderr);
369
370 /* assembly */
371 std::vector<uint32_t> code;
372 code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
373 unsigned exec_size = emit_program(program.get(), code);
374
375 std::string disasm;
376 if (options->record_asm)
377 disasm = get_disasm_string(program.get(), code, exec_size);
378
379 (*build_prolog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
380 disasm.data(), disasm.size());
381 }
382
383 void
aco_compile_ps_epilog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct aco_ps_epilog_info * pinfo,const struct ac_shader_args * args,aco_shader_part_callback * build_epilog,void ** binary)384 aco_compile_ps_epilog(const struct aco_compiler_options* options,
385 const struct aco_shader_info* info, const struct aco_ps_epilog_info* pinfo,
386 const struct ac_shader_args* args, aco_shader_part_callback* build_epilog,
387 void** binary)
388 {
389 aco_compile_shader_part(options, info, args, select_ps_epilog, (void*)pinfo, build_epilog,
390 binary);
391 }
392
393 void
aco_compile_ps_prolog(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct aco_ps_prolog_info * pinfo,const struct ac_shader_args * args,aco_shader_part_callback * build_prolog,void ** binary)394 aco_compile_ps_prolog(const struct aco_compiler_options* options,
395 const struct aco_shader_info* info, const struct aco_ps_prolog_info* pinfo,
396 const struct ac_shader_args* args, aco_shader_part_callback* build_prolog,
397 void** binary)
398 {
399 aco_compile_shader_part(options, info, args, select_ps_prolog, (void*)pinfo, build_prolog,
400 binary, true);
401 }
402
403 void
aco_compile_trap_handler(const struct aco_compiler_options * options,const struct aco_shader_info * info,const struct ac_shader_args * args,aco_callback * build_binary,void ** binary)404 aco_compile_trap_handler(const struct aco_compiler_options* options,
405 const struct aco_shader_info* info, const struct ac_shader_args* args,
406 aco_callback* build_binary, void** binary)
407 {
408 init();
409
410 ac_shader_config config = {0};
411 std::unique_ptr<Program> program{new Program};
412 program->collect_statistics = false;
413 program->debug.func = NULL;
414 program->debug.private_data = NULL;
415
416 select_trap_handler_shader(program.get(), &config, options, info, args);
417
418 if (options->dump_preoptir)
419 aco_print_program(program.get(), stderr);
420 validate(program.get());
421
422 insert_exec_mask(program.get());
423 validate(program.get());
424
425 lower_to_hw_instr(program.get());
426 lower_branches(program.get());
427 validate(program.get());
428
429 insert_waitcnt(program.get());
430 insert_NOPs(program.get());
431
432 /* assembly */
433 std::vector<uint32_t> code;
434 code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
435 unsigned exec_size = emit_program(program.get(), code);
436
437 std::string disasm;
438 if (options->record_asm)
439 disasm = get_disasm_string(program.get(), code, exec_size);
440
441 (*build_binary)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
442 exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
443 }
444
445 uint64_t
aco_get_codegen_flags()446 aco_get_codegen_flags()
447 {
448 init();
449 /* Exclude flags which don't affect code generation. */
450 uint64_t exclude =
451 DEBUG_VALIDATE_IR | DEBUG_VALIDATE_RA | DEBUG_PERF_INFO | DEBUG_LIVE_INFO;
452 return debug_flags & ~exclude;
453 }
454
455 bool
aco_is_gpu_supported(const struct radeon_info * info)456 aco_is_gpu_supported(const struct radeon_info* info)
457 {
458 switch (info->gfx_level) {
459 case GFX6:
460 case GFX7:
461 case GFX8:
462 return true;
463 case GFX9:
464 return info->has_graphics; /* no CDNA support */
465 case GFX10:
466 case GFX10_3:
467 case GFX11:
468 case GFX11_5:
469 case GFX12:
470 return true;
471 default:
472 return false;
473 }
474 }
475
476 bool
aco_nir_op_supports_packed_math_16bit(const nir_alu_instr * alu)477 aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
478 {
479 switch (alu->op) {
480 case nir_op_f2f16: {
481 nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
482 unsigned execution_mode = shader->info.float_controls_execution_mode;
483 return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
484 nir_is_rounding_mode_rtz(execution_mode, 16);
485 }
486 case nir_op_fadd:
487 case nir_op_fsub:
488 case nir_op_fmul:
489 case nir_op_ffma:
490 case nir_op_fdiv:
491 case nir_op_flrp:
492 case nir_op_fabs:
493 case nir_op_fneg:
494 case nir_op_fsat:
495 case nir_op_fmin:
496 case nir_op_fmax:
497 case nir_op_f2f16_rtz:
498 case nir_op_iabs:
499 case nir_op_iadd:
500 case nir_op_iadd_sat:
501 case nir_op_uadd_sat:
502 case nir_op_isub:
503 case nir_op_isub_sat:
504 case nir_op_usub_sat:
505 case nir_op_ineg:
506 case nir_op_imul:
507 case nir_op_imin:
508 case nir_op_imax:
509 case nir_op_umin:
510 case nir_op_umax: return true;
511 case nir_op_ishl: /* TODO: in NIR, these have 32bit shift operands */
512 case nir_op_ishr: /* while Radeon needs 16bit operands when vectorized */
513 case nir_op_ushr:
514 default: return false;
515 }
516 }
517
518 const aco_compiler_statistic_info* aco_statistic_infos = statistic_infos.data();
519
520 void
aco_print_asm(const struct radeon_info * info,unsigned wave_size,uint32_t * binary,unsigned num_dw)521 aco_print_asm(const struct radeon_info *info, unsigned wave_size,
522 uint32_t *binary, unsigned num_dw)
523 {
524 std::vector<uint32_t> binarray(binary, binary + num_dw);
525 aco::Program prog;
526
527 prog.gfx_level = info->gfx_level;
528 prog.family = info->family;
529 prog.wave_size = wave_size;
530 prog.blocks.push_back(aco::Block());
531
532 aco::print_asm(&prog, binarray, num_dw, stderr);
533 }
534