1 /*
2 * Copyright 2023 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "ac_hw_stage.h"
28 #include "aco_interface.h"
29
30 static void
si_aco_compiler_debug(void * private_data,enum aco_compiler_debug_level level,const char * message)31 si_aco_compiler_debug(void *private_data, enum aco_compiler_debug_level level,
32 const char *message)
33 {
34 struct util_debug_callback *debug = private_data;
35
36 util_debug_message(debug, SHADER_INFO, "%s\n", message);
37 }
38
39 static void
si_fill_aco_options(struct si_screen * screen,gl_shader_stage stage,struct aco_compiler_options * options,struct util_debug_callback * debug)40 si_fill_aco_options(struct si_screen *screen, gl_shader_stage stage,
41 struct aco_compiler_options *options,
42 struct util_debug_callback *debug)
43 {
44 options->dump_ir = si_can_dump_shader(screen, stage, SI_DUMP_ACO_IR);
45 options->dump_preoptir = si_can_dump_shader(screen, stage, SI_DUMP_INIT_ACO_IR);
46 options->record_asm = si_can_dump_shader(screen, stage, SI_DUMP_ASM) ||
47 screen->options.debug_disassembly;
48 options->record_ir = screen->record_llvm_ir;
49 options->is_opengl = true;
50
51 options->has_ls_vgpr_init_bug = screen->info.has_ls_vgpr_init_bug;
52 options->load_grid_size_from_user_sgpr = true;
53 options->family = screen->info.family;
54 options->gfx_level = screen->info.gfx_level;
55 options->address32_hi = screen->info.address32_hi;
56
57 options->debug.func = si_aco_compiler_debug;
58 options->debug.private_data = debug;
59 }
60
61 static void
si_fill_aco_shader_info(struct si_shader * shader,struct aco_shader_info * info,struct si_shader_args * args)62 si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
63 struct si_shader_args *args)
64 {
65 const struct si_shader_selector *sel = shader->selector;
66 const union si_shader_key *key = &shader->key;
67 const enum amd_gfx_level gfx_level = sel->screen->info.gfx_level;
68 gl_shader_stage stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : sel->stage;
69
70 info->wave_size = shader->wave_size;
71 info->workgroup_size = si_get_max_workgroup_size(shader);
72 info->merged_shader_compiled_separately = !shader->is_gs_copy_shader &&
73 si_is_multi_part_shader(shader) && !shader->is_monolithic;
74
75 info->image_2d_view_of_3d = gfx_level == GFX9;
76 info->hw_stage = si_select_hw_stage(stage, key, gfx_level);
77
78 if (stage <= MESA_SHADER_GEOMETRY && key->ge.as_ngg && !key->ge.as_es) {
79 info->has_ngg_culling = si_shader_culling_enabled(shader);
80 info->has_ngg_early_prim_export = gfx10_ngg_export_prim_early(shader);
81 }
82
83 switch (stage) {
84 case MESA_SHADER_TESS_CTRL:
85 info->vs.tcs_in_out_eq = key->ge.opt.same_patch_vertices;
86 info->vs.any_tcs_inputs_via_lds = sel->info.tcs_inputs_via_lds ||
87 (!shader->key.ge.opt.same_patch_vertices &&
88 sel->info.tcs_inputs_via_temp);
89 info->tcs.tcs_offchip_layout = args->tcs_offchip_layout;
90 break;
91 case MESA_SHADER_FRAGMENT:
92 info->ps.num_inputs = si_get_ps_num_interp(shader);
93 info->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena;
94 info->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
95 info->ps.alpha_reference = args->alpha_reference;
96 info->ps.has_prolog = !shader->is_monolithic;
97 info->ps.has_epilog = !shader->is_monolithic;
98 break;
99 default:
100 break;
101 }
102 }
103
104 static void
si_aco_build_shader_binary(void ** data,const struct ac_shader_config * config,const char * llvm_ir_str,unsigned llvm_ir_size,const char * disasm_str,unsigned disasm_size,uint32_t * statistics,uint32_t stats_size,uint32_t exec_size,const uint32_t * code,uint32_t code_dw,const struct aco_symbol * symbols,unsigned num_symbols,const struct ac_shader_debug_info * debug_info,unsigned debug_info_count)105 si_aco_build_shader_binary(void **data, const struct ac_shader_config *config,
106 const char *llvm_ir_str, unsigned llvm_ir_size, const char *disasm_str,
107 unsigned disasm_size, uint32_t *statistics, uint32_t stats_size,
108 uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
109 const struct aco_symbol *symbols, unsigned num_symbols,
110 const struct ac_shader_debug_info *debug_info, unsigned debug_info_count)
111 {
112 struct si_shader *shader = (struct si_shader *)data;
113
114 unsigned code_size = code_dw * 4;
115 char *buffer = MALLOC(code_size + disasm_size);
116 memcpy(buffer, code, code_size);
117
118 shader->binary.type = SI_SHADER_BINARY_RAW;
119 shader->binary.code_buffer = buffer;
120 shader->binary.code_size = code_size;
121 shader->binary.exec_size = exec_size;
122
123 if (disasm_size) {
124 memcpy(buffer + code_size, disasm_str, disasm_size);
125 shader->binary.disasm_string = buffer + code_size;
126 shader->binary.disasm_size = disasm_size;
127 }
128
129 if (llvm_ir_size) {
130 shader->binary.llvm_ir_string = MALLOC(llvm_ir_size);
131 memcpy(shader->binary.llvm_ir_string, llvm_ir_str, llvm_ir_size);
132 }
133
134 if (num_symbols) {
135 unsigned symbol_size = num_symbols * sizeof(*symbols);
136 void *data = MALLOC(symbol_size);
137 memcpy(data, symbols, symbol_size);
138 shader->binary.symbols = data;
139 shader->binary.num_symbols = num_symbols;
140 }
141
142 shader->config = *config;
143 }
144
145 bool
si_aco_compile_shader(struct si_shader * shader,struct si_shader_args * args,struct nir_shader * nir,struct util_debug_callback * debug)146 si_aco_compile_shader(struct si_shader *shader,
147 struct si_shader_args *args,
148 struct nir_shader *nir,
149 struct util_debug_callback *debug)
150 {
151 const struct si_shader_selector *sel = shader->selector;
152
153 struct aco_compiler_options options = {0};
154 si_fill_aco_options(sel->screen, nir->info.stage, &options, debug);
155
156 struct aco_shader_info info = {0};
157 si_fill_aco_shader_info(shader, &info, args);
158
159 nir_shader *shaders[2];
160 unsigned num_shaders = 0;
161
162 bool free_nir = false;
163 struct si_shader prev_shader = {};
164 struct si_shader_args prev_args;
165
166 /* For merged shader stage. */
167 if (shader->is_monolithic && sel->screen->info.gfx_level >= GFX9 &&
168 (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_GEOMETRY)) {
169 shaders[num_shaders++] =
170 si_get_prev_stage_nir_shader(shader, &prev_shader, &prev_args, &free_nir);
171
172 args = &prev_args;
173 }
174
175 shaders[num_shaders++] = nir;
176
177 aco_compile_shader(&options, &info, num_shaders, shaders, &args->ac,
178 si_aco_build_shader_binary, (void **)shader);
179
180 if (free_nir)
181 ralloc_free(shaders[0]);
182
183 return true;
184 }
185
186 void
si_aco_resolve_symbols(struct si_shader * shader,uint32_t * code_for_write,const uint32_t * code_for_read,uint64_t scratch_va,uint32_t const_offset)187 si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code_for_write,
188 const uint32_t *code_for_read, uint64_t scratch_va, uint32_t const_offset)
189 {
190 const struct aco_symbol *symbols = (struct aco_symbol *)shader->binary.symbols;
191 const struct si_shader_selector *sel = shader->selector;
192 const union si_shader_key *key = &shader->key;
193
194 for (int i = 0; i < shader->binary.num_symbols; i++) {
195 uint32_t value = 0;
196
197 switch (symbols[i].id) {
198 case aco_symbol_scratch_addr_lo:
199 value = scratch_va;
200 break;
201 case aco_symbol_scratch_addr_hi:
202 value = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
203
204 if (sel->screen->info.gfx_level >= GFX11)
205 value |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
206 else
207 value |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
208 break;
209 case aco_symbol_lds_ngg_scratch_base:
210 assert(sel->stage <= MESA_SHADER_GEOMETRY && key->ge.as_ngg);
211 value = shader->gs_info.esgs_ring_size * 4;
212 if (sel->stage == MESA_SHADER_GEOMETRY)
213 value += shader->ngg.ngg_emit_size * 4;
214 value = ALIGN(value, 8);
215 break;
216 case aco_symbol_lds_ngg_gs_out_vertex_base:
217 assert(sel->stage == MESA_SHADER_GEOMETRY && key->ge.as_ngg);
218 value = shader->gs_info.esgs_ring_size * 4;
219 break;
220 case aco_symbol_const_data_addr:
221 if (!const_offset)
222 continue;
223 value = code_for_read[symbols[i].offset] + const_offset;
224 break;
225 default:
226 unreachable("invalid aco symbol");
227 break;
228 }
229
230 code_for_write[symbols[i].offset] = value;
231 }
232 }
233
234 static void
si_aco_build_shader_part_binary(void ** priv_ptr,uint32_t num_sgprs,uint32_t num_vgprs,const uint32_t * code,uint32_t code_dw_size,const char * disasm_str,uint32_t disasm_size)235 si_aco_build_shader_part_binary(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
236 const uint32_t* code, uint32_t code_dw_size,
237 const char* disasm_str, uint32_t disasm_size)
238 {
239 struct si_shader_part *result = (struct si_shader_part *)priv_ptr;
240 unsigned code_size = code_dw_size * 4;
241
242 char *buffer = MALLOC(code_size + disasm_size);
243 memcpy(buffer, code, code_size);
244
245 result->binary.type = SI_SHADER_BINARY_RAW;
246 result->binary.code_buffer = buffer;
247 result->binary.code_size = code_size;
248 result->binary.exec_size = code_size;
249
250 if (disasm_size) {
251 memcpy(buffer + code_size, disasm_str, disasm_size);
252 result->binary.disasm_string = buffer + code_size;
253 result->binary.disasm_size = disasm_size;
254 }
255
256 result->config.num_sgprs = num_sgprs;
257 result->config.num_vgprs = num_vgprs;
258 }
259
260 static bool
si_aco_build_ps_prolog(struct aco_compiler_options * options,struct si_shader_part * result)261 si_aco_build_ps_prolog(struct aco_compiler_options *options,
262 struct si_shader_part *result)
263 {
264 const union si_shader_part_key *key = &result->key;
265
266 struct si_shader_args args;
267 si_get_ps_prolog_args(&args, key);
268
269 struct aco_ps_prolog_info pinfo = {
270 .poly_stipple = key->ps_prolog.states.poly_stipple,
271 .poly_stipple_buf_offset = SI_PS_CONST_POLY_STIPPLE * 16,
272
273 .bc_optimize_for_persp = key->ps_prolog.states.bc_optimize_for_persp,
274 .bc_optimize_for_linear = key->ps_prolog.states.bc_optimize_for_linear,
275 .force_persp_sample_interp = key->ps_prolog.states.force_persp_sample_interp,
276 .force_linear_sample_interp = key->ps_prolog.states.force_linear_sample_interp,
277 .force_persp_center_interp = key->ps_prolog.states.force_persp_center_interp,
278 .force_linear_center_interp = key->ps_prolog.states.force_linear_center_interp,
279
280 .samplemask_log_ps_iter = key->ps_prolog.states.samplemask_log_ps_iter,
281 .num_interp_inputs = key->ps_prolog.num_interp_inputs,
282 .colors_read = key->ps_prolog.colors_read,
283 .color_interp_vgpr_index[0] = key->ps_prolog.color_interp_vgpr_index[0],
284 .color_interp_vgpr_index[1] = key->ps_prolog.color_interp_vgpr_index[1],
285 .color_attr_index[0] = key->ps_prolog.color_attr_index[0],
286 .color_attr_index[1] = key->ps_prolog.color_attr_index[1],
287 .color_two_side = key->ps_prolog.states.color_two_side,
288 .needs_wqm = key->ps_prolog.wqm,
289
290 .internal_bindings = args.internal_bindings,
291 };
292
293 struct aco_shader_info info = {0};
294 info.hw_stage = AC_HW_PIXEL_SHADER;
295 info.workgroup_size = info.wave_size = key->ps_prolog.wave32 ? 32 : 64,
296
297 aco_compile_ps_prolog(options, &info, &pinfo, &args.ac,
298 si_aco_build_shader_part_binary, (void **)result);
299 return true;
300 }
301
302 static bool
si_aco_build_ps_epilog(struct aco_compiler_options * options,struct si_shader_part * result)303 si_aco_build_ps_epilog(struct aco_compiler_options *options,
304 struct si_shader_part *result)
305 {
306 const union si_shader_part_key *key = &result->key;
307
308 struct aco_ps_epilog_info pinfo = {
309 .spi_shader_col_format = key->ps_epilog.states.spi_shader_col_format,
310 .color_is_int8 = key->ps_epilog.states.color_is_int8,
311 .color_is_int10 = key->ps_epilog.states.color_is_int10,
312 .broadcast_last_cbuf = key->ps_epilog.states.last_cbuf,
313 .alpha_func = key->ps_epilog.states.alpha_func,
314 .alpha_to_one = key->ps_epilog.states.alpha_to_one,
315 .alpha_to_coverage_via_mrtz = key->ps_epilog.states.alpha_to_coverage_via_mrtz,
316 .clamp_color = key->ps_epilog.states.clamp_color,
317 .mrt0_is_dual_src = key->ps_epilog.states.dual_src_blend_swizzle,
318 /* rbplus_depth_only_opt only affects registers, not the shader */
319 .kill_depth = key->ps_epilog.states.kill_z,
320 .kill_stencil = key->ps_epilog.states.kill_stencil,
321 .kill_samplemask = key->ps_epilog.states.kill_samplemask,
322 .skip_null_export = options->gfx_level >= GFX10 && !key->ps_epilog.uses_discard,
323 .color_types = key->ps_epilog.color_types,
324 .color_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
325 };
326
327 struct si_shader_args args;
328 si_get_ps_epilog_args(&args, key, pinfo.colors, &pinfo.depth, &pinfo.stencil,
329 &pinfo.samplemask);
330 pinfo.alpha_reference = args.alpha_reference;
331
332 struct aco_shader_info info = {0};
333 info.hw_stage = AC_HW_PIXEL_SHADER;
334 info.workgroup_size = info.wave_size = key->ps_epilog.wave32 ? 32 : 64,
335
336 aco_compile_ps_epilog(options, &info, &pinfo, &args.ac,
337 si_aco_build_shader_part_binary, (void **)result);
338 return true;
339 }
340
341 bool
si_aco_build_shader_part(struct si_screen * screen,gl_shader_stage stage,bool prolog,struct util_debug_callback * debug,const char * name,struct si_shader_part * result)342 si_aco_build_shader_part(struct si_screen *screen, gl_shader_stage stage, bool prolog,
343 struct util_debug_callback *debug, const char *name,
344 struct si_shader_part *result)
345 {
346 struct aco_compiler_options options = {0};
347 si_fill_aco_options(screen, stage, &options, debug);
348
349 switch (stage) {
350 case MESA_SHADER_FRAGMENT:
351 if (prolog)
352 return si_aco_build_ps_prolog(&options, result);
353 else
354 return si_aco_build_ps_epilog(&options, result);
355 default:
356 unreachable("bad shader part");
357 }
358
359 return false;
360 }
361