• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "ac_hw_stage.h"
28 #include "aco_interface.h"
29 
30 static void
si_aco_compiler_debug(void * private_data,enum aco_compiler_debug_level level,const char * message)31 si_aco_compiler_debug(void *private_data, enum aco_compiler_debug_level level,
32                       const char *message)
33 {
34    struct util_debug_callback *debug = private_data;
35 
36    util_debug_message(debug, SHADER_INFO, "%s\n", message);
37 }
38 
39 static void
si_fill_aco_options(struct si_screen * screen,gl_shader_stage stage,struct aco_compiler_options * options,struct util_debug_callback * debug)40 si_fill_aco_options(struct si_screen *screen, gl_shader_stage stage,
41                     struct aco_compiler_options *options,
42                     struct util_debug_callback *debug)
43 {
44    options->dump_shader =
45       si_can_dump_shader(screen, stage, SI_DUMP_ACO_IR) ||
46       si_can_dump_shader(screen, stage, SI_DUMP_ASM);
47    options->dump_preoptir = si_can_dump_shader(screen, stage, SI_DUMP_INIT_ACO_IR);
48    options->record_ir = screen->record_llvm_ir;
49    options->is_opengl = true;
50 
51    options->has_ls_vgpr_init_bug = screen->info.has_ls_vgpr_init_bug;
52    options->load_grid_size_from_user_sgpr = true;
53    options->family = screen->info.family;
54    options->gfx_level = screen->info.gfx_level;
55    options->address32_hi = screen->info.address32_hi;
56 
57    options->debug.func = si_aco_compiler_debug;
58    options->debug.private_data = debug;
59 }
60 
61 static void
si_fill_aco_shader_info(struct si_shader * shader,struct aco_shader_info * info,struct si_shader_args * args)62 si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
63                         struct si_shader_args *args)
64 {
65    const struct si_shader_selector *sel = shader->selector;
66    const union si_shader_key *key = &shader->key;
67    const enum amd_gfx_level gfx_level = sel->screen->info.gfx_level;
68    gl_shader_stage stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : sel->stage;
69 
70    info->wave_size = shader->wave_size;
71    info->workgroup_size = si_get_max_workgroup_size(shader);
72    /* aco need non-zero value */
73    if (!info->workgroup_size)
74       info->workgroup_size = info->wave_size;
75 
76    info->merged_shader_compiled_separately = !shader->is_gs_copy_shader &&
77       si_is_multi_part_shader(shader) && !shader->is_monolithic;
78 
79    info->image_2d_view_of_3d = gfx_level == GFX9;
80    info->hw_stage = si_select_hw_stage(stage, key, gfx_level);
81 
82    if (stage <= MESA_SHADER_GEOMETRY && key->ge.as_ngg && !key->ge.as_es) {
83       info->has_ngg_culling = key->ge.opt.ngg_culling;
84       info->has_ngg_early_prim_export = gfx10_ngg_export_prim_early(shader);
85    }
86 
87    switch (stage) {
88    case MESA_SHADER_TESS_CTRL:
89       info->vs.tcs_in_out_eq = key->ge.opt.same_patch_vertices;
90       info->vs.tcs_temp_only_input_mask = sel->info.tcs_vgpr_only_inputs;
91       info->has_epilog = !shader->is_monolithic;
92       info->tcs.pass_tessfactors_by_reg = sel->info.tessfactors_are_def_in_all_invocs;
93       info->tcs.patch_stride = si_get_tcs_out_patch_stride(&sel->info);
94       info->tcs.tcs_offchip_layout = args->tcs_offchip_layout;
95       info->tcs.tes_offchip_addr = args->tes_offchip_addr;
96       info->tcs.vs_state_bits = args->vs_state_bits;
97       break;
98    case MESA_SHADER_FRAGMENT:
99       info->ps.num_interp = si_get_ps_num_interp(shader);
100       info->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena;
101       info->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
102       info->ps.alpha_reference = args->alpha_reference;
103       info->has_epilog = !shader->is_monolithic;
104       break;
105    default:
106       break;
107    }
108 }
109 
110 static void
si_aco_build_shader_binary(void ** data,const struct ac_shader_config * config,const char * llvm_ir_str,unsigned llvm_ir_size,const char * disasm_str,unsigned disasm_size,uint32_t * statistics,uint32_t stats_size,uint32_t exec_size,const uint32_t * code,uint32_t code_dw,const struct aco_symbol * symbols,unsigned num_symbols)111 si_aco_build_shader_binary(void **data, const struct ac_shader_config *config,
112                            const char *llvm_ir_str, unsigned llvm_ir_size, const char *disasm_str,
113                            unsigned disasm_size, uint32_t *statistics, uint32_t stats_size,
114                            uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
115                            const struct aco_symbol *symbols, unsigned num_symbols)
116 {
117    struct si_shader *shader = (struct si_shader *)data;
118 
119    unsigned code_size = code_dw * 4;
120    char *buffer = MALLOC(code_size + disasm_size);
121    memcpy(buffer, code, code_size);
122 
123    shader->binary.type = SI_SHADER_BINARY_RAW;
124    shader->binary.code_buffer = buffer;
125    shader->binary.code_size = code_size;
126    shader->binary.exec_size = exec_size;
127 
128    if (disasm_size) {
129       memcpy(buffer + code_size, disasm_str, disasm_size);
130       shader->binary.disasm_string = buffer + code_size;
131       shader->binary.disasm_size = disasm_size;
132    }
133 
134    if (llvm_ir_size) {
135       shader->binary.llvm_ir_string = MALLOC(llvm_ir_size);
136       memcpy(shader->binary.llvm_ir_string, llvm_ir_str, llvm_ir_size);
137    }
138 
139    if (num_symbols) {
140       unsigned symbol_size = num_symbols * sizeof(*symbols);
141       void *data = MALLOC(symbol_size);
142       memcpy(data, symbols, symbol_size);
143       shader->binary.symbols = data;
144       shader->binary.num_symbols = num_symbols;
145    }
146 
147    shader->config = *config;
148 }
149 
150 bool
si_aco_compile_shader(struct si_shader * shader,struct si_shader_args * args,struct nir_shader * nir,struct util_debug_callback * debug)151 si_aco_compile_shader(struct si_shader *shader,
152                       struct si_shader_args *args,
153                       struct nir_shader *nir,
154                       struct util_debug_callback *debug)
155 {
156    const struct si_shader_selector *sel = shader->selector;
157 
158    struct aco_compiler_options options = {0};
159    si_fill_aco_options(sel->screen, sel->stage, &options, debug);
160 
161    struct aco_shader_info info = {0};
162    si_fill_aco_shader_info(shader, &info, args);
163 
164    nir_shader *shaders[2];
165    unsigned num_shaders = 0;
166 
167    bool free_nir = false;
168    struct si_shader prev_shader = {};
169    struct si_shader_args prev_args;
170 
171    /* For merged shader stage. */
172    if (shader->is_monolithic && sel->screen->info.gfx_level >= GFX9 &&
173        (sel->stage == MESA_SHADER_TESS_CTRL || sel->stage == MESA_SHADER_GEOMETRY)) {
174 
175       shaders[num_shaders++] =
176          si_get_prev_stage_nir_shader(shader, &prev_shader, &prev_args, &free_nir);
177 
178       args = &prev_args;
179    }
180 
181    shaders[num_shaders++] = nir;
182 
183    aco_compile_shader(&options, &info, num_shaders, shaders, &args->ac,
184                       si_aco_build_shader_binary, (void **)shader);
185 
186    if (free_nir)
187       ralloc_free(shaders[0]);
188 
189    return true;
190 }
191 
192 void
si_aco_resolve_symbols(struct si_shader * shader,uint32_t * code_for_write,const uint32_t * code_for_read,uint64_t scratch_va,uint32_t const_offset)193 si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code_for_write,
194                        const uint32_t *code_for_read, uint64_t scratch_va, uint32_t const_offset)
195 {
196    const struct aco_symbol *symbols = (struct aco_symbol *)shader->binary.symbols;
197    const struct si_shader_selector *sel = shader->selector;
198    const union si_shader_key *key = &shader->key;
199 
200    for (int i = 0; i < shader->binary.num_symbols; i++) {
201       uint32_t value = 0;
202 
203       switch (symbols[i].id) {
204       case aco_symbol_scratch_addr_lo:
205          value = scratch_va;
206          break;
207       case aco_symbol_scratch_addr_hi:
208          value = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
209 
210          if (sel->screen->info.gfx_level >= GFX11)
211             value |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
212          else
213             value |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
214          break;
215       case aco_symbol_lds_ngg_scratch_base:
216          assert(sel->stage <= MESA_SHADER_GEOMETRY && key->ge.as_ngg);
217          value = shader->gs_info.esgs_ring_size * 4;
218          if (sel->stage == MESA_SHADER_GEOMETRY)
219             value += shader->ngg.ngg_emit_size * 4;
220          value = ALIGN(value, 8);
221          break;
222       case aco_symbol_lds_ngg_gs_out_vertex_base:
223          assert(sel->stage == MESA_SHADER_GEOMETRY && key->ge.as_ngg);
224          value = shader->gs_info.esgs_ring_size * 4;
225          break;
226       case aco_symbol_const_data_addr:
227          if (!const_offset)
228             continue;
229          value = code_for_read[symbols[i].offset] + const_offset;
230          break;
231       default:
232          unreachable("invalid aco symbol");
233          break;
234       }
235 
236       code_for_write[symbols[i].offset] = value;
237    }
238 }
239 
240 static void
si_aco_build_shader_part_binary(void ** priv_ptr,uint32_t num_sgprs,uint32_t num_vgprs,const uint32_t * code,uint32_t code_dw_size,const char * disasm_str,uint32_t disasm_size)241 si_aco_build_shader_part_binary(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
242                                 const uint32_t* code, uint32_t code_dw_size,
243                                 const char* disasm_str, uint32_t disasm_size)
244 {
245    struct si_shader_part *result = (struct si_shader_part *)priv_ptr;
246    unsigned code_size = code_dw_size * 4;
247 
248    char *buffer = MALLOC(code_size + disasm_size);
249    memcpy(buffer, code, code_size);
250 
251    result->binary.type = SI_SHADER_BINARY_RAW;
252    result->binary.code_buffer = buffer;
253    result->binary.code_size = code_size;
254    result->binary.exec_size = code_size;
255 
256    if (disasm_size) {
257       memcpy(buffer + code_size, disasm_str, disasm_size);
258       result->binary.disasm_string = buffer + code_size;
259       result->binary.disasm_size = disasm_size;
260    }
261 
262    result->config.num_sgprs = num_sgprs;
263    result->config.num_vgprs = num_vgprs;
264 }
265 
266 static bool
si_aco_build_tcs_epilog(struct si_screen * screen,struct aco_compiler_options * options,struct si_shader_part * result)267 si_aco_build_tcs_epilog(struct si_screen *screen,
268                         struct aco_compiler_options *options,
269                         struct si_shader_part *result)
270 {
271    const union si_shader_part_key *key = &result->key;
272 
273    struct si_shader_args args;
274    struct ac_arg rel_patch_id;
275    struct ac_arg invocation_id;
276    struct ac_arg tcs_out_current_patch_data_offset;
277    struct ac_arg tess_factors[6];
278    si_get_tcs_epilog_args(screen->info.gfx_level, &args, &rel_patch_id, &invocation_id,
279                           &tcs_out_current_patch_data_offset, tess_factors);
280 
281    struct aco_tcs_epilog_info einfo = {
282       .pass_tessfactors_by_reg = key->tcs_epilog.states.invoc0_tess_factors_are_def,
283       .tcs_out_patch_fits_subgroup = key->tcs_epilog.noop_s_barrier,
284       .primitive_mode = key->tcs_epilog.states.prim_mode,
285       .tess_offchip_ring_size = screen->hs.tess_offchip_ring_size,
286       .tes_reads_tessfactors = key->tcs_epilog.states.tes_reads_tess_factors,
287 
288       .rel_patch_id = rel_patch_id,
289       .invocation_id = invocation_id,
290       .tcs_out_current_patch_data_offset = tcs_out_current_patch_data_offset,
291       .tcs_out_lds_layout = args.tes_offchip_addr,
292       .tcs_offchip_layout = args.tcs_offchip_layout,
293    };
294    memcpy(einfo.tess_lvl_out, tess_factors, sizeof(einfo.tess_lvl_out));
295    memcpy(einfo.tess_lvl_in, tess_factors + 4, sizeof(einfo.tess_lvl_in));
296 
297    struct aco_shader_info info = {0};
298    info.hw_stage = AC_HW_HULL_SHADER;
299    info.wave_size = key->tcs_epilog.wave32 ? 32 : 64;
300    /* Set to >wave_size to keep p_barrier work. GFX6 has single wave for HS. */
301    info.workgroup_size = screen->info.gfx_level >= GFX7 ? 128 : info.wave_size;
302 
303    aco_compile_tcs_epilog(options, &info, &einfo, &args.ac,
304                           si_aco_build_shader_part_binary, (void **)result);
305    return true;
306 }
307 
308 static bool
si_aco_build_ps_prolog(struct aco_compiler_options * options,struct si_shader_part * result)309 si_aco_build_ps_prolog(struct aco_compiler_options *options,
310                        struct si_shader_part *result)
311 {
312    const union si_shader_part_key *key = &result->key;
313 
314    struct si_shader_args args;
315    si_get_ps_prolog_args(&args, key);
316 
317    struct aco_ps_prolog_info pinfo = {
318       .poly_stipple = key->ps_prolog.states.poly_stipple,
319       .poly_stipple_buf_offset = SI_PS_CONST_POLY_STIPPLE * 16,
320 
321       .bc_optimize_for_persp = key->ps_prolog.states.bc_optimize_for_persp,
322       .bc_optimize_for_linear = key->ps_prolog.states.bc_optimize_for_linear,
323       .force_persp_sample_interp = key->ps_prolog.states.force_persp_sample_interp,
324       .force_linear_sample_interp = key->ps_prolog.states.force_linear_sample_interp,
325       .force_persp_center_interp = key->ps_prolog.states.force_persp_center_interp,
326       .force_linear_center_interp = key->ps_prolog.states.force_linear_center_interp,
327 
328       .samplemask_log_ps_iter = key->ps_prolog.states.samplemask_log_ps_iter,
329       .num_interp_inputs = key->ps_prolog.num_interp_inputs,
330       .colors_read = key->ps_prolog.colors_read,
331       .color_interp_vgpr_index[0] = key->ps_prolog.color_interp_vgpr_index[0],
332       .color_interp_vgpr_index[1] = key->ps_prolog.color_interp_vgpr_index[1],
333       .color_attr_index[0] = key->ps_prolog.color_attr_index[0],
334       .color_attr_index[1] = key->ps_prolog.color_attr_index[1],
335       .color_two_side = key->ps_prolog.states.color_two_side,
336       .needs_wqm = key->ps_prolog.wqm,
337 
338       .internal_bindings = args.internal_bindings,
339    };
340 
341    struct aco_shader_info info = {0};
342    info.hw_stage = AC_HW_PIXEL_SHADER;
343    info.workgroup_size = info.wave_size = key->ps_prolog.wave32 ? 32 : 64,
344 
345    aco_compile_ps_prolog(options, &info, &pinfo, &args.ac,
346                          si_aco_build_shader_part_binary, (void **)result);
347    return true;
348 }
349 
350 static bool
si_aco_build_ps_epilog(struct aco_compiler_options * options,struct si_shader_part * result)351 si_aco_build_ps_epilog(struct aco_compiler_options *options,
352                        struct si_shader_part *result)
353 {
354    const union si_shader_part_key *key = &result->key;
355 
356    struct aco_ps_epilog_info pinfo = {
357       .spi_shader_col_format = key->ps_epilog.states.spi_shader_col_format,
358       .color_is_int8 = key->ps_epilog.states.color_is_int8,
359       .color_is_int10 = key->ps_epilog.states.color_is_int10,
360       .mrt0_is_dual_src = key->ps_epilog.states.dual_src_blend_swizzle,
361       .color_types = key->ps_epilog.color_types,
362       .clamp_color = key->ps_epilog.states.clamp_color,
363       .alpha_to_one = key->ps_epilog.states.alpha_to_one,
364       .alpha_to_coverage_via_mrtz = key->ps_epilog.states.alpha_to_coverage_via_mrtz,
365       .skip_null_export = options->gfx_level >= GFX10 && !key->ps_epilog.uses_discard,
366       .broadcast_last_cbuf = key->ps_epilog.states.last_cbuf,
367       .alpha_func = key->ps_epilog.states.alpha_func,
368    };
369 
370    struct si_shader_args args;
371    si_get_ps_epilog_args(&args, key, pinfo.colors, &pinfo.depth, &pinfo.stencil,
372                          &pinfo.samplemask);
373    pinfo.alpha_reference = args.alpha_reference;
374 
375    struct aco_shader_info info = {0};
376    info.hw_stage = AC_HW_PIXEL_SHADER;
377    info.workgroup_size = info.wave_size = key->ps_epilog.wave32 ? 32 : 64,
378 
379    aco_compile_ps_epilog(options, &info, &pinfo, &args.ac,
380                          si_aco_build_shader_part_binary, (void **)result);
381    return true;
382 }
383 
384 bool
si_aco_build_shader_part(struct si_screen * screen,gl_shader_stage stage,bool prolog,struct util_debug_callback * debug,const char * name,struct si_shader_part * result)385 si_aco_build_shader_part(struct si_screen *screen, gl_shader_stage stage, bool prolog,
386                          struct util_debug_callback *debug, const char *name,
387                          struct si_shader_part *result)
388 {
389    struct aco_compiler_options options = {0};
390    si_fill_aco_options(screen, stage, &options, debug);
391 
392    switch (stage) {
393    case MESA_SHADER_TESS_CTRL:
394       return si_aco_build_tcs_epilog(screen, &options, result);
395       break;
396    case MESA_SHADER_FRAGMENT:
397       if (prolog)
398          return si_aco_build_ps_prolog(&options, result);
399       else
400          return si_aco_build_ps_epilog(&options, result);
401    default:
402       unreachable("bad shader part");
403    }
404 
405    return false;
406 }
407