1 /*
2 * Copyright © 2010 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_fs.h"
7 #include "brw_fs_live_variables.h"
8 #include "brw_generator.h"
9 #include "brw_nir.h"
10 #include "brw_cfg.h"
11 #include "brw_private.h"
12 #include "intel_nir.h"
13 #include "shader_enums.h"
14 #include "dev/intel_debug.h"
15 #include "dev/intel_wa.h"
16
17 #include <memory>
18
19 static uint64_t
brw_bsr(const struct intel_device_info * devinfo,uint32_t offset,uint8_t simd_size,uint8_t local_arg_offset,uint8_t grf_used)20 brw_bsr(const struct intel_device_info *devinfo,
21 uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset,
22 uint8_t grf_used)
23 {
24 assert(offset % 64 == 0);
25 assert(simd_size == 8 || simd_size == 16);
26 assert(local_arg_offset % 8 == 0);
27
28 return ((uint64_t)ptl_register_blocks(grf_used) << 60) |
29 offset |
30 SET_BITS(simd_size == 8, 4, 4) |
31 SET_BITS(local_arg_offset / 8, 2, 0);
32 }
33
34 static bool
run_bs(fs_visitor & s,bool allow_spilling)35 run_bs(fs_visitor &s, bool allow_spilling)
36 {
37 assert(s.stage >= MESA_SHADER_RAYGEN && s.stage <= MESA_SHADER_CALLABLE);
38
39 s.payload_ = new bs_thread_payload(s);
40
41 nir_to_brw(&s);
42
43 if (s.failed)
44 return false;
45
46 /* TODO(RT): Perhaps rename this? */
47 s.emit_cs_terminate();
48
49 brw_calculate_cfg(s);
50
51 brw_optimize(s);
52
53 s.assign_curb_setup();
54
55 brw_lower_3src_null_dest(s);
56 brw_workaround_memory_fence_before_eot(s);
57 brw_workaround_emit_dummy_mov_instruction(s);
58
59 brw_allocate_registers(s, allow_spilling);
60
61 brw_workaround_source_arf_before_eot(s);
62
63 return !s.failed;
64 }
65
66 static uint8_t
compile_single_bs(const struct brw_compiler * compiler,struct brw_compile_bs_params * params,const struct brw_bs_prog_key * key,struct brw_bs_prog_data * prog_data,nir_shader * shader,brw_generator * g,struct brw_compile_stats * stats,int * prog_offset,uint64_t * bsr)67 compile_single_bs(const struct brw_compiler *compiler,
68 struct brw_compile_bs_params *params,
69 const struct brw_bs_prog_key *key,
70 struct brw_bs_prog_data *prog_data,
71 nir_shader *shader,
72 brw_generator *g,
73 struct brw_compile_stats *stats,
74 int *prog_offset,
75 uint64_t *bsr)
76 {
77 const bool debug_enabled = brw_should_print_shader(shader, DEBUG_RT);
78
79 prog_data->base.stage = shader->info.stage;
80 prog_data->max_stack_size = MAX2(prog_data->max_stack_size,
81 shader->scratch_size);
82
83 const unsigned max_dispatch_width = 16;
84 brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width);
85 brw_postprocess_nir(shader, compiler, debug_enabled,
86 key->base.robust_flags);
87
88 brw_simd_selection_state simd_state{
89 .devinfo = compiler->devinfo,
90 .prog_data = prog_data,
91
92 /* Since divergence is a lot more likely in RT than compute, it makes
93 * sense to limit ourselves to the smallest available SIMD for now.
94 */
95 .required_width = compiler->devinfo->ver >= 20 ? 16u : 8u,
96 };
97
98 std::unique_ptr<fs_visitor> v[2];
99
100 for (unsigned simd = 0; simd < ARRAY_SIZE(v); simd++) {
101 if (!brw_simd_should_compile(simd_state, simd))
102 continue;
103
104 const unsigned dispatch_width = 8u << simd;
105
106 if (dispatch_width == 8 && compiler->devinfo->ver >= 20)
107 continue;
108
109 v[simd] = std::make_unique<fs_visitor>(compiler, ¶ms->base,
110 &key->base,
111 &prog_data->base, shader,
112 dispatch_width,
113 stats != NULL,
114 debug_enabled);
115
116 const bool allow_spilling = !brw_simd_any_compiled(simd_state);
117 if (run_bs(*v[simd], allow_spilling)) {
118 brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
119 } else {
120 simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx,
121 v[simd]->fail_msg);
122 if (simd > 0) {
123 brw_shader_perf_log(compiler, params->base.log_data,
124 "SIMD%u shader failed to compile: %s",
125 dispatch_width, v[simd]->fail_msg);
126 }
127 }
128 }
129
130 const int selected_simd = brw_simd_select(simd_state);
131 if (selected_simd < 0) {
132 params->base.error_str =
133 ralloc_asprintf(params->base.mem_ctx,
134 "Can't compile shader: "
135 "SIMD8 '%s' and SIMD16 '%s'.\n",
136 simd_state.error[0], simd_state.error[1]);
137 return 0;
138 }
139
140 assert(selected_simd < int(ARRAY_SIZE(v)));
141 fs_visitor *selected = v[selected_simd].get();
142 assert(selected);
143
144 const unsigned dispatch_width = selected->dispatch_width;
145
146 int offset = g->generate_code(selected->cfg, dispatch_width, selected->shader_stats,
147 selected->performance_analysis.require(), stats);
148 if (prog_offset)
149 *prog_offset = offset;
150 else
151 assert(offset == 0);
152
153 if (bsr)
154 *bsr = brw_bsr(compiler->devinfo, offset, dispatch_width, 0,
155 selected->grf_used);
156 else
157 prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
158 selected->grf_used);
159
160 return dispatch_width;
161 }
162
163 const unsigned *
brw_compile_bs(const struct brw_compiler * compiler,struct brw_compile_bs_params * params)164 brw_compile_bs(const struct brw_compiler *compiler,
165 struct brw_compile_bs_params *params)
166 {
167 nir_shader *shader = params->base.nir;
168 struct brw_bs_prog_data *prog_data = params->prog_data;
169 unsigned num_resume_shaders = params->num_resume_shaders;
170 nir_shader **resume_shaders = params->resume_shaders;
171 const bool debug_enabled = brw_should_print_shader(shader, DEBUG_RT);
172
173 prog_data->base.stage = shader->info.stage;
174 prog_data->base.ray_queries = shader->info.ray_queries;
175 prog_data->base.total_scratch = 0;
176
177 prog_data->max_stack_size = 0;
178 prog_data->num_resume_shaders = num_resume_shaders;
179
180 brw_generator g(compiler, ¶ms->base, &prog_data->base,
181 shader->info.stage);
182 if (unlikely(debug_enabled)) {
183 char *name = ralloc_asprintf(params->base.mem_ctx,
184 "%s %s shader %s",
185 shader->info.label ?
186 shader->info.label : "unnamed",
187 gl_shader_stage_name(shader->info.stage),
188 shader->info.name);
189 g.enable_debug(name);
190 }
191
192 prog_data->simd_size =
193 compile_single_bs(compiler, params, params->key, prog_data,
194 shader, &g, params->base.stats, NULL, NULL);
195 if (prog_data->simd_size == 0)
196 return NULL;
197
198 uint64_t *resume_sbt = ralloc_array(params->base.mem_ctx,
199 uint64_t, num_resume_shaders);
200 for (unsigned i = 0; i < num_resume_shaders; i++) {
201 if (INTEL_DEBUG(DEBUG_RT)) {
202 char *name = ralloc_asprintf(params->base.mem_ctx,
203 "%s %s resume(%u) shader %s",
204 shader->info.label ?
205 shader->info.label : "unnamed",
206 gl_shader_stage_name(shader->info.stage),
207 i, shader->info.name);
208 g.enable_debug(name);
209 }
210
211 /* TODO: Figure out shader stats etc. for resume shaders */
212 int offset = 0;
213 uint8_t simd_size =
214 compile_single_bs(compiler, params, params->key,
215 prog_data, resume_shaders[i], &g, NULL, &offset,
216 &resume_sbt[i]);
217 if (simd_size == 0)
218 return NULL;
219
220 assert(offset > 0);
221 }
222
223 /* We only have one constant data so we want to make sure they're all the
224 * same.
225 */
226 for (unsigned i = 0; i < num_resume_shaders; i++) {
227 assert(resume_shaders[i]->constant_data_size ==
228 shader->constant_data_size);
229 assert(memcmp(resume_shaders[i]->constant_data,
230 shader->constant_data,
231 shader->constant_data_size) == 0);
232 }
233
234 g.add_const_data(shader->constant_data, shader->constant_data_size);
235 g.add_resume_sbt(num_resume_shaders, resume_sbt);
236
237 return g.get_assembly();
238 }
239