1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_nir_rt.h"
25 #include "brw_nir_rt_builder.h"
26 #include "nir_phi_builder.h"
27
28 UNUSED static bool
no_load_scratch_base_ptr_intrinsic(nir_shader * shader)29 no_load_scratch_base_ptr_intrinsic(nir_shader *shader)
30 {
31 nir_foreach_function_impl(impl, shader) {
32 nir_foreach_block(block, impl) {
33 nir_foreach_instr(instr, block) {
34 if (instr->type != nir_instr_type_intrinsic)
35 continue;
36
37 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
38 if (intrin->intrinsic == nir_intrinsic_load_scratch_base_ptr)
39 return false;
40 }
41 }
42 }
43
44 return true;
45 }
46
47 /** Insert the appropriate return instruction at the end of the shader */
48 void
brw_nir_lower_shader_returns(nir_shader * shader)49 brw_nir_lower_shader_returns(nir_shader *shader)
50 {
51 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
52
53 /* Reserve scratch space at the start of the shader's per-thread scratch
54 * space for the return BINDLESS_SHADER_RECORD address and data payload.
55 * When a shader is called, the calling shader will write the return BSR
56 * address in this region of the callee's scratch space.
57 *
58 * We could also put it at the end of the caller's scratch space. However,
59 * doing this way means that a shader never accesses its caller's scratch
60 * space unless given an explicit pointer (such as for ray payloads). It
61 * also makes computing the address easier given that we want to apply an
62 * alignment to the scratch offset to ensure we can make alignment
63 * assumptions in the called shader.
64 *
65 * This isn't needed for ray-gen shaders because they end the thread and
66 * never return to the calling trampoline shader.
67 */
68 assert(no_load_scratch_base_ptr_intrinsic(shader));
69 if (shader->info.stage != MESA_SHADER_RAYGEN)
70 shader->scratch_size += BRW_BTD_STACK_CALLEE_DATA_SIZE;
71
72 nir_builder b = nir_builder_create(impl);
73
74 set_foreach(impl->end_block->predecessors, block_entry) {
75 struct nir_block *block = (void *)block_entry->key;
76 b.cursor = nir_after_block_before_jump(block);
77
78 switch (shader->info.stage) {
79 case MESA_SHADER_RAYGEN:
80 /* A raygen shader is always the root of the shader call tree. When
81 * it ends, we retire the bindless stack ID and no further shaders
82 * will be executed.
83 */
84 assert(impl->end_block->predecessors->entries == 1);
85 brw_nir_btd_retire(&b);
86 break;
87
88 case MESA_SHADER_ANY_HIT:
89 /* The default action of an any-hit shader is to accept the ray
90 * intersection. Any-hit shaders may have more than one exit. Only
91 * the final "normal" exit will actually need to accept the
92 * intersection as any others should come from nir_jump_halt
93 * instructions inserted after ignore_ray_intersection or
94 * terminate_ray or the like. However, inserting an accept after
95 * the ignore or terminate is safe because it'll get deleted later.
96 */
97 nir_accept_ray_intersection(&b);
98 break;
99
100 case MESA_SHADER_CALLABLE:
101 case MESA_SHADER_MISS:
102 case MESA_SHADER_CLOSEST_HIT:
103 /* Callable, miss, and closest-hit shaders don't take any special
104 * action at the end. They simply return back to the previous shader
105 * in the call stack.
106 */
107 assert(impl->end_block->predecessors->entries == 1);
108 brw_nir_btd_return(&b);
109 break;
110
111 case MESA_SHADER_INTERSECTION:
112 /* This will be handled by brw_nir_lower_intersection_shader */
113 break;
114
115 default:
116 unreachable("Invalid callable shader stage");
117 }
118 }
119
120 nir_metadata_preserve(impl, nir_metadata_block_index |
121 nir_metadata_dominance);
122 }
123
124 static void
store_resume_addr(nir_builder * b,nir_intrinsic_instr * call)125 store_resume_addr(nir_builder *b, nir_intrinsic_instr *call)
126 {
127 uint32_t call_idx = nir_intrinsic_call_idx(call);
128 uint32_t offset = nir_intrinsic_stack_size(call);
129
130 /* First thing on the called shader's stack is the resume address
131 * followed by a pointer to the payload.
132 */
133 nir_def *resume_record_addr =
134 nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b),
135 call_idx * BRW_BTD_RESUME_SBT_STRIDE);
136 /* By the time we get here, any remaining shader/function memory
137 * pointers have been lowered to SSA values.
138 */
139 nir_def *payload_addr =
140 nir_get_shader_call_payload_src(call)->ssa;
141 brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN,
142 nir_vec2(b, resume_record_addr, payload_addr),
143 0xf /* write_mask */);
144
145 nir_btd_stack_push_intel(b, offset);
146 }
147
148 static bool
lower_shader_trace_ray_instr(struct nir_builder * b,nir_instr * instr,void * data)149 lower_shader_trace_ray_instr(struct nir_builder *b, nir_instr *instr, void *data)
150 {
151 struct brw_bs_prog_key *key = data;
152
153 if (instr->type != nir_instr_type_intrinsic)
154 return false;
155
156 /* Leave nir_intrinsic_rt_resume to be lowered by
157 * brw_nir_lower_rt_intrinsics()
158 */
159 nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
160 if (call->intrinsic != nir_intrinsic_rt_trace_ray)
161 return false;
162
163 b->cursor = nir_instr_remove(instr);
164
165 store_resume_addr(b, call);
166
167 nir_def *as_addr = call->src[0].ssa;
168 nir_def *ray_flags = call->src[1].ssa;
169 /* From the SPIR-V spec:
170 *
171 * "Only the 8 least-significant bits of Cull Mask are used by this
172 * instruction - other bits are ignored.
173 *
174 * Only the 4 least-significant bits of SBT Offset and SBT Stride are
175 * used by this instruction - other bits are ignored.
176 *
177 * Only the 16 least-significant bits of Miss Index are used by this
178 * instruction - other bits are ignored."
179 */
180 nir_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
181 nir_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
182 nir_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
183 nir_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
184 nir_def *ray_orig = call->src[6].ssa;
185 nir_def *ray_t_min = call->src[7].ssa;
186 nir_def *ray_dir = call->src[8].ssa;
187 nir_def *ray_t_max = call->src[9].ssa;
188
189 nir_def *root_node_ptr =
190 brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
191
192 /* The hardware packet requires an address to the first element of the
193 * hit SBT.
194 *
195 * In order to calculate this, we must multiply the "SBT Offset"
196 * provided to OpTraceRay by the SBT stride provided for the hit SBT in
197 * the call to vkCmdTraceRay() and add that to the base address of the
198 * hit SBT. This stride is not to be confused with the "SBT Stride"
199 * provided to OpTraceRay which is in units of this stride. It's a
200 * rather terrible overload of the word "stride". The hardware docs
201 * calls the SPIR-V stride value the "shader index multiplier" which is
202 * a much more sane name.
203 */
204 nir_def *hit_sbt_stride_B =
205 nir_load_ray_hit_sbt_stride_intel(b);
206 nir_def *hit_sbt_offset_B =
207 nir_imul(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
208 nir_def *hit_sbt_addr =
209 nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
210 nir_u2u64(b, hit_sbt_offset_B));
211
212 /* The hardware packet takes an address to the miss BSR. */
213 nir_def *miss_sbt_stride_B =
214 nir_load_ray_miss_sbt_stride_intel(b);
215 nir_def *miss_sbt_offset_B =
216 nir_imul(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
217 nir_def *miss_sbt_addr =
218 nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
219 nir_u2u64(b, miss_sbt_offset_B));
220
221 struct brw_nir_rt_mem_ray_defs ray_defs = {
222 .root_node_ptr = root_node_ptr,
223 /* Combine the shader value given to traceRayEXT() with the pipeline
224 * creation value VkPipelineCreateFlags.
225 */
226 .ray_flags = nir_ior_imm(b, nir_u2u16(b, ray_flags), key->pipeline_ray_flags),
227 .ray_mask = cull_mask,
228 .hit_group_sr_base_ptr = hit_sbt_addr,
229 .hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
230 .miss_sr_ptr = miss_sbt_addr,
231 .orig = ray_orig,
232 .t_near = ray_t_min,
233 .dir = ray_dir,
234 .t_far = ray_t_max,
235 .shader_index_multiplier = sbt_stride,
236 /* The instance leaf pointer is unused in the top level BVH traversal
237 * since we always start from the root node. We can reuse that field to
238 * store the ray_flags handed to traceRayEXT(). This will be reloaded
239 * when the shader accesses gl_IncomingRayFlagsEXT (see
240 * nir_intrinsic_load_ray_flags brw_nir_lower_rt_intrinsic.c)
241 */
242 .inst_leaf_ptr = nir_u2u64(b, ray_flags),
243 };
244 brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
245
246 nir_trace_ray_intel(b,
247 nir_load_btd_global_arg_addr_intel(b),
248 nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
249 nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
250 .synchronous = false);
251 return true;
252 }
253
254 static bool
lower_shader_call_instr(struct nir_builder * b,nir_intrinsic_instr * call,void * data)255 lower_shader_call_instr(struct nir_builder *b, nir_intrinsic_instr *call,
256 void *data)
257 {
258 if (call->intrinsic != nir_intrinsic_rt_execute_callable)
259 return false;
260
261 b->cursor = nir_instr_remove(&call->instr);
262
263 store_resume_addr(b, call);
264
265 nir_def *sbt_offset32 =
266 nir_imul(b, call->src[0].ssa,
267 nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
268 nir_def *sbt_addr =
269 nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
270 nir_u2u64(b, sbt_offset32));
271 brw_nir_btd_spawn(b, sbt_addr);
272 return true;
273 }
274
275 bool
brw_nir_lower_shader_calls(nir_shader * shader,struct brw_bs_prog_key * key)276 brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key)
277 {
278 bool a = nir_shader_instructions_pass(shader,
279 lower_shader_trace_ray_instr,
280 nir_metadata_none,
281 key);
282 bool b = nir_shader_intrinsics_pass(shader, lower_shader_call_instr,
283 nir_metadata_block_index |
284 nir_metadata_dominance,
285 NULL);
286 return a || b;
287 }
288
289 /** Creates a trivial return shader
290 *
291 * In most cases this shader doesn't actually do anything. It just needs to
292 * return to the caller.
293 *
294 * By default, our HW has the ability to handle the fact that a shader is not
295 * available and will execute the next following shader in the tracing call.
296 * For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
297 * but there is no ANYHIT shader available. The HW should follow up by
298 * execution the CLOSESTHIT shader.
299 *
300 * This default behavior can be changed through the RT_CTRL register
301 * (privileged access) and when NULL shader checks are disabled, the HW will
302 * instead call the call stack handler (this shader). This is what i915 is
303 * doing as part of Wa_14013202645.
304 *
305 * In order to ensure the call to the CLOSESTHIT shader, this shader needs to
306 * commit the ray and will not proceed with the BTD return. Similarly when the
307 * same thing happen with the INTERSECTION shader, we should just carry on the
308 * ray traversal with the continue operation.
309 *
310 */
311 nir_shader *
brw_nir_create_trivial_return_shader(const struct brw_compiler * compiler,void * mem_ctx)312 brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
313 void *mem_ctx)
314 {
315 const nir_shader_compiler_options *nir_options =
316 compiler->nir_options[MESA_SHADER_CALLABLE];
317
318 nir_builder _b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE,
319 nir_options,
320 "RT Trivial Return");
321 nir_builder *b = &_b;
322
323 ralloc_steal(mem_ctx, b->shader);
324 nir_shader *nir = b->shader;
325
326 NIR_PASS_V(nir, brw_nir_lower_shader_returns);
327
328 return nir;
329 }
330