• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_nir_rt.h"
25 #include "brw_nir_rt_builder.h"
26 #include "nir_phi_builder.h"
27 
28 UNUSED static bool
no_load_scratch_base_ptr_intrinsic(nir_shader * shader)29 no_load_scratch_base_ptr_intrinsic(nir_shader *shader)
30 {
31    nir_foreach_function(func, shader) {
32       if (!func->impl)
33          continue;
34 
35       nir_foreach_block(block, func->impl) {
36          nir_foreach_instr(instr, block) {
37             if (instr->type != nir_instr_type_intrinsic)
38                continue;
39 
40             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
41             if (intrin->intrinsic == nir_intrinsic_load_scratch_base_ptr)
42                return false;
43          }
44       }
45    }
46 
47    return true;
48 }
49 
50 /** Insert the appropriate return instruction at the end of the shader */
51 void
brw_nir_lower_shader_returns(nir_shader * shader)52 brw_nir_lower_shader_returns(nir_shader *shader)
53 {
54    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
55 
56    /* Reserve scratch space at the start of the shader's per-thread scratch
57     * space for the return BINDLESS_SHADER_RECORD address and data payload.
58     * When a shader is called, the calling shader will write the return BSR
59     * address in this region of the callee's scratch space.
60     *
61     * We could also put it at the end of the caller's scratch space.  However,
62     * doing this way means that a shader never accesses its caller's scratch
63     * space unless given an explicit pointer (such as for ray payloads).  It
64     * also makes computing the address easier given that we want to apply an
65     * alignment to the scratch offset to ensure we can make alignment
66     * assumptions in the called shader.
67     *
68     * This isn't needed for ray-gen shaders because they end the thread and
69     * never return to the calling trampoline shader.
70     */
71    assert(no_load_scratch_base_ptr_intrinsic(shader));
72    if (shader->info.stage != MESA_SHADER_RAYGEN)
73       shader->scratch_size += BRW_BTD_STACK_CALLEE_DATA_SIZE;
74 
75    nir_builder b;
76    nir_builder_init(&b, impl);
77 
78    set_foreach(impl->end_block->predecessors, block_entry) {
79       struct nir_block *block = (void *)block_entry->key;
80       b.cursor = nir_after_block_before_jump(block);
81 
82       switch (shader->info.stage) {
83       case MESA_SHADER_RAYGEN:
84          /* A raygen shader is always the root of the shader call tree.  When
85           * it ends, we retire the bindless stack ID and no further shaders
86           * will be executed.
87           */
88          brw_nir_btd_retire(&b);
89          break;
90 
91       case MESA_SHADER_ANY_HIT:
92          /* The default action of an any-hit shader is to accept the ray
93           * intersection.
94           */
95          nir_accept_ray_intersection(&b);
96          break;
97 
98       case MESA_SHADER_CALLABLE:
99       case MESA_SHADER_MISS:
100       case MESA_SHADER_CLOSEST_HIT:
101          /* Callable, miss, and closest-hit shaders don't take any special
102           * action at the end.  They simply return back to the previous shader
103           * in the call stack.
104           */
105          brw_nir_btd_return(&b);
106          break;
107 
108       case MESA_SHADER_INTERSECTION:
109          /* This will be handled by brw_nir_lower_intersection_shader */
110          break;
111 
112       default:
113          unreachable("Invalid callable shader stage");
114       }
115 
116       assert(impl->end_block->predecessors->entries == 1);
117       break;
118    }
119 
120    nir_metadata_preserve(impl, nir_metadata_block_index |
121                                nir_metadata_dominance);
122 }
123 
124 static void
store_resume_addr(nir_builder * b,nir_intrinsic_instr * call)125 store_resume_addr(nir_builder *b, nir_intrinsic_instr *call)
126 {
127    uint32_t call_idx = nir_intrinsic_call_idx(call);
128    uint32_t offset = nir_intrinsic_stack_size(call);
129 
130    /* First thing on the called shader's stack is the resume address
131     * followed by a pointer to the payload.
132     */
133    nir_ssa_def *resume_record_addr =
134       nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b),
135                    call_idx * BRW_BTD_RESUME_SBT_STRIDE);
136    /* By the time we get here, any remaining shader/function memory
137     * pointers have been lowered to SSA values.
138     */
139    assert(nir_get_shader_call_payload_src(call)->is_ssa);
140    nir_ssa_def *payload_addr =
141       nir_get_shader_call_payload_src(call)->ssa;
142    brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN,
143                             nir_vec2(b, resume_record_addr, payload_addr),
144                             0xf /* write_mask */);
145 
146    nir_btd_stack_push_intel(b, offset);
147 }
148 
149 static bool
lower_shader_trace_ray_instr(struct nir_builder * b,nir_instr * instr,void * data)150 lower_shader_trace_ray_instr(struct nir_builder *b, nir_instr *instr, void *data)
151 {
152    if (instr->type != nir_instr_type_intrinsic)
153       return false;
154 
155    /* Leave nir_intrinsic_rt_resume to be lowered by
156     * brw_nir_lower_rt_intrinsics()
157     */
158    nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
159    if (call->intrinsic != nir_intrinsic_rt_trace_ray)
160       return false;
161 
162    b->cursor = nir_instr_remove(instr);
163 
164    store_resume_addr(b, call);
165 
166    nir_ssa_def *as_addr = call->src[0].ssa;
167    nir_ssa_def *ray_flags = call->src[1].ssa;
168    /* From the SPIR-V spec:
169     *
170     *    "Only the 8 least-significant bits of Cull Mask are used by this
171     *    instruction - other bits are ignored.
172     *
173     *    Only the 4 least-significant bits of SBT Offset and SBT Stride are
174     *    used by this instruction - other bits are ignored.
175     *
176     *    Only the 16 least-significant bits of Miss Index are used by this
177     *    instruction - other bits are ignored."
178     */
179    nir_ssa_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
180    nir_ssa_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
181    nir_ssa_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
182    nir_ssa_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
183    nir_ssa_def *ray_orig = call->src[6].ssa;
184    nir_ssa_def *ray_t_min = call->src[7].ssa;
185    nir_ssa_def *ray_dir = call->src[8].ssa;
186    nir_ssa_def *ray_t_max = call->src[9].ssa;
187 
188    nir_ssa_def *root_node_ptr =
189       brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
190 
191    /* The hardware packet requires an address to the first element of the
192     * hit SBT.
193     *
194     * In order to calculate this, we must multiply the "SBT Offset"
195     * provided to OpTraceRay by the SBT stride provided for the hit SBT in
196     * the call to vkCmdTraceRay() and add that to the base address of the
197     * hit SBT. This stride is not to be confused with the "SBT Stride"
198     * provided to OpTraceRay which is in units of this stride. It's a
199     * rather terrible overload of the word "stride". The hardware docs
200     * calls the SPIR-V stride value the "shader index multiplier" which is
201     * a much more sane name.
202     */
203    nir_ssa_def *hit_sbt_stride_B =
204       nir_load_ray_hit_sbt_stride_intel(b);
205    nir_ssa_def *hit_sbt_offset_B =
206       nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
207    nir_ssa_def *hit_sbt_addr =
208       nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
209                   nir_u2u64(b, hit_sbt_offset_B));
210 
211    /* The hardware packet takes an address to the miss BSR. */
212    nir_ssa_def *miss_sbt_stride_B =
213       nir_load_ray_miss_sbt_stride_intel(b);
214    nir_ssa_def *miss_sbt_offset_B =
215       nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
216    nir_ssa_def *miss_sbt_addr =
217       nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
218                   nir_u2u64(b, miss_sbt_offset_B));
219 
220    struct brw_nir_rt_mem_ray_defs ray_defs = {
221       .root_node_ptr = root_node_ptr,
222       .ray_flags = nir_u2u16(b, ray_flags),
223       .ray_mask = cull_mask,
224       .hit_group_sr_base_ptr = hit_sbt_addr,
225       .hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
226       .miss_sr_ptr = miss_sbt_addr,
227       .orig = ray_orig,
228       .t_near = ray_t_min,
229       .dir = ray_dir,
230       .t_far = ray_t_max,
231       .shader_index_multiplier = sbt_stride,
232    };
233    brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
234 
235    nir_trace_ray_intel(b,
236                        nir_load_btd_global_arg_addr_intel(b),
237                        nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
238                        nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
239                        .synchronous = false);
240    return true;
241 }
242 
243 static bool
lower_shader_call_instr(struct nir_builder * b,nir_instr * instr,void * data)244 lower_shader_call_instr(struct nir_builder *b, nir_instr *instr, void *data)
245 {
246    if (instr->type != nir_instr_type_intrinsic)
247       return false;
248 
249    /* Leave nir_intrinsic_rt_resume to be lowered by
250     * brw_nir_lower_rt_intrinsics()
251     */
252    nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
253    if (call->intrinsic != nir_intrinsic_rt_execute_callable)
254       return false;
255 
256    b->cursor = nir_instr_remove(instr);
257 
258    store_resume_addr(b, call);
259 
260    nir_ssa_def *sbt_offset32 =
261       nir_imul(b, call->src[0].ssa,
262                nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
263    nir_ssa_def *sbt_addr =
264       nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
265                nir_u2u64(b, sbt_offset32));
266    brw_nir_btd_spawn(b, sbt_addr);
267    return true;
268 }
269 
270 bool
brw_nir_lower_shader_calls(nir_shader * shader)271 brw_nir_lower_shader_calls(nir_shader *shader)
272 {
273    return
274       nir_shader_instructions_pass(shader,
275                                    lower_shader_trace_ray_instr,
276                                    nir_metadata_none,
277                                    NULL) |
278       nir_shader_instructions_pass(shader,
279                                    lower_shader_call_instr,
280                                    nir_metadata_block_index |
281                                    nir_metadata_dominance,
282                                    NULL);
283 }
284 
285 /** Creates a trivial return shader
286  *
287  * In most cases this shader doesn't actually do anything. It just needs to
288  * return to the caller.
289  *
290  * By default, our HW has the ability to handle the fact that a shader is not
291  * available and will execute the next following shader in the tracing call.
292  * For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
293  * but there is no ANYHIT shader available. The HW should follow up by
294  * execution the CLOSESTHIT shader.
295  *
296  * This default behavior can be changed through the RT_CTRL register
297  * (privileged access) and when NULL shader checks are disabled, the HW will
298  * instead call the call stack handler (this shader). This is what i915 is
299  * doing as part of Wa_14013202645.
300  *
301  * In order to ensure the call to the CLOSESTHIT shader, this shader needs to
302  * commit the ray and will not proceed with the BTD return. Similarly when the
303  * same thing happen with the INTERSECTION shader, we should just carry on the
304  * ray traversal with the continue operation.
305  *
306  */
307 nir_shader *
brw_nir_create_trivial_return_shader(const struct brw_compiler * compiler,void * mem_ctx)308 brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
309                                      void *mem_ctx)
310 {
311    const nir_shader_compiler_options *nir_options =
312       compiler->nir_options[MESA_SHADER_CALLABLE];
313 
314    nir_builder _b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE,
315                                                    nir_options,
316                                                    "RT Trivial Return");
317    nir_builder *b = &_b;
318 
319    ralloc_steal(mem_ctx, b->shader);
320    nir_shader *nir = b->shader;
321 
322    /* Workaround not needed on DG2-G10-C0+ & DG2-G11-B0+ */
323    if ((compiler->devinfo->platform == INTEL_PLATFORM_DG2_G10 &&
324         compiler->devinfo->revision < 8) ||
325        (compiler->devinfo->platform == INTEL_PLATFORM_DG2_G11 &&
326         compiler->devinfo->revision < 4)) {
327       /* Reserve scratch space at the start of the shader's per-thread scratch
328        * space for the return BINDLESS_SHADER_RECORD address and data payload.
329        * When a shader is called, the calling shader will write the return BSR
330        * address in this region of the callee's scratch space.
331        */
332       nir->scratch_size = BRW_BTD_STACK_CALLEE_DATA_SIZE;
333 
334       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
335 
336       b->cursor = nir_before_block(nir_start_block(impl));
337 
338       nir_ssa_def *shader_type = nir_load_btd_shader_type_intel(b);
339 
340       nir_ssa_def *is_intersection_shader =
341          nir_ieq_imm(b, shader_type, GEN_RT_BTD_SHADER_TYPE_INTERSECTION);
342       nir_ssa_def *is_anyhit_shader =
343          nir_ieq_imm(b, shader_type, GEN_RT_BTD_SHADER_TYPE_ANY_HIT);
344 
345       nir_ssa_def *needs_commit_or_continue =
346          nir_ior(b, is_intersection_shader, is_anyhit_shader);
347 
348       nir_push_if(b, needs_commit_or_continue);
349       {
350          struct brw_nir_rt_mem_hit_defs hit_in = {};
351          brw_nir_rt_load_mem_hit(b, &hit_in, false /* committed */);
352 
353          nir_ssa_def *ray_op =
354             nir_bcsel(b, is_intersection_shader,
355                       nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
356                       nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT));
357          nir_ssa_def *ray_level = hit_in.bvh_level;
358 
359          nir_trace_ray_intel(b,
360                              nir_load_btd_global_arg_addr_intel(b),
361                              ray_level, ray_op);
362       }
363       nir_push_else(b, NULL);
364       {
365          brw_nir_btd_return(b);
366       }
367       nir_pop_if(b, NULL);
368    } else {
369       NIR_PASS_V(nir, brw_nir_lower_shader_returns);
370    }
371 
372    return nir;
373 }
374