• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_nir_rt.h"
25 #include "brw_nir_rt_builder.h"
26 
27 static nir_def *
build_leaf_is_procedural(nir_builder * b,struct brw_nir_rt_mem_hit_defs * hit)28 build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
29 {
30    switch (b->shader->info.stage) {
31    case MESA_SHADER_ANY_HIT:
32       /* Any-hit shaders are always compiled into intersection shaders for
33        * procedural geometry.  If we got here in an any-hit shader, it's for
34        * triangles.
35        */
36       return nir_imm_false(b);
37 
38    case MESA_SHADER_INTERSECTION:
39       return nir_imm_true(b);
40 
41    default:
42       return nir_ieq_imm(b, hit->leaf_type,
43                             BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
44    }
45 }
46 
47 static void
lower_rt_intrinsics_impl(nir_function_impl * impl,const struct intel_device_info * devinfo)48 lower_rt_intrinsics_impl(nir_function_impl *impl,
49                          const struct intel_device_info *devinfo)
50 {
51    bool progress = false;
52 
53    nir_builder build = nir_builder_at(nir_before_impl(impl));
54    nir_builder *b = &build;
55 
56    struct brw_nir_rt_globals_defs globals;
57    brw_nir_rt_load_globals(b, &globals);
58 
59    nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
60    nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);
61 
62    gl_shader_stage stage = b->shader->info.stage;
63    struct brw_nir_rt_mem_ray_defs world_ray_in = {};
64    struct brw_nir_rt_mem_ray_defs object_ray_in = {};
65    struct brw_nir_rt_mem_hit_defs hit_in = {};
66    switch (stage) {
67    case MESA_SHADER_ANY_HIT:
68    case MESA_SHADER_CLOSEST_HIT:
69    case MESA_SHADER_INTERSECTION:
70       brw_nir_rt_load_mem_hit(b, &hit_in,
71                               stage == MESA_SHADER_CLOSEST_HIT);
72       brw_nir_rt_load_mem_ray(b, &object_ray_in,
73                               BRW_RT_BVH_LEVEL_OBJECT);
74       FALLTHROUGH;
75 
76    case MESA_SHADER_MISS:
77       brw_nir_rt_load_mem_ray(b, &world_ray_in,
78                               BRW_RT_BVH_LEVEL_WORLD);
79       break;
80 
81    default:
82       break;
83    }
84 
85    nir_def *thread_stack_base_addr = brw_nir_rt_sw_stack_addr(b, devinfo);
86    nir_def *stack_base_offset = nir_channel(b, hotzone, 0);
87    nir_def *stack_base_addr =
88       nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
89    ASSERTED bool seen_scratch_base_ptr_load = false;
90    ASSERTED bool found_resume = false;
91 
92    nir_foreach_block(block, impl) {
93       nir_foreach_instr_safe(instr, block) {
94          if (instr->type != nir_instr_type_intrinsic)
95             continue;
96 
97          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
98 
99          b->cursor = nir_after_instr(&intrin->instr);
100 
101          nir_def *sysval = NULL;
102          switch (intrin->intrinsic) {
103          case nir_intrinsic_load_scratch_base_ptr:
104             assert(nir_intrinsic_base(intrin) == 1);
105             seen_scratch_base_ptr_load = true;
106             sysval = stack_base_addr;
107             break;
108 
109          case nir_intrinsic_btd_stack_push_intel: {
110             int32_t stack_size = nir_intrinsic_stack_size(intrin);
111             if (stack_size > 0) {
112                nir_def *child_stack_offset =
113                   nir_iadd_imm(b, stack_base_offset, stack_size);
114                nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
115             }
116             nir_instr_remove(instr);
117             break;
118          }
119 
120          case nir_intrinsic_rt_resume:
121             /* This is the first "interesting" instruction */
122             assert(block == nir_start_block(impl));
123             assert(!seen_scratch_base_ptr_load);
124             found_resume = true;
125 
126             int32_t stack_size = nir_intrinsic_stack_size(intrin);
127             if (stack_size > 0) {
128                stack_base_offset =
129                   nir_iadd_imm(b, stack_base_offset, -stack_size);
130                nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
131                stack_base_addr = nir_iadd(b, thread_stack_base_addr,
132                                           nir_u2u64(b, stack_base_offset));
133             }
134             nir_instr_remove(instr);
135             break;
136 
137          case nir_intrinsic_load_uniform: {
138             /* We don't want to lower this in the launch trampoline. */
139             if (stage == MESA_SHADER_COMPUTE)
140                break;
141 
142             sysval = brw_nir_load_global_const(b, intrin,
143                         nir_load_btd_global_arg_addr_intel(b),
144                         BRW_RT_PUSH_CONST_OFFSET);
145 
146             break;
147          }
148 
149          case nir_intrinsic_load_ray_launch_id:
150             sysval = nir_channels(b, hotzone, 0xe);
151             break;
152 
153          case nir_intrinsic_load_ray_launch_size:
154             sysval = globals.launch_size;
155             break;
156 
157          case nir_intrinsic_load_ray_world_origin:
158             sysval = world_ray_in.orig;
159             break;
160 
161          case nir_intrinsic_load_ray_world_direction:
162             sysval = world_ray_in.dir;
163             break;
164 
165          case nir_intrinsic_load_ray_object_origin:
166             sysval = object_ray_in.orig;
167             break;
168 
169          case nir_intrinsic_load_ray_object_direction:
170             sysval = object_ray_in.dir;
171             break;
172 
173          case nir_intrinsic_load_ray_t_min:
174             /* It shouldn't matter which we pull this from */
175             sysval = world_ray_in.t_near;
176             break;
177 
178          case nir_intrinsic_load_ray_t_max:
179             if (stage == MESA_SHADER_MISS)
180                sysval = world_ray_in.t_far;
181             else
182                sysval = hit_in.t;
183             break;
184 
185          case nir_intrinsic_load_primitive_id:
186             sysval = brw_nir_rt_load_primitive_id_from_hit(b,
187                                                            build_leaf_is_procedural(b, &hit_in),
188                                                            &hit_in);
189             break;
190 
191          case nir_intrinsic_load_instance_id: {
192             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
193             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
194             sysval = leaf.instance_index;
195             break;
196          }
197 
198          case nir_intrinsic_load_ray_object_to_world: {
199             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
200             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
201             sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
202             break;
203          }
204 
205          case nir_intrinsic_load_ray_world_to_object: {
206             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
207             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
208             sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
209             break;
210          }
211 
212          case nir_intrinsic_load_ray_hit_kind: {
213             nir_def *tri_hit_kind =
214                nir_bcsel(b, hit_in.front_face,
215                             nir_imm_int(b, BRW_RT_HIT_KIND_FRONT_FACE),
216                             nir_imm_int(b, BRW_RT_HIT_KIND_BACK_FACE));
217             sysval = nir_bcsel(b, build_leaf_is_procedural(b, &hit_in),
218                                   hit_in.aabb_hit_kind, tri_hit_kind);
219             break;
220          }
221 
222          case nir_intrinsic_load_ray_flags:
223             /* We need to fetch the original ray flags we stored in the
224              * leaf pointer, because the actual ray flags we get here
225              * will include any flags passed on the pipeline at creation
226              * time, and the spec for IncomingRayFlagsKHR says:
227              *   Setting pipeline flags on the raytracing pipeline must not
228              *   cause any corresponding flags to be set in variables with
229              *   this decoration.
230              */
231             sysval = nir_u2u32(b, world_ray_in.inst_leaf_ptr);
232             break;
233 
234          case nir_intrinsic_load_cull_mask:
235             sysval = nir_u2u32(b, world_ray_in.ray_mask);
236             break;
237 
238          case nir_intrinsic_load_ray_geometry_index: {
239             nir_def *geometry_index_dw =
240                nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
241                                1, 32);
242             sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
243             break;
244          }
245 
246          case nir_intrinsic_load_ray_instance_custom_index: {
247             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
248             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
249             sysval = leaf.instance_id;
250             break;
251          }
252 
253          case nir_intrinsic_load_shader_record_ptr:
254             /* We can't handle this intrinsic in resume shaders because the
255              * handle we get there won't be from the original SBT.  The shader
256              * call lowering/splitting pass should have ensured that this
257              * value was spilled from the initial shader and unspilled in any
258              * resume shaders that need it.
259              */
260             assert(!found_resume);
261             sysval = nir_load_btd_local_arg_addr_intel(b);
262             break;
263 
264          case nir_intrinsic_load_ray_base_mem_addr_intel:
265             sysval = globals.base_mem_addr;
266             break;
267 
268          case nir_intrinsic_load_ray_hw_stack_size_intel:
269             sysval = nir_imul_imm(b, globals.hw_stack_size, 64);
270             break;
271 
272          case nir_intrinsic_load_ray_sw_stack_size_intel:
273             sysval = nir_imul_imm(b, globals.sw_stack_size, 64);
274             break;
275 
276          case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
277             sysval = globals.num_dss_rt_stacks;
278             break;
279 
280          case nir_intrinsic_load_ray_hit_sbt_addr_intel:
281             sysval = globals.hit_sbt_addr;
282             break;
283 
284          case nir_intrinsic_load_ray_hit_sbt_stride_intel:
285             sysval = globals.hit_sbt_stride;
286             break;
287 
288          case nir_intrinsic_load_ray_miss_sbt_addr_intel:
289             sysval = globals.miss_sbt_addr;
290             break;
291 
292          case nir_intrinsic_load_ray_miss_sbt_stride_intel:
293             sysval = globals.miss_sbt_stride;
294             break;
295 
296          case nir_intrinsic_load_callable_sbt_addr_intel:
297             sysval = globals.call_sbt_addr;
298             break;
299 
300          case nir_intrinsic_load_callable_sbt_stride_intel:
301             sysval = globals.call_sbt_stride;
302             break;
303 
304          case nir_intrinsic_load_btd_resume_sbt_addr_intel:
305             sysval = nir_pack_64_2x32_split(b,
306                nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW),
307                nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH));
308             break;
309 
310          case nir_intrinsic_load_leaf_procedural_intel:
311             sysval = build_leaf_is_procedural(b, &hit_in);
312             break;
313 
314          case nir_intrinsic_load_ray_triangle_vertex_positions: {
315             struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
316             brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
317             sysval = pos.positions[nir_intrinsic_column(intrin)];
318             break;
319          }
320 
321          case nir_intrinsic_load_leaf_opaque_intel: {
322             if (stage == MESA_SHADER_INTERSECTION) {
323                /* In intersection shaders, the opaque bit is passed to us in
324                 * the front_face bit.
325                 */
326                sysval = hit_in.front_face;
327             } else {
328                nir_def *flags_dw =
329                   nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
330                                   1, 32);
331                sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
332             }
333             break;
334          }
335 
336          default:
337             continue;
338          }
339 
340          progress = true;
341 
342          if (sysval) {
343             nir_def_rewrite_uses(&intrin->def,
344                                      sysval);
345             nir_instr_remove(&intrin->instr);
346          }
347       }
348    }
349 
350    nir_metadata_preserve(impl,
351                          progress ?
352                          nir_metadata_none :
353                          (nir_metadata_block_index |
354                           nir_metadata_dominance));
355 }
356 
357 /** Lower ray-tracing system values and intrinsics
358  *
359  * In most 3D shader stages, intrinsics are a fairly thin wrapper around
360  * hardware functionality and system values represent magic bits that come
361  * into the shader from FF hardware.  Ray-tracing, however, looks a bit more
362  * like the OpenGL 1.0 world where the underlying hardware is simple and most
363  * of the API implementation is software.
364  *
365  * In particular, most things that are treated as system values (or built-ins
366  * in SPIR-V) don't get magically dropped into registers for us.  Instead, we
367  * have to fetch them from the relevant data structures shared with the
368  * ray-tracing hardware.  Most come from either the RT_DISPATCH_GLOBALS or
369  * from one of the MemHit data structures.  Some, such as primitive_id require
370  * us to fetch the leaf address from the MemHit struct and then manually read
371  * the data out of the BVH.  Instead of trying to emit all this code deep in
372  * the back-end where we can't effectively optimize it, we lower it all to
373  * global memory access in NIR.
374  *
375  * Once this pass is complete, the only real system values left are the two
376  * argument pointer system values for BTD dispatch: btd_local_arg_addr and
377  * btd_global_arg_addr.
378  */
379 void
brw_nir_lower_rt_intrinsics(nir_shader * nir,const struct intel_device_info * devinfo)380 brw_nir_lower_rt_intrinsics(nir_shader *nir,
381                             const struct intel_device_info *devinfo)
382 {
383    nir_foreach_function_impl(impl, nir) {
384       lower_rt_intrinsics_impl(impl, devinfo);
385    }
386 }
387