1 /*
2 * Copyright (c) 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_nir_rt.h"
25 #include "brw_nir_rt_builder.h"
26
27 #include "nir_deref.h"
28
29 #include "util/macros.h"
30
31 struct lowering_state {
32 const struct intel_device_info *devinfo;
33
34 nir_function_impl *impl;
35
36 struct hash_table *queries;
37 uint32_t n_queries;
38
39 struct brw_nir_rt_globals_defs globals;
40 nir_def *rq_globals;
41 };
42
43 struct brw_ray_query {
44 nir_variable *opaque_var;
45 nir_variable *internal_var;
46 uint32_t id;
47 };
48
49 #define SIZEOF_QUERY_STATE (sizeof(uint32_t))
50
51 static bool
need_spill_fill(struct lowering_state * state)52 need_spill_fill(struct lowering_state *state)
53 {
54 return state->n_queries > 1;
55 }
56
57 /**
58 * This pass converts opaque RayQuery structures from SPIRV into a vec3 where
59 * the first 2 elements store a global address for the query and the third
60 * element is an incremented counter on the number of executed
61 * nir_intrinsic_rq_proceed.
62 */
63
64 static void
register_opaque_var(nir_variable * opaque_var,struct lowering_state * state)65 register_opaque_var(nir_variable *opaque_var, struct lowering_state *state)
66 {
67 struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
68 assert(entry == NULL);
69
70 struct brw_ray_query *rq = rzalloc(state->queries, struct brw_ray_query);
71 rq->opaque_var = opaque_var;
72 rq->id = state->n_queries;
73
74 unsigned aoa_size = glsl_get_aoa_size(opaque_var->type);
75 state->n_queries += MAX2(1, aoa_size);
76
77 _mesa_hash_table_insert(state->queries, opaque_var, rq);
78 }
79
80 static void
create_internal_var(struct brw_ray_query * rq,struct lowering_state * state)81 create_internal_var(struct brw_ray_query *rq, struct lowering_state *state)
82 {
83 const struct glsl_type *opaque_type = rq->opaque_var->type;
84 const struct glsl_type *internal_type = glsl_uint16_t_type();
85
86 while (glsl_type_is_array(opaque_type)) {
87 assert(!glsl_type_is_unsized_array(opaque_type));
88 internal_type = glsl_array_type(internal_type,
89 glsl_array_size(opaque_type),
90 0);
91 opaque_type = glsl_get_array_element(opaque_type);
92 }
93
94 rq->internal_var = nir_local_variable_create(state->impl,
95 internal_type,
96 NULL);
97 }
98
99
100
101 static nir_def *
get_ray_query_shadow_addr(nir_builder * b,nir_deref_instr * deref,struct lowering_state * state,nir_deref_instr ** out_state_deref)102 get_ray_query_shadow_addr(nir_builder *b,
103 nir_deref_instr *deref,
104 struct lowering_state *state,
105 nir_deref_instr **out_state_deref)
106 {
107 nir_deref_path path;
108 nir_deref_path_init(&path, deref, NULL);
109 assert(path.path[0]->deref_type == nir_deref_type_var);
110
111 nir_variable *opaque_var = nir_deref_instr_get_variable(path.path[0]);
112 struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
113 assert(entry);
114
115 struct brw_ray_query *rq = entry->data;
116
117 /* Base address in the shadow memory of the variable associated with this
118 * ray query variable.
119 */
120 nir_def *base_addr =
121 nir_iadd_imm(b, state->globals.resume_sbt_addr,
122 brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
123
124 bool spill_fill = need_spill_fill(state);
125 *out_state_deref = nir_build_deref_var(b, rq->internal_var);
126
127 if (!spill_fill)
128 return NULL;
129
130 /* Just emit code and let constant-folding go to town */
131 nir_deref_instr **p = &path.path[1];
132 for (; *p; p++) {
133 if ((*p)->deref_type == nir_deref_type_array) {
134 nir_def *index = (*p)->arr.index.ssa;
135
136 /**/
137 *out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
138
139 /**/
140 uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
141 brw_rt_ray_queries_shadow_stack_size(state->devinfo);
142
143 nir_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
144
145 base_addr = nir_iadd(b, base_addr, mul);
146 } else {
147 unreachable("Unsupported deref type");
148 }
149 }
150
151 nir_deref_path_finish(&path);
152
153 /* Add the lane offset to the shadow memory address */
154 nir_def *lane_offset =
155 nir_imul_imm(
156 b,
157 nir_iadd(
158 b,
159 nir_imul(
160 b,
161 brw_load_btd_dss_id(b),
162 state->globals.num_dss_rt_stacks),
163 brw_nir_rt_sync_stack_id(b)),
164 BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
165
166 return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
167 }
168
169 static void
update_trace_ctrl_level(nir_builder * b,nir_deref_instr * state_deref,nir_def ** out_old_ctrl,nir_def ** out_old_level,nir_def * new_ctrl,nir_def * new_level)170 update_trace_ctrl_level(nir_builder *b,
171 nir_deref_instr *state_deref,
172 nir_def **out_old_ctrl,
173 nir_def **out_old_level,
174 nir_def *new_ctrl,
175 nir_def *new_level)
176 {
177 nir_def *old_value = nir_load_deref(b, state_deref);
178 nir_def *old_ctrl = nir_ishr_imm(b, old_value, 2);
179 nir_def *old_level = nir_iand_imm(b, old_value, 0x3);
180
181 if (out_old_ctrl)
182 *out_old_ctrl = old_ctrl;
183 if (out_old_level)
184 *out_old_level = old_level;
185
186 if (new_ctrl)
187 new_ctrl = nir_i2i16(b, new_ctrl);
188 if (new_level)
189 new_level = nir_i2i16(b, new_level);
190
191 if (new_ctrl || new_level) {
192 if (!new_ctrl)
193 new_ctrl = old_ctrl;
194 if (!new_level)
195 new_level = old_level;
196
197 nir_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level);
198 nir_store_deref(b, state_deref, new_value, 0x1);
199 }
200 }
201
202 static void
fill_query(nir_builder * b,nir_def * hw_stack_addr,nir_def * shadow_stack_addr,nir_def * ctrl)203 fill_query(nir_builder *b,
204 nir_def *hw_stack_addr,
205 nir_def *shadow_stack_addr,
206 nir_def *ctrl)
207 {
208 brw_nir_memcpy_global(b, hw_stack_addr, 64, shadow_stack_addr, 64,
209 BRW_RT_SIZEOF_RAY_QUERY);
210 }
211
212 static void
spill_query(nir_builder * b,nir_def * hw_stack_addr,nir_def * shadow_stack_addr)213 spill_query(nir_builder *b,
214 nir_def *hw_stack_addr,
215 nir_def *shadow_stack_addr)
216 {
217 brw_nir_memcpy_global(b, shadow_stack_addr, 64, hw_stack_addr, 64,
218 BRW_RT_SIZEOF_RAY_QUERY);
219 }
220
221
222 static void
lower_ray_query_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct lowering_state * state)223 lower_ray_query_intrinsic(nir_builder *b,
224 nir_intrinsic_instr *intrin,
225 struct lowering_state *state)
226 {
227 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
228
229 b->cursor = nir_instr_remove(&intrin->instr);
230
231 nir_deref_instr *ctrl_level_deref;
232 nir_def *shadow_stack_addr =
233 get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
234 nir_def *hw_stack_addr =
235 brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr,
236 state->globals.num_dss_rt_stacks);
237 nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
238
239 switch (intrin->intrinsic) {
240 case nir_intrinsic_rq_initialize: {
241 nir_def *as_addr = intrin->src[1].ssa;
242 nir_def *ray_flags = intrin->src[2].ssa;
243 /* From the SPIR-V spec:
244 *
245 * "Only the 8 least-significant bits of Cull Mask are used by
246 * this instruction - other bits are ignored.
247 *
248 * Only the 16 least-significant bits of Miss Index are used by
249 * this instruction - other bits are ignored."
250 */
251 nir_def *cull_mask = nir_iand_imm(b, intrin->src[3].ssa, 0xff);
252 nir_def *ray_orig = intrin->src[4].ssa;
253 nir_def *ray_t_min = intrin->src[5].ssa;
254 nir_def *ray_dir = intrin->src[6].ssa;
255 nir_def *ray_t_max = intrin->src[7].ssa;
256
257 nir_def *root_node_ptr =
258 brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
259
260 struct brw_nir_rt_mem_ray_defs ray_defs = {
261 .root_node_ptr = root_node_ptr,
262 .ray_flags = nir_u2u16(b, ray_flags),
263 .ray_mask = cull_mask,
264 .orig = ray_orig,
265 .t_near = ray_t_min,
266 .dir = ray_dir,
267 .t_far = ray_t_max,
268 };
269
270 nir_def *ray_addr =
271 brw_nir_rt_mem_ray_addr(b, stack_addr, BRW_RT_BVH_LEVEL_WORLD);
272
273 brw_nir_rt_query_mark_init(b, stack_addr);
274 brw_nir_rt_store_mem_ray_query_at_addr(b, ray_addr, &ray_defs);
275
276 update_trace_ctrl_level(b, ctrl_level_deref,
277 NULL, NULL,
278 nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
279 nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD));
280 break;
281 }
282
283 case nir_intrinsic_rq_proceed: {
284 nir_def *not_done =
285 nir_inot(b, brw_nir_rt_query_done(b, stack_addr));
286 nir_def *not_done_then, *not_done_else;
287
288 nir_push_if(b, not_done);
289 {
290 nir_def *ctrl, *level;
291 update_trace_ctrl_level(b, ctrl_level_deref,
292 &ctrl, &level,
293 NULL,
294 NULL);
295
296 /* Mark the query as done because handing it over to the HW for
297 * processing. If the HW make any progress, it will write back some
298 * data and as a side effect, clear the "done" bit. If no progress is
299 * made, HW does not write anything back and we can use this bit to
300 * detect that.
301 */
302 brw_nir_rt_query_mark_done(b, stack_addr);
303
304 if (shadow_stack_addr)
305 fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
306
307 nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
308
309 struct brw_nir_rt_mem_hit_defs hit_in = {};
310 brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false);
311
312 if (shadow_stack_addr)
313 spill_query(b, hw_stack_addr, shadow_stack_addr);
314
315 update_trace_ctrl_level(b, ctrl_level_deref,
316 NULL, NULL,
317 nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
318 hit_in.bvh_level);
319
320 not_done_then = nir_inot(b, hit_in.done);
321 }
322 nir_push_else(b, NULL);
323 {
324 not_done_else = nir_imm_false(b);
325 }
326 nir_pop_if(b, NULL);
327 not_done = nir_if_phi(b, not_done_then, not_done_else);
328 nir_def_rewrite_uses(&intrin->def, not_done);
329 break;
330 }
331
332 case nir_intrinsic_rq_confirm_intersection: {
333 brw_nir_memcpy_global(b,
334 brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true), 16,
335 brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false), 16,
336 BRW_RT_SIZEOF_HIT_INFO);
337 update_trace_ctrl_level(b, ctrl_level_deref,
338 NULL, NULL,
339 nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
340 nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
341 break;
342 }
343
344 case nir_intrinsic_rq_generate_intersection: {
345 brw_nir_rt_generate_hit_addr(b, stack_addr, intrin->src[1].ssa);
346 update_trace_ctrl_level(b, ctrl_level_deref,
347 NULL, NULL,
348 nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
349 nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
350 break;
351 }
352
353 case nir_intrinsic_rq_terminate: {
354 brw_nir_rt_query_mark_done(b, stack_addr);
355 break;
356 }
357
358 case nir_intrinsic_rq_load: {
359 const bool committed = nir_intrinsic_committed(intrin);
360
361 struct brw_nir_rt_mem_ray_defs world_ray_in = {};
362 struct brw_nir_rt_mem_ray_defs object_ray_in = {};
363 struct brw_nir_rt_mem_hit_defs hit_in = {};
364 brw_nir_rt_load_mem_ray_from_addr(b, &world_ray_in, stack_addr,
365 BRW_RT_BVH_LEVEL_WORLD);
366 brw_nir_rt_load_mem_ray_from_addr(b, &object_ray_in, stack_addr,
367 BRW_RT_BVH_LEVEL_OBJECT);
368 brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, committed);
369
370 nir_def *sysval = NULL;
371 switch (nir_intrinsic_ray_query_value(intrin)) {
372 case nir_ray_query_value_intersection_type:
373 if (committed) {
374 /* Values we want to generate :
375 *
376 * RayQueryCommittedIntersectionNoneEXT = 0U <= hit_in.valid == false
377 * RayQueryCommittedIntersectionTriangleEXT = 1U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_QUAD (4)
378 * RayQueryCommittedIntersectionGeneratedEXT = 2U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_PROCEDURAL (3)
379 */
380 sysval =
381 nir_bcsel(b, nir_ieq_imm(b, hit_in.leaf_type, 4),
382 nir_imm_int(b, 1), nir_imm_int(b, 2));
383 sysval =
384 nir_bcsel(b, hit_in.valid,
385 sysval, nir_imm_int(b, 0));
386 } else {
387 /* 0 -> triangle, 1 -> AABB */
388 sysval =
389 nir_b2i32(b,
390 nir_ieq_imm(b, hit_in.leaf_type,
391 BRW_RT_BVH_NODE_TYPE_PROCEDURAL));
392 }
393 break;
394
395 case nir_ray_query_value_intersection_t:
396 sysval = hit_in.t;
397 break;
398
399 case nir_ray_query_value_intersection_instance_custom_index: {
400 struct brw_nir_rt_bvh_instance_leaf_defs leaf;
401 brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
402 sysval = leaf.instance_id;
403 break;
404 }
405
406 case nir_ray_query_value_intersection_instance_id: {
407 struct brw_nir_rt_bvh_instance_leaf_defs leaf;
408 brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
409 sysval = leaf.instance_index;
410 break;
411 }
412
413 case nir_ray_query_value_intersection_instance_sbt_index: {
414 struct brw_nir_rt_bvh_instance_leaf_defs leaf;
415 brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
416 sysval = leaf.contribution_to_hit_group_index;
417 break;
418 }
419
420 case nir_ray_query_value_intersection_geometry_index: {
421 nir_def *geometry_index_dw =
422 nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
423 1, 32);
424 sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
425 break;
426 }
427
428 case nir_ray_query_value_intersection_primitive_index:
429 sysval = brw_nir_rt_load_primitive_id_from_hit(b, NULL /* is_procedural */, &hit_in);
430 break;
431
432 case nir_ray_query_value_intersection_barycentrics:
433 sysval = hit_in.tri_bary;
434 break;
435
436 case nir_ray_query_value_intersection_front_face:
437 sysval = hit_in.front_face;
438 break;
439
440 case nir_ray_query_value_intersection_object_ray_direction:
441 sysval = world_ray_in.dir;
442 break;
443
444 case nir_ray_query_value_intersection_object_ray_origin:
445 sysval = world_ray_in.orig;
446 break;
447
448 case nir_ray_query_value_intersection_object_to_world: {
449 struct brw_nir_rt_bvh_instance_leaf_defs leaf;
450 brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
451 sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
452 break;
453 }
454
455 case nir_ray_query_value_intersection_world_to_object: {
456 struct brw_nir_rt_bvh_instance_leaf_defs leaf;
457 brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
458 sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
459 break;
460 }
461
462 case nir_ray_query_value_intersection_candidate_aabb_opaque:
463 sysval = hit_in.front_face;
464 break;
465
466 case nir_ray_query_value_tmin:
467 sysval = world_ray_in.t_near;
468 break;
469
470 case nir_ray_query_value_flags:
471 sysval = nir_u2u32(b, world_ray_in.ray_flags);
472 break;
473
474 case nir_ray_query_value_world_ray_direction:
475 sysval = world_ray_in.dir;
476 break;
477
478 case nir_ray_query_value_world_ray_origin:
479 sysval = world_ray_in.orig;
480 break;
481
482 case nir_ray_query_value_intersection_triangle_vertex_positions: {
483 struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
484 brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
485 sysval = pos.positions[nir_intrinsic_column(intrin)];
486 break;
487 }
488
489 default:
490 unreachable("Invalid ray query");
491 }
492
493 assert(sysval);
494 nir_def_rewrite_uses(&intrin->def, sysval);
495 break;
496 }
497
498 default:
499 unreachable("Invalid intrinsic");
500 }
501 }
502
503 static void
lower_ray_query_impl(nir_function_impl * impl,struct lowering_state * state)504 lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
505 {
506 nir_builder _b, *b = &_b;
507 _b = nir_builder_at(nir_before_impl(impl));
508
509 state->rq_globals = nir_load_ray_query_global_intel(b);
510
511 brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals);
512
513 nir_foreach_block_safe(block, impl) {
514 nir_foreach_instr_safe(instr, block) {
515 if (instr->type != nir_instr_type_intrinsic)
516 continue;
517
518 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
519 if (intrin->intrinsic != nir_intrinsic_rq_initialize &&
520 intrin->intrinsic != nir_intrinsic_rq_terminate &&
521 intrin->intrinsic != nir_intrinsic_rq_proceed &&
522 intrin->intrinsic != nir_intrinsic_rq_generate_intersection &&
523 intrin->intrinsic != nir_intrinsic_rq_confirm_intersection &&
524 intrin->intrinsic != nir_intrinsic_rq_load)
525 continue;
526
527 lower_ray_query_intrinsic(b, intrin, state);
528 }
529 }
530
531 nir_metadata_preserve(impl, nir_metadata_none);
532 }
533
534 bool
brw_nir_lower_ray_queries(nir_shader * shader,const struct intel_device_info * devinfo)535 brw_nir_lower_ray_queries(nir_shader *shader,
536 const struct intel_device_info *devinfo)
537 {
538 assert(exec_list_length(&shader->functions) == 1);
539
540 struct lowering_state state = {
541 .devinfo = devinfo,
542 .impl = nir_shader_get_entrypoint(shader),
543 .queries = _mesa_pointer_hash_table_create(NULL),
544 };
545
546 /* Map all query variable to internal type variables */
547 nir_foreach_function_temp_variable(var, state.impl) {
548 if (!var->data.ray_query)
549 continue;
550 register_opaque_var(var, &state);
551 }
552 hash_table_foreach(state.queries, entry)
553 create_internal_var(entry->data, &state);
554
555 bool progress = state.n_queries > 0;
556
557 if (progress) {
558 lower_ray_query_impl(state.impl, &state);
559
560 nir_remove_dead_derefs(shader);
561 nir_remove_dead_variables(shader,
562 nir_var_shader_temp | nir_var_function_temp,
563 NULL);
564
565 nir_metadata_preserve(state.impl, nir_metadata_none);
566 }
567
568 ralloc_free(state.queries);
569
570 return progress;
571 }
572