1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 #include "util/hash_table.h"
28 #include "util/macros.h"
29 #include "util/set.h"
30 #include "util/u_dynarray.h"
31
32 /** @file nir_opt_ray_queries.c
33 *
34 * 1. Remove ray queries that the shader is not using the result of.
35 * 2. Combine ray queries which are not simultaneously.
36 */
37
38 static void
mark_query_read(struct set * queries,nir_intrinsic_instr * intrin)39 mark_query_read(struct set *queries,
40 nir_intrinsic_instr *intrin)
41 {
42 nir_def *rq_def = intrin->src[0].ssa;
43
44 nir_variable *query;
45 if (rq_def->parent_instr->type == nir_instr_type_intrinsic) {
46 nir_intrinsic_instr *load_deref =
47 nir_instr_as_intrinsic(rq_def->parent_instr);
48 assert(load_deref->intrinsic == nir_intrinsic_load_deref);
49
50 query = nir_intrinsic_get_var(load_deref, 0);
51 } else if (rq_def->parent_instr->type == nir_instr_type_deref) {
52 query = nir_deref_instr_get_variable(
53 nir_instr_as_deref(rq_def->parent_instr));
54 } else {
55 return;
56 }
57 assert(query);
58
59 _mesa_set_add(queries, query);
60 }
61
62 static void
nir_find_ray_queries_read(struct set * queries,nir_shader * shader)63 nir_find_ray_queries_read(struct set *queries,
64 nir_shader *shader)
65 {
66 nir_foreach_function_impl(impl, shader) {
67 nir_foreach_block(block, impl) {
68 nir_foreach_instr(instr, block) {
69 if (instr->type != nir_instr_type_intrinsic)
70 continue;
71
72 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
73 switch (intrin->intrinsic) {
74 case nir_intrinsic_rq_proceed:
75 if (!list_is_empty(&intrin->def.uses))
76 mark_query_read(queries, intrin);
77 break;
78 case nir_intrinsic_rq_load:
79 mark_query_read(queries, intrin);
80 break;
81 default:
82 break;
83 }
84 }
85 }
86 }
87 }
88
89 static bool
nir_replace_unread_queries_instr(nir_builder * b,nir_instr * instr,void * data)90 nir_replace_unread_queries_instr(nir_builder *b, nir_instr *instr, void *data)
91 {
92 struct set *queries = data;
93
94 if (instr->type != nir_instr_type_intrinsic)
95 return false;
96
97 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
98 switch (intrin->intrinsic) {
99 case nir_intrinsic_rq_initialize:
100 case nir_intrinsic_rq_terminate:
101 case nir_intrinsic_rq_generate_intersection:
102 case nir_intrinsic_rq_confirm_intersection:
103 break;
104 case nir_intrinsic_rq_proceed:
105 break;
106 default:
107 return false;
108 }
109
110 nir_variable *query = nir_intrinsic_get_var(intrin, 0);
111 assert(query);
112
113 struct set_entry *entry = _mesa_set_search(queries, query);
114 if (entry)
115 return false;
116
117 if (intrin->intrinsic == nir_intrinsic_rq_load)
118 assert(list_is_empty(&intrin->def.uses));
119
120 nir_instr_remove(instr);
121
122 return true;
123 }
124
125 bool
nir_opt_ray_queries(nir_shader * shader)126 nir_opt_ray_queries(nir_shader *shader)
127 {
128 struct set *read_queries = _mesa_pointer_set_create(NULL);
129 nir_find_ray_queries_read(read_queries, shader);
130
131 bool progress =
132 nir_shader_instructions_pass(shader,
133 nir_replace_unread_queries_instr,
134 nir_metadata_block_index |
135 nir_metadata_dominance,
136 read_queries);
137
138 /* Update the number of queries if some have been removed. */
139 if (progress) {
140 nir_remove_dead_derefs(shader);
141 nir_remove_dead_variables(shader,
142 nir_var_shader_temp | nir_var_function_temp,
143 NULL);
144 }
145
146 _mesa_set_destroy(read_queries, NULL);
147
148 return progress;
149 }
150
151 /**
152 * Merge ray queries that are not used in parallel to reduce scratch memory:
153 *
154 * 1. Store all the ray queries we will consider into an array for
155 * convenient access. Ignore arrays since it would be really complex
156 * to handle and will be rare in praxis.
157 *
158 * 2. Count the number of ray query ranges and allocate the required ranges.
159 *
160 * 3. Populate the ray query range array. A range is started and termninated
161 * rq_initialize (the terminating rq_initialize will be the start of the
162 * next range). There are two hazards:
163 *
164 * 1. rq_initialize can be inside some form of controlflow which can result
165 * in incorrect ranges and invalid merging.
166 *
167 * SOLUTION: Discard the entire ray query when encountering an
168 * instruction that is not dominated by the rq_initialize
169 * of the range.
170 *
171 * 2. With loops, we can underestimate the range because the state may
172 * have to be preserved for multiple iterations.
173 *
174 * SOLUTION: Track parent loops.
175 *
176 * 4. Try to rewrite the variables. For that, we iterate over every ray query
177 * and try to move its ranges to the preceding ray queries.
178 */
179
180 struct rq_range {
181 nir_variable *variable;
182
183 uint32_t first;
184 uint32_t last;
185
186 struct util_dynarray instrs;
187 struct set *loops;
188 };
189
190 #define RQ_NEW_INDEX_NONE 0xFFFFFFFF
191
192 static bool
count_ranges(struct nir_builder * b,nir_intrinsic_instr * intrinsic,void * data)193 count_ranges(struct nir_builder *b, nir_intrinsic_instr *intrinsic,
194 void *data)
195 {
196 if (intrinsic->intrinsic == nir_intrinsic_rq_initialize)
197 (*(uint32_t *)data)++;
198
199 return false;
200 }
201
202 static nir_cf_node *
get_parent_loop(nir_cf_node * node)203 get_parent_loop(nir_cf_node *node)
204 {
205 nir_cf_node *result = NULL;
206 while (node) {
207 if (node->type == nir_cf_node_loop)
208 result = node;
209
210 node = node->parent;
211 }
212 return result;
213 }
214
215 bool
nir_opt_ray_query_ranges(nir_shader * shader)216 nir_opt_ray_query_ranges(nir_shader *shader)
217 {
218 assert(exec_list_length(&shader->functions) == 1);
219
220 struct nir_function *func =
221 (struct nir_function *)exec_list_get_head_const(&shader->functions);
222 assert(func->impl);
223
224 uint32_t ray_query_count = 0;
225 nir_foreach_variable_in_shader(var, shader) {
226 if (!var->data.ray_query || glsl_type_is_array(var->type))
227 continue;
228 ray_query_count++;
229 }
230 nir_foreach_function_temp_variable(var, func->impl) {
231 if (!var->data.ray_query || glsl_type_is_array(var->type))
232 continue;
233 ray_query_count++;
234 }
235
236 if (ray_query_count <= 1) {
237 nir_metadata_preserve(func->impl, nir_metadata_all);
238 return false;
239 }
240
241 void *mem_ctx = ralloc_context(NULL);
242
243 nir_metadata_require(func->impl, nir_metadata_instr_index | nir_metadata_dominance);
244
245 nir_variable **ray_queries = ralloc_array(mem_ctx, nir_variable *, ray_query_count);
246 ray_query_count = 0;
247
248 nir_foreach_variable_in_shader(var, shader) {
249 if (!var->data.ray_query || glsl_type_is_array(var->type))
250 continue;
251
252 ray_queries[ray_query_count] = var;
253 ray_query_count++;
254 }
255
256 nir_foreach_function_temp_variable(var, func->impl) {
257 if (!var->data.ray_query || glsl_type_is_array(var->type))
258 continue;
259
260 ray_queries[ray_query_count] = var;
261 ray_query_count++;
262 }
263
264 uint32_t range_count = 0;
265 nir_shader_intrinsics_pass(shader, count_ranges, nir_metadata_all,
266 &range_count);
267
268 struct rq_range *ranges = rzalloc_array(mem_ctx, struct rq_range, range_count);
269
270 struct hash_table *range_indices = _mesa_pointer_hash_table_create(mem_ctx);
271 uint32_t target_index = 0;
272
273 nir_foreach_block(block, func->impl) {
274 nir_cf_node *parent_loop = get_parent_loop(&block->cf_node);
275
276 nir_foreach_instr(instr, block) {
277 if (instr->type != nir_instr_type_intrinsic)
278 continue;
279
280 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
281 if (!nir_intrinsic_is_ray_query(intrinsic->intrinsic))
282 continue;
283
284 nir_deref_instr *ray_query_deref =
285 nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
286
287 if (ray_query_deref->deref_type != nir_deref_type_var)
288 continue;
289
290 if (intrinsic->intrinsic == nir_intrinsic_rq_initialize) {
291 _mesa_hash_table_insert(range_indices, ray_query_deref->var,
292 (void *)(uintptr_t)target_index);
293
294 ranges[target_index].variable = ray_query_deref->var;
295 ranges[target_index].first = instr->index;
296 ranges[target_index].last = instr->index;
297 util_dynarray_init(&ranges[target_index].instrs, mem_ctx);
298 ranges[target_index].loops = _mesa_pointer_set_create(mem_ctx);
299
300 target_index++;
301 }
302
303 struct hash_entry *index_entry =
304 _mesa_hash_table_search(range_indices, ray_query_deref->var);
305 struct rq_range *range = ranges + (uintptr_t)index_entry->data;
306
307 if (intrinsic->intrinsic != nir_intrinsic_rq_initialize) {
308 /* If the initialize instruction does not dominate every other
309 * instruction in the range, we have to reject the enire query
310 * since we can not be certain about the ranges:
311 *
312 * rayQuery rq;
313 * if (i == 0)
314 * init(rq);
315 * ... <-- Another ray query that would get merged.
316 * if (i == 1)
317 * init(rq); <--+
318 * if (i == 0) |
319 * proceed(rq); <--+ Not dominated by init!
320 * if (i == 1)
321 * proceed(rq);
322 */
323 nir_instr *init = *util_dynarray_element(&range->instrs, nir_instr *, 0);
324 if (!nir_block_dominates(init->block, instr->block)) {
325 for (uint32_t i = 0; i < ray_query_count; i++) {
326 if (ray_queries[i] == ray_query_deref->var) {
327 ray_queries[i] = NULL;
328 break;
329 }
330 }
331
332 continue;
333 }
334
335 range->last = MAX2(range->last, instr->index);
336 }
337
338 util_dynarray_append(&range->instrs, nir_instr *, instr);
339
340 if (parent_loop)
341 _mesa_set_add(range->loops, parent_loop);
342 }
343 }
344
345 range_count = target_index;
346
347 /* Try to push ray query ranges 'down'. */
348 for (uint32_t rq_index = 1; rq_index < ray_query_count; rq_index++) {
349 if (!ray_queries[rq_index])
350 continue;
351
352 for (uint32_t dom_rq_index = 0; dom_rq_index < rq_index; dom_rq_index++) {
353 if (!ray_queries[dom_rq_index])
354 continue;
355
356 bool collides = false;
357
358 for (uint32_t range_index = 0; range_index < range_count; range_index++) {
359 if (ranges[range_index].variable != ray_queries[rq_index])
360 continue;
361
362 for (uint32_t dom_range_index = 0; dom_range_index < range_count; dom_range_index++) {
363 if (ranges[dom_range_index].variable != ray_queries[dom_rq_index])
364 continue;
365
366 if (!(ranges[dom_range_index].first > ranges[range_index].last ||
367 ranges[dom_range_index].last < ranges[range_index].first)) {
368 collides = true;
369 break;
370 }
371
372 if (_mesa_set_intersects(ranges[dom_range_index].loops,
373 ranges[range_index].loops)) {
374 collides = true;
375 break;
376 }
377 }
378
379 if (collides)
380 break;
381 }
382
383 if (collides)
384 continue;
385
386 for (uint32_t range_index = 0; range_index < range_count; range_index++) {
387 if (ranges[range_index].variable != ray_queries[rq_index])
388 continue;
389
390 ranges[range_index].variable = ray_queries[dom_rq_index];
391 }
392 }
393 }
394
395 /* Remap the ray query derefs to the new variables. */
396 bool progress = false;
397 for (uint32_t range_index = 0; range_index < range_count; range_index++) {
398 struct rq_range *range = ranges + range_index;
399 util_dynarray_foreach(&range->instrs, nir_instr *, instr) {
400 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(*instr);
401 nir_deref_instr *ray_query_deref =
402 nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
403 if (ray_query_deref->var != range->variable) {
404 ray_query_deref->var = range->variable;
405 progress = true;
406 }
407 }
408 }
409
410 nir_metadata_preserve(func->impl, nir_metadata_all);
411
412 /* Remove dead ray queries. */
413 if (progress) {
414 nir_remove_dead_derefs(shader);
415 nir_remove_dead_variables(shader, nir_var_shader_temp | nir_var_function_temp,
416 NULL);
417 }
418
419 ralloc_free(mem_ctx);
420
421 return progress;
422 }
423