• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 #include "util/hash_table.h"
28 #include "util/macros.h"
29 #include "util/set.h"
30 #include "util/u_dynarray.h"
31 
32 /** @file nir_opt_ray_queries.c
33  *
34  * 1. Remove ray queries that the shader is not using the result of.
35  * 2. Combine ray queries which are not simultaneously.
36  */
37 
38 static void
mark_query_read(struct set * queries,nir_intrinsic_instr * intrin)39 mark_query_read(struct set *queries,
40                 nir_intrinsic_instr *intrin)
41 {
42    nir_def *rq_def = intrin->src[0].ssa;
43 
44    nir_variable *query;
45    if (rq_def->parent_instr->type == nir_instr_type_intrinsic) {
46       nir_intrinsic_instr *load_deref =
47          nir_instr_as_intrinsic(rq_def->parent_instr);
48       assert(load_deref->intrinsic == nir_intrinsic_load_deref);
49 
50       query = nir_intrinsic_get_var(load_deref, 0);
51    } else if (rq_def->parent_instr->type == nir_instr_type_deref) {
52       query = nir_deref_instr_get_variable(
53          nir_instr_as_deref(rq_def->parent_instr));
54    } else {
55       return;
56    }
57    assert(query);
58 
59    _mesa_set_add(queries, query);
60 }
61 
62 static void
nir_find_ray_queries_read(struct set * queries,nir_shader * shader)63 nir_find_ray_queries_read(struct set *queries,
64                           nir_shader *shader)
65 {
66    nir_foreach_function_impl(impl, shader) {
67       nir_foreach_block(block, impl) {
68          nir_foreach_instr(instr, block) {
69             if (instr->type != nir_instr_type_intrinsic)
70                continue;
71 
72             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
73             switch (intrin->intrinsic) {
74             case nir_intrinsic_rq_proceed:
75                if (!list_is_empty(&intrin->def.uses))
76                   mark_query_read(queries, intrin);
77                break;
78             case nir_intrinsic_rq_load:
79                mark_query_read(queries, intrin);
80                break;
81             default:
82                break;
83             }
84          }
85       }
86    }
87 }
88 
89 static bool
nir_replace_unread_queries_instr(nir_builder * b,nir_instr * instr,void * data)90 nir_replace_unread_queries_instr(nir_builder *b, nir_instr *instr, void *data)
91 {
92    struct set *queries = data;
93 
94    if (instr->type != nir_instr_type_intrinsic)
95       return false;
96 
97    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
98    switch (intrin->intrinsic) {
99    case nir_intrinsic_rq_initialize:
100    case nir_intrinsic_rq_terminate:
101    case nir_intrinsic_rq_generate_intersection:
102    case nir_intrinsic_rq_confirm_intersection:
103       break;
104    case nir_intrinsic_rq_proceed:
105       break;
106    default:
107       return false;
108    }
109 
110    nir_variable *query = nir_intrinsic_get_var(intrin, 0);
111    assert(query);
112 
113    struct set_entry *entry = _mesa_set_search(queries, query);
114    if (entry)
115       return false;
116 
117    if (intrin->intrinsic == nir_intrinsic_rq_load)
118       assert(list_is_empty(&intrin->def.uses));
119 
120    nir_instr_remove(instr);
121 
122    return true;
123 }
124 
125 bool
nir_opt_ray_queries(nir_shader * shader)126 nir_opt_ray_queries(nir_shader *shader)
127 {
128    struct set *read_queries = _mesa_pointer_set_create(NULL);
129    nir_find_ray_queries_read(read_queries, shader);
130 
131    bool progress =
132       nir_shader_instructions_pass(shader,
133                                    nir_replace_unread_queries_instr,
134                                    nir_metadata_block_index |
135                                       nir_metadata_dominance,
136                                    read_queries);
137 
138    /* Update the number of queries if some have been removed. */
139    if (progress) {
140       nir_remove_dead_derefs(shader);
141       nir_remove_dead_variables(shader,
142                                 nir_var_shader_temp | nir_var_function_temp,
143                                 NULL);
144    }
145 
146    _mesa_set_destroy(read_queries, NULL);
147 
148    return progress;
149 }
150 
151 /**
152  * Merge ray queries that are not used in parallel to reduce scratch memory:
153  *
154  * 1. Store all the ray queries we will consider into an array for
155  *    convenient access. Ignore arrays since it would be really complex
156  *    to handle and will be rare in praxis.
157  *
158  * 2. Count the number of ray query ranges and allocate the required ranges.
159  *
160  * 3. Populate the ray query range array. A range is started and termninated
161  *    rq_initialize (the terminating rq_initialize will be the start of the
162  *    next range). There are two hazards:
163  *
164  *    1. rq_initialize can be inside some form of controlflow which can result
165  *       in incorrect ranges and invalid merging.
166  *
167  *       SOLUTION: Discard the entire ray query when encountering an
168  *                 instruction that is not dominated by the rq_initialize
169  *                 of the range.
170  *
171  *    2. With loops, we can underestimate the range because the state may
172  *       have to be preserved for multiple iterations.
173  *
174  *       SOLUTION: Track parent loops.
175  *
176  * 4. Try to rewrite the variables. For that, we iterate over every ray query
177  *    and try to move its ranges to the preceding ray queries.
178  */
179 
180 struct rq_range {
181    nir_variable *variable;
182 
183    uint32_t first;
184    uint32_t last;
185 
186    struct util_dynarray instrs;
187    struct set *loops;
188 };
189 
190 #define RQ_NEW_INDEX_NONE 0xFFFFFFFF
191 
192 static bool
count_ranges(struct nir_builder * b,nir_intrinsic_instr * intrinsic,void * data)193 count_ranges(struct nir_builder *b, nir_intrinsic_instr *intrinsic,
194              void *data)
195 {
196    if (intrinsic->intrinsic == nir_intrinsic_rq_initialize)
197       (*(uint32_t *)data)++;
198 
199    return false;
200 }
201 
202 static nir_cf_node *
get_parent_loop(nir_cf_node * node)203 get_parent_loop(nir_cf_node *node)
204 {
205    nir_cf_node *result = NULL;
206    while (node) {
207       if (node->type == nir_cf_node_loop)
208          result = node;
209 
210       node = node->parent;
211    }
212    return result;
213 }
214 
215 bool
nir_opt_ray_query_ranges(nir_shader * shader)216 nir_opt_ray_query_ranges(nir_shader *shader)
217 {
218    assert(exec_list_length(&shader->functions) == 1);
219 
220    struct nir_function *func =
221       (struct nir_function *)exec_list_get_head_const(&shader->functions);
222    assert(func->impl);
223 
224    uint32_t ray_query_count = 0;
225    nir_foreach_variable_in_shader(var, shader) {
226       if (!var->data.ray_query || glsl_type_is_array(var->type))
227          continue;
228       ray_query_count++;
229    }
230    nir_foreach_function_temp_variable(var, func->impl) {
231       if (!var->data.ray_query || glsl_type_is_array(var->type))
232          continue;
233       ray_query_count++;
234    }
235 
236    if (ray_query_count <= 1) {
237       nir_metadata_preserve(func->impl, nir_metadata_all);
238       return false;
239    }
240 
241    void *mem_ctx = ralloc_context(NULL);
242 
243    nir_metadata_require(func->impl, nir_metadata_instr_index | nir_metadata_dominance);
244 
245    nir_variable **ray_queries = ralloc_array(mem_ctx, nir_variable *, ray_query_count);
246    ray_query_count = 0;
247 
248    nir_foreach_variable_in_shader(var, shader) {
249       if (!var->data.ray_query || glsl_type_is_array(var->type))
250          continue;
251 
252       ray_queries[ray_query_count] = var;
253       ray_query_count++;
254    }
255 
256    nir_foreach_function_temp_variable(var, func->impl) {
257       if (!var->data.ray_query || glsl_type_is_array(var->type))
258          continue;
259 
260       ray_queries[ray_query_count] = var;
261       ray_query_count++;
262    }
263 
264    uint32_t range_count = 0;
265    nir_shader_intrinsics_pass(shader, count_ranges, nir_metadata_all,
266                               &range_count);
267 
268    struct rq_range *ranges = rzalloc_array(mem_ctx, struct rq_range, range_count);
269 
270    struct hash_table *range_indices = _mesa_pointer_hash_table_create(mem_ctx);
271    uint32_t target_index = 0;
272 
273    nir_foreach_block(block, func->impl) {
274       nir_cf_node *parent_loop = get_parent_loop(&block->cf_node);
275 
276       nir_foreach_instr(instr, block) {
277          if (instr->type != nir_instr_type_intrinsic)
278             continue;
279 
280          nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
281          if (!nir_intrinsic_is_ray_query(intrinsic->intrinsic))
282             continue;
283 
284          nir_deref_instr *ray_query_deref =
285             nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
286 
287          if (ray_query_deref->deref_type != nir_deref_type_var)
288             continue;
289 
290          if (intrinsic->intrinsic == nir_intrinsic_rq_initialize) {
291             _mesa_hash_table_insert(range_indices, ray_query_deref->var,
292                                     (void *)(uintptr_t)target_index);
293 
294             ranges[target_index].variable = ray_query_deref->var;
295             ranges[target_index].first = instr->index;
296             ranges[target_index].last = instr->index;
297             util_dynarray_init(&ranges[target_index].instrs, mem_ctx);
298             ranges[target_index].loops = _mesa_pointer_set_create(mem_ctx);
299 
300             target_index++;
301          }
302 
303          struct hash_entry *index_entry =
304             _mesa_hash_table_search(range_indices, ray_query_deref->var);
305          struct rq_range *range = ranges + (uintptr_t)index_entry->data;
306 
307          if (intrinsic->intrinsic != nir_intrinsic_rq_initialize) {
308             /* If the initialize instruction does not dominate every other
309              * instruction in the range, we have to reject the enire query
310              * since we can not be certain about the ranges:
311              *
312              * rayQuery rq;
313              * if (i == 0)
314              *    init(rq);
315              * ...             <-- Another ray query that would get merged.
316              * if (i == 1)
317              *    init(rq);    <--+
318              * if (i == 0)        |
319              *    proceed(rq); <--+ Not dominated by init!
320              * if (i == 1)
321              *    proceed(rq);
322              */
323             nir_instr *init = *util_dynarray_element(&range->instrs, nir_instr *, 0);
324             if (!nir_block_dominates(init->block, instr->block)) {
325                for (uint32_t i = 0; i < ray_query_count; i++) {
326                   if (ray_queries[i] == ray_query_deref->var) {
327                      ray_queries[i] = NULL;
328                      break;
329                   }
330                }
331 
332                continue;
333             }
334 
335             range->last = MAX2(range->last, instr->index);
336          }
337 
338          util_dynarray_append(&range->instrs, nir_instr *, instr);
339 
340          if (parent_loop)
341             _mesa_set_add(range->loops, parent_loop);
342       }
343    }
344 
345    range_count = target_index;
346 
347    /* Try to push ray query ranges 'down'. */
348    for (uint32_t rq_index = 1; rq_index < ray_query_count; rq_index++) {
349       if (!ray_queries[rq_index])
350          continue;
351 
352       for (uint32_t dom_rq_index = 0; dom_rq_index < rq_index; dom_rq_index++) {
353          if (!ray_queries[dom_rq_index])
354             continue;
355 
356          bool collides = false;
357 
358          for (uint32_t range_index = 0; range_index < range_count; range_index++) {
359             if (ranges[range_index].variable != ray_queries[rq_index])
360                continue;
361 
362             for (uint32_t dom_range_index = 0; dom_range_index < range_count; dom_range_index++) {
363                if (ranges[dom_range_index].variable != ray_queries[dom_rq_index])
364                   continue;
365 
366                if (!(ranges[dom_range_index].first > ranges[range_index].last ||
367                      ranges[dom_range_index].last < ranges[range_index].first)) {
368                   collides = true;
369                   break;
370                }
371 
372                if (_mesa_set_intersects(ranges[dom_range_index].loops,
373                                         ranges[range_index].loops)) {
374                   collides = true;
375                   break;
376                }
377             }
378 
379             if (collides)
380                break;
381          }
382 
383          if (collides)
384             continue;
385 
386          for (uint32_t range_index = 0; range_index < range_count; range_index++) {
387             if (ranges[range_index].variable != ray_queries[rq_index])
388                continue;
389 
390             ranges[range_index].variable = ray_queries[dom_rq_index];
391          }
392       }
393    }
394 
395    /* Remap the ray query derefs to the new variables. */
396    bool progress = false;
397    for (uint32_t range_index = 0; range_index < range_count; range_index++) {
398       struct rq_range *range = ranges + range_index;
399       util_dynarray_foreach(&range->instrs, nir_instr *, instr) {
400          nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(*instr);
401          nir_deref_instr *ray_query_deref =
402             nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
403          if (ray_query_deref->var != range->variable) {
404             ray_query_deref->var = range->variable;
405             progress = true;
406          }
407       }
408    }
409 
410    nir_metadata_preserve(func->impl, nir_metadata_all);
411 
412    /* Remove dead ray queries. */
413    if (progress) {
414       nir_remove_dead_derefs(shader);
415       nir_remove_dead_variables(shader, nir_var_shader_temp | nir_var_function_temp,
416                                 NULL);
417    }
418 
419    ralloc_free(mem_ctx);
420 
421    return progress;
422 }
423