• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 
28 struct apply_pipeline_layout_state {
29    nir_shader *shader;
30    nir_builder builder;
31 
32    struct anv_pipeline_layout *layout;
33    bool add_bounds_checks;
34 
35    struct {
36       BITSET_WORD *used;
37       uint8_t *surface_offsets;
38       uint8_t *sampler_offsets;
39       uint8_t *image_offsets;
40    } set[MAX_SETS];
41 };
42 
43 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)44 add_binding(struct apply_pipeline_layout_state *state,
45             uint32_t set, uint32_t binding)
46 {
47    BITSET_SET(state->set[set].used, binding);
48 }
49 
50 static void
add_var_binding(struct apply_pipeline_layout_state * state,nir_variable * var)51 add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
52 {
53    add_binding(state, var->data.descriptor_set, var->data.binding);
54 }
55 
56 static void
get_used_bindings_block(nir_block * block,struct apply_pipeline_layout_state * state)57 get_used_bindings_block(nir_block *block,
58                         struct apply_pipeline_layout_state *state)
59 {
60    nir_foreach_instr_safe(instr, block) {
61       switch (instr->type) {
62       case nir_instr_type_intrinsic: {
63          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
64          switch (intrin->intrinsic) {
65          case nir_intrinsic_vulkan_resource_index:
66             add_binding(state, nir_intrinsic_desc_set(intrin),
67                         nir_intrinsic_binding(intrin));
68             break;
69 
70          case nir_intrinsic_image_load:
71          case nir_intrinsic_image_store:
72          case nir_intrinsic_image_atomic_add:
73          case nir_intrinsic_image_atomic_min:
74          case nir_intrinsic_image_atomic_max:
75          case nir_intrinsic_image_atomic_and:
76          case nir_intrinsic_image_atomic_or:
77          case nir_intrinsic_image_atomic_xor:
78          case nir_intrinsic_image_atomic_exchange:
79          case nir_intrinsic_image_atomic_comp_swap:
80          case nir_intrinsic_image_size:
81          case nir_intrinsic_image_samples:
82             add_var_binding(state, intrin->variables[0]->var);
83             break;
84 
85          default:
86             break;
87          }
88          break;
89       }
90       case nir_instr_type_tex: {
91          nir_tex_instr *tex = nir_instr_as_tex(instr);
92          assert(tex->texture);
93          add_var_binding(state, tex->texture->var);
94          if (tex->sampler)
95             add_var_binding(state, tex->sampler->var);
96          break;
97       }
98       default:
99          continue;
100       }
101    }
102 }
103 
104 static void
lower_res_index_intrinsic(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)105 lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
106                           struct apply_pipeline_layout_state *state)
107 {
108    nir_builder *b = &state->builder;
109 
110    b->cursor = nir_before_instr(&intrin->instr);
111 
112    uint32_t set = nir_intrinsic_desc_set(intrin);
113    uint32_t binding = nir_intrinsic_binding(intrin);
114 
115    uint32_t surface_index = state->set[set].surface_offsets[binding];
116    uint32_t array_size =
117       state->layout->set[set].layout->binding[binding].array_size;
118 
119    nir_const_value *const_array_index = nir_src_as_const_value(intrin->src[0]);
120 
121    nir_ssa_def *block_index;
122    if (const_array_index) {
123       unsigned array_index = const_array_index->u32[0];
124       array_index = MIN2(array_index, array_size - 1);
125       block_index = nir_imm_int(b, surface_index + array_index);
126    } else {
127       block_index = nir_ssa_for_src(b, intrin->src[0], 1);
128 
129       if (state->add_bounds_checks)
130          block_index = nir_umin(b, block_index, nir_imm_int(b, array_size - 1));
131 
132       block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
133    }
134 
135    assert(intrin->dest.is_ssa);
136    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
137    nir_instr_remove(&intrin->instr);
138 }
139 
140 static void
lower_res_reindex_intrinsic(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)141 lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
142                             struct apply_pipeline_layout_state *state)
143 {
144    nir_builder *b = &state->builder;
145 
146    /* For us, the resource indices are just indices into the binding table and
147     * array elements are sequential.  A resource_reindex just turns into an
148     * add of the two indices.
149     */
150    assert(intrin->src[0].is_ssa && intrin->src[0].is_ssa);
151    nir_ssa_def *new_index = nir_iadd(b, intrin->src[0].ssa,
152                                         intrin->src[1].ssa);
153 
154    assert(intrin->dest.is_ssa);
155    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index));
156    nir_instr_remove(&intrin->instr);
157 }
158 
159 static void
lower_tex_deref(nir_tex_instr * tex,nir_deref_var * deref,unsigned * const_index,unsigned array_size,nir_tex_src_type src_type,bool allow_indirect,struct apply_pipeline_layout_state * state)160 lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
161                 unsigned *const_index, unsigned array_size,
162                 nir_tex_src_type src_type, bool allow_indirect,
163                 struct apply_pipeline_layout_state *state)
164 {
165    nir_builder *b = &state->builder;
166 
167    if (deref->deref.child) {
168       assert(deref->deref.child->deref_type == nir_deref_type_array);
169       nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
170 
171       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
172          /* From VK_KHR_sampler_ycbcr_conversion:
173           *
174           * If sampler Y’CBCR conversion is enabled, the combined image
175           * sampler must be indexed only by constant integral expressions when
176           * aggregated into arrays in shader code, irrespective of the
177           * shaderSampledImageArrayDynamicIndexing feature.
178           */
179          assert(allow_indirect);
180 
181          nir_ssa_def *index =
182             nir_iadd(b, nir_imm_int(b, deref_array->base_offset),
183                         nir_ssa_for_src(b, deref_array->indirect, 1));
184 
185          if (state->add_bounds_checks)
186             index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
187 
188          nir_tex_instr_add_src(tex, src_type, nir_src_for_ssa(index));
189       } else {
190          *const_index += MIN2(deref_array->base_offset, array_size - 1);
191       }
192    }
193 }
194 
195 static void
cleanup_tex_deref(nir_tex_instr * tex,nir_deref_var * deref)196 cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
197 {
198    if (deref->deref.child == NULL)
199       return;
200 
201    nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
202 
203    if (deref_array->deref_array_type != nir_deref_array_type_indirect)
204       return;
205 
206    nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
207 }
208 
209 static bool
has_tex_src_plane(nir_tex_instr * tex)210 has_tex_src_plane(nir_tex_instr *tex)
211 {
212    for (unsigned i = 0; i < tex->num_srcs; i++) {
213       if (tex->src[i].src_type == nir_tex_src_plane)
214          return true;
215    }
216 
217    return false;
218 }
219 
220 static uint32_t
extract_tex_src_plane(nir_tex_instr * tex)221 extract_tex_src_plane(nir_tex_instr *tex)
222 {
223    unsigned plane = 0;
224 
225    int plane_src_idx = -1;
226    for (unsigned i = 0; i < tex->num_srcs; i++) {
227       if (tex->src[i].src_type == nir_tex_src_plane) {
228          nir_const_value *const_plane =
229             nir_src_as_const_value(tex->src[i].src);
230 
231          /* Our color conversion lowering pass should only ever insert
232           * constants. */
233          assert(const_plane);
234          plane = const_plane->u32[0];
235          plane_src_idx = i;
236       }
237    }
238 
239    assert(plane_src_idx >= 0);
240    nir_tex_instr_remove_src(tex, plane_src_idx);
241 
242    return plane;
243 }
244 
245 static void
lower_tex(nir_tex_instr * tex,struct apply_pipeline_layout_state * state)246 lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
247 {
248    /* No one should have come by and lowered it already */
249    assert(tex->texture);
250 
251    state->builder.cursor = nir_before_instr(&tex->instr);
252 
253    unsigned set = tex->texture->var->data.descriptor_set;
254    unsigned binding = tex->texture->var->data.binding;
255    unsigned array_size =
256       state->layout->set[set].layout->binding[binding].array_size;
257    bool has_plane = has_tex_src_plane(tex);
258    unsigned plane = has_plane ? extract_tex_src_plane(tex) : 0;
259 
260    tex->texture_index = state->set[set].surface_offsets[binding];
261    lower_tex_deref(tex, tex->texture, &tex->texture_index, array_size,
262                    nir_tex_src_texture_offset, !has_plane, state);
263    tex->texture_index += plane;
264 
265    if (tex->sampler) {
266       unsigned set = tex->sampler->var->data.descriptor_set;
267       unsigned binding = tex->sampler->var->data.binding;
268       unsigned array_size =
269          state->layout->set[set].layout->binding[binding].array_size;
270       tex->sampler_index = state->set[set].sampler_offsets[binding];
271       lower_tex_deref(tex, tex->sampler, &tex->sampler_index, array_size,
272                       nir_tex_src_sampler_offset, !has_plane, state);
273       tex->sampler_index += plane;
274    }
275 
276    /* The backend only ever uses this to mark used surfaces.  We don't care
277     * about that little optimization so it just needs to be non-zero.
278     */
279    tex->texture_array_size = 1;
280 
281    cleanup_tex_deref(tex, tex->texture);
282    if (tex->sampler)
283       cleanup_tex_deref(tex, tex->sampler);
284    tex->texture = NULL;
285    tex->sampler = NULL;
286 }
287 
288 static void
apply_pipeline_layout_block(nir_block * block,struct apply_pipeline_layout_state * state)289 apply_pipeline_layout_block(nir_block *block,
290                             struct apply_pipeline_layout_state *state)
291 {
292    nir_foreach_instr_safe(instr, block) {
293       switch (instr->type) {
294       case nir_instr_type_intrinsic: {
295          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
296          switch (intrin->intrinsic) {
297          case nir_intrinsic_vulkan_resource_index:
298             lower_res_index_intrinsic(intrin, state);
299             break;
300          case nir_intrinsic_vulkan_resource_reindex:
301             lower_res_reindex_intrinsic(intrin, state);
302             break;
303          default:
304             break;
305          }
306          break;
307       }
308       case nir_instr_type_tex:
309          lower_tex(nir_instr_as_tex(instr), state);
310          break;
311       default:
312          continue;
313       }
314    }
315 }
316 
317 static void
setup_vec4_uniform_value(uint32_t * params,uint32_t offset,unsigned n)318 setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
319 {
320    for (unsigned i = 0; i < n; ++i)
321       params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t));
322 
323    for (unsigned i = n; i < 4; ++i)
324       params[i] = BRW_PARAM_BUILTIN_ZERO;
325 }
326 
327 void
anv_nir_apply_pipeline_layout(struct anv_pipeline * pipeline,nir_shader * shader,struct brw_stage_prog_data * prog_data,struct anv_pipeline_bind_map * map)328 anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
329                               nir_shader *shader,
330                               struct brw_stage_prog_data *prog_data,
331                               struct anv_pipeline_bind_map *map)
332 {
333    struct anv_pipeline_layout *layout = pipeline->layout;
334    gl_shader_stage stage = shader->info.stage;
335 
336    struct apply_pipeline_layout_state state = {
337       .shader = shader,
338       .layout = layout,
339       .add_bounds_checks = pipeline->device->robust_buffer_access,
340    };
341 
342    void *mem_ctx = ralloc_context(NULL);
343 
344    for (unsigned s = 0; s < layout->num_sets; s++) {
345       const unsigned count = layout->set[s].layout->binding_count;
346       const unsigned words = BITSET_WORDS(count);
347       state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
348       state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
349       state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
350       state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
351    }
352 
353    nir_foreach_function(function, shader) {
354       if (!function->impl)
355          continue;
356 
357       nir_foreach_block(block, function->impl)
358          get_used_bindings_block(block, &state);
359    }
360 
361    for (uint32_t set = 0; set < layout->num_sets; set++) {
362       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
363 
364       BITSET_WORD b, _tmp;
365       BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
366                          set_layout->binding_count) {
367          if (set_layout->binding[b].stage[stage].surface_index >= 0) {
368             map->surface_count +=
369                anv_descriptor_set_binding_layout_get_hw_size(&set_layout->binding[b]);
370          }
371          if (set_layout->binding[b].stage[stage].sampler_index >= 0) {
372             map->sampler_count +=
373                anv_descriptor_set_binding_layout_get_hw_size(&set_layout->binding[b]);
374          }
375          if (set_layout->binding[b].stage[stage].image_index >= 0)
376             map->image_count += set_layout->binding[b].array_size;
377       }
378    }
379 
380    unsigned surface = 0;
381    unsigned sampler = 0;
382    unsigned image = 0;
383    for (uint32_t set = 0; set < layout->num_sets; set++) {
384       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
385 
386       BITSET_WORD b, _tmp;
387       BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
388                          set_layout->binding_count) {
389          struct anv_descriptor_set_binding_layout *binding =
390             &set_layout->binding[b];
391 
392          if (binding->stage[stage].surface_index >= 0) {
393             state.set[set].surface_offsets[b] = surface;
394             struct anv_sampler **samplers = binding->immutable_samplers;
395             for (unsigned i = 0; i < binding->array_size; i++) {
396                uint8_t planes = samplers ? samplers[i]->n_planes : 1;
397                for (uint8_t p = 0; p < planes; p++) {
398                   map->surface_to_descriptor[surface].set = set;
399                   map->surface_to_descriptor[surface].binding = b;
400                   map->surface_to_descriptor[surface].index = i;
401                   map->surface_to_descriptor[surface].plane = p;
402                   surface++;
403                }
404             }
405          }
406 
407          if (binding->stage[stage].sampler_index >= 0) {
408             state.set[set].sampler_offsets[b] = sampler;
409             struct anv_sampler **samplers = binding->immutable_samplers;
410             for (unsigned i = 0; i < binding->array_size; i++) {
411                uint8_t planes = samplers ? samplers[i]->n_planes : 1;
412                for (uint8_t p = 0; p < planes; p++) {
413                   map->sampler_to_descriptor[sampler].set = set;
414                   map->sampler_to_descriptor[sampler].binding = b;
415                   map->sampler_to_descriptor[sampler].index = i;
416                   map->sampler_to_descriptor[sampler].plane = p;
417                   sampler++;
418                }
419             }
420          }
421 
422          if (binding->stage[stage].image_index >= 0) {
423             state.set[set].image_offsets[b] = image;
424             image += binding->array_size;
425          }
426       }
427    }
428 
429    nir_foreach_variable(var, &shader->uniforms) {
430       if (!glsl_type_is_image(var->interface_type))
431          continue;
432 
433       enum glsl_sampler_dim dim = glsl_get_sampler_dim(var->interface_type);
434 
435       const uint32_t set = var->data.descriptor_set;
436       const uint32_t binding = var->data.binding;
437       const uint32_t array_size =
438          layout->set[set].layout->binding[binding].array_size;
439 
440       if (!BITSET_TEST(state.set[set].used, binding))
441          continue;
442 
443       struct anv_pipeline_binding *pipe_binding =
444          &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
445       for (unsigned i = 0; i < array_size; i++) {
446          assert(pipe_binding[i].set == set);
447          assert(pipe_binding[i].binding == binding);
448          assert(pipe_binding[i].index == i);
449 
450          if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
451              dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
452             pipe_binding[i].input_attachment_index = var->data.index + i;
453 
454          pipe_binding[i].write_only = var->data.image.write_only;
455       }
456    }
457 
458    nir_foreach_function(function, shader) {
459       if (!function->impl)
460          continue;
461 
462       nir_builder_init(&state.builder, function->impl);
463       nir_foreach_block(block, function->impl)
464          apply_pipeline_layout_block(block, &state);
465       nir_metadata_preserve(function->impl, nir_metadata_block_index |
466                                             nir_metadata_dominance);
467    }
468 
469    if (map->image_count > 0) {
470       assert(map->image_count <= MAX_IMAGES);
471       nir_foreach_variable(var, &shader->uniforms) {
472          if (glsl_type_is_image(var->type) ||
473              (glsl_type_is_array(var->type) &&
474               glsl_type_is_image(glsl_get_array_element(var->type)))) {
475             /* Images are represented as uniform push constants and the actual
476              * information required for reading/writing to/from the image is
477              * storred in the uniform.
478              */
479             unsigned set = var->data.descriptor_set;
480             unsigned binding = var->data.binding;
481             unsigned image_index = state.set[set].image_offsets[binding];
482 
483             var->data.driver_location = shader->num_uniforms +
484                                         image_index * BRW_IMAGE_PARAM_SIZE * 4;
485          }
486       }
487 
488       uint32_t *param = brw_stage_prog_data_add_params(prog_data,
489                                                        map->image_count *
490                                                        BRW_IMAGE_PARAM_SIZE);
491       struct anv_push_constants *null_data = NULL;
492       const struct brw_image_param *image_param = null_data->images;
493       for (uint32_t i = 0; i < map->image_count; i++) {
494          setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
495                                   (uintptr_t)&image_param->surface_idx, 1);
496          setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
497                                   (uintptr_t)image_param->offset, 2);
498          setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
499                                   (uintptr_t)image_param->size, 3);
500          setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
501                                   (uintptr_t)image_param->stride, 4);
502          setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
503                                   (uintptr_t)image_param->tiling, 3);
504          setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
505                                   (uintptr_t)image_param->swizzling, 2);
506 
507          param += BRW_IMAGE_PARAM_SIZE;
508          image_param ++;
509       }
510       assert(param == prog_data->param + prog_data->nr_params);
511 
512       shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
513    }
514 
515    ralloc_free(mem_ctx);
516 }
517