1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "nir_builder.h"
26
27 /* This pass updates the block index in the resource_intel intrinsics if the
28 * array index is constant.
29 *
30 * This pass must be run before anv_nir_compute_push_layout().
31 */
32 static bool
update_resource_intel_block(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * data)33 update_resource_intel_block(nir_builder *b, nir_intrinsic_instr *intrin,
34 UNUSED void *data)
35 {
36 if (intrin->intrinsic != nir_intrinsic_resource_intel)
37 return false;
38
39 /* If the array index in the descriptor binding is not const, we won't be
40 * able to turn this load_ubo into a push constant.
41 *
42 * Also if not pushable, set the block to 0xffffffff.
43 *
44 * Otherwise we need to update the block index by adding the array index so
45 * that when anv_nir_compute_push_layout() uses the block value it uses the
46 * right surface in the array of the binding.
47 */
48 if (!nir_src_is_const(intrin->src[2]) ||
49 !(nir_intrinsic_resource_access_intel(intrin) &
50 nir_resource_intel_pushable)) {
51 nir_intrinsic_set_resource_block_intel(intrin, 0xffffffff);
52 nir_intrinsic_set_resource_access_intel(
53 intrin,
54 nir_intrinsic_resource_access_intel(intrin) &
55 ~nir_resource_intel_pushable);
56 } else {
57 nir_intrinsic_set_resource_block_intel(
58 intrin,
59 nir_intrinsic_resource_block_intel(intrin) +
60 nir_src_as_uint(intrin->src[2]));
61 }
62
63 return true;
64 }
65
66 bool
anv_nir_update_resource_intel_block(nir_shader * shader)67 anv_nir_update_resource_intel_block(nir_shader *shader)
68 {
69 return nir_shader_intrinsics_pass(shader, update_resource_intel_block,
70 nir_metadata_all,
71 NULL);
72 }
73
74 struct lower_resource_state {
75 enum anv_descriptor_set_layout_type desc_type;
76 const struct anv_physical_device *device;
77 };
78
79 /* This pass lower resource_intel surface_index source, combining the
80 * descriptor set offset with the surface offset in the descriptor set.
81 *
82 * This pass must be run after anv_nir_compute_push_layout() because we want
83 * the push constant selection to tell if the surface offset is constant. Once
84 * combined the constant detection does not work anymore.
85 */
86 static bool
lower_resource_intel(nir_builder * b,nir_intrinsic_instr * intrin,void * data)87 lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
88 {
89 if (intrin->intrinsic != nir_intrinsic_resource_intel)
90 return false;
91
92 const bool is_bindless =
93 (nir_intrinsic_resource_access_intel(intrin) &
94 nir_resource_intel_bindless) != 0;
95 const bool is_sampler =
96 (nir_intrinsic_resource_access_intel(intrin) &
97 nir_resource_intel_sampler) != 0;
98 const struct lower_resource_state *state = data;
99
100 if (!is_bindless)
101 return true;
102
103 b->cursor = nir_before_instr(&intrin->instr);
104
105 nir_def *set_offset = intrin->src[0].ssa;
106 nir_def *binding_offset = intrin->src[1].ssa;
107
108 /* When using indirect descriptor, the surface handles are loaded from the
109 * descriptor buffer and do not need any offset.
110 */
111 if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) {
112 if (!state->device->uses_ex_bso) {
113 /* We're trying to reduce the number of instructions in the shaders
114 * to compute surface handles. The assumption is that we're using
115 * more surface handles than sampler handles (UBO, SSBO, images,
116 * etc...) so it's worth optimizing that case.
117 *
118 * Surface handles in the extended descriptor message have to be
119 * shifted left by 6 prior to ex_bso (bits 31:12 in extended
120 * descriptor, match bits 25:6 of the surface handle). We have to
121 * combine 2 parts in the shader to build the final surface handle,
122 * base offset of the descriptor set (in the push constant, located
123 * in resource_intel::src[0]) and the relative descriptor offset
124 * (resource_intel::src[1]).
125 *
126 * For convenience, up to here, resource_intel::src[1] is in bytes.
127 * We now have to shift it left by 6 to match the shifted left by 6
128 * done for the push constant value provided in
129 * resource_intel::src[0]. That way the shader can just do a single
130 * ADD and get the surface handle.
131 *
132 * Samplers have a 4Gb heap and in the message they're in bits 31:6
133 * of the component 3 of the sampler message header. But since we
134 * push only a single offset for the base offset of the descriptor
135 * set, resource_intel::src[0] has to be shifted right by 6 (bringing
136 * it back in bytes).
137 */
138 if (!is_sampler)
139 binding_offset = nir_ishl_imm(b, binding_offset, 6);
140 }
141
142 nir_src_rewrite(&intrin->src[1],
143 nir_iadd(b, set_offset, binding_offset));
144 }
145
146 /* Now unused values : set offset, array index */
147 nir_src_rewrite(&intrin->src[0], nir_imm_int(b, 0xdeaddeed));
148 nir_src_rewrite(&intrin->src[2], nir_imm_int(b, 0xdeaddeed));
149
150 return true;
151 }
152
153 bool
anv_nir_lower_resource_intel(nir_shader * shader,const struct anv_physical_device * device,enum anv_descriptor_set_layout_type desc_type)154 anv_nir_lower_resource_intel(nir_shader *shader,
155 const struct anv_physical_device *device,
156 enum anv_descriptor_set_layout_type desc_type)
157 {
158 struct lower_resource_state state = {
159 .desc_type = desc_type,
160 .device = device,
161 };
162 return nir_shader_intrinsics_pass(shader, lower_resource_intel,
163 nir_metadata_block_index |
164 nir_metadata_dominance,
165 &state);
166 }
167