• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Igalia S.L.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "ir3_nir.h"
25 
26 /**
27  * A pass which detects tex instructions which are candidate to be executed
28  * prior to FS shader start, and change them to nir_texop_tex_prefetch.
29  */
30 
31 static int
coord_offset(nir_def * ssa)32 coord_offset(nir_def *ssa)
33 {
34    nir_instr *parent_instr = ssa->parent_instr;
35 
36    /* The coordinate of a texture sampling instruction eligible for
37     * pre-fetch is either going to be a load_interpolated_input/
38     * load_input, or a vec2 assembling non-swizzled components of
39     * a load_interpolated_input/load_input (due to varying packing)
40     */
41 
42    if (parent_instr->type == nir_instr_type_alu) {
43       nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
44 
45       if (alu->op != nir_op_vec2)
46          return -1;
47 
48       int base_src_offset = coord_offset(alu->src[0].src.ssa);
49       if (base_src_offset < 0)
50          return -1;
51 
52       int base_offset = base_src_offset + alu->src[0].swizzle[0];
53 
54       /* NOTE it might be possible to support more than 2D? */
55       for (int i = 1; i < 2; i++) {
56          int nth_src_offset = coord_offset(alu->src[i].src.ssa);
57          if (nth_src_offset < 0)
58             return -1;
59          int nth_offset = nth_src_offset + alu->src[i].swizzle[0];
60 
61          if (nth_offset != (base_offset + i))
62             return -1;
63       }
64 
65       return base_offset;
66    }
67 
68    if (parent_instr->type != nir_instr_type_intrinsic)
69       return -1;
70 
71    nir_intrinsic_instr *input = nir_instr_as_intrinsic(parent_instr);
72 
73    if (input->intrinsic != nir_intrinsic_load_interpolated_input)
74       return -1;
75 
76    /* Happens with lowered load_barycentric_at_offset */
77    if (input->src[0].ssa->parent_instr->type != nir_instr_type_intrinsic)
78       return -1;
79 
80    nir_intrinsic_instr *interp =
81       nir_instr_as_intrinsic(input->src[0].ssa->parent_instr);
82 
83    if (interp->intrinsic != nir_intrinsic_load_barycentric_pixel)
84       return -1;
85 
86    /* interpolation modes such as noperspective aren't covered by the other
87     * test, we need to explicitly check for them here.
88     */
89    unsigned interp_mode = nir_intrinsic_interp_mode(interp);
90    if (interp_mode != INTERP_MODE_NONE && interp_mode != INTERP_MODE_SMOOTH)
91       return -1;
92 
93    /* we also need a const input offset: */
94    if (!nir_src_is_const(input->src[1]))
95       return -1;
96 
97    unsigned base = nir_src_as_uint(input->src[1]) + nir_intrinsic_base(input);
98    unsigned comp = nir_intrinsic_component(input);
99 
100    return (4 * base) + comp;
101 }
102 
103 int
ir3_nir_coord_offset(nir_def * ssa)104 ir3_nir_coord_offset(nir_def *ssa)
105 {
106 
107    assert(ssa->num_components == 2);
108    return coord_offset(ssa);
109 }
110 
111 static bool
has_src(nir_tex_instr * tex,nir_tex_src_type type)112 has_src(nir_tex_instr *tex, nir_tex_src_type type)
113 {
114    return nir_tex_instr_src_index(tex, type) >= 0;
115 }
116 
117 static bool
ok_bindless_src(nir_tex_instr * tex,nir_tex_src_type type)118 ok_bindless_src(nir_tex_instr *tex, nir_tex_src_type type)
119 {
120    int idx = nir_tex_instr_src_index(tex, type);
121    assert(idx >= 0);
122    nir_intrinsic_instr *bindless = ir3_bindless_resource(tex->src[idx].src);
123 
124    /* TODO from SP_FS_BINDLESS_PREFETCH[n] it looks like this limit should
125     * be 1<<8 ?
126     */
127    return nir_src_is_const(bindless->src[0]) &&
128           (nir_src_as_uint(bindless->src[0]) < (1 << 16));
129 }
130 
131 /**
132  * Check that we will be able to encode the tex/samp parameters
133  * successfully.  These limits are based on the layout of
134  * SP_FS_PREFETCH[n] and SP_FS_BINDLESS_PREFETCH[n], so at some
135  * point (if those regs changes) they may become generation
136  * specific.
137  */
138 static bool
ok_tex_samp(nir_tex_instr * tex)139 ok_tex_samp(nir_tex_instr *tex)
140 {
141    if (has_src(tex, nir_tex_src_texture_handle)) {
142       /* bindless case: */
143 
144       assert(has_src(tex, nir_tex_src_sampler_handle));
145 
146       return ok_bindless_src(tex, nir_tex_src_texture_handle) &&
147              ok_bindless_src(tex, nir_tex_src_sampler_handle);
148    } else {
149       assert(!has_src(tex, nir_tex_src_texture_offset));
150       assert(!has_src(tex, nir_tex_src_sampler_offset));
151 
152       return (tex->texture_index <= 0x1f) && (tex->sampler_index <= 0xf);
153    }
154 }
155 
156 static bool
lower_tex_prefetch_block(nir_block * block)157 lower_tex_prefetch_block(nir_block *block)
158 {
159    bool progress = false;
160 
161    nir_foreach_instr_safe (instr, block) {
162       if (instr->type != nir_instr_type_tex)
163          continue;
164 
165       nir_tex_instr *tex = nir_instr_as_tex(instr);
166       if (tex->op != nir_texop_tex)
167          continue;
168 
169       if (has_src(tex, nir_tex_src_bias) || has_src(tex, nir_tex_src_lod) ||
170           has_src(tex, nir_tex_src_comparator) ||
171           has_src(tex, nir_tex_src_projector) ||
172           has_src(tex, nir_tex_src_offset) || has_src(tex, nir_tex_src_ddx) ||
173           has_src(tex, nir_tex_src_ddy) || has_src(tex, nir_tex_src_ms_index) ||
174           has_src(tex, nir_tex_src_texture_offset) ||
175           has_src(tex, nir_tex_src_sampler_offset))
176          continue;
177 
178       /* only prefetch for simple 2d tex fetch case */
179       if (tex->sampler_dim != GLSL_SAMPLER_DIM_2D || tex->is_array)
180          continue;
181 
182       if (!ok_tex_samp(tex))
183          continue;
184 
185       int idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
186       /* First source should be the sampling coordinate. */
187       nir_tex_src *coord = &tex->src[idx];
188 
189       if (ir3_nir_coord_offset(coord->src.ssa) >= 0) {
190          tex->op = nir_texop_tex_prefetch;
191 
192          progress |= true;
193       }
194    }
195 
196    return progress;
197 }
198 
199 static bool
lower_tex_prefetch_func(nir_function_impl * impl)200 lower_tex_prefetch_func(nir_function_impl *impl)
201 {
202    /* Only instructions in the the outer-most block are considered eligible for
203     * pre-dispatch, because they need to be move-able to the beginning of the
204     * shader to avoid locking down the register holding the pre-fetched result
205     * for too long. However if there is a preamble we should skip the preamble
206     * and only look in the first block after the preamble instead, because that
207     * corresponds to the first block in the original program and texture fetches
208     * in the preamble are never pre-dispatchable.
209     */
210    nir_block *block = nir_start_block(impl);
211 
212    nir_if *nif = nir_block_get_following_if(block);
213    if (nif) {
214       nir_instr *cond = nif->condition.ssa->parent_instr;
215       if (cond->type == nir_instr_type_intrinsic &&
216           nir_instr_as_intrinsic(cond)->intrinsic ==
217           nir_intrinsic_preamble_start_ir3) {
218          block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
219       }
220    }
221 
222    bool progress = lower_tex_prefetch_block(block);
223 
224    if (progress) {
225       nir_metadata_preserve(impl,
226                             nir_metadata_block_index | nir_metadata_dominance);
227    }
228 
229    return progress;
230 }
231 
232 bool
ir3_nir_lower_tex_prefetch(nir_shader * shader)233 ir3_nir_lower_tex_prefetch(nir_shader *shader)
234 {
235    bool progress = false;
236 
237    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
238 
239    nir_foreach_function (function, shader) {
240       /* Only texture sampling instructions inside the main function
241        * are eligible for pre-dispatch.
242        */
243       if (!function->impl || !function->is_entrypoint)
244          continue;
245 
246       progress |= lower_tex_prefetch_func(function->impl);
247    }
248 
249    return progress;
250 }
251