1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_nir_lower_tex.h"
28
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_builtin_builder.h"
32
33 static bool
lower_coord_shift_normalized(nir_builder * b,nir_tex_instr * tex)34 lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
35 {
36 b->cursor = nir_before_instr(&tex->instr);
37
38 nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
39 nir_def *scale = nir_frcp(b, size);
40
41 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
42 nir_def *corr = nullptr;
43 if (unlikely(tex->array_is_lowered_cube)) {
44 auto corr2 = nir_fadd(b,
45 nir_trim_vector(b, tex->src[coord_index].src.ssa, 2),
46 nir_fmul_imm(b, scale, -0.5f));
47 corr = nir_vec3(b,
48 nir_channel(b, corr2, 0),
49 nir_channel(b, corr2, 1),
50 nir_channel(b, tex->src[coord_index].src.ssa, 2));
51 } else {
52 corr = nir_fadd(b,
53 nir_fmul_imm(b, scale, -0.5f),
54 tex->src[coord_index].src.ssa);
55 }
56
57 nir_src_rewrite(&tex->src[coord_index].src, corr);
58 return true;
59 }
60
61 static bool
lower_coord_shift_unnormalized(nir_builder * b,nir_tex_instr * tex)62 lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
63 {
64 b->cursor = nir_before_instr(&tex->instr);
65 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
66 nir_def *corr = nullptr;
67 if (unlikely(tex->array_is_lowered_cube)) {
68 auto corr2 = nir_fadd_imm(b,
69 nir_trim_vector(b, tex->src[coord_index].src.ssa, 2),
70 -0.5f);
71 corr = nir_vec3(b,
72 nir_channel(b, corr2, 0),
73 nir_channel(b, corr2, 1),
74 nir_channel(b, tex->src[coord_index].src.ssa, 2));
75 } else {
76 corr = nir_fadd_imm(b, tex->src[coord_index].src.ssa, -0.5f);
77 }
78 nir_src_rewrite(&tex->src[coord_index].src, corr);
79 return true;
80 }
81
82 static bool
r600_nir_lower_int_tg4_impl(nir_function_impl * impl)83 r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
84 {
85 nir_builder b = nir_builder_create(impl);
86
87 bool progress = false;
88 nir_foreach_block(block, impl)
89 {
90 nir_foreach_instr_safe(instr, block)
91 {
92 if (instr->type == nir_instr_type_tex) {
93 nir_tex_instr *tex = nir_instr_as_tex(instr);
94 if (tex->op == nir_texop_tg4 && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
95 nir_tex_instr_src_index(tex, nir_tex_src_backend1) < 0) {
96 if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
97 if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
98 lower_coord_shift_normalized(&b, tex);
99 else
100 lower_coord_shift_unnormalized(&b, tex);
101 progress = true;
102 }
103 }
104 }
105 }
106 }
107 return progress;
108 }
109
110 /*
111 * This lowering pass works around a bug in r600 when doing TG4 from
112 * integral valued samplers.
113
114 * Gather4 should follow the same rules as bilinear filtering, but the hardware
115 * incorrectly forces nearest filtering if the texture format is integer.
116 * The only effect it has on Gather4, which always returns 4 texels for
117 * bilinear filtering, is that the final coordinates are off by 0.5 of
118 * the texel size.
119 */
120
121 bool
r600_nir_lower_int_tg4(nir_shader * shader)122 r600_nir_lower_int_tg4(nir_shader *shader)
123 {
124 bool progress = false;
125 bool need_lowering = false;
126
127 nir_foreach_uniform_variable(var, shader)
128 {
129 if (glsl_type_is_sampler(var->type)) {
130 if (glsl_base_type_is_integer(var->type->sampled_type)) {
131 need_lowering = true;
132 }
133 }
134 }
135
136 if (need_lowering) {
137 nir_foreach_function_impl(impl, shader)
138 {
139 if (r600_nir_lower_int_tg4_impl(impl))
140 progress = true;
141 }
142 }
143
144 return progress;
145 }
146
147 static bool
lower_txl_txf_array_or_cube(nir_builder * b,nir_tex_instr * tex)148 lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
149 {
150 assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
151 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
152 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
153
154 b->cursor = nir_before_instr(&tex->instr);
155
156 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
157 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
158 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
159 assert(lod_idx >= 0 || bias_idx >= 0);
160
161 nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
162 nir_def *lod = (lod_idx >= 0) ? tex->src[lod_idx].src.ssa
163 : nir_get_texture_lod(b, tex);
164
165 if (bias_idx >= 0)
166 lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa);
167
168 if (min_lod_idx >= 0)
169 lod = nir_fmax(b, lod, tex->src[min_lod_idx].src.ssa);
170
171 /* max lod? */
172
173 nir_def *lambda_exp = nir_fexp2(b, lod);
174 nir_def *scale = NULL;
175
176 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
177 unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0, 0, 0, 0};
178 scale = nir_frcp(b, nir_channels(b, size, 1));
179 scale = nir_swizzle(b, scale, swizzle, 3);
180 } else if (tex->is_array) {
181 int cmp_mask = (1 << (size->num_components - 1)) - 1;
182 scale = nir_frcp(b, nir_channels(b, size, (nir_component_mask_t)cmp_mask));
183 }
184
185 nir_def *grad = nir_fmul(b, lambda_exp, scale);
186
187 if (lod_idx >= 0)
188 nir_tex_instr_remove_src(tex, lod_idx);
189 if (bias_idx >= 0)
190 nir_tex_instr_remove_src(tex, bias_idx);
191 if (min_lod_idx >= 0)
192 nir_tex_instr_remove_src(tex, min_lod_idx);
193 nir_tex_instr_add_src(tex, nir_tex_src_ddx, grad);
194 nir_tex_instr_add_src(tex, nir_tex_src_ddy, grad);
195
196 tex->op = nir_texop_txd;
197 return true;
198 }
199
200 static bool
r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl * impl)201 r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
202 {
203 nir_builder b = nir_builder_create(impl);
204
205 bool progress = false;
206 nir_foreach_block(block, impl)
207 {
208 nir_foreach_instr_safe(instr, block)
209 {
210 if (instr->type == nir_instr_type_tex) {
211 nir_tex_instr *tex = nir_instr_as_tex(instr);
212
213 if (tex->is_shadow &&
214 (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
215 (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
216 progress |= lower_txl_txf_array_or_cube(&b, tex);
217 }
218 }
219 }
220 return progress;
221 }
222
223 bool
r600_nir_lower_txl_txf_array_or_cube(nir_shader * shader)224 r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
225 {
226 bool progress = false;
227 nir_foreach_function_impl(impl, shader)
228 {
229 if (r600_nir_lower_txl_txf_array_or_cube_impl(impl))
230 progress = true;
231 }
232 return progress;
233 }
234
235 static bool
r600_nir_lower_cube_to_2darray_filer(const nir_instr * instr,const void * _options)236 r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
237 {
238 if (instr->type != nir_instr_type_tex)
239 return false;
240
241 auto tex = nir_instr_as_tex(instr);
242 if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
243 return false;
244
245 switch (tex->op) {
246 case nir_texop_tex:
247 case nir_texop_txb:
248 case nir_texop_txf:
249 case nir_texop_txl:
250 case nir_texop_lod:
251 case nir_texop_tg4:
252 case nir_texop_txd:
253 return true;
254 default:
255 return false;
256 }
257 }
258
259 static nir_def *
r600_nir_lower_cube_to_2darray_impl(nir_builder * b,nir_instr * instr,void * _options)260 r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
261 {
262 b->cursor = nir_before_instr(instr);
263
264 auto tex = nir_instr_as_tex(instr);
265 int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
266 assert(coord_idx >= 0);
267
268 auto cubed = nir_cube_amd(b,
269 nir_trim_vector(b, tex->src[coord_idx].src.ssa, 3));
270 auto xy = nir_fmad(b,
271 nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
272 nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
273 nir_imm_float(b, 1.5));
274
275 nir_def *z = nir_channel(b, cubed, 3);
276 if (tex->is_array && tex->op != nir_texop_lod) {
277 auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
278 z =
279 nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0), z);
280 }
281
282 if (tex->op == nir_texop_txd) {
283 int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
284 nir_src_rewrite(&tex->src[ddx_idx].src,
285 nir_fmul_imm(b, tex->src[ddx_idx].src.ssa, 0.5));
286
287 int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
288 nir_src_rewrite(&tex->src[ddy_idx].src,
289 nir_fmul_imm(b, tex->src[ddy_idx].src.ssa, 0.5));
290 }
291
292 auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
293 nir_src_rewrite(&tex->src[coord_idx].src, new_coord);
294 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
295 tex->is_array = true;
296 tex->array_is_lowered_cube = true;
297
298 tex->coord_components = 3;
299
300 return NIR_LOWER_INSTR_PROGRESS;
301 }
302
303 bool
r600_nir_lower_cube_to_2darray(nir_shader * shader)304 r600_nir_lower_cube_to_2darray(nir_shader *shader)
305 {
306 return nir_shader_lower_instructions(shader,
307 r600_nir_lower_cube_to_2darray_filer,
308 r600_nir_lower_cube_to_2darray_impl,
309 nullptr);
310 }
311