1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2019 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instruction_tex.h"
28 #include "nir_builder.h"
29 #include "nir_builtin_builder.h"
30
31 namespace r600 {
32
TexInstruction(Opcode op,const GPRVector & dest,const GPRVector & src,unsigned sid,unsigned rid,PValue sampler_offset)33 TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
34 unsigned sid, unsigned rid, PValue sampler_offset):
35 Instruction(tex),
36 m_opcode(op),
37 m_dst(dest),
38 m_src(src),
39 m_sampler_id(sid),
40 m_resource_id(rid),
41 m_flags(0),
42 m_inst_mode(0),
43 m_dest_swizzle{0,1,2,3},
44 m_sampler_offset(sampler_offset)
45
46 {
47 memset(m_offset, 0, sizeof (m_offset));
48
49 add_remappable_src_value(&m_src);
50 add_remappable_src_value(&m_sampler_offset);
51 add_remappable_dst_value(&m_dst);
52 }
53
set_gather_comp(int cmp)54 void TexInstruction::set_gather_comp(int cmp)
55 {
56 m_inst_mode = cmp;
57 }
58
replace_values(const ValueSet & candidates,PValue new_value)59 void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
60 {
61 // I wonder whether we can actually end up here ...
62 for (auto c: candidates) {
63 if (*c == *m_src.reg_i(c->chan()))
64 m_src.set_reg_i(c->chan(), new_value);
65 if (*c == *m_dst.reg_i(c->chan()))
66 m_dst.set_reg_i(c->chan(), new_value);
67 }
68 }
69
set_offset(unsigned index,int32_t val)70 void TexInstruction::set_offset(unsigned index, int32_t val)
71 {
72 assert(index < 3);
73 m_offset[index] = val;
74 }
75
get_offset(unsigned index) const76 int TexInstruction::get_offset(unsigned index) const
77 {
78 assert(index < 3);
79 return (m_offset[index] << 1 & 0x1f);
80 }
81
is_equal_to(const Instruction & rhs) const82 bool TexInstruction::is_equal_to(const Instruction& rhs) const
83 {
84 assert(rhs.type() == tex);
85 const auto& r = static_cast<const TexInstruction&>(rhs);
86 return (m_opcode == r.m_opcode &&
87 m_dst == r.m_dst &&
88 m_src == r.m_src &&
89 m_sampler_id == r.m_sampler_id &&
90 m_resource_id == r.m_resource_id);
91 }
92
do_print(std::ostream & os) const93 void TexInstruction::do_print(std::ostream& os) const
94 {
95 const char *map_swz = "xyzw01?_";
96 os << opname(m_opcode) << " R" << m_dst.sel() << ".";
97 for (int i = 0; i < 4; ++i)
98 os << map_swz[m_dest_swizzle[i]];
99
100 os << " " << m_src
101 << " RESID:" << m_resource_id << " SAMPLER:"
102 << m_sampler_id;
103 }
104
opname(Opcode op)105 const char *TexInstruction::opname(Opcode op)
106 {
107 switch (op) {
108 case ld: return "LD";
109 case get_resinfo: return "GET_TEXTURE_RESINFO";
110 case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
111 case get_tex_lod: return "GET_LOD";
112 case get_gradient_h: return "GET_GRADIENTS_H";
113 case get_gradient_v: return "GET_GRADIENTS_V";
114 case set_offsets: return "SET_TEXTURE_OFFSETS";
115 case keep_gradients: return "KEEP_GRADIENTS";
116 case set_gradient_h: return "SET_GRADIENTS_H";
117 case set_gradient_v: return "SET_GRADIENTS_V";
118 case sample: return "SAMPLE";
119 case sample_l: return "SAMPLE_L";
120 case sample_lb: return "SAMPLE_LB";
121 case sample_lz: return "SAMPLE_LZ";
122 case sample_g: return "SAMPLE_G";
123 case sample_g_lb: return "SAMPLE_G_L";
124 case gather4: return "GATHER4";
125 case gather4_o: return "GATHER4_O";
126 case sample_c: return "SAMPLE_C";
127 case sample_c_l: return "SAMPLE_C_L";
128 case sample_c_lb: return "SAMPLE_C_LB";
129 case sample_c_lz: return "SAMPLE_C_LZ";
130 case sample_c_g: return "SAMPLE_C_G";
131 case sample_c_g_lb: return "SAMPLE_C_G_L";
132 case gather4_c: return "GATHER4_C";
133 case gather4_c_o: return "OP_GATHER4_C_O";
134 }
135 return "ERROR";
136 }
137
138
139
lower_coord_shift_normalized(nir_builder * b,nir_tex_instr * tex)140 static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
141 {
142 b->cursor = nir_before_instr(&tex->instr);
143
144 nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex));
145 nir_ssa_def *scale = nir_frcp(b, size);
146
147 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
148 nir_ssa_def *corr = nullptr;
149 if (unlikely(tex->array_is_lowered_cube)) {
150 auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
151 nir_fmul(b, nir_imm_float(b, -0.5f), scale));
152 corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
153 nir_channel(
154 b, tex->src[coord_index].src.ssa, 2));
155 } else {
156 corr = nir_fadd(b,
157 nir_fmul(b, nir_imm_float(b, -0.5f), scale),
158 tex->src[coord_index].src.ssa);
159 }
160
161 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
162 nir_src_for_ssa(corr));
163 return true;
164 }
165
lower_coord_shift_unnormalized(nir_builder * b,nir_tex_instr * tex)166 static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
167 {
168 b->cursor = nir_before_instr(&tex->instr);
169 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
170 nir_ssa_def *corr = nullptr;
171 if (unlikely(tex->array_is_lowered_cube)) {
172 auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
173 nir_imm_float(b, -0.5f));
174 corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
175 nir_channel(b, tex->src[coord_index].src.ssa, 2));
176 } else {
177 corr = nir_fadd(b, tex->src[coord_index].src.ssa,
178 nir_imm_float(b, -0.5f));
179 }
180 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
181 nir_src_for_ssa(corr));
182 return true;
183 }
184
185 static bool
r600_nir_lower_int_tg4_impl(nir_function_impl * impl)186 r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
187 {
188 nir_builder b;
189 nir_builder_init(&b, impl);
190
191 bool progress = false;
192 nir_foreach_block(block, impl) {
193 nir_foreach_instr_safe(instr, block) {
194 if (instr->type == nir_instr_type_tex) {
195 nir_tex_instr *tex = nir_instr_as_tex(instr);
196 if (tex->op == nir_texop_tg4 &&
197 tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
198 if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
199 if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
200 lower_coord_shift_normalized(&b, tex);
201 else
202 lower_coord_shift_unnormalized(&b, tex);
203 progress = true;
204 }
205 }
206 }
207 }
208 }
209 return progress;
210 }
211
212 /*
213 * This lowering pass works around a bug in r600 when doing TG4 from
214 * integral valued samplers.
215
216 * Gather4 should follow the same rules as bilinear filtering, but the hardware
217 * incorrectly forces nearest filtering if the texture format is integer.
218 * The only effect it has on Gather4, which always returns 4 texels for
219 * bilinear filtering, is that the final coordinates are off by 0.5 of
220 * the texel size.
221 */
222
r600_nir_lower_int_tg4(nir_shader * shader)223 bool r600_nir_lower_int_tg4(nir_shader *shader)
224 {
225 bool progress = false;
226 bool need_lowering = false;
227
228 nir_foreach_uniform_variable(var, shader) {
229 if (var->type->is_sampler()) {
230 if (glsl_base_type_is_integer(var->type->sampled_type)) {
231 need_lowering = true;
232 }
233 }
234 }
235
236 if (need_lowering) {
237 nir_foreach_function(function, shader) {
238 if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
239 progress = true;
240 }
241 }
242
243 return progress;
244 }
245
246 static
lower_txl_txf_array_or_cube(nir_builder * b,nir_tex_instr * tex)247 bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
248 {
249 assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
250 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
251 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
252
253 b->cursor = nir_before_instr(&tex->instr);
254
255 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
256 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
257 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
258 assert (lod_idx >= 0 || bias_idx >= 0);
259
260 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
261 nir_ssa_def *lod = (lod_idx >= 0) ?
262 nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
263 nir_get_texture_lod(b, tex);
264
265 if (bias_idx >= 0)
266 lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
267
268 if (min_lod_idx >= 0)
269 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
270
271 /* max lod? */
272
273 nir_ssa_def *lambda_exp = nir_fexp2(b, lod);
274 nir_ssa_def *scale = NULL;
275
276 if (tex->is_array) {
277 int cmp_mask = (1 << (size->num_components - 1)) - 1;
278 scale = nir_frcp(b, nir_channels(b, size,
279 (nir_component_mask_t)cmp_mask));
280 } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
281 unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
282 scale = nir_frcp(b, nir_channels(b, size, 1));
283 scale = nir_swizzle(b, scale, swizzle, 3);
284 }
285
286 nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
287
288 if (lod_idx >= 0)
289 nir_tex_instr_remove_src(tex, lod_idx);
290 if (bias_idx >= 0)
291 nir_tex_instr_remove_src(tex, bias_idx);
292 if (min_lod_idx >= 0)
293 nir_tex_instr_remove_src(tex, min_lod_idx);
294 nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
295 nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
296
297 tex->op = nir_texop_txd;
298 return true;
299 }
300
301
302 static bool
r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl * impl)303 r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
304 {
305 nir_builder b;
306 nir_builder_init(&b, impl);
307
308 bool progress = false;
309 nir_foreach_block(block, impl) {
310 nir_foreach_instr_safe(instr, block) {
311 if (instr->type == nir_instr_type_tex) {
312 nir_tex_instr *tex = nir_instr_as_tex(instr);
313
314 if (tex->is_shadow &&
315 (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
316 (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
317 progress |= lower_txl_txf_array_or_cube(&b, tex);
318 }
319 }
320 }
321 return progress;
322 }
323
324 bool
r600_nir_lower_txl_txf_array_or_cube(nir_shader * shader)325 r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
326 {
327 bool progress = false;
328 nir_foreach_function(function, shader) {
329 if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
330 progress = true;
331 }
332 return progress;
333 }
334
335 static bool
r600_nir_lower_cube_to_2darray_filer(const nir_instr * instr,const void * _options)336 r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
337 {
338 if (instr->type != nir_instr_type_tex)
339 return false;
340
341 auto tex = nir_instr_as_tex(instr);
342 if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
343 return false;
344
345 switch (tex->op) {
346 case nir_texop_tex:
347 case nir_texop_txb:
348 case nir_texop_txf:
349 case nir_texop_txl:
350 case nir_texop_lod:
351 case nir_texop_tg4:
352 case nir_texop_txd:
353 return true;
354 default:
355 return false;
356 }
357 }
358
359 static nir_ssa_def *
r600_nir_lower_cube_to_2darray_impl(nir_builder * b,nir_instr * instr,void * _options)360 r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
361 {
362 b->cursor = nir_before_instr(instr);
363
364 auto tex = nir_instr_as_tex(instr);
365 int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
366 assert(coord_idx >= 0);
367
368 auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7));
369 auto xy = nir_fmad(b,
370 nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
371 nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
372 nir_imm_float(b, 1.5));
373
374 nir_ssa_def *z = nir_channel(b, cubed, 3);
375 if (tex->is_array) {
376 auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
377 z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0),
378 z);
379 }
380
381 if (tex->op == nir_texop_txd) {
382 int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
383 auto zero_dot_5 = nir_imm_float(b, 0.5);
384 nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src,
385 nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5)));
386
387 int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
388 nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src,
389 nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5)));
390 }
391
392 auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
393 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src,
394 nir_src_for_ssa(new_coord));
395 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
396 tex->is_array = true;
397 tex->array_is_lowered_cube = true;
398
399 tex->coord_components = 3;
400
401 return NIR_LOWER_INSTR_PROGRESS;
402 }
403
404 bool
r600_nir_lower_cube_to_2darray(nir_shader * shader)405 r600_nir_lower_cube_to_2darray(nir_shader *shader)
406 {
407 return nir_shader_lower_instructions(shader,
408 r600_nir_lower_cube_to_2darray_filer,
409 r600_nir_lower_cube_to_2darray_impl, nullptr);
410 }
411
412
413
414 }
415