• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "ac_nir.h"
26 #include "nir_builder.h"
27 
28 /**
29  * Build a manual selection sequence for cube face sc/tc coordinates and
30  * major axis vector (multiplied by 2 for consistency) for the given
31  * vec3 \p coords, for the face implied by \p selcoords.
32  *
33  * For the major axis, we always adjust the sign to be in the direction of
34  * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
35  * the selcoords major axis.
36  */
37 static void
build_cube_select(nir_builder * b,nir_def * ma,nir_def * id,nir_def * deriv,nir_def ** out_ma,nir_def ** out_sc,nir_def ** out_tc)38 build_cube_select(nir_builder *b, nir_def *ma, nir_def *id, nir_def *deriv,
39                   nir_def **out_ma, nir_def **out_sc, nir_def **out_tc)
40 {
41    nir_def *deriv_x = nir_channel(b, deriv, 0);
42    nir_def *deriv_y = nir_channel(b, deriv, 1);
43    nir_def *deriv_z = nir_channel(b, deriv, 2);
44 
45    nir_def *is_ma_positive = nir_fge_imm(b, ma, 0.0);
46    nir_def *sgn_ma =
47       nir_bcsel(b, is_ma_positive, nir_imm_float(b, 1.0), nir_imm_float(b, -1.0));
48    nir_def *neg_sgn_ma = nir_fneg(b, sgn_ma);
49 
50    nir_def *is_ma_z = nir_fge_imm(b, id, 4.0);
51    nir_def *is_ma_y = nir_fge_imm(b, id, 2.0);
52    is_ma_y = nir_iand(b, is_ma_y, nir_inot(b, is_ma_z));
53    nir_def *is_not_ma_x = nir_ior(b, is_ma_z, is_ma_y);
54 
55    /* Select sc */
56    nir_def *tmp = nir_bcsel(b, is_not_ma_x, deriv_x, deriv_z);
57    nir_def *sgn =
58       nir_bcsel(b, is_ma_y, nir_imm_float(b, 1.0), nir_bcsel(b, is_ma_z, sgn_ma, neg_sgn_ma));
59    *out_sc = nir_fmul(b, tmp, sgn);
60 
61    /* Select tc */
62    tmp = nir_bcsel(b, is_ma_y, deriv_z, deriv_y);
63    sgn = nir_bcsel(b, is_ma_y, sgn_ma, nir_imm_float(b, -1.0));
64    *out_tc = nir_fmul(b, tmp, sgn);
65 
66    /* Select ma */
67    tmp = nir_bcsel(b, is_ma_z, deriv_z, nir_bcsel(b, is_ma_y, deriv_y, deriv_x));
68    *out_ma = nir_fmul_imm(b, nir_fabs(b, tmp), 2.0);
69 }
70 
71 static void
prepare_cube_coords(nir_builder * b,nir_tex_instr * tex,nir_def ** coord,nir_src * ddx,nir_src * ddy,const ac_nir_lower_tex_options * options)72 prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src *ddx,
73                     nir_src *ddy, const ac_nir_lower_tex_options *options)
74 {
75    nir_def *coords[NIR_MAX_VEC_COMPONENTS] = {0};
76    for (unsigned i = 0; i < (*coord)->num_components; i++)
77       coords[i] = nir_channel(b, *coord, i);
78 
79    /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
80     *
81     *    "For Array forms, the array layer used will be
82     *
83     *       max(0, min(d−1, floor(layer+0.5)))
84     *
85     *     where d is the depth of the texture array and layer
86     *     comes from the component indicated in the tables below.
87     *     Workaroudn for an issue where the layer is taken from a
88     *     helper invocation which happens to fall on a different
89     *     layer due to extrapolation."
90     *
91     * GFX8 and earlier attempt to implement this in hardware by
92     * clamping the value of coords[2] = (8 * layer) + face.
93     * Unfortunately, this means that the we end up with the wrong
94     * face when clamping occurs.
95     *
96     * Clamp the layer earlier to work around the issue.
97     */
98    if (tex->is_array && options->gfx_level <= GFX8 && coords[3])
99       coords[3] = nir_fmax(b, coords[3], nir_imm_float(b, 0.0));
100 
101    nir_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3));
102    nir_def *sc = nir_channel(b, cube_coords, 1);
103    nir_def *tc = nir_channel(b, cube_coords, 0);
104    nir_def *ma = nir_channel(b, cube_coords, 2);
105    nir_def *invma = nir_frcp(b, nir_fabs(b, ma));
106    nir_def *id = nir_channel(b, cube_coords, 3);
107 
108    if (ddx || ddy) {
109       sc = nir_fmul(b, sc, invma);
110       tc = nir_fmul(b, tc, invma);
111 
112       /* Convert cube derivatives to 2D derivatives. */
113       for (unsigned i = 0; i < 2; i++) {
114          /* Transform the derivative alongside the texture
115           * coordinate. Mathematically, the correct formula is
116           * as follows. Assume we're projecting onto the +Z face
117           * and denote by dx/dh the derivative of the (original)
118           * X texture coordinate with respect to horizontal
119           * window coordinates. The projection onto the +Z face
120           * plane is:
121           *
122           *   f(x,z) = x/z
123           *
124           * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
125           *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
126           *
127           * This motivatives the implementation below.
128           *
129           * Whether this actually gives the expected results for
130           * apps that might feed in derivatives obtained via
131           * finite differences is anyone's guess. The OpenGL spec
132           * seems awfully quiet about how textureGrad for cube
133           * maps should be handled.
134           */
135          nir_def *deriv_ma, *deriv_sc, *deriv_tc;
136          build_cube_select(b, ma, id, i ? ddy->ssa : ddx->ssa, &deriv_ma, &deriv_sc, &deriv_tc);
137 
138          deriv_ma = nir_fmul(b, deriv_ma, invma);
139 
140          nir_def *x = nir_fsub(b, nir_fmul(b, deriv_sc, invma), nir_fmul(b, deriv_ma, sc));
141          nir_def *y = nir_fsub(b, nir_fmul(b, deriv_tc, invma), nir_fmul(b, deriv_ma, tc));
142 
143          nir_src_rewrite(i ? ddy : ddx, nir_vec2(b, x, y));
144       }
145 
146       sc = nir_fadd_imm(b, sc, 1.5);
147       tc = nir_fadd_imm(b, tc, 1.5);
148    } else {
149       sc = nir_ffma_imm2(b, sc, invma, 1.5);
150       tc = nir_ffma_imm2(b, tc, invma, 1.5);
151    }
152 
153    if (tex->is_array && coords[3])
154       id = nir_ffma_imm1(b, coords[3], 8.0, id);
155 
156    *coord = nir_vec3(b, sc, tc, id);
157 
158    tex->is_array = true;
159 }
160 
161 static bool
lower_array_layer_round_even(nir_builder * b,nir_tex_instr * tex,nir_def ** coords)162 lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex, nir_def **coords)
163 {
164    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
165    if (coord_index < 0 || nir_tex_instr_src_type(tex, coord_index) != nir_type_float)
166       return false;
167 
168    unsigned layer = tex->coord_components - 1;
169    nir_def *rounded_layer = nir_fround_even(b, nir_channel(b, *coords, layer));
170    *coords = nir_vector_insert_imm(b, *coords, rounded_layer, layer);
171    return true;
172 }
173 
174 static bool
lower_tex_coords(nir_builder * b,nir_tex_instr * tex,nir_def ** coords,const ac_nir_lower_tex_options * options)175 lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coords,
176                  const ac_nir_lower_tex_options *options)
177 {
178    bool progress = false;
179    if ((options->lower_array_layer_round_even || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) &&
180        tex->is_array && tex->op != nir_texop_lod)
181       progress |= lower_array_layer_round_even(b, tex, coords);
182 
183    if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
184       return progress;
185 
186    int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
187    int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
188    nir_src *ddx = ddx_idx >= 0 ? &tex->src[ddx_idx].src : NULL;
189    nir_src *ddy = ddy_idx >= 0 ? &tex->src[ddy_idx].src : NULL;
190 
191    prepare_cube_coords(b, tex, coords, ddx, ddy, options);
192 
193    return true;
194 }
195 
196 static bool
lower_tex(nir_builder * b,nir_instr * instr,void * options_)197 lower_tex(nir_builder *b, nir_instr *instr, void *options_)
198 {
199    const ac_nir_lower_tex_options *options = options_;
200    if (instr->type != nir_instr_type_tex)
201       return false;
202 
203    nir_tex_instr *tex = nir_instr_as_tex(instr);
204    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
205    if (coord_idx < 0 || nir_tex_instr_src_index(tex, nir_tex_src_backend1) >= 0)
206       return false;
207 
208    b->cursor = nir_before_instr(instr);
209    nir_def *coords = tex->src[coord_idx].src.ssa;
210    if (lower_tex_coords(b, tex, &coords, options)) {
211       tex->coord_components = coords->num_components;
212       nir_src_rewrite(&tex->src[coord_idx].src, coords);
213       return true;
214    }
215 
216    return false;
217 }
218 
219 typedef struct {
220    nir_intrinsic_instr *bary;
221    nir_intrinsic_instr *load;
222 } coord_info;
223 
224 static bool
can_move_coord(nir_scalar scalar,coord_info * info)225 can_move_coord(nir_scalar scalar, coord_info *info)
226 {
227    if (scalar.def->bit_size != 32)
228       return false;
229 
230    if (nir_scalar_is_const(scalar))
231       return true;
232 
233    if (!nir_scalar_is_intrinsic(scalar))
234       return false;
235 
236    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
237    if (intrin->intrinsic == nir_intrinsic_load_input) {
238       info->bary = NULL;
239       info->load = intrin;
240       return true;
241    }
242 
243    if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
244       return false;
245 
246    nir_scalar coord_x = nir_scalar_resolved(intrin->src[0].ssa, 0);
247    nir_scalar coord_y = nir_scalar_resolved(intrin->src[0].ssa, 1);
248    if (!nir_scalar_is_intrinsic(coord_x) || coord_x.comp != 0 ||
249        !nir_scalar_is_intrinsic(coord_y) || coord_y.comp != 1)
250       return false;
251 
252    nir_intrinsic_instr *intrin_x = nir_instr_as_intrinsic(coord_x.def->parent_instr);
253    nir_intrinsic_instr *intrin_y = nir_instr_as_intrinsic(coord_y.def->parent_instr);
254    if (intrin_x->intrinsic != intrin_y->intrinsic ||
255        (intrin_x->intrinsic != nir_intrinsic_load_barycentric_sample &&
256         intrin_x->intrinsic != nir_intrinsic_load_barycentric_pixel &&
257         intrin_x->intrinsic != nir_intrinsic_load_barycentric_centroid) ||
258        nir_intrinsic_interp_mode(intrin_x) != nir_intrinsic_interp_mode(intrin_y))
259       return false;
260 
261    info->bary = intrin_x;
262    info->load = intrin;
263 
264    return true;
265 }
266 
267 struct move_tex_coords_state {
268    const ac_nir_lower_tex_options *options;
269    unsigned num_wqm_vgprs;
270    nir_builder toplevel_b;
271 };
272 
273 static nir_def *
build_coordinate(struct move_tex_coords_state * state,nir_scalar scalar,coord_info info)274 build_coordinate(struct move_tex_coords_state *state, nir_scalar scalar, coord_info info)
275 {
276    nir_builder *b = &state->toplevel_b;
277 
278    if (nir_scalar_is_const(scalar))
279       return nir_imm_intN_t(b, nir_scalar_as_uint(scalar), scalar.def->bit_size);
280 
281    ASSERTED nir_src offset = *nir_get_io_offset_src(info.load);
282    assert(nir_src_is_const(offset) && !nir_src_as_uint(offset));
283 
284    nir_def *zero = nir_imm_int(b, 0);
285    nir_def *res;
286    if (info.bary) {
287       enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(info.bary);
288       nir_def *bary = nir_load_system_value(b, info.bary->intrinsic, interp_mode, 2, 32);
289       res = nir_load_interpolated_input(b, 1, 32, bary, zero);
290    } else {
291       res = nir_load_input(b, 1, 32, zero);
292    }
293    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(res->parent_instr);
294    nir_intrinsic_set_base(intrin, nir_intrinsic_base(info.load));
295    nir_intrinsic_set_component(intrin, nir_intrinsic_component(info.load) + scalar.comp);
296    nir_intrinsic_set_dest_type(intrin, nir_intrinsic_dest_type(info.load));
297    nir_intrinsic_set_io_semantics(intrin, nir_intrinsic_io_semantics(info.load));
298    return res;
299 }
300 
301 static bool
move_tex_coords(struct move_tex_coords_state * state,nir_function_impl * impl,nir_instr * instr)302 move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, nir_instr *instr)
303 {
304    nir_tex_instr *tex = nir_instr_as_tex(instr);
305    if (tex->op != nir_texop_tex && tex->op != nir_texop_txb && tex->op != nir_texop_lod)
306       return false;
307 
308    switch (tex->sampler_dim) {
309    case GLSL_SAMPLER_DIM_1D:
310    case GLSL_SAMPLER_DIM_2D:
311    case GLSL_SAMPLER_DIM_3D:
312    case GLSL_SAMPLER_DIM_CUBE:
313    case GLSL_SAMPLER_DIM_EXTERNAL:
314       break;
315    case GLSL_SAMPLER_DIM_RECT:
316    case GLSL_SAMPLER_DIM_BUF:
317    case GLSL_SAMPLER_DIM_MS:
318    case GLSL_SAMPLER_DIM_SUBPASS:
319    case GLSL_SAMPLER_DIM_SUBPASS_MS:
320       return false; /* No LOD or can't be sampled. */
321    }
322 
323    if (nir_tex_instr_src_index(tex, nir_tex_src_min_lod) != -1)
324       return false;
325 
326    nir_tex_src *src = &tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)];
327    nir_scalar components[NIR_MAX_VEC_COMPONENTS];
328    coord_info infos[NIR_MAX_VEC_COMPONENTS];
329    bool can_move_all = true;
330    for (unsigned i = 0; i < tex->coord_components; i++) {
331       components[i] = nir_scalar_resolved(src->src.ssa, i);
332       can_move_all &= can_move_coord(components[i], &infos[i]);
333    }
334    if (!can_move_all)
335       return false;
336 
337    int coord_base = 0;
338    unsigned linear_vgpr_size = tex->coord_components;
339    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array)
340       linear_vgpr_size--; /* cube array layer and face are combined */
341    for (unsigned i = 0; i < tex->num_srcs; i++) {
342       switch (tex->src[i].src_type) {
343       case nir_tex_src_offset:
344       case nir_tex_src_bias:
345       case nir_tex_src_comparator:
346          coord_base++;
347          linear_vgpr_size++;
348          break;
349       default:
350          break;
351       }
352    }
353 
354    if (state->num_wqm_vgprs + linear_vgpr_size > state->options->max_wqm_vgprs)
355       return false;
356 
357    for (unsigned i = 0; i < tex->coord_components; i++)
358       components[i] = nir_get_scalar(build_coordinate(state, components[i], infos[i]), 0);
359 
360    nir_def *linear_vgpr = nir_vec_scalars(&state->toplevel_b, components, tex->coord_components);
361    lower_tex_coords(&state->toplevel_b, tex, &linear_vgpr, state->options);
362 
363    linear_vgpr = nir_strict_wqm_coord_amd(&state->toplevel_b, linear_vgpr, coord_base * 4);
364 
365    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_coord));
366    tex->coord_components = 0;
367 
368    nir_tex_instr_add_src(tex, nir_tex_src_backend1, linear_vgpr);
369 
370    int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
371    if (offset_src >= 0) /* Workaround requirement in nir_tex_instr_src_size(). */
372       tex->src[offset_src].src_type = nir_tex_src_backend2;
373 
374    state->num_wqm_vgprs += linear_vgpr_size;
375 
376    return true;
377 }
378 
379 static bool
move_fddxy(struct move_tex_coords_state * state,nir_function_impl * impl,nir_alu_instr * instr)380 move_fddxy(struct move_tex_coords_state *state, nir_function_impl *impl, nir_alu_instr *instr)
381 {
382    switch (instr->op) {
383    case nir_op_fddx:
384    case nir_op_fddy:
385    case nir_op_fddx_fine:
386    case nir_op_fddy_fine:
387    case nir_op_fddx_coarse:
388    case nir_op_fddy_coarse:
389       break;
390    default:
391       return false;
392    }
393 
394    unsigned num_components = instr->def.num_components;
395    nir_scalar components[NIR_MAX_VEC_COMPONENTS];
396    coord_info infos[NIR_MAX_VEC_COMPONENTS];
397    bool can_move_all = true;
398    for (unsigned i = 0; i < num_components; i++) {
399       components[i] = nir_scalar_chase_alu_src(nir_get_scalar(&instr->def, i), 0);
400       components[i] = nir_scalar_chase_movs(components[i]);
401       can_move_all &= can_move_coord(components[i], &infos[i]);
402    }
403    if (!can_move_all || state->num_wqm_vgprs + num_components > state->options->max_wqm_vgprs)
404       return false;
405 
406    for (unsigned i = 0; i < num_components; i++) {
407       nir_def *def = build_coordinate(state, components[i], infos[i]);
408       components[i] = nir_get_scalar(def, 0);
409    }
410 
411    nir_def *def = nir_vec_scalars(&state->toplevel_b, components, num_components);
412    def = nir_build_alu1(&state->toplevel_b, instr->op, def);
413    nir_def_rewrite_uses(&instr->def, def);
414 
415    state->num_wqm_vgprs += num_components;
416 
417    return true;
418 }
419 
420 static bool
move_coords_from_divergent_cf(struct move_tex_coords_state * state,nir_function_impl * impl,struct exec_list * cf_list,bool * divergent_discard,bool divergent_cf)421 move_coords_from_divergent_cf(struct move_tex_coords_state *state, nir_function_impl *impl,
422                               struct exec_list *cf_list, bool *divergent_discard, bool divergent_cf)
423 {
424    bool progress = false;
425    foreach_list_typed (nir_cf_node, cf_node, node, cf_list) {
426       switch (cf_node->type) {
427       case nir_cf_node_block: {
428          nir_block *block = nir_cf_node_as_block(cf_node);
429 
430          bool top_level = cf_list == &impl->body;
431 
432          nir_foreach_instr (instr, block) {
433             if (top_level && !*divergent_discard)
434                state->toplevel_b.cursor = nir_before_instr(instr);
435 
436             if (instr->type == nir_instr_type_tex && (divergent_cf || *divergent_discard)) {
437                progress |= move_tex_coords(state, impl, instr);
438             } else if (instr->type == nir_instr_type_alu && (divergent_cf || *divergent_discard)) {
439                progress |= move_fddxy(state, impl, nir_instr_as_alu(instr));
440             } else if (instr->type == nir_instr_type_intrinsic) {
441                nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
442                switch (intrin->intrinsic) {
443                case nir_intrinsic_discard:
444                case nir_intrinsic_terminate:
445                   if (divergent_cf)
446                      *divergent_discard = true;
447                   break;
448                case nir_intrinsic_discard_if:
449                case nir_intrinsic_terminate_if:
450                   if (divergent_cf || nir_src_is_divergent(intrin->src[0]))
451                      *divergent_discard = true;
452                   break;
453                default:
454                   break;
455                }
456             }
457          }
458 
459          if (top_level && !*divergent_discard)
460             state->toplevel_b.cursor = nir_after_block_before_jump(block);
461          break;
462       }
463       case nir_cf_node_if: {
464          nir_if *nif = nir_cf_node_as_if(cf_node);
465          bool divergent_discard_then = *divergent_discard;
466          bool divergent_discard_else = *divergent_discard;
467          bool then_else_divergent = divergent_cf || nir_src_is_divergent(nif->condition);
468          progress |= move_coords_from_divergent_cf(state, impl, &nif->then_list,
469                                                    &divergent_discard_then, then_else_divergent);
470          progress |= move_coords_from_divergent_cf(state, impl, &nif->else_list,
471                                                    &divergent_discard_else, then_else_divergent);
472          *divergent_discard |= divergent_discard_then || divergent_discard_else;
473          break;
474       }
475       case nir_cf_node_loop: {
476          nir_loop *loop = nir_cf_node_as_loop(cf_node);
477          assert(!nir_loop_has_continue_construct(loop));
478          progress |=
479             move_coords_from_divergent_cf(state, impl, &loop->body, divergent_discard, true);
480          break;
481       }
482       case nir_cf_node_function:
483          unreachable("Invalid cf type");
484       }
485    }
486 
487    return progress;
488 }
489 
490 bool
ac_nir_lower_tex(nir_shader * nir,const ac_nir_lower_tex_options * options)491 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options)
492 {
493    bool progress = false;
494    if (options->fix_derivs_in_divergent_cf) {
495       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
496 
497       struct move_tex_coords_state state;
498       state.toplevel_b = nir_builder_create(impl);
499       state.options = options;
500       state.num_wqm_vgprs = 0;
501 
502       bool divergent_discard = false;
503       if (move_coords_from_divergent_cf(&state, impl, &impl->body, &divergent_discard, false))
504          nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
505       else
506          nir_metadata_preserve(impl, nir_metadata_all);
507    }
508 
509    progress |= nir_shader_instructions_pass(
510       nir, lower_tex, nir_metadata_block_index | nir_metadata_dominance, (void *)options);
511 
512    return progress;
513 }
514