• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /* This is a pre-link lowering and optimization pass that modifies the shader for the purpose
8  * of gathering accurate shader_info and determining hw registers. It should be run before
9  * linking passes and it doesn't produce AMD intrinsics that would break linking passes.
10  * Some of the options come from dynamic state.
11  *
12  * It should be run after nir_lower_io, but before nir_opt_varyings.
13  */
14 
15 #include "ac_nir.h"
16 #include "sid.h"
17 #include "nir_builder.h"
18 #include "nir_builtin_builder.h"
19 
20 typedef struct {
21    const ac_nir_lower_ps_early_options *options;
22 
23    nir_variable *persp_center;
24    nir_variable *persp_centroid;
25    nir_variable *persp_sample;
26    nir_variable *linear_center;
27    nir_variable *linear_centroid;
28    nir_variable *linear_sample;
29 
30    bool frag_color_is_frag_data0;
31    bool seen_color0_alpha;
32    bool uses_fragcoord_xy_as_float;
33    bool use_fragcoord;
34 
35    nir_def *load_helper_invoc_at_top;
36 } lower_ps_early_state;
37 
38 static nir_variable *
get_baryc_var_common(nir_builder * b,bool will_replace,nir_variable ** var,const char * var_name)39 get_baryc_var_common(nir_builder *b, bool will_replace, nir_variable **var, const char *var_name)
40 {
41    if (will_replace) {
42       if (!*var) {
43          *var = nir_local_variable_create(b->impl, glsl_vec_type(2), var_name);
44       }
45       return *var;
46    }
47    return NULL;
48 }
49 
50 static nir_variable *
get_baryc_var(nir_builder * b,nir_intrinsic_op baryc_op,enum glsl_interp_mode mode,lower_ps_early_state * s)51 get_baryc_var(nir_builder *b, nir_intrinsic_op baryc_op, enum glsl_interp_mode mode,
52               lower_ps_early_state *s)
53 {
54    switch (baryc_op) {
55    case nir_intrinsic_load_barycentric_pixel:
56       if (mode == INTERP_MODE_NOPERSPECTIVE) {
57          return get_baryc_var_common(b, s->options->ps_iter_samples > 1, &s->linear_center,
58                                      "linear_center");
59       } else {
60          return get_baryc_var_common(b, s->options->ps_iter_samples > 1, &s->persp_center,
61                                      "persp_center");
62       }
63    case nir_intrinsic_load_barycentric_centroid:
64       if (mode == INTERP_MODE_NOPERSPECTIVE) {
65          return get_baryc_var_common(b, s->options->ps_iter_samples > 1 ||
66                                      s->options->force_center_interp_no_msaa, &s->linear_centroid,
67                                      "linear_centroid");
68       } else {
69          return get_baryc_var_common(b, s->options->ps_iter_samples > 1 ||
70                                      s->options->force_center_interp_no_msaa, &s->persp_centroid,
71                                      "persp_centroid");
72       }
73    case nir_intrinsic_load_barycentric_sample:
74       if (mode == INTERP_MODE_NOPERSPECTIVE) {
75          return get_baryc_var_common(b, s->options->force_center_interp_no_msaa, &s->linear_sample,
76                                      "linear_sample");
77       } else {
78          return get_baryc_var_common(b, s->options->force_center_interp_no_msaa, &s->persp_sample,
79                                      "persp_sample");
80       }
81    default:
82       return NULL;
83    }
84 }
85 
86 static void
set_interp_vars(nir_builder * b,nir_def * new_baryc,nir_variable * baryc1,nir_variable * baryc2)87 set_interp_vars(nir_builder *b, nir_def *new_baryc, nir_variable *baryc1, nir_variable *baryc2)
88 {
89    if (baryc1)
90       nir_store_var(b, baryc1, new_baryc, 0x3);
91    if (baryc2)
92       nir_store_var(b, baryc2, new_baryc, 0x3);
93 }
94 
95 static void
init_interp_param(nir_builder * b,lower_ps_early_state * s)96 init_interp_param(nir_builder *b, lower_ps_early_state *s)
97 {
98    b->cursor = nir_before_cf_list(&b->impl->body);
99 
100    if (s->options->ps_iter_samples > 1) {
101       set_interp_vars(b, nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_SMOOTH),
102                       s->persp_center, s->persp_centroid);
103       set_interp_vars(b, nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE),
104                       s->linear_center, s->linear_centroid);
105    }
106 
107    if (s->options->force_center_interp_no_msaa) {
108       set_interp_vars(b, nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH),
109                       s->persp_sample, s->persp_centroid);
110       set_interp_vars(b, nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE),
111                       s->linear_sample, s->linear_centroid);
112    }
113 }
114 
115 static bool
rewrite_ps_load_barycentric(nir_builder * b,nir_intrinsic_instr * intrin,lower_ps_early_state * s)116 rewrite_ps_load_barycentric(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_early_state *s)
117 {
118    nir_variable *baryc_var = get_baryc_var(b, intrin->intrinsic,
119                                            nir_intrinsic_interp_mode(intrin), s);
120    if (!baryc_var)
121       return false;
122 
123    nir_def_replace(&intrin->def, nir_load_var(b, baryc_var));
124    return true;
125 }
126 
127 static bool
optimize_lower_ps_outputs(nir_builder * b,nir_intrinsic_instr * intrin,lower_ps_early_state * s)128 optimize_lower_ps_outputs(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_early_state *s)
129 {
130    unsigned slot = nir_intrinsic_io_semantics(intrin).location;
131 
132    switch (slot) {
133    case FRAG_RESULT_DEPTH:
134       if (!s->options->kill_z)
135          return false;
136       nir_instr_remove(&intrin->instr);
137       return true;
138 
139    case FRAG_RESULT_STENCIL:
140       if (!s->options->kill_stencil)
141          return false;
142       nir_instr_remove(&intrin->instr);
143       return true;
144 
145    case FRAG_RESULT_SAMPLE_MASK:
146       if (!s->options->kill_samplemask)
147          return false;
148       nir_instr_remove(&intrin->instr);
149       return true;
150    }
151 
152    unsigned writemask = nir_intrinsic_write_mask(intrin);
153    unsigned component = nir_intrinsic_component(intrin);
154    unsigned color_index = (slot >= FRAG_RESULT_DATA0 ? slot - FRAG_RESULT_DATA0 : 0) +
155                           nir_intrinsic_io_semantics(intrin).dual_source_blend_index;
156    nir_def *value = intrin->src[0].ssa;
157    bool progress = false;
158 
159    /* Clamp color. */
160    if (s->options->clamp_color) {
161       value = nir_fsat(b, value);
162       progress = true;
163    }
164 
165    /* Alpha test. */
166    if (color_index == 0 && s->options->alpha_func != COMPARE_FUNC_ALWAYS &&
167        (writemask << component) & BITFIELD_BIT(3)) {
168       assert(!s->seen_color0_alpha);
169       s->seen_color0_alpha = true;
170 
171       if (s->options->alpha_func == COMPARE_FUNC_NEVER) {
172          nir_discard(b);
173       } else {
174          nir_def *ref = nir_load_alpha_reference_amd(b);
175          ref = nir_convert_to_bit_size(b, ref, nir_type_float, value->bit_size);
176          nir_def *alpha = s->options->alpha_test_alpha_to_one ?
177                              nir_imm_floatN_t(b, 1, value->bit_size) :
178                              nir_channel(b, value, 3 - component);
179          nir_def *cond = nir_compare_func(b, s->options->alpha_func, alpha, ref);
180          nir_discard_if(b, nir_inot(b, cond));
181       }
182       progress = true;
183    }
184 
185    /* Trim the src according to the format and writemask. */
186    unsigned cb_shader_mask = ac_get_cb_shader_mask(s->options->spi_shader_col_format_hint);
187    unsigned format_mask;
188 
189    if (slot == FRAG_RESULT_COLOR && !s->frag_color_is_frag_data0) {
190       /* cb_shader_mask is 0 for disabled color buffers, so combine all of them. */
191       format_mask = 0;
192       for (unsigned i = 0; i < 8; i++)
193          format_mask |= (cb_shader_mask >> (i * 4)) & 0xf;
194    } else {
195       format_mask = (cb_shader_mask >> (color_index * 4)) & 0xf;
196    }
197 
198    if (s->options->keep_alpha_for_mrtz && color_index == 0)
199       format_mask |= BITFIELD_BIT(3);
200 
201    writemask = (format_mask >> component) & writemask;
202    nir_intrinsic_set_write_mask(intrin, writemask);
203 
204    /* Empty writemask. */
205    if (!writemask) {
206       nir_instr_remove(&intrin->instr);
207       return true;
208    }
209 
210    /* Trim the src to the last bit of writemask. */
211    unsigned num_components = util_last_bit(writemask);
212 
213    if (num_components != value->num_components) {
214       assert(num_components < value->num_components);
215       value = nir_trim_vector(b, value, num_components);
216       progress = true;
217    }
218 
219    /* Replace disabled channels in a non-contiguous writemask with undef. */
220    if (!util_is_power_of_two_nonzero(writemask + 1)) {
221       u_foreach_bit(i, BITFIELD_MASK(num_components) & ~writemask) {
222          value = nir_vector_insert_imm(b, value, nir_undef(b, 1, value->bit_size), i);
223          progress = true;
224       }
225    }
226 
227    if (intrin->src[0].ssa != value) {
228       assert(progress);
229       nir_src_rewrite(&intrin->src[0], value);
230       intrin->num_components = value->num_components;
231    } else {
232       assert(intrin->src[0].ssa == value);
233    }
234 
235    return progress;
236 }
237 
238 static nir_def *
get_load_helper_invocation(nir_function_impl * impl,lower_ps_early_state * s)239 get_load_helper_invocation(nir_function_impl *impl, lower_ps_early_state *s)
240 {
241    /* Insert this only once. */
242    if (!s->load_helper_invoc_at_top) {
243       nir_builder b = nir_builder_at(nir_before_impl(impl));
244       s->load_helper_invoc_at_top = nir_load_helper_invocation(&b, 1);
245    }
246 
247    return s->load_helper_invoc_at_top;
248 }
249 
250 static bool
lower_ps_load_sample_mask_in(nir_builder * b,nir_intrinsic_instr * intrin,lower_ps_early_state * s)251 lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_early_state *s)
252 {
253    /* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
254     * says:
255     *
256     *    "When per-sample shading is active due to the use of a fragment
257     *     input qualified by sample or due to the use of the gl_SampleID
258     *     or gl_SamplePosition variables, only the bit for the current
259     *     sample is set in gl_SampleMaskIn. When state specifies multiple
260     *     fragment shader invocations for a given fragment, the sample
261     *     mask for any single fragment shader invocation may specify a
262     *     subset of the covered samples for the fragment. In this case,
263     *     the bit corresponding to each covered sample will be set in
264     *     exactly one fragment shader invocation."
265     *
266     * The samplemask loaded by hardware is always the coverage of the
267     * entire pixel/fragment, so mask bits out based on the sample ID.
268     */
269    nir_def *replacement = NULL;
270 
271    /* Set ps_iter_samples=8 if full sample shading is enabled even for 2x and 4x MSAA
272     * to get this fast path that fully replaces sample_mask_in with sample_id.
273     */
274    if (s->options->force_center_interp_no_msaa && !s->options->uses_vrs_coarse_shading) {
275       replacement = nir_b2i32(b, nir_inot(b, get_load_helper_invocation(b->impl, s)));
276    } else if (s->options->ps_iter_samples == 8) {
277       replacement = nir_bcsel(b, get_load_helper_invocation(b->impl, s), nir_imm_int(b, 0),
278                               nir_ishl(b, nir_imm_int(b, 1), nir_load_sample_id(b)));
279    } else if (s->options->ps_iter_samples > 1) {
280       uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
281       nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), nir_load_sample_id(b));
282       replacement = nir_iand(b, nir_load_sample_mask_in(b), submask);
283    } else {
284       return false;
285    }
286 
287    nir_def_replace(&intrin->def, replacement);
288    return true;
289 }
290 
291 static nir_def *
lower_load_barycentric_at_offset(nir_builder * b,nir_def * offset,enum glsl_interp_mode mode)292 lower_load_barycentric_at_offset(nir_builder *b, nir_def *offset, enum glsl_interp_mode mode)
293 {
294    /* ddx/ddy must execute before terminate (discard). */
295    nir_builder sb = nir_builder_at(nir_before_impl(b->impl));
296    nir_def *baryc = nir_load_barycentric_pixel(&sb, 32, .interp_mode = mode);
297    nir_def *i = nir_channel(&sb, baryc, 0);
298    nir_def *j = nir_channel(&sb, baryc, 1);
299    nir_def *ddx_i = nir_ddx(&sb, i);
300    nir_def *ddx_j = nir_ddx(&sb, j);
301    nir_def *ddy_i = nir_ddy(&sb, i);
302    nir_def *ddy_j = nir_ddy(&sb, j);
303 
304    nir_def *offset_x = nir_channel(b, offset, 0);
305    nir_def *offset_y = nir_channel(b, offset, 1);
306 
307    /* Interpolate standard barycentrics by offset. */
308    nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i));
309    nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j));
310    return nir_vec2(b, offset_i, offset_j);
311 }
312 
313 static nir_def *
fbfetch_color_buffer0(nir_builder * b,lower_ps_early_state * s)314 fbfetch_color_buffer0(nir_builder *b, lower_ps_early_state *s)
315 {
316    nir_def *zero = nir_imm_zero(b, 1, 32);
317    nir_def *undef = nir_undef(b, 1, 32);
318 
319    unsigned chan = 0;
320    nir_def *coord_vec[4] = {undef, undef, undef, undef};
321    nir_def *pixel_coord = nir_u2u32(b, nir_load_pixel_coord(b));
322 
323    coord_vec[chan++] = nir_channel(b, pixel_coord, 0);
324 
325    if (!s->options->fbfetch_is_1D)
326       coord_vec[chan++] = nir_channel(b, pixel_coord, 1);
327 
328    /* Get the current render target layer index. */
329    if (s->options->fbfetch_layered)
330       coord_vec[chan++] = nir_load_layer_id(b);
331 
332    nir_def *coords = nir_vec(b, coord_vec, 4);
333 
334    enum glsl_sampler_dim dim;
335    if (s->options->fbfetch_msaa)
336       dim = GLSL_SAMPLER_DIM_MS;
337    else if (s->options->fbfetch_is_1D)
338       dim = GLSL_SAMPLER_DIM_1D;
339    else
340       dim = GLSL_SAMPLER_DIM_2D;
341 
342    nir_def *sample_id;
343    if (s->options->fbfetch_msaa) {
344       sample_id = nir_load_sample_id(b);
345 
346       if (s->options->fbfetch_apply_fmask) {
347          nir_def *fmask =
348             nir_bindless_image_fragment_mask_load_amd(
349                b, nir_load_fbfetch_image_fmask_desc_amd(b), coords,
350                .image_dim = dim,
351                .image_array = s->options->fbfetch_layered,
352                .access = ACCESS_CAN_REORDER);
353          sample_id = nir_ubfe(b, fmask, nir_ishl_imm(b, sample_id, 2), nir_imm_int(b, 3));
354       }
355    } else {
356       sample_id = zero;
357    }
358 
359    return nir_bindless_image_load(b, 4, 32, nir_load_fbfetch_image_desc_amd(b), coords, sample_id,
360                                   zero,
361                                   .image_dim = dim,
362                                   .image_array = s->options->fbfetch_layered,
363                                   .access = ACCESS_CAN_REORDER);
364 }
365 
366 static bool
lower_ps_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,void * state)367 lower_ps_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
368 {
369    lower_ps_early_state *s = (lower_ps_early_state *)state;
370 
371    b->cursor = nir_before_instr(&intrin->instr);
372 
373    switch (intrin->intrinsic) {
374    case nir_intrinsic_store_output:
375       return optimize_lower_ps_outputs(b, intrin, s);
376    case nir_intrinsic_load_barycentric_pixel:
377    case nir_intrinsic_load_barycentric_centroid:
378    case nir_intrinsic_load_barycentric_sample:
379       return rewrite_ps_load_barycentric(b, intrin, s);
380    case nir_intrinsic_load_sample_mask_in:
381       return lower_ps_load_sample_mask_in(b, intrin, s);
382    case nir_intrinsic_load_front_face:
383       if (s->options->force_front_face) {
384          nir_def_replace(&intrin->def, nir_imm_bool(b, s->options->force_front_face == 1));
385          return true;
386       }
387       break;
388    case nir_intrinsic_load_front_face_fsign:
389       if (s->options->force_front_face) {
390          nir_def_replace(&intrin->def, nir_imm_float(b, s->options->force_front_face == 1 ? 1 : -1));
391          return true;
392       }
393       break;
394    case nir_intrinsic_load_sample_pos:
395       if (s->options->frag_coord_is_center) {
396          /* We have to use the alternative way to get sample_pos. */
397          nir_def *num_samples = s->options->load_sample_positions_always_loads_current_ones ?
398                                    nir_undef(b, 1, 32) : nir_load_rasterization_samples_amd(b);
399          nir_def_replace(&intrin->def, nir_load_sample_positions_amd(b, 32, nir_load_sample_id(b),
400                                                                      num_samples));
401       } else {
402          /* sample_pos = ffract(frag_coord.xy); */
403          nir_def_replace(&intrin->def, nir_ffract(b, nir_channels(b, nir_load_frag_coord(b), 0x3)));
404       }
405       return true;
406    case nir_intrinsic_load_barycentric_at_offset:
407       nir_def_replace(&intrin->def,
408                       lower_load_barycentric_at_offset(b, intrin->src[0].ssa,
409                                                        nir_intrinsic_interp_mode(intrin)));
410       return true;
411    case nir_intrinsic_load_barycentric_at_sample: {
412       unsigned mode = nir_intrinsic_interp_mode(intrin);
413       nir_def *sample_id = intrin->src[0].ssa;
414 
415       if (s->options->force_center_interp_no_msaa) {
416          nir_def_replace(&intrin->def, nir_load_barycentric_pixel(b, 32, .interp_mode = mode));
417          return true;
418       }
419 
420       if (s->options->ps_iter_samples >= 2 &&
421           sample_id->parent_instr->type == nir_instr_type_intrinsic &&
422           nir_instr_as_intrinsic(sample_id->parent_instr)->intrinsic == nir_intrinsic_load_sample_id) {
423          nir_def_replace(&intrin->def, nir_load_barycentric_sample(b, 32, .interp_mode = mode));
424          return true;
425       }
426 
427       /* If load_sample_positions_always_loads_current_ones is true, load_sample_positions_amd
428        * always loads the sample positions that are currently set in the rasterizer state
429        * even if MSAA is disabled.
430        */
431       nir_def *num_samples = s->options->load_sample_positions_always_loads_current_ones ?
432                                 nir_undef(b, 1, 32) : nir_load_rasterization_samples_amd(b);
433       nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, sample_id, num_samples);
434       sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
435 
436       if (s->options->dynamic_rasterization_samples) {
437          assert(!s->options->load_sample_positions_always_loads_current_ones);
438          nir_def *pixel, *at_sample;
439 
440          nir_push_if(b, nir_ieq_imm(b, num_samples, 1));
441          {
442             pixel = nir_load_barycentric_pixel(b, 32, .interp_mode = mode);
443          }
444          nir_push_else(b, NULL);
445          {
446             at_sample = lower_load_barycentric_at_offset(b, sample_pos, mode);
447          }
448          nir_pop_if(b, NULL);
449          nir_def_replace(&intrin->def, nir_if_phi(b, pixel, at_sample));
450       } else {
451          nir_def_replace(&intrin->def,
452                          lower_load_barycentric_at_offset(b, sample_pos, mode));
453       }
454       return true;
455    }
456    case nir_intrinsic_load_output:
457       if (nir_intrinsic_io_semantics(intrin).fb_fetch_output) {
458          nir_def_replace(&intrin->def, fbfetch_color_buffer0(b, s));
459          return true;
460       }
461       break;
462    case nir_intrinsic_load_frag_coord:
463       if (!s->options->optimize_frag_coord)
464          break;
465       /* Compute frag_coord.xy from pixel_coord. */
466       if (!s->use_fragcoord && nir_def_components_read(&intrin->def) & 0x3) {
467          nir_def *new_fragcoord_xy = nir_u2f32(b, nir_load_pixel_coord(b));
468          if (!b->shader->info.fs.pixel_center_integer)
469             new_fragcoord_xy = nir_fadd_imm(b, new_fragcoord_xy, 0.5);
470          nir_def *fragcoord = nir_load_frag_coord(b);
471          nir_def_replace(&intrin->def,
472                          nir_vec4(b, nir_channel(b, new_fragcoord_xy, 0),
473                                   nir_channel(b, new_fragcoord_xy, 1),
474                                   nir_channel(b, fragcoord, 2),
475                                   nir_channel(b, fragcoord, 3)));
476          return true;
477       }
478       break;
479    case nir_intrinsic_load_pixel_coord:
480       if (!s->options->optimize_frag_coord)
481          break;
482       /* There is already a floating-point frag_coord.xy use in the shader. Don't add pixel_coord.
483        * Instead, compute pixel_coord from frag_coord.
484        */
485       if (s->use_fragcoord) {
486          nir_def *new_pixel_coord = nir_f2u16(b, nir_channels(b, nir_load_frag_coord(b), 0x3));
487          nir_def_replace(&intrin->def, new_pixel_coord);
488          return true;
489       }
490       break;
491    default:
492       break;
493    }
494 
495    return false;
496 }
497 
498 static bool
gather_info(nir_builder * b,nir_intrinsic_instr * intr,void * state)499 gather_info(nir_builder *b, nir_intrinsic_instr *intr, void *state)
500 {
501    lower_ps_early_state *s = (lower_ps_early_state *)state;
502 
503    switch (intr->intrinsic) {
504    case nir_intrinsic_store_output:
505       /* FRAG_RESULT_COLOR can't broadcast results to all color buffers if another
506        * FRAG_RESULT_COLOR output exists with dual_src_blend_index=1. This happens
507        * with gl_SecondaryFragColorEXT in GLES.
508        */
509       if (nir_intrinsic_io_semantics(intr).location == FRAG_RESULT_COLOR &&
510           nir_intrinsic_io_semantics(intr).dual_source_blend_index)
511          s->frag_color_is_frag_data0 = true;
512       break;
513    case nir_intrinsic_load_frag_coord:
514       assert(intr->def.bit_size == 32);
515       nir_foreach_use(use, &intr->def) {
516          if (nir_src_parent_instr(use)->type == nir_instr_type_alu &&
517              nir_src_components_read(use) & 0x3) {
518             switch (nir_instr_as_alu(nir_src_parent_instr(use))->op) {
519             case nir_op_f2i8:
520             case nir_op_f2i16:
521             case nir_op_f2i32:
522             case nir_op_f2i64:
523             case nir_op_f2u8:
524             case nir_op_f2u16:
525             case nir_op_f2u32:
526             case nir_op_f2u64:
527             case nir_op_ftrunc:
528             case nir_op_ffloor:
529                continue;
530             default:
531                break;
532             }
533          }
534          s->uses_fragcoord_xy_as_float = true;
535          break;
536       }
537       break;
538    case nir_intrinsic_load_sample_pos:
539       if (!s->options->frag_coord_is_center)
540          s->uses_fragcoord_xy_as_float = true;
541       break;
542    default:
543       break;
544    }
545 
546    return false;
547 }
548 
549 bool
ac_nir_lower_ps_early(nir_shader * nir,const ac_nir_lower_ps_early_options * options)550 ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options)
551 {
552    assert(nir->info.stage == MESA_SHADER_FRAGMENT);
553    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
554 
555    nir_builder builder = nir_builder_create(impl);
556    nir_builder *b = &builder;
557 
558    lower_ps_early_state state = {
559       .options = options,
560    };
561 
562    /* Don't gather shader_info. Just gather the single thing we want to know. */
563    nir_shader_intrinsics_pass(nir, gather_info, nir_metadata_all, &state);
564 
565    /* The preferred option is replacing frag_coord by pixel_coord.xy + 0.5. The goal is to reduce
566     * input VGPRs to increase PS wave launch rate. pixel_coord uses 1 input VGPR, while
567     * frag_coord.xy uses 2 input VGPRs. It only helps performance if the number of input VGPRs
568     * decreases to an even number. If it only decreases to an odd number, it has no effect.
569     *
570     * TODO: estimate input VGPRs and don't lower to pixel_coord if their number doesn't decrease to
571     * an even number?
572     */
573    state.use_fragcoord = !options->frag_coord_is_center && state.options->ps_iter_samples != 1 &&
574                          !state.options->force_center_interp_no_msaa &&
575                          state.uses_fragcoord_xy_as_float;
576 
577    bool progress = nir_shader_intrinsics_pass(nir, lower_ps_intrinsic,
578                                               nir_metadata_control_flow, &state);
579 
580    if (state.persp_center || state.persp_centroid || state.persp_sample ||
581        state.linear_center || state.linear_centroid || state.linear_sample) {
582       assert(progress);
583 
584       /* This must be after lower_ps_intrinsic. */
585       init_interp_param(b, &state);
586 
587       /* Cleanup local variables, as RADV won't do this. */
588       NIR_PASS(_, nir, nir_lower_vars_to_ssa);
589    }
590 
591    return progress;
592 }
593