1 /*
2 * Copyright 2024 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* This is a pre-link lowering and optimization pass that modifies the shader for the purpose
8 * of gathering accurate shader_info and determining hw registers. It should be run before
9 * linking passes and it doesn't produce AMD intrinsics that would break linking passes.
10 * Some of the options come from dynamic state.
11 *
12 * It should be run after nir_lower_io, but before nir_opt_varyings.
13 */
14
15 #include "ac_nir.h"
16 #include "sid.h"
17 #include "nir_builder.h"
18 #include "nir_builtin_builder.h"
19
20 typedef struct {
21 const ac_nir_lower_ps_early_options *options;
22
23 nir_variable *persp_center;
24 nir_variable *persp_centroid;
25 nir_variable *persp_sample;
26 nir_variable *linear_center;
27 nir_variable *linear_centroid;
28 nir_variable *linear_sample;
29
30 bool frag_color_is_frag_data0;
31 bool seen_color0_alpha;
32 bool uses_fragcoord_xy_as_float;
33 bool use_fragcoord;
34
35 nir_def *load_helper_invoc_at_top;
36 } lower_ps_early_state;
37
38 static nir_variable *
get_baryc_var_common(nir_builder * b,bool will_replace,nir_variable ** var,const char * var_name)39 get_baryc_var_common(nir_builder *b, bool will_replace, nir_variable **var, const char *var_name)
40 {
41 if (will_replace) {
42 if (!*var) {
43 *var = nir_local_variable_create(b->impl, glsl_vec_type(2), var_name);
44 }
45 return *var;
46 }
47 return NULL;
48 }
49
50 static nir_variable *
get_baryc_var(nir_builder * b,nir_intrinsic_op baryc_op,enum glsl_interp_mode mode,lower_ps_early_state * s)51 get_baryc_var(nir_builder *b, nir_intrinsic_op baryc_op, enum glsl_interp_mode mode,
52 lower_ps_early_state *s)
53 {
54 switch (baryc_op) {
55 case nir_intrinsic_load_barycentric_pixel:
56 if (mode == INTERP_MODE_NOPERSPECTIVE) {
57 return get_baryc_var_common(b, s->options->ps_iter_samples > 1, &s->linear_center,
58 "linear_center");
59 } else {
60 return get_baryc_var_common(b, s->options->ps_iter_samples > 1, &s->persp_center,
61 "persp_center");
62 }
63 case nir_intrinsic_load_barycentric_centroid:
64 if (mode == INTERP_MODE_NOPERSPECTIVE) {
65 return get_baryc_var_common(b, s->options->ps_iter_samples > 1 ||
66 s->options->force_center_interp_no_msaa, &s->linear_centroid,
67 "linear_centroid");
68 } else {
69 return get_baryc_var_common(b, s->options->ps_iter_samples > 1 ||
70 s->options->force_center_interp_no_msaa, &s->persp_centroid,
71 "persp_centroid");
72 }
73 case nir_intrinsic_load_barycentric_sample:
74 if (mode == INTERP_MODE_NOPERSPECTIVE) {
75 return get_baryc_var_common(b, s->options->force_center_interp_no_msaa, &s->linear_sample,
76 "linear_sample");
77 } else {
78 return get_baryc_var_common(b, s->options->force_center_interp_no_msaa, &s->persp_sample,
79 "persp_sample");
80 }
81 default:
82 return NULL;
83 }
84 }
85
86 static void
set_interp_vars(nir_builder * b,nir_def * new_baryc,nir_variable * baryc1,nir_variable * baryc2)87 set_interp_vars(nir_builder *b, nir_def *new_baryc, nir_variable *baryc1, nir_variable *baryc2)
88 {
89 if (baryc1)
90 nir_store_var(b, baryc1, new_baryc, 0x3);
91 if (baryc2)
92 nir_store_var(b, baryc2, new_baryc, 0x3);
93 }
94
95 static void
init_interp_param(nir_builder * b,lower_ps_early_state * s)96 init_interp_param(nir_builder *b, lower_ps_early_state *s)
97 {
98 b->cursor = nir_before_cf_list(&b->impl->body);
99
100 if (s->options->ps_iter_samples > 1) {
101 set_interp_vars(b, nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_SMOOTH),
102 s->persp_center, s->persp_centroid);
103 set_interp_vars(b, nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE),
104 s->linear_center, s->linear_centroid);
105 }
106
107 if (s->options->force_center_interp_no_msaa) {
108 set_interp_vars(b, nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH),
109 s->persp_sample, s->persp_centroid);
110 set_interp_vars(b, nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE),
111 s->linear_sample, s->linear_centroid);
112 }
113 }
114
115 static bool
rewrite_ps_load_barycentric(nir_builder * b,nir_intrinsic_instr * intrin,lower_ps_early_state * s)116 rewrite_ps_load_barycentric(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_early_state *s)
117 {
118 nir_variable *baryc_var = get_baryc_var(b, intrin->intrinsic,
119 nir_intrinsic_interp_mode(intrin), s);
120 if (!baryc_var)
121 return false;
122
123 nir_def_replace(&intrin->def, nir_load_var(b, baryc_var));
124 return true;
125 }
126
127 static bool
optimize_lower_ps_outputs(nir_builder * b,nir_intrinsic_instr * intrin,lower_ps_early_state * s)128 optimize_lower_ps_outputs(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_early_state *s)
129 {
130 unsigned slot = nir_intrinsic_io_semantics(intrin).location;
131
132 switch (slot) {
133 case FRAG_RESULT_DEPTH:
134 if (!s->options->kill_z)
135 return false;
136 nir_instr_remove(&intrin->instr);
137 return true;
138
139 case FRAG_RESULT_STENCIL:
140 if (!s->options->kill_stencil)
141 return false;
142 nir_instr_remove(&intrin->instr);
143 return true;
144
145 case FRAG_RESULT_SAMPLE_MASK:
146 if (!s->options->kill_samplemask)
147 return false;
148 nir_instr_remove(&intrin->instr);
149 return true;
150 }
151
152 unsigned writemask = nir_intrinsic_write_mask(intrin);
153 unsigned component = nir_intrinsic_component(intrin);
154 unsigned color_index = (slot >= FRAG_RESULT_DATA0 ? slot - FRAG_RESULT_DATA0 : 0) +
155 nir_intrinsic_io_semantics(intrin).dual_source_blend_index;
156 nir_def *value = intrin->src[0].ssa;
157 bool progress = false;
158
159 /* Clamp color. */
160 if (s->options->clamp_color) {
161 value = nir_fsat(b, value);
162 progress = true;
163 }
164
165 /* Alpha test. */
166 if (color_index == 0 && s->options->alpha_func != COMPARE_FUNC_ALWAYS &&
167 (writemask << component) & BITFIELD_BIT(3)) {
168 assert(!s->seen_color0_alpha);
169 s->seen_color0_alpha = true;
170
171 if (s->options->alpha_func == COMPARE_FUNC_NEVER) {
172 nir_discard(b);
173 } else {
174 nir_def *ref = nir_load_alpha_reference_amd(b);
175 ref = nir_convert_to_bit_size(b, ref, nir_type_float, value->bit_size);
176 nir_def *alpha = s->options->alpha_test_alpha_to_one ?
177 nir_imm_floatN_t(b, 1, value->bit_size) :
178 nir_channel(b, value, 3 - component);
179 nir_def *cond = nir_compare_func(b, s->options->alpha_func, alpha, ref);
180 nir_discard_if(b, nir_inot(b, cond));
181 }
182 progress = true;
183 }
184
185 /* Trim the src according to the format and writemask. */
186 unsigned cb_shader_mask = ac_get_cb_shader_mask(s->options->spi_shader_col_format_hint);
187 unsigned format_mask;
188
189 if (slot == FRAG_RESULT_COLOR && !s->frag_color_is_frag_data0) {
190 /* cb_shader_mask is 0 for disabled color buffers, so combine all of them. */
191 format_mask = 0;
192 for (unsigned i = 0; i < 8; i++)
193 format_mask |= (cb_shader_mask >> (i * 4)) & 0xf;
194 } else {
195 format_mask = (cb_shader_mask >> (color_index * 4)) & 0xf;
196 }
197
198 if (s->options->keep_alpha_for_mrtz && color_index == 0)
199 format_mask |= BITFIELD_BIT(3);
200
201 writemask = (format_mask >> component) & writemask;
202 nir_intrinsic_set_write_mask(intrin, writemask);
203
204 /* Empty writemask. */
205 if (!writemask) {
206 nir_instr_remove(&intrin->instr);
207 return true;
208 }
209
210 /* Trim the src to the last bit of writemask. */
211 unsigned num_components = util_last_bit(writemask);
212
213 if (num_components != value->num_components) {
214 assert(num_components < value->num_components);
215 value = nir_trim_vector(b, value, num_components);
216 progress = true;
217 }
218
219 /* Replace disabled channels in a non-contiguous writemask with undef. */
220 if (!util_is_power_of_two_nonzero(writemask + 1)) {
221 u_foreach_bit(i, BITFIELD_MASK(num_components) & ~writemask) {
222 value = nir_vector_insert_imm(b, value, nir_undef(b, 1, value->bit_size), i);
223 progress = true;
224 }
225 }
226
227 if (intrin->src[0].ssa != value) {
228 assert(progress);
229 nir_src_rewrite(&intrin->src[0], value);
230 intrin->num_components = value->num_components;
231 } else {
232 assert(intrin->src[0].ssa == value);
233 }
234
235 return progress;
236 }
237
238 static nir_def *
get_load_helper_invocation(nir_function_impl * impl,lower_ps_early_state * s)239 get_load_helper_invocation(nir_function_impl *impl, lower_ps_early_state *s)
240 {
241 /* Insert this only once. */
242 if (!s->load_helper_invoc_at_top) {
243 nir_builder b = nir_builder_at(nir_before_impl(impl));
244 s->load_helper_invoc_at_top = nir_load_helper_invocation(&b, 1);
245 }
246
247 return s->load_helper_invoc_at_top;
248 }
249
250 static bool
lower_ps_load_sample_mask_in(nir_builder * b,nir_intrinsic_instr * intrin,lower_ps_early_state * s)251 lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_early_state *s)
252 {
253 /* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
254 * says:
255 *
256 * "When per-sample shading is active due to the use of a fragment
257 * input qualified by sample or due to the use of the gl_SampleID
258 * or gl_SamplePosition variables, only the bit for the current
259 * sample is set in gl_SampleMaskIn. When state specifies multiple
260 * fragment shader invocations for a given fragment, the sample
261 * mask for any single fragment shader invocation may specify a
262 * subset of the covered samples for the fragment. In this case,
263 * the bit corresponding to each covered sample will be set in
264 * exactly one fragment shader invocation."
265 *
266 * The samplemask loaded by hardware is always the coverage of the
267 * entire pixel/fragment, so mask bits out based on the sample ID.
268 */
269 nir_def *replacement = NULL;
270
271 /* Set ps_iter_samples=8 if full sample shading is enabled even for 2x and 4x MSAA
272 * to get this fast path that fully replaces sample_mask_in with sample_id.
273 */
274 if (s->options->force_center_interp_no_msaa && !s->options->uses_vrs_coarse_shading) {
275 replacement = nir_b2i32(b, nir_inot(b, get_load_helper_invocation(b->impl, s)));
276 } else if (s->options->ps_iter_samples == 8) {
277 replacement = nir_bcsel(b, get_load_helper_invocation(b->impl, s), nir_imm_int(b, 0),
278 nir_ishl(b, nir_imm_int(b, 1), nir_load_sample_id(b)));
279 } else if (s->options->ps_iter_samples > 1) {
280 uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
281 nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), nir_load_sample_id(b));
282 replacement = nir_iand(b, nir_load_sample_mask_in(b), submask);
283 } else {
284 return false;
285 }
286
287 nir_def_replace(&intrin->def, replacement);
288 return true;
289 }
290
291 static nir_def *
lower_load_barycentric_at_offset(nir_builder * b,nir_def * offset,enum glsl_interp_mode mode)292 lower_load_barycentric_at_offset(nir_builder *b, nir_def *offset, enum glsl_interp_mode mode)
293 {
294 /* ddx/ddy must execute before terminate (discard). */
295 nir_builder sb = nir_builder_at(nir_before_impl(b->impl));
296 nir_def *baryc = nir_load_barycentric_pixel(&sb, 32, .interp_mode = mode);
297 nir_def *i = nir_channel(&sb, baryc, 0);
298 nir_def *j = nir_channel(&sb, baryc, 1);
299 nir_def *ddx_i = nir_ddx(&sb, i);
300 nir_def *ddx_j = nir_ddx(&sb, j);
301 nir_def *ddy_i = nir_ddy(&sb, i);
302 nir_def *ddy_j = nir_ddy(&sb, j);
303
304 nir_def *offset_x = nir_channel(b, offset, 0);
305 nir_def *offset_y = nir_channel(b, offset, 1);
306
307 /* Interpolate standard barycentrics by offset. */
308 nir_def *offset_i = nir_ffma(b, ddy_i, offset_y, nir_ffma(b, ddx_i, offset_x, i));
309 nir_def *offset_j = nir_ffma(b, ddy_j, offset_y, nir_ffma(b, ddx_j, offset_x, j));
310 return nir_vec2(b, offset_i, offset_j);
311 }
312
313 static nir_def *
fbfetch_color_buffer0(nir_builder * b,lower_ps_early_state * s)314 fbfetch_color_buffer0(nir_builder *b, lower_ps_early_state *s)
315 {
316 nir_def *zero = nir_imm_zero(b, 1, 32);
317 nir_def *undef = nir_undef(b, 1, 32);
318
319 unsigned chan = 0;
320 nir_def *coord_vec[4] = {undef, undef, undef, undef};
321 nir_def *pixel_coord = nir_u2u32(b, nir_load_pixel_coord(b));
322
323 coord_vec[chan++] = nir_channel(b, pixel_coord, 0);
324
325 if (!s->options->fbfetch_is_1D)
326 coord_vec[chan++] = nir_channel(b, pixel_coord, 1);
327
328 /* Get the current render target layer index. */
329 if (s->options->fbfetch_layered)
330 coord_vec[chan++] = nir_load_layer_id(b);
331
332 nir_def *coords = nir_vec(b, coord_vec, 4);
333
334 enum glsl_sampler_dim dim;
335 if (s->options->fbfetch_msaa)
336 dim = GLSL_SAMPLER_DIM_MS;
337 else if (s->options->fbfetch_is_1D)
338 dim = GLSL_SAMPLER_DIM_1D;
339 else
340 dim = GLSL_SAMPLER_DIM_2D;
341
342 nir_def *sample_id;
343 if (s->options->fbfetch_msaa) {
344 sample_id = nir_load_sample_id(b);
345
346 if (s->options->fbfetch_apply_fmask) {
347 nir_def *fmask =
348 nir_bindless_image_fragment_mask_load_amd(
349 b, nir_load_fbfetch_image_fmask_desc_amd(b), coords,
350 .image_dim = dim,
351 .image_array = s->options->fbfetch_layered,
352 .access = ACCESS_CAN_REORDER);
353 sample_id = nir_ubfe(b, fmask, nir_ishl_imm(b, sample_id, 2), nir_imm_int(b, 3));
354 }
355 } else {
356 sample_id = zero;
357 }
358
359 return nir_bindless_image_load(b, 4, 32, nir_load_fbfetch_image_desc_amd(b), coords, sample_id,
360 zero,
361 .image_dim = dim,
362 .image_array = s->options->fbfetch_layered,
363 .access = ACCESS_CAN_REORDER);
364 }
365
366 static bool
lower_ps_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,void * state)367 lower_ps_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
368 {
369 lower_ps_early_state *s = (lower_ps_early_state *)state;
370
371 b->cursor = nir_before_instr(&intrin->instr);
372
373 switch (intrin->intrinsic) {
374 case nir_intrinsic_store_output:
375 return optimize_lower_ps_outputs(b, intrin, s);
376 case nir_intrinsic_load_barycentric_pixel:
377 case nir_intrinsic_load_barycentric_centroid:
378 case nir_intrinsic_load_barycentric_sample:
379 return rewrite_ps_load_barycentric(b, intrin, s);
380 case nir_intrinsic_load_sample_mask_in:
381 return lower_ps_load_sample_mask_in(b, intrin, s);
382 case nir_intrinsic_load_front_face:
383 if (s->options->force_front_face) {
384 nir_def_replace(&intrin->def, nir_imm_bool(b, s->options->force_front_face == 1));
385 return true;
386 }
387 break;
388 case nir_intrinsic_load_front_face_fsign:
389 if (s->options->force_front_face) {
390 nir_def_replace(&intrin->def, nir_imm_float(b, s->options->force_front_face == 1 ? 1 : -1));
391 return true;
392 }
393 break;
394 case nir_intrinsic_load_sample_pos:
395 if (s->options->frag_coord_is_center) {
396 /* We have to use the alternative way to get sample_pos. */
397 nir_def *num_samples = s->options->load_sample_positions_always_loads_current_ones ?
398 nir_undef(b, 1, 32) : nir_load_rasterization_samples_amd(b);
399 nir_def_replace(&intrin->def, nir_load_sample_positions_amd(b, 32, nir_load_sample_id(b),
400 num_samples));
401 } else {
402 /* sample_pos = ffract(frag_coord.xy); */
403 nir_def_replace(&intrin->def, nir_ffract(b, nir_channels(b, nir_load_frag_coord(b), 0x3)));
404 }
405 return true;
406 case nir_intrinsic_load_barycentric_at_offset:
407 nir_def_replace(&intrin->def,
408 lower_load_barycentric_at_offset(b, intrin->src[0].ssa,
409 nir_intrinsic_interp_mode(intrin)));
410 return true;
411 case nir_intrinsic_load_barycentric_at_sample: {
412 unsigned mode = nir_intrinsic_interp_mode(intrin);
413 nir_def *sample_id = intrin->src[0].ssa;
414
415 if (s->options->force_center_interp_no_msaa) {
416 nir_def_replace(&intrin->def, nir_load_barycentric_pixel(b, 32, .interp_mode = mode));
417 return true;
418 }
419
420 if (s->options->ps_iter_samples >= 2 &&
421 sample_id->parent_instr->type == nir_instr_type_intrinsic &&
422 nir_instr_as_intrinsic(sample_id->parent_instr)->intrinsic == nir_intrinsic_load_sample_id) {
423 nir_def_replace(&intrin->def, nir_load_barycentric_sample(b, 32, .interp_mode = mode));
424 return true;
425 }
426
427 /* If load_sample_positions_always_loads_current_ones is true, load_sample_positions_amd
428 * always loads the sample positions that are currently set in the rasterizer state
429 * even if MSAA is disabled.
430 */
431 nir_def *num_samples = s->options->load_sample_positions_always_loads_current_ones ?
432 nir_undef(b, 1, 32) : nir_load_rasterization_samples_amd(b);
433 nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, sample_id, num_samples);
434 sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
435
436 if (s->options->dynamic_rasterization_samples) {
437 assert(!s->options->load_sample_positions_always_loads_current_ones);
438 nir_def *pixel, *at_sample;
439
440 nir_push_if(b, nir_ieq_imm(b, num_samples, 1));
441 {
442 pixel = nir_load_barycentric_pixel(b, 32, .interp_mode = mode);
443 }
444 nir_push_else(b, NULL);
445 {
446 at_sample = lower_load_barycentric_at_offset(b, sample_pos, mode);
447 }
448 nir_pop_if(b, NULL);
449 nir_def_replace(&intrin->def, nir_if_phi(b, pixel, at_sample));
450 } else {
451 nir_def_replace(&intrin->def,
452 lower_load_barycentric_at_offset(b, sample_pos, mode));
453 }
454 return true;
455 }
456 case nir_intrinsic_load_output:
457 if (nir_intrinsic_io_semantics(intrin).fb_fetch_output) {
458 nir_def_replace(&intrin->def, fbfetch_color_buffer0(b, s));
459 return true;
460 }
461 break;
462 case nir_intrinsic_load_frag_coord:
463 if (!s->options->optimize_frag_coord)
464 break;
465 /* Compute frag_coord.xy from pixel_coord. */
466 if (!s->use_fragcoord && nir_def_components_read(&intrin->def) & 0x3) {
467 nir_def *new_fragcoord_xy = nir_u2f32(b, nir_load_pixel_coord(b));
468 if (!b->shader->info.fs.pixel_center_integer)
469 new_fragcoord_xy = nir_fadd_imm(b, new_fragcoord_xy, 0.5);
470 nir_def *fragcoord = nir_load_frag_coord(b);
471 nir_def_replace(&intrin->def,
472 nir_vec4(b, nir_channel(b, new_fragcoord_xy, 0),
473 nir_channel(b, new_fragcoord_xy, 1),
474 nir_channel(b, fragcoord, 2),
475 nir_channel(b, fragcoord, 3)));
476 return true;
477 }
478 break;
479 case nir_intrinsic_load_pixel_coord:
480 if (!s->options->optimize_frag_coord)
481 break;
482 /* There is already a floating-point frag_coord.xy use in the shader. Don't add pixel_coord.
483 * Instead, compute pixel_coord from frag_coord.
484 */
485 if (s->use_fragcoord) {
486 nir_def *new_pixel_coord = nir_f2u16(b, nir_channels(b, nir_load_frag_coord(b), 0x3));
487 nir_def_replace(&intrin->def, new_pixel_coord);
488 return true;
489 }
490 break;
491 default:
492 break;
493 }
494
495 return false;
496 }
497
498 static bool
gather_info(nir_builder * b,nir_intrinsic_instr * intr,void * state)499 gather_info(nir_builder *b, nir_intrinsic_instr *intr, void *state)
500 {
501 lower_ps_early_state *s = (lower_ps_early_state *)state;
502
503 switch (intr->intrinsic) {
504 case nir_intrinsic_store_output:
505 /* FRAG_RESULT_COLOR can't broadcast results to all color buffers if another
506 * FRAG_RESULT_COLOR output exists with dual_src_blend_index=1. This happens
507 * with gl_SecondaryFragColorEXT in GLES.
508 */
509 if (nir_intrinsic_io_semantics(intr).location == FRAG_RESULT_COLOR &&
510 nir_intrinsic_io_semantics(intr).dual_source_blend_index)
511 s->frag_color_is_frag_data0 = true;
512 break;
513 case nir_intrinsic_load_frag_coord:
514 assert(intr->def.bit_size == 32);
515 nir_foreach_use(use, &intr->def) {
516 if (nir_src_parent_instr(use)->type == nir_instr_type_alu &&
517 nir_src_components_read(use) & 0x3) {
518 switch (nir_instr_as_alu(nir_src_parent_instr(use))->op) {
519 case nir_op_f2i8:
520 case nir_op_f2i16:
521 case nir_op_f2i32:
522 case nir_op_f2i64:
523 case nir_op_f2u8:
524 case nir_op_f2u16:
525 case nir_op_f2u32:
526 case nir_op_f2u64:
527 case nir_op_ftrunc:
528 case nir_op_ffloor:
529 continue;
530 default:
531 break;
532 }
533 }
534 s->uses_fragcoord_xy_as_float = true;
535 break;
536 }
537 break;
538 case nir_intrinsic_load_sample_pos:
539 if (!s->options->frag_coord_is_center)
540 s->uses_fragcoord_xy_as_float = true;
541 break;
542 default:
543 break;
544 }
545
546 return false;
547 }
548
549 bool
ac_nir_lower_ps_early(nir_shader * nir,const ac_nir_lower_ps_early_options * options)550 ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options)
551 {
552 assert(nir->info.stage == MESA_SHADER_FRAGMENT);
553 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
554
555 nir_builder builder = nir_builder_create(impl);
556 nir_builder *b = &builder;
557
558 lower_ps_early_state state = {
559 .options = options,
560 };
561
562 /* Don't gather shader_info. Just gather the single thing we want to know. */
563 nir_shader_intrinsics_pass(nir, gather_info, nir_metadata_all, &state);
564
565 /* The preferred option is replacing frag_coord by pixel_coord.xy + 0.5. The goal is to reduce
566 * input VGPRs to increase PS wave launch rate. pixel_coord uses 1 input VGPR, while
567 * frag_coord.xy uses 2 input VGPRs. It only helps performance if the number of input VGPRs
568 * decreases to an even number. If it only decreases to an odd number, it has no effect.
569 *
570 * TODO: estimate input VGPRs and don't lower to pixel_coord if their number doesn't decrease to
571 * an even number?
572 */
573 state.use_fragcoord = !options->frag_coord_is_center && state.options->ps_iter_samples != 1 &&
574 !state.options->force_center_interp_no_msaa &&
575 state.uses_fragcoord_xy_as_float;
576
577 bool progress = nir_shader_intrinsics_pass(nir, lower_ps_intrinsic,
578 nir_metadata_control_flow, &state);
579
580 if (state.persp_center || state.persp_centroid || state.persp_sample ||
581 state.linear_center || state.linear_centroid || state.linear_sample) {
582 assert(progress);
583
584 /* This must be after lower_ps_intrinsic. */
585 init_interp_param(b, &state);
586
587 /* Cleanup local variables, as RADV won't do this. */
588 NIR_PASS(_, nir, nir_lower_vars_to_ssa);
589 }
590
591 return progress;
592 }
593