1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include "meta/radv_meta.h"
12 #include "nir/nir.h"
13 #include "nir/nir_builder.h"
14 #include "nir/nir_serialize.h"
15 #include "nir/nir_xfb_info.h"
16 #include "nir/radv_nir.h"
17 #include "spirv/nir_spirv.h"
18 #include "util/disk_cache.h"
19 #include "util/mesa-sha1.h"
20 #include "util/os_time.h"
21 #include "util/u_atomic.h"
22 #include "radv_cs.h"
23 #include "radv_debug.h"
24 #include "radv_entrypoints.h"
25 #include "radv_formats.h"
26 #include "radv_physical_device.h"
27 #include "radv_pipeline_binary.h"
28 #include "radv_pipeline_cache.h"
29 #include "radv_rmv.h"
30 #include "radv_shader.h"
31 #include "radv_shader_args.h"
32 #include "vk_nir_convert_ycbcr.h"
33 #include "vk_pipeline.h"
34 #include "vk_render_pass.h"
35 #include "vk_util.h"
36
37 #include "util/u_debug.h"
38 #include "ac_binary.h"
39 #include "ac_formats.h"
40 #include "ac_nir.h"
41 #include "ac_shader_util.h"
42 #include "aco_interface.h"
43 #include "sid.h"
44
45 static bool
radv_is_static_vrs_enabled(const struct vk_graphics_pipeline_state * state)46 radv_is_static_vrs_enabled(const struct vk_graphics_pipeline_state *state)
47 {
48 if (!state->fsr)
49 return false;
50
51 return state->fsr->fragment_size.width != 1 || state->fsr->fragment_size.height != 1 ||
52 state->fsr->combiner_ops[0] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR ||
53 state->fsr->combiner_ops[1] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
54 }
55
56 static bool
radv_is_vrs_enabled(const struct vk_graphics_pipeline_state * state)57 radv_is_vrs_enabled(const struct vk_graphics_pipeline_state *state)
58 {
59 return radv_is_static_vrs_enabled(state) || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_FSR);
60 }
61
62 static bool
radv_pipeline_has_ds_attachments(const struct vk_render_pass_state * rp)63 radv_pipeline_has_ds_attachments(const struct vk_render_pass_state *rp)
64 {
65 return rp->depth_attachment_format != VK_FORMAT_UNDEFINED || rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
66 }
67
68 static bool
radv_pipeline_has_color_attachments(const struct vk_render_pass_state * rp)69 radv_pipeline_has_color_attachments(const struct vk_render_pass_state *rp)
70 {
71 for (uint32_t i = 0; i < rp->color_attachment_count; ++i) {
72 if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
73 return true;
74 }
75
76 return false;
77 }
78
79 /**
80 * Get rid of DST in the blend factors by commuting the operands:
81 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
82 */
83 void
radv_blend_remove_dst(VkBlendOp * func,VkBlendFactor * src_factor,VkBlendFactor * dst_factor,VkBlendFactor expected_dst,VkBlendFactor replacement_src)84 radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, VkBlendFactor expected_dst,
85 VkBlendFactor replacement_src)
86 {
87 if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) {
88 *src_factor = VK_BLEND_FACTOR_ZERO;
89 *dst_factor = replacement_src;
90
91 /* Commuting the operands requires reversing subtractions. */
92 if (*func == VK_BLEND_OP_SUBTRACT)
93 *func = VK_BLEND_OP_REVERSE_SUBTRACT;
94 else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
95 *func = VK_BLEND_OP_SUBTRACT;
96 }
97 }
98
99 static unsigned
radv_choose_spi_color_format(const struct radv_device * device,VkFormat vk_format,bool blend_enable,bool blend_need_alpha)100 radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
101 bool blend_need_alpha)
102 {
103 const struct radv_physical_device *pdev = radv_device_physical(device);
104 const struct util_format_description *desc = vk_format_description(vk_format);
105 bool use_rbplus = pdev->info.rbplus_allowed;
106 struct ac_spi_color_formats formats = {0};
107 unsigned format, ntype, swap;
108
109 format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
110 ntype = ac_get_cb_number_type(desc->format);
111 swap = ac_translate_colorswap(pdev->info.gfx_level, desc->format, false);
112
113 ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
114
115 if (blend_enable && blend_need_alpha)
116 return formats.blend_alpha;
117 else if (blend_need_alpha)
118 return formats.alpha;
119 else if (blend_enable)
120 return formats.blend;
121 else
122 return formats.normal;
123 }
124
125 static bool
format_is_int8(VkFormat format)126 format_is_int8(VkFormat format)
127 {
128 const struct util_format_description *desc = vk_format_description(format);
129 int channel = vk_format_get_first_non_void_channel(format);
130
131 return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8;
132 }
133
134 static bool
format_is_int10(VkFormat format)135 format_is_int10(VkFormat format)
136 {
137 const struct util_format_description *desc = vk_format_description(format);
138
139 if (desc->nr_channels != 4)
140 return false;
141 for (unsigned i = 0; i < 4; i++) {
142 if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
143 return true;
144 }
145 return false;
146 }
147
148 static bool
format_is_float32(VkFormat format)149 format_is_float32(VkFormat format)
150 {
151 const struct util_format_description *desc = vk_format_description(format);
152 int channel = vk_format_get_first_non_void_channel(format);
153
154 return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
155 }
156
157 /*
158 * Ordered so that for each i,
159 * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
160 */
161 const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
162 VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM,
163 VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT,
164 VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R8G8B8A8_UINT,
165 VK_FORMAT_R8G8B8A8_SINT, VK_FORMAT_A2R10G10B10_UINT_PACK32, VK_FORMAT_A2R10G10B10_SINT_PACK32,
166 };
167
168 unsigned
radv_format_meta_fs_key(struct radv_device * device,VkFormat format)169 radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
170 {
171 unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
172 assert(col_format != V_028714_SPI_SHADER_32_AR);
173
174 bool is_int8 = format_is_int8(format);
175 bool is_int10 = format_is_int10(format);
176
177 if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
178 return 8;
179 else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
180 return 9;
181 else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
182 return 10;
183 else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
184 return 11;
185 else {
186 if (col_format >= V_028714_SPI_SHADER_32_AR)
187 --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */
188
189 --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
190 return col_format;
191 }
192 }
193
194 static bool
radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)195 radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state *state,
196 VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
197 {
198 /* Use a PS epilog when the fragment shader is compiled without the fragment output interface. */
199 if ((state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) &&
200 (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
201 !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT))
202 return true;
203
204 /* These dynamic states need to compile PS epilogs on-demand. */
205 if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
206 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
207 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) ||
208 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
209 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE))
210 return true;
211
212 return false;
213 }
214
215 static bool
radv_pipeline_uses_vrs_attachment(const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)216 radv_pipeline_uses_vrs_attachment(const struct radv_graphics_pipeline *pipeline,
217 const struct vk_graphics_pipeline_state *state)
218 {
219 VkPipelineCreateFlags2 create_flags = pipeline->base.create_flags;
220 if (state->rp)
221 create_flags |= state->pipeline_flags;
222
223 return (create_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0;
224 }
225
226 static void
radv_pipeline_init_multisample_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state)227 radv_pipeline_init_multisample_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
228 const VkGraphicsPipelineCreateInfo *pCreateInfo,
229 const struct vk_graphics_pipeline_state *state)
230 {
231 struct radv_multisample_state *ms = &pipeline->ms;
232
233 /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
234 *
235 * "Sample shading is enabled for a graphics pipeline:
236 *
237 * - If the interface of the fragment shader entry point of the
238 * graphics pipeline includes an input variable decorated
239 * with SampleId or SamplePosition. In this case
240 * minSampleShadingFactor takes the value 1.0.
241 * - Else if the sampleShadingEnable member of the
242 * VkPipelineMultisampleStateCreateInfo structure specified
243 * when creating the graphics pipeline is set to VK_TRUE. In
244 * this case minSampleShadingFactor takes the value of
245 * VkPipelineMultisampleStateCreateInfo::minSampleShading.
246 *
247 * Otherwise, sample shading is considered disabled."
248 */
249 if (state->ms && state->ms->sample_shading_enable) {
250 ms->sample_shading_enable = true;
251 ms->min_sample_shading = state->ms->min_sample_shading;
252 }
253 }
254
255 static uint32_t
radv_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)256 radv_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)
257 {
258 switch (prim) {
259 case TESS_PRIMITIVE_TRIANGLES:
260 case TESS_PRIMITIVE_QUADS:
261 return V_028A6C_TRISTRIP;
262 case TESS_PRIMITIVE_ISOLINES:
263 return V_028A6C_LINESTRIP;
264 default:
265 assert(0);
266 return 0;
267 }
268 }
269
270 static uint64_t
radv_dynamic_state_mask(VkDynamicState state)271 radv_dynamic_state_mask(VkDynamicState state)
272 {
273 switch (state) {
274 case VK_DYNAMIC_STATE_VIEWPORT:
275 case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT:
276 return RADV_DYNAMIC_VIEWPORT;
277 case VK_DYNAMIC_STATE_SCISSOR:
278 case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT:
279 return RADV_DYNAMIC_SCISSOR;
280 case VK_DYNAMIC_STATE_LINE_WIDTH:
281 return RADV_DYNAMIC_LINE_WIDTH;
282 case VK_DYNAMIC_STATE_DEPTH_BIAS:
283 return RADV_DYNAMIC_DEPTH_BIAS;
284 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
285 return RADV_DYNAMIC_BLEND_CONSTANTS;
286 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
287 return RADV_DYNAMIC_DEPTH_BOUNDS;
288 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
289 return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
290 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
291 return RADV_DYNAMIC_STENCIL_WRITE_MASK;
292 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
293 return RADV_DYNAMIC_STENCIL_REFERENCE;
294 case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
295 return RADV_DYNAMIC_DISCARD_RECTANGLE;
296 case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
297 return RADV_DYNAMIC_SAMPLE_LOCATIONS;
298 case VK_DYNAMIC_STATE_LINE_STIPPLE:
299 return RADV_DYNAMIC_LINE_STIPPLE;
300 case VK_DYNAMIC_STATE_CULL_MODE:
301 return RADV_DYNAMIC_CULL_MODE;
302 case VK_DYNAMIC_STATE_FRONT_FACE:
303 return RADV_DYNAMIC_FRONT_FACE;
304 case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY:
305 return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
306 case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE:
307 return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
308 case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE:
309 return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
310 case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP:
311 return RADV_DYNAMIC_DEPTH_COMPARE_OP;
312 case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE:
313 return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
314 case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE:
315 return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
316 case VK_DYNAMIC_STATE_STENCIL_OP:
317 return RADV_DYNAMIC_STENCIL_OP;
318 case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE:
319 return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
320 case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
321 return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
322 case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT:
323 return RADV_DYNAMIC_PATCH_CONTROL_POINTS;
324 case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE:
325 return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
326 case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE:
327 return RADV_DYNAMIC_DEPTH_BIAS_ENABLE;
328 case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
329 return RADV_DYNAMIC_LOGIC_OP;
330 case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE:
331 return RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
332 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
333 return RADV_DYNAMIC_COLOR_WRITE_ENABLE;
334 case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT:
335 return RADV_DYNAMIC_VERTEX_INPUT;
336 case VK_DYNAMIC_STATE_POLYGON_MODE_EXT:
337 return RADV_DYNAMIC_POLYGON_MODE;
338 case VK_DYNAMIC_STATE_TESSELLATION_DOMAIN_ORIGIN_EXT:
339 return RADV_DYNAMIC_TESS_DOMAIN_ORIGIN;
340 case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT:
341 return RADV_DYNAMIC_LOGIC_OP_ENABLE;
342 case VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT:
343 return RADV_DYNAMIC_LINE_STIPPLE_ENABLE;
344 case VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT:
345 return RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE;
346 case VK_DYNAMIC_STATE_SAMPLE_MASK_EXT:
347 return RADV_DYNAMIC_SAMPLE_MASK;
348 case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT:
349 return RADV_DYNAMIC_DEPTH_CLIP_ENABLE;
350 case VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT:
351 return RADV_DYNAMIC_CONSERVATIVE_RAST_MODE;
352 case VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT:
353 return RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE;
354 case VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT:
355 return RADV_DYNAMIC_PROVOKING_VERTEX_MODE;
356 case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT:
357 return RADV_DYNAMIC_DEPTH_CLAMP_ENABLE;
358 case VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT:
359 return RADV_DYNAMIC_COLOR_WRITE_MASK;
360 case VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT:
361 return RADV_DYNAMIC_COLOR_BLEND_ENABLE;
362 case VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT:
363 return RADV_DYNAMIC_RASTERIZATION_SAMPLES;
364 case VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT:
365 return RADV_DYNAMIC_LINE_RASTERIZATION_MODE;
366 case VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT:
367 return RADV_DYNAMIC_COLOR_BLEND_EQUATION;
368 case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT:
369 return RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE;
370 case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT:
371 return RADV_DYNAMIC_DISCARD_RECTANGLE_MODE;
372 case VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT:
373 return RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
374 case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT:
375 return RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE;
376 case VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT:
377 return RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE;
378 case VK_DYNAMIC_STATE_DEPTH_CLAMP_RANGE_EXT:
379 return RADV_DYNAMIC_DEPTH_CLAMP_RANGE;
380 default:
381 unreachable("Unhandled dynamic state");
382 }
383 }
384
385 #define RADV_DYNAMIC_CB_STATES \
386 (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE | \
387 RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION | \
388 RADV_DYNAMIC_BLEND_CONSTANTS)
389
390 static bool
radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb)391 radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_color_blend_state *cb)
392 {
393 /* If we don't know then we have to assume that blend may be enabled. cb may also be NULL in this
394 * case.
395 */
396 if (pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK))
397 return true;
398
399 /* If we have the blend enable state, then cb being NULL indicates no attachments are written. */
400 if (cb) {
401 for (uint32_t i = 0; i < cb->attachment_count; i++) {
402 if (cb->attachments[i].write_mask && cb->attachments[i].blend_enable)
403 return true;
404 }
405 }
406
407 return false;
408 }
409
410 static uint64_t
radv_pipeline_needed_dynamic_state(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)411 radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
412 const struct vk_graphics_pipeline_state *state)
413 {
414 const struct radv_physical_device *pdev = radv_device_physical(device);
415 bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
416 bool raster_enabled =
417 !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
418 uint64_t states = RADV_DYNAMIC_ALL;
419
420 if (pdev->info.gfx_level < GFX10_3)
421 states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
422
423 /* Disable dynamic states that are useless to mesh shading. */
424 if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
425 if (!raster_enabled)
426 return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
427
428 states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
429 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY);
430 }
431
432 /* Disable dynamic states that are useless when rasterization is disabled. */
433 if (!raster_enabled) {
434 states = RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
435 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
436 RADV_DYNAMIC_VERTEX_INPUT;
437
438 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
439 states |= RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN;
440
441 return states;
442 }
443
444 if (!state->rs->depth_bias.enable && !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE))
445 states &= ~RADV_DYNAMIC_DEPTH_BIAS;
446
447 if (!(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) &&
448 (!state->ds || !state->ds->depth.bounds_test.enable))
449 states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
450
451 if (!(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) &&
452 (!state->ds || !state->ds->stencil.test_enable))
453 states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | RADV_DYNAMIC_STENCIL_REFERENCE |
454 RADV_DYNAMIC_STENCIL_OP);
455
456 if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) && !state->dr->rectangle_count)
457 states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
458
459 if (!(pipeline->dynamic_states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) &&
460 (!state->ms || !state->ms->sample_locations_enable))
461 states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
462
463 if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, state->cb))
464 states &= ~RADV_DYNAMIC_BLEND_CONSTANTS;
465
466 if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT))
467 states &= ~(RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN);
468
469 return states;
470 }
471
472 struct radv_ia_multi_vgt_param_helpers
radv_compute_ia_multi_vgt_param(const struct radv_device * device,struct radv_shader * const * shaders)473 radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders)
474 {
475 const struct radv_physical_device *pdev = radv_device_physical(device);
476 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
477
478 ia_multi_vgt_param.ia_switch_on_eoi = false;
479 if (shaders[MESA_SHADER_FRAGMENT] && shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
480 ia_multi_vgt_param.ia_switch_on_eoi = true;
481 if (shaders[MESA_SHADER_GEOMETRY] && shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
482 ia_multi_vgt_param.ia_switch_on_eoi = true;
483 if (shaders[MESA_SHADER_TESS_CTRL]) {
484 const struct radv_shader *tes = radv_get_shader(shaders, MESA_SHADER_TESS_EVAL);
485
486 /* SWITCH_ON_EOI must be set if PrimID is used. */
487 if (shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || tes->info.uses_prim_id ||
488 (tes->info.merged_shader_compiled_separately && shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id))
489 ia_multi_vgt_param.ia_switch_on_eoi = true;
490 }
491
492 ia_multi_vgt_param.partial_vs_wave = false;
493 if (shaders[MESA_SHADER_TESS_CTRL]) {
494 /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
495 if ((pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_PITCAIRN ||
496 pdev->info.family == CHIP_BONAIRE) &&
497 shaders[MESA_SHADER_GEOMETRY])
498 ia_multi_vgt_param.partial_vs_wave = true;
499 /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
500 if (pdev->info.has_distributed_tess) {
501 if (shaders[MESA_SHADER_GEOMETRY]) {
502 if (pdev->info.gfx_level <= GFX8)
503 ia_multi_vgt_param.partial_es_wave = true;
504 } else {
505 ia_multi_vgt_param.partial_vs_wave = true;
506 }
507 }
508 }
509
510 if (shaders[MESA_SHADER_GEOMETRY]) {
511 /* On these chips there is the possibility of a hang if the
512 * pipeline uses a GS and partial_vs_wave is not set.
513 *
514 * This mostly does not hit 4-SE chips, as those typically set
515 * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
516 * with GS due to another workaround.
517 *
518 * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
519 */
520 if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_FIJI || pdev->info.family == CHIP_POLARIS10 ||
521 pdev->info.family == CHIP_POLARIS11 || pdev->info.family == CHIP_POLARIS12 ||
522 pdev->info.family == CHIP_VEGAM) {
523 ia_multi_vgt_param.partial_vs_wave = true;
524 }
525 }
526
527 ia_multi_vgt_param.base =
528 /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
529 S_028AA8_MAX_PRIMGRP_IN_WAVE(pdev->info.gfx_level == GFX8 ? 2 : 0) |
530 S_030960_EN_INST_OPT_BASIC(pdev->info.gfx_level >= GFX9) | S_030960_EN_INST_OPT_ADV(pdev->info.gfx_level >= GFX9);
531
532 return ia_multi_vgt_param;
533 }
534
535 static uint32_t
radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo * vi,uint32_t attrib_binding)536 radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *vi, uint32_t attrib_binding)
537 {
538 for (uint32_t i = 0; i < vi->vertexBindingDescriptionCount; i++) {
539 const VkVertexInputBindingDescription *input_binding = &vi->pVertexBindingDescriptions[i];
540
541 if (input_binding->binding == attrib_binding)
542 return input_binding->stride;
543 }
544
545 return 0;
546 }
547
548 #define ALL_GRAPHICS_LIB_FLAGS \
549 (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
550 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
551 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
552 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
553
554 static VkGraphicsPipelineLibraryFlagBitsEXT
shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage)555 shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage)
556 {
557 assert(util_bitcount(stage) == 1);
558 switch (stage) {
559 case VK_SHADER_STAGE_VERTEX_BIT:
560 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
561 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
562 case VK_SHADER_STAGE_GEOMETRY_BIT:
563 case VK_SHADER_STAGE_TASK_BIT_EXT:
564 case VK_SHADER_STAGE_MESH_BIT_EXT:
565 return VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT;
566 case VK_SHADER_STAGE_FRAGMENT_BIT:
567 return VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
568 default:
569 unreachable("Invalid shader stage");
570 }
571 }
572
573 static void
radv_graphics_pipeline_import_layout(struct radv_pipeline_layout * dst,const struct radv_pipeline_layout * src)574 radv_graphics_pipeline_import_layout(struct radv_pipeline_layout *dst, const struct radv_pipeline_layout *src)
575 {
576 for (uint32_t s = 0; s < src->num_sets; s++) {
577 if (!src->set[s].layout)
578 continue;
579
580 radv_pipeline_layout_add_set(dst, s, src->set[s].layout);
581 }
582
583 dst->independent_sets |= src->independent_sets;
584 dst->push_constant_size = MAX2(dst->push_constant_size, src->push_constant_size);
585 }
586
587 static void
radv_pipeline_import_graphics_info(struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)588 radv_pipeline_import_graphics_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
589 const VkGraphicsPipelineCreateInfo *pCreateInfo)
590 {
591 /* Mark all states declared dynamic at pipeline creation. */
592 if (pCreateInfo->pDynamicState) {
593 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
594 for (uint32_t s = 0; s < count; s++) {
595 pipeline->dynamic_states |= radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
596 }
597 }
598
599 /* Mark all active stages at pipeline creation. */
600 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
601 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
602
603 pipeline->active_stages |= sinfo->stage;
604 }
605
606 if (pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_EXT) {
607 pipeline->last_vgt_api_stage = MESA_SHADER_MESH;
608 } else {
609 pipeline->last_vgt_api_stage = util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
610 }
611 }
612
613 static bool
radv_should_import_lib_binaries(const VkPipelineCreateFlags2 create_flags)614 radv_should_import_lib_binaries(const VkPipelineCreateFlags2 create_flags)
615 {
616 return !(create_flags & (VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT |
617 VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT));
618 }
619
620 static void
radv_graphics_pipeline_import_lib(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,struct radv_graphics_lib_pipeline * lib)621 radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
622 struct radv_graphics_lib_pipeline *lib)
623 {
624 bool import_binaries = false;
625
626 /* There should be no common blocks between a lib we import and the current
627 * pipeline we're building.
628 */
629 assert((pipeline->active_stages & lib->base.active_stages) == 0);
630
631 pipeline->dynamic_states |= lib->base.dynamic_states;
632 pipeline->active_stages |= lib->base.active_stages;
633
634 /* Import binaries when LTO is disabled and when the library doesn't retain any shaders. */
635 if (lib->base.has_pipeline_binaries || radv_should_import_lib_binaries(pipeline->base.create_flags)) {
636 import_binaries = true;
637 }
638
639 if (import_binaries) {
640 /* Import the compiled shaders. */
641 for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.base.shaders); s++) {
642 if (!lib->base.base.shaders[s])
643 continue;
644
645 pipeline->base.shaders[s] = radv_shader_ref(lib->base.base.shaders[s]);
646 }
647
648 /* Import the GS copy shader if present. */
649 if (lib->base.base.gs_copy_shader) {
650 assert(!pipeline->base.gs_copy_shader);
651 pipeline->base.gs_copy_shader = radv_shader_ref(lib->base.base.gs_copy_shader);
652 }
653 }
654 }
655
656 static void
radv_pipeline_init_input_assembly_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline)657 radv_pipeline_init_input_assembly_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
658 {
659 pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param(device, pipeline->base.shaders);
660 }
661
662 static bool
radv_pipeline_uses_ds_feedback_loop(const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)663 radv_pipeline_uses_ds_feedback_loop(const struct radv_graphics_pipeline *pipeline,
664 const struct vk_graphics_pipeline_state *state)
665 {
666 VkPipelineCreateFlags2 create_flags = pipeline->base.create_flags;
667 if (state->rp)
668 create_flags |= state->pipeline_flags;
669
670 return (create_flags & VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
671 }
672
673 void
radv_get_viewport_xform(const VkViewport * viewport,float scale[3],float translate[3])674 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3])
675 {
676 float x = viewport->x;
677 float y = viewport->y;
678 float half_width = 0.5f * viewport->width;
679 float half_height = 0.5f * viewport->height;
680 double n = viewport->minDepth;
681 double f = viewport->maxDepth;
682
683 scale[0] = half_width;
684 translate[0] = half_width + x;
685 scale[1] = half_height;
686 translate[1] = half_height + y;
687
688 scale[2] = (f - n);
689 translate[2] = n;
690 }
691
692 static void
radv_pipeline_init_dynamic_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,const VkGraphicsPipelineCreateInfo * pCreateInfo)693 radv_pipeline_init_dynamic_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
694 const struct vk_graphics_pipeline_state *state,
695 const VkGraphicsPipelineCreateInfo *pCreateInfo)
696 {
697 uint64_t needed_states = radv_pipeline_needed_dynamic_state(device, pipeline, state);
698 struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
699 uint64_t states = needed_states;
700
701 /* Initialize non-zero values for default dynamic state. */
702 dynamic->vk.rs.line.width = 1.0f;
703 dynamic->vk.fsr.fragment_size.width = 1u;
704 dynamic->vk.fsr.fragment_size.height = 1u;
705 dynamic->vk.ds.depth.bounds_test.max = 1.0f;
706 dynamic->vk.ds.stencil.front.compare_mask = ~0;
707 dynamic->vk.ds.stencil.front.write_mask = ~0;
708 dynamic->vk.ds.stencil.back.compare_mask = ~0;
709 dynamic->vk.ds.stencil.back.write_mask = ~0;
710 dynamic->vk.ms.rasterization_samples = VK_SAMPLE_COUNT_1_BIT;
711
712 pipeline->needed_dynamic_state = needed_states;
713
714 states &= ~pipeline->dynamic_states;
715
716 /* Input assembly. */
717 if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
718 dynamic->vk.ia.primitive_topology = radv_translate_prim(state->ia->primitive_topology);
719 }
720
721 if (states & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {
722 dynamic->vk.ia.primitive_restart_enable = state->ia->primitive_restart_enable;
723 }
724
725 /* Tessellation. */
726 if (states & RADV_DYNAMIC_PATCH_CONTROL_POINTS) {
727 dynamic->vk.ts.patch_control_points = state->ts->patch_control_points;
728 }
729
730 if (states & RADV_DYNAMIC_TESS_DOMAIN_ORIGIN) {
731 dynamic->vk.ts.domain_origin = state->ts->domain_origin;
732 }
733
734 /* Viewport. */
735 if (needed_states & RADV_DYNAMIC_VIEWPORT) {
736 dynamic->vk.vp.viewport_count = state->vp->viewport_count;
737 if (states & RADV_DYNAMIC_VIEWPORT) {
738 typed_memcpy(dynamic->vk.vp.viewports, state->vp->viewports, state->vp->viewport_count);
739 for (unsigned i = 0; i < dynamic->vk.vp.viewport_count; i++)
740 radv_get_viewport_xform(&dynamic->vk.vp.viewports[i], dynamic->hw_vp.xform[i].scale,
741 dynamic->hw_vp.xform[i].translate);
742 }
743 }
744
745 if (needed_states & RADV_DYNAMIC_SCISSOR) {
746 dynamic->vk.vp.scissor_count = state->vp->scissor_count;
747 if (states & RADV_DYNAMIC_SCISSOR) {
748 typed_memcpy(dynamic->vk.vp.scissors, state->vp->scissors, state->vp->scissor_count);
749 }
750 }
751
752 if (states & RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) {
753 dynamic->vk.vp.depth_clip_negative_one_to_one = state->vp->depth_clip_negative_one_to_one;
754 }
755
756 if (states & RADV_DYNAMIC_DEPTH_CLAMP_RANGE) {
757 dynamic->vk.vp.depth_clamp_mode = state->vp->depth_clamp_mode;
758 dynamic->vk.vp.depth_clamp_range = state->vp->depth_clamp_range;
759 }
760
761 /* Discard rectangles. */
762 if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
763 dynamic->vk.dr.rectangle_count = state->dr->rectangle_count;
764 if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
765 typed_memcpy(dynamic->vk.dr.rectangles, state->dr->rectangles, state->dr->rectangle_count);
766 }
767 }
768
769 /* Rasterization. */
770 if (states & RADV_DYNAMIC_LINE_WIDTH) {
771 dynamic->vk.rs.line.width = state->rs->line.width;
772 }
773
774 if (states & RADV_DYNAMIC_DEPTH_BIAS) {
775 dynamic->vk.rs.depth_bias.constant_factor = state->rs->depth_bias.constant_factor;
776 dynamic->vk.rs.depth_bias.clamp = state->rs->depth_bias.clamp;
777 dynamic->vk.rs.depth_bias.slope_factor = state->rs->depth_bias.slope_factor;
778 dynamic->vk.rs.depth_bias.representation = state->rs->depth_bias.representation;
779 }
780
781 if (states & RADV_DYNAMIC_CULL_MODE) {
782 dynamic->vk.rs.cull_mode = state->rs->cull_mode;
783 }
784
785 if (states & RADV_DYNAMIC_FRONT_FACE) {
786 dynamic->vk.rs.front_face = state->rs->front_face;
787 }
788
789 if (states & RADV_DYNAMIC_LINE_STIPPLE) {
790 dynamic->vk.rs.line.stipple.factor = state->rs->line.stipple.factor;
791 dynamic->vk.rs.line.stipple.pattern = state->rs->line.stipple.pattern;
792 }
793
794 if (states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) {
795 dynamic->vk.rs.depth_bias.enable = state->rs->depth_bias.enable;
796 }
797
798 if (states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
799 dynamic->vk.rs.rasterizer_discard_enable = state->rs->rasterizer_discard_enable;
800 }
801
802 if (states & RADV_DYNAMIC_POLYGON_MODE) {
803 dynamic->vk.rs.polygon_mode = radv_translate_fill(state->rs->polygon_mode);
804 }
805
806 if (states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) {
807 dynamic->vk.rs.line.stipple.enable = state->rs->line.stipple.enable;
808 }
809
810 if (states & RADV_DYNAMIC_DEPTH_CLIP_ENABLE) {
811 dynamic->vk.rs.depth_clip_enable = state->rs->depth_clip_enable;
812 }
813
814 if (states & RADV_DYNAMIC_CONSERVATIVE_RAST_MODE) {
815 dynamic->vk.rs.conservative_mode = state->rs->conservative_mode;
816 }
817
818 if (states & RADV_DYNAMIC_PROVOKING_VERTEX_MODE) {
819 dynamic->vk.rs.provoking_vertex = state->rs->provoking_vertex;
820 }
821
822 if (states & RADV_DYNAMIC_DEPTH_CLAMP_ENABLE) {
823 dynamic->vk.rs.depth_clamp_enable = state->rs->depth_clamp_enable;
824 }
825
826 if (states & RADV_DYNAMIC_LINE_RASTERIZATION_MODE) {
827 dynamic->vk.rs.line.mode = state->rs->line.mode;
828 }
829
830 /* Fragment shading rate. */
831 if (states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
832 dynamic->vk.fsr = *state->fsr;
833 }
834
835 /* Multisample. */
836 if (states & RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE) {
837 dynamic->vk.ms.alpha_to_coverage_enable = state->ms->alpha_to_coverage_enable;
838 }
839
840 if (states & RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE) {
841 dynamic->vk.ms.alpha_to_one_enable = state->ms->alpha_to_one_enable;
842 }
843
844 if (states & RADV_DYNAMIC_SAMPLE_MASK) {
845 dynamic->vk.ms.sample_mask = state->ms->sample_mask & 0xffff;
846 }
847
848 if (states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) {
849 dynamic->vk.ms.rasterization_samples = state->ms->rasterization_samples;
850 }
851
852 if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) {
853 dynamic->vk.ms.sample_locations_enable = state->ms->sample_locations_enable;
854 }
855
856 if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
857 unsigned count = state->ms->sample_locations->per_pixel * state->ms->sample_locations->grid_size.width *
858 state->ms->sample_locations->grid_size.height;
859
860 dynamic->sample_location.per_pixel = state->ms->sample_locations->per_pixel;
861 dynamic->sample_location.grid_size = state->ms->sample_locations->grid_size;
862 dynamic->sample_location.count = count;
863 typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations, count);
864 }
865
866 /* Depth stencil. */
867 /* If there is no depthstencil attachment, then don't read
868 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
869 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
870 * no need to override the depthstencil defaults in
871 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
872 *
873 * Section 9.2 of the Vulkan 1.0.15 spec says:
874 *
875 * pDepthStencilState is [...] NULL if the pipeline has rasterization
876 * disabled or if the subpass of the render pass the pipeline is created
877 * against does not use a depth/stencil attachment.
878 */
879 if (needed_states && radv_pipeline_has_ds_attachments(state->rp)) {
880 if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
881 dynamic->vk.ds.depth.bounds_test.min = state->ds->depth.bounds_test.min;
882 dynamic->vk.ds.depth.bounds_test.max = state->ds->depth.bounds_test.max;
883 }
884
885 if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
886 dynamic->vk.ds.stencil.front.compare_mask = state->ds->stencil.front.compare_mask;
887 dynamic->vk.ds.stencil.back.compare_mask = state->ds->stencil.back.compare_mask;
888 }
889
890 if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
891 dynamic->vk.ds.stencil.front.write_mask = state->ds->stencil.front.write_mask;
892 dynamic->vk.ds.stencil.back.write_mask = state->ds->stencil.back.write_mask;
893 }
894
895 if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
896 dynamic->vk.ds.stencil.front.reference = state->ds->stencil.front.reference;
897 dynamic->vk.ds.stencil.back.reference = state->ds->stencil.back.reference;
898 }
899
900 if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
901 dynamic->vk.ds.depth.test_enable = state->ds->depth.test_enable;
902 }
903
904 if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
905 dynamic->vk.ds.depth.write_enable = state->ds->depth.write_enable;
906 }
907
908 if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
909 dynamic->vk.ds.depth.compare_op = state->ds->depth.compare_op;
910 }
911
912 if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
913 dynamic->vk.ds.depth.bounds_test.enable = state->ds->depth.bounds_test.enable;
914 }
915
916 if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
917 dynamic->vk.ds.stencil.test_enable = state->ds->stencil.test_enable;
918 }
919
920 if (states & RADV_DYNAMIC_STENCIL_OP) {
921 dynamic->vk.ds.stencil.front.op.compare = state->ds->stencil.front.op.compare;
922 dynamic->vk.ds.stencil.front.op.fail = state->ds->stencil.front.op.fail;
923 dynamic->vk.ds.stencil.front.op.pass = state->ds->stencil.front.op.pass;
924 dynamic->vk.ds.stencil.front.op.depth_fail = state->ds->stencil.front.op.depth_fail;
925
926 dynamic->vk.ds.stencil.back.op.compare = state->ds->stencil.back.op.compare;
927 dynamic->vk.ds.stencil.back.op.fail = state->ds->stencil.back.op.fail;
928 dynamic->vk.ds.stencil.back.op.pass = state->ds->stencil.back.op.pass;
929 dynamic->vk.ds.stencil.back.op.depth_fail = state->ds->stencil.back.op.depth_fail;
930 }
931 }
932
933 /* Color blend. */
934 /* Section 9.2 of the Vulkan 1.0.15 spec says:
935 *
936 * pColorBlendState is [...] NULL if the pipeline has rasterization
937 * disabled or if the subpass of the render pass the pipeline is
938 * created against does not use any color attachments.
939 */
940 if (states & RADV_DYNAMIC_BLEND_CONSTANTS) {
941 typed_memcpy(dynamic->vk.cb.blend_constants, state->cb->blend_constants, 4);
942 }
943
944 if (radv_pipeline_has_color_attachments(state->rp)) {
945 if (states & RADV_DYNAMIC_LOGIC_OP) {
946 if ((pipeline->dynamic_states & RADV_DYNAMIC_LOGIC_OP_ENABLE) || state->cb->logic_op_enable) {
947 dynamic->vk.cb.logic_op = radv_translate_blend_logic_op(state->cb->logic_op);
948 }
949 }
950
951 if (states & RADV_DYNAMIC_COLOR_WRITE_ENABLE) {
952 dynamic->vk.cb.color_write_enables = state->cb->color_write_enables;
953 }
954
955 if (states & RADV_DYNAMIC_LOGIC_OP_ENABLE) {
956 dynamic->vk.cb.logic_op_enable = state->cb->logic_op_enable;
957 }
958
959 if (states & RADV_DYNAMIC_COLOR_WRITE_MASK) {
960 for (unsigned i = 0; i < state->cb->attachment_count; i++) {
961 dynamic->vk.cb.attachments[i].write_mask = state->cb->attachments[i].write_mask;
962 }
963 }
964
965 if (states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) {
966 for (unsigned i = 0; i < state->cb->attachment_count; i++) {
967 dynamic->vk.cb.attachments[i].blend_enable = state->cb->attachments[i].blend_enable;
968 }
969 }
970
971 if (states & RADV_DYNAMIC_COLOR_BLEND_EQUATION) {
972 for (unsigned i = 0; i < state->cb->attachment_count; i++) {
973 const struct vk_color_blend_attachment_state *att = &state->cb->attachments[i];
974
975 dynamic->vk.cb.attachments[i].src_color_blend_factor = att->src_color_blend_factor;
976 dynamic->vk.cb.attachments[i].dst_color_blend_factor = att->dst_color_blend_factor;
977 dynamic->vk.cb.attachments[i].color_blend_op = att->color_blend_op;
978 dynamic->vk.cb.attachments[i].src_alpha_blend_factor = att->src_alpha_blend_factor;
979 dynamic->vk.cb.attachments[i].dst_alpha_blend_factor = att->dst_alpha_blend_factor;
980 dynamic->vk.cb.attachments[i].alpha_blend_op = att->alpha_blend_op;
981 }
982 }
983 }
984
985 if (states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) {
986 dynamic->vk.dr.enable = state->dr->rectangle_count > 0;
987 }
988
989 if (states & RADV_DYNAMIC_DISCARD_RECTANGLE_MODE) {
990 dynamic->vk.dr.mode = state->dr->mode;
991 }
992
993 if (states & RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE) {
994 bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pipeline, state);
995
996 dynamic->feedback_loop_aspects =
997 uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_NONE;
998 }
999
1000 for (uint32_t i = 0; i < MAX_RTS; i++) {
1001 dynamic->vk.cal.color_map[i] = state->cal ? state->cal->color_map[i] : i;
1002 dynamic->vk.ial.color_map[i] = state->ial ? state->ial->color_map[i] : i;
1003 }
1004
1005 dynamic->vk.ial.depth_att = state->ial ? state->ial->depth_att : MESA_VK_ATTACHMENT_UNUSED;
1006 dynamic->vk.ial.stencil_att = state->ial ? state->ial->stencil_att : MESA_VK_ATTACHMENT_UNUSED;
1007
1008 pipeline->dynamic_state.mask = states;
1009 }
1010
1011 struct radv_shader *
radv_get_shader(struct radv_shader * const * shaders,gl_shader_stage stage)1012 radv_get_shader(struct radv_shader *const *shaders, gl_shader_stage stage)
1013 {
1014 if (stage == MESA_SHADER_VERTEX) {
1015 if (shaders[MESA_SHADER_VERTEX])
1016 return shaders[MESA_SHADER_VERTEX];
1017 if (shaders[MESA_SHADER_TESS_CTRL])
1018 return shaders[MESA_SHADER_TESS_CTRL];
1019 if (shaders[MESA_SHADER_GEOMETRY])
1020 return shaders[MESA_SHADER_GEOMETRY];
1021 } else if (stage == MESA_SHADER_TESS_EVAL) {
1022 if (!shaders[MESA_SHADER_TESS_CTRL])
1023 return NULL;
1024 if (shaders[MESA_SHADER_TESS_EVAL])
1025 return shaders[MESA_SHADER_TESS_EVAL];
1026 if (shaders[MESA_SHADER_GEOMETRY])
1027 return shaders[MESA_SHADER_GEOMETRY];
1028 }
1029 return shaders[stage];
1030 }
1031
1032 static bool
radv_should_export_multiview(const struct radv_shader_stage * stage,const struct radv_graphics_state_key * gfx_state)1033 radv_should_export_multiview(const struct radv_shader_stage *stage, const struct radv_graphics_state_key *gfx_state)
1034 {
1035 /* Export the layer in the last VGT stage if multiview is used.
1036 * Also checks for NONE stage, which happens when we have depth-only rendering.
1037 * When the next stage is unknown (with GPL or ESO), the layer is exported unconditionally.
1038 */
1039 return gfx_state->has_multiview_view_index && radv_is_last_vgt_stage(stage) &&
1040 !(stage->nir->info.outputs_written & VARYING_BIT_LAYER);
1041 }
1042
1043 static void
radv_remove_point_size(const struct radv_graphics_state_key * gfx_state,nir_shader * producer,nir_shader * consumer)1044 radv_remove_point_size(const struct radv_graphics_state_key *gfx_state, nir_shader *producer, nir_shader *consumer)
1045 {
1046 if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) || !(producer->info.outputs_written & VARYING_BIT_PSIZ))
1047 return;
1048
1049 /* Do not remove PSIZ if the shader uses XFB because it might be stored. */
1050 if (producer->xfb_info)
1051 return;
1052
1053 /* Do not remove PSIZ if the rasterization primitive uses points. */
1054 if (consumer->info.stage == MESA_SHADER_FRAGMENT &&
1055 ((producer->info.stage == MESA_SHADER_TESS_EVAL && producer->info.tess.point_mode) ||
1056 (producer->info.stage == MESA_SHADER_GEOMETRY && producer->info.gs.output_primitive == MESA_PRIM_POINTS) ||
1057 (producer->info.stage == MESA_SHADER_MESH && producer->info.mesh.primitive_type == MESA_PRIM_POINTS)))
1058 return;
1059
1060 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1061 assert(var);
1062
1063 /* Change PSIZ to a global variable which allows it to be DCE'd. */
1064 var->data.location = 0;
1065 var->data.mode = nir_var_shader_temp;
1066
1067 producer->info.outputs_written &= ~VARYING_BIT_PSIZ;
1068 NIR_PASS_V(producer, nir_fixup_deref_modes);
1069 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1070 NIR_PASS(_, producer, nir_opt_dce);
1071 }
1072
1073 static void
radv_remove_color_exports(const struct radv_graphics_state_key * gfx_state,nir_shader * nir)1074 radv_remove_color_exports(const struct radv_graphics_state_key *gfx_state, nir_shader *nir)
1075 {
1076 uint8_t color_remap[MAX_RTS];
1077 bool fixup_derefs = false;
1078
1079 /* Do not remove color exports when a PS epilog is used because the format isn't known and the color write mask can
1080 * be dynamic. */
1081 if (gfx_state->ps.has_epilog)
1082 return;
1083
1084 /* Shader output locations to color attachment mappings. */
1085 memset(color_remap, MESA_VK_ATTACHMENT_UNUSED, sizeof(color_remap));
1086 for (uint32_t i = 0; i < MAX_RTS; i++) {
1087 if (gfx_state->ps.epilog.color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
1088 color_remap[gfx_state->ps.epilog.color_map[i]] = i;
1089 }
1090
1091 nir_foreach_shader_out_variable (var, nir) {
1092 int idx = var->data.location;
1093 idx -= FRAG_RESULT_DATA0;
1094
1095 if (idx < 0)
1096 continue;
1097
1098 const uint8_t cb_idx = color_remap[idx];
1099 unsigned col_format = (gfx_state->ps.epilog.spi_shader_col_format >> (4 * cb_idx)) & 0xf;
1100
1101 if (col_format == V_028714_SPI_SHADER_ZERO) {
1102 /* Remove the color export if it's unused or in presence of holes. */
1103 nir->info.outputs_written &= ~BITFIELD64_BIT(var->data.location);
1104 var->data.location = 0;
1105 var->data.mode = nir_var_shader_temp;
1106 fixup_derefs = true;
1107 }
1108 }
1109
1110 if (fixup_derefs) {
1111 NIR_PASS_V(nir, nir_fixup_deref_modes);
1112 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1113 NIR_PASS(_, nir, nir_opt_dce);
1114 }
1115 }
1116
1117 static void
merge_tess_info(struct shader_info * tes_info,struct shader_info * tcs_info)1118 merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
1119 {
1120 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
1121 *
1122 * "PointMode. Controls generation of points rather than triangles
1123 * or lines. This functionality defaults to disabled, and is
1124 * enabled if either shader stage includes the execution mode.
1125 *
1126 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
1127 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
1128 * and OutputVertices, it says:
1129 *
1130 * "One mode must be set in at least one of the tessellation
1131 * shader stages."
1132 *
1133 * So, the fields can be set in either the TCS or TES, but they must
1134 * agree if set in both. Our backend looks at TES, so bitwise-or in
1135 * the values from the TCS.
1136 */
1137 assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
1138 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
1139 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
1140
1141 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1142 tcs_info->tess.spacing == tes_info->tess.spacing);
1143 tes_info->tess.spacing |= tcs_info->tess.spacing;
1144
1145 assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
1146 tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
1147 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
1148 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
1149 tes_info->tess.ccw |= tcs_info->tess.ccw;
1150 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
1151
1152 /* Copy the merged info back to the TCS */
1153 tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
1154 tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode;
1155 }
1156
1157 static void
radv_link_shaders(const struct radv_device * device,struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage,const struct radv_graphics_state_key * gfx_state)1158 radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage,
1159 struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state)
1160 {
1161 const struct radv_physical_device *pdev = radv_device_physical(device);
1162 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
1163 nir_shader *producer = producer_stage->nir;
1164 nir_shader *consumer = consumer_stage->nir;
1165
1166 if (consumer->info.stage == MESA_SHADER_FRAGMENT) {
1167 /* Lower the viewport index to zero when the last vertex stage doesn't export it. */
1168 if ((consumer->info.inputs_read & VARYING_BIT_VIEWPORT) &&
1169 !(producer->info.outputs_written & VARYING_BIT_VIEWPORT)) {
1170 NIR_PASS(_, consumer, radv_nir_lower_viewport_to_zero);
1171 }
1172 }
1173
1174 if (producer_stage->key.optimisations_disabled || consumer_stage->key.optimisations_disabled)
1175 return;
1176
1177 if (consumer->info.stage == MESA_SHADER_FRAGMENT && producer->info.has_transform_feedback_varyings) {
1178 nir_link_xfb_varyings(producer, consumer);
1179 }
1180
1181 unsigned array_deref_of_vec_options =
1182 nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
1183 nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store;
1184
1185 NIR_PASS(_, producer, nir_lower_array_deref_of_vec, nir_var_shader_out, NULL, array_deref_of_vec_options);
1186 NIR_PASS(_, consumer, nir_lower_array_deref_of_vec, nir_var_shader_in, NULL, array_deref_of_vec_options);
1187
1188 nir_lower_io_arrays_to_elements(producer, consumer);
1189 nir_validate_shader(producer, "after nir_lower_io_arrays_to_elements");
1190 nir_validate_shader(consumer, "after nir_lower_io_arrays_to_elements");
1191
1192 radv_nir_lower_io_to_scalar_early(producer, nir_var_shader_out);
1193 radv_nir_lower_io_to_scalar_early(consumer, nir_var_shader_in);
1194
1195 /* Remove PSIZ from shaders when it's not needed.
1196 * This is typically produced by translation layers like Zink or D9VK.
1197 */
1198 if (gfx_state->enable_remove_point_size)
1199 radv_remove_point_size(gfx_state, producer, consumer);
1200
1201 if (nir_link_opt_varyings(producer, consumer)) {
1202 nir_validate_shader(producer, "after nir_link_opt_varyings");
1203 nir_validate_shader(consumer, "after nir_link_opt_varyings");
1204
1205 NIR_PASS(_, consumer, nir_opt_constant_folding);
1206 NIR_PASS(_, consumer, nir_opt_algebraic);
1207 NIR_PASS(_, consumer, nir_opt_dce);
1208 }
1209
1210 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1211 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1212
1213 nir_remove_unused_varyings(producer, consumer);
1214
1215 nir_compact_varyings(producer, consumer, true);
1216
1217 nir_validate_shader(producer, "after nir_compact_varyings");
1218 nir_validate_shader(consumer, "after nir_compact_varyings");
1219
1220 if (producer->info.stage == MESA_SHADER_MESH) {
1221 /* nir_compact_varyings can change the location of per-vertex and per-primitive outputs */
1222 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
1223 }
1224
1225 const bool has_geom_or_tess =
1226 consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL;
1227 const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9;
1228
1229 if (producer->info.stage == MESA_SHADER_TESS_CTRL || producer->info.stage == MESA_SHADER_MESH ||
1230 (producer->info.stage == MESA_SHADER_VERTEX && has_geom_or_tess) ||
1231 (producer->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
1232 NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out);
1233
1234 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
1235 NIR_PASS(_, producer, nir_vectorize_tess_levels);
1236
1237 NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out);
1238 }
1239
1240 if (consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL ||
1241 consumer->info.stage == MESA_SHADER_TESS_EVAL) {
1242 NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
1243 }
1244 }
1245
1246 static const gl_shader_stage graphics_shader_order[] = {
1247 MESA_SHADER_VERTEX, MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_EVAL, MESA_SHADER_GEOMETRY,
1248
1249 MESA_SHADER_TASK, MESA_SHADER_MESH,
1250
1251 MESA_SHADER_FRAGMENT,
1252 };
1253
1254 static void
radv_link_vs(const struct radv_device * device,struct radv_shader_stage * vs_stage,struct radv_shader_stage * next_stage,const struct radv_graphics_state_key * gfx_state)1255 radv_link_vs(const struct radv_device *device, struct radv_shader_stage *vs_stage, struct radv_shader_stage *next_stage,
1256 const struct radv_graphics_state_key *gfx_state)
1257 {
1258 assert(vs_stage->nir->info.stage == MESA_SHADER_VERTEX);
1259
1260 if (radv_should_export_multiview(vs_stage, gfx_state)) {
1261 NIR_PASS(_, vs_stage->nir, radv_nir_export_multiview);
1262 }
1263
1264 if (next_stage) {
1265 assert(next_stage->nir->info.stage == MESA_SHADER_TESS_CTRL ||
1266 next_stage->nir->info.stage == MESA_SHADER_GEOMETRY ||
1267 next_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1268
1269 radv_link_shaders(device, vs_stage, next_stage, gfx_state);
1270 }
1271 }
1272
1273 static void
radv_link_tcs(const struct radv_device * device,struct radv_shader_stage * tcs_stage,struct radv_shader_stage * tes_stage,const struct radv_graphics_state_key * gfx_state)1274 radv_link_tcs(const struct radv_device *device, struct radv_shader_stage *tcs_stage,
1275 struct radv_shader_stage *tes_stage, const struct radv_graphics_state_key *gfx_state)
1276 {
1277 if (!tes_stage)
1278 return;
1279
1280 assert(tcs_stage->nir->info.stage == MESA_SHADER_TESS_CTRL);
1281 assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
1282
1283 radv_link_shaders(device, tcs_stage, tes_stage, gfx_state);
1284
1285 /* Copy TCS info into the TES info */
1286 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
1287 }
1288
1289 static void
radv_link_tes(const struct radv_device * device,struct radv_shader_stage * tes_stage,struct radv_shader_stage * next_stage,const struct radv_graphics_state_key * gfx_state)1290 radv_link_tes(const struct radv_device *device, struct radv_shader_stage *tes_stage,
1291 struct radv_shader_stage *next_stage, const struct radv_graphics_state_key *gfx_state)
1292 {
1293 assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
1294
1295 if (radv_should_export_multiview(tes_stage, gfx_state)) {
1296 NIR_PASS(_, tes_stage->nir, radv_nir_export_multiview);
1297 }
1298
1299 if (next_stage) {
1300 assert(next_stage->nir->info.stage == MESA_SHADER_GEOMETRY ||
1301 next_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1302
1303 radv_link_shaders(device, tes_stage, next_stage, gfx_state);
1304 }
1305 }
1306
1307 static void
radv_link_gs(const struct radv_device * device,struct radv_shader_stage * gs_stage,struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1308 radv_link_gs(const struct radv_device *device, struct radv_shader_stage *gs_stage, struct radv_shader_stage *fs_stage,
1309 const struct radv_graphics_state_key *gfx_state)
1310 {
1311 assert(gs_stage->nir->info.stage == MESA_SHADER_GEOMETRY);
1312
1313 if (radv_should_export_multiview(gs_stage, gfx_state)) {
1314 NIR_PASS(_, gs_stage->nir, radv_nir_export_multiview);
1315 }
1316
1317 if (fs_stage) {
1318 assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1319
1320 radv_link_shaders(device, gs_stage, fs_stage, gfx_state);
1321 }
1322 }
1323
1324 static void
radv_link_task(const struct radv_device * device,struct radv_shader_stage * task_stage,struct radv_shader_stage * mesh_stage,const struct radv_graphics_state_key * gfx_state)1325 radv_link_task(const struct radv_device *device, struct radv_shader_stage *task_stage,
1326 struct radv_shader_stage *mesh_stage, const struct radv_graphics_state_key *gfx_state)
1327 {
1328 assert(task_stage->nir->info.stage == MESA_SHADER_TASK);
1329
1330 if (mesh_stage) {
1331 assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
1332
1333 /* Linking task and mesh shaders shouldn't do anything for now but keep it for consistency. */
1334 radv_link_shaders(device, task_stage, mesh_stage, gfx_state);
1335 }
1336 }
1337
1338 static void
radv_link_mesh(const struct radv_device * device,struct radv_shader_stage * mesh_stage,struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1339 radv_link_mesh(const struct radv_device *device, struct radv_shader_stage *mesh_stage,
1340 struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
1341 {
1342 assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
1343
1344 if (fs_stage) {
1345 assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1346
1347 nir_foreach_shader_in_variable (var, fs_stage->nir) {
1348 /* These variables are per-primitive when used with a mesh shader. */
1349 if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || var->data.location == VARYING_SLOT_VIEWPORT ||
1350 var->data.location == VARYING_SLOT_LAYER) {
1351 var->data.per_primitive = true;
1352 }
1353 }
1354
1355 radv_link_shaders(device, mesh_stage, fs_stage, gfx_state);
1356 }
1357
1358 /* Lower mesh shader draw ID to zero prevent app bugs from triggering undefined behaviour. */
1359 if (mesh_stage->info.ms.has_task && BITSET_TEST(mesh_stage->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
1360 radv_nir_lower_draw_id_to_zero(mesh_stage->nir);
1361 }
1362
1363 static void
radv_link_fs(struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1364 radv_link_fs(struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
1365 {
1366 assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1367
1368 /* Lower the view index to map on the layer. */
1369 NIR_PASS(_, fs_stage->nir, radv_nir_lower_view_index);
1370
1371 radv_remove_color_exports(gfx_state, fs_stage->nir);
1372 }
1373
1374 static bool
radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline * pipeline,const struct radv_graphics_state_key * gfx_state)1375 radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, const struct radv_graphics_state_key *gfx_state)
1376 {
1377 if (pipeline->base.type == RADV_PIPELINE_GRAPHICS &&
1378 !(radv_pipeline_to_graphics(&pipeline->base)->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
1379 return true;
1380
1381 if (pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB &&
1382 (gfx_state->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
1383 !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
1384 return true;
1385
1386 return false;
1387 }
1388
1389 static void
radv_remove_varyings(nir_shader * nir)1390 radv_remove_varyings(nir_shader *nir)
1391 {
1392 /* We can't demote mesh outputs to nir_var_shader_temp yet, because
1393 * they don't support array derefs of vectors.
1394 */
1395 if (nir->info.stage == MESA_SHADER_MESH)
1396 return;
1397
1398 bool fixup_derefs = false;
1399
1400 nir_foreach_shader_out_variable (var, nir) {
1401 if (var->data.always_active_io)
1402 continue;
1403
1404 if (var->data.location < VARYING_SLOT_VAR0)
1405 continue;
1406
1407 nir->info.outputs_written &= ~BITFIELD64_BIT(var->data.location);
1408 var->data.location = 0;
1409 var->data.mode = nir_var_shader_temp;
1410 fixup_derefs = true;
1411 }
1412
1413 if (fixup_derefs) {
1414 NIR_PASS_V(nir, nir_fixup_deref_modes);
1415 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1416 NIR_PASS(_, nir, nir_opt_dce);
1417 }
1418 }
1419
1420 static void
radv_graphics_shaders_link(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stages)1421 radv_graphics_shaders_link(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
1422 struct radv_shader_stage *stages)
1423 {
1424 /* Walk backwards to link */
1425 struct radv_shader_stage *next_stage = NULL;
1426 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1427 gl_shader_stage s = graphics_shader_order[i];
1428 if (!stages[s].nir)
1429 continue;
1430
1431 switch (s) {
1432 case MESA_SHADER_VERTEX:
1433 radv_link_vs(device, &stages[s], next_stage, gfx_state);
1434 break;
1435 case MESA_SHADER_TESS_CTRL:
1436 radv_link_tcs(device, &stages[s], next_stage, gfx_state);
1437 break;
1438 case MESA_SHADER_TESS_EVAL:
1439 radv_link_tes(device, &stages[s], next_stage, gfx_state);
1440 break;
1441 case MESA_SHADER_GEOMETRY:
1442 radv_link_gs(device, &stages[s], next_stage, gfx_state);
1443 break;
1444 case MESA_SHADER_TASK:
1445 radv_link_task(device, &stages[s], next_stage, gfx_state);
1446 break;
1447 case MESA_SHADER_MESH:
1448 radv_link_mesh(device, &stages[s], next_stage, gfx_state);
1449 break;
1450 case MESA_SHADER_FRAGMENT:
1451 radv_link_fs(&stages[s], gfx_state);
1452 break;
1453 default:
1454 unreachable("Invalid graphics shader stage");
1455 }
1456
1457 next_stage = &stages[s];
1458 }
1459 }
1460
1461 /**
1462 * Fist pass of varying optimization.
1463 * This function is called for each shader pair from first to last.
1464 *
1465 * 1. Run some NIR passes in preparation.
1466 * 2. Optimize varyings.
1467 * 3. If either shader changed, run algebraic optimizations.
1468 */
1469 static void
radv_graphics_shaders_link_varyings_first(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1470 radv_graphics_shaders_link_varyings_first(struct radv_shader_stage *producer_stage,
1471 struct radv_shader_stage *consumer_stage)
1472 {
1473 nir_shader *producer = producer_stage->nir;
1474 nir_shader *consumer = consumer_stage->nir;
1475
1476 /* It is expected by nir_opt_varyings that no undefined stores are present in the shader. */
1477 NIR_PASS(_, producer, nir_opt_undef);
1478
1479 /* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */
1480 NIR_PASS(_, consumer, nir_opt_load_store_update_alignments);
1481
1482 /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */
1483 NIR_PASS(_, producer, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
1484 NIR_PASS(_, consumer, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL);
1485
1486 /* Eliminate useless vec->mov copies resulting from scalarization. */
1487 NIR_PASS(_, producer, nir_copy_prop);
1488
1489 const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
1490
1491 /* Run algebraic optimizations on shaders that changed. */
1492 if (p & nir_progress_producer) {
1493 radv_optimize_nir_algebraic(producer, false, false);
1494 }
1495 if (p & nir_progress_consumer) {
1496 radv_optimize_nir_algebraic(consumer, false, false);
1497 }
1498 }
1499
1500 /**
1501 * Second pass of varying optimization.
1502 * This function is called for each shader pair from last to fist,
1503 * after the first pass had already been called for each pair.
1504 * Done because the previous pass might have enabled additional
1505 * opportunities for optimization.
1506 *
1507 * 1. Optimize varyings again.
1508 * 2. If either shader changed, run algebraic optimizations.
1509 * 3. Run some NIR passes to clean up the shaders.
1510 */
1511 static void
radv_graphics_shaders_link_varyings_second(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1512 radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_stage,
1513 struct radv_shader_stage *consumer_stage)
1514 {
1515 nir_shader *producer = producer_stage->nir;
1516 nir_shader *consumer = consumer_stage->nir;
1517
1518 const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
1519
1520 /* Run algebraic optimizations on shaders that changed. */
1521 if (p & nir_progress_producer) {
1522 radv_optimize_nir_algebraic(producer, true, false);
1523 }
1524 if (p & nir_progress_consumer) {
1525 radv_optimize_nir_algebraic(consumer, true, false);
1526 }
1527
1528 /* Re-vectorize I/O for stages that output to memory (LDS or VRAM).
1529 * Don't vectorize FS inputs, doing so just regresses shader stats without any benefit.
1530 * There is also no benefit from re-vectorizing the outputs of the last pre-rasterization
1531 * stage here, because ac_nir_lower_ngg/legacy already takes care of that.
1532 */
1533 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1534 NIR_PASS(_, producer, nir_opt_vectorize_io, nir_var_shader_out);
1535 NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in);
1536 }
1537
1538 /* Gather shader info; at least the I/O info likely changed
1539 * and changes to only the I/O info are not reflected in nir_opt_varyings_progress.
1540 */
1541 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
1542 nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer));
1543
1544 /* Recompute intrinsic bases of PS inputs in order to remove gaps. */
1545 if (consumer->info.stage == MESA_SHADER_FRAGMENT)
1546 radv_recompute_fs_input_bases(consumer);
1547
1548 /* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */
1549 if (producer->xfb_info) {
1550 nir_gather_xfb_info_from_intrinsics(producer);
1551 }
1552 }
1553
1554 static void
radv_graphics_shaders_fill_linked_vs_io_info(struct radv_shader_stage * vs_stage,struct radv_shader_stage * consumer_stage)1555 radv_graphics_shaders_fill_linked_vs_io_info(struct radv_shader_stage *vs_stage,
1556 struct radv_shader_stage *consumer_stage)
1557 {
1558 const unsigned num_reserved_slots = util_bitcount64(consumer_stage->nir->info.inputs_read);
1559 vs_stage->info.vs.num_linked_outputs = num_reserved_slots;
1560 vs_stage->info.outputs_linked = true;
1561
1562 switch (consumer_stage->stage) {
1563 case MESA_SHADER_TESS_CTRL: {
1564 consumer_stage->info.tcs.num_linked_inputs = num_reserved_slots;
1565 consumer_stage->info.inputs_linked = true;
1566 break;
1567 }
1568 case MESA_SHADER_GEOMETRY: {
1569 consumer_stage->info.gs.num_linked_inputs = num_reserved_slots;
1570 consumer_stage->info.inputs_linked = true;
1571 break;
1572 }
1573 default:
1574 unreachable("invalid next stage for VS");
1575 }
1576 }
1577
1578 static void
radv_graphics_shaders_fill_linked_tcs_tes_io_info(struct radv_shader_stage * tcs_stage,struct radv_shader_stage * tes_stage)1579 radv_graphics_shaders_fill_linked_tcs_tes_io_info(struct radv_shader_stage *tcs_stage,
1580 struct radv_shader_stage *tes_stage)
1581 {
1582 assume(tes_stage->stage == MESA_SHADER_TESS_EVAL);
1583
1584 /* Count the number of per-vertex output slots we need to reserve for the TCS and TES. */
1585 const uint64_t per_vertex_mask =
1586 tes_stage->nir->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER);
1587 const unsigned num_reserved_slots = util_bitcount64(per_vertex_mask);
1588
1589 /* Count the number of per-patch output slots we need to reserve for the TCS and TES.
1590 * This is necessary because we need it to determine the patch size in VRAM.
1591 */
1592 const uint64_t tess_lvl_mask =
1593 tes_stage->nir->info.inputs_read & (VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER);
1594 const unsigned num_reserved_patch_slots =
1595 util_bitcount64(tess_lvl_mask) + util_bitcount64(tes_stage->nir->info.patch_inputs_read);
1596
1597 tcs_stage->info.tcs.num_linked_outputs = num_reserved_slots;
1598 tcs_stage->info.tcs.num_linked_patch_outputs = num_reserved_patch_slots;
1599 tcs_stage->info.outputs_linked = true;
1600
1601 tes_stage->info.tes.num_linked_inputs = num_reserved_slots;
1602 tes_stage->info.tes.num_linked_patch_inputs = num_reserved_patch_slots;
1603 tes_stage->info.inputs_linked = true;
1604 }
1605
1606 static void
radv_graphics_shaders_fill_linked_tes_gs_io_info(struct radv_shader_stage * tes_stage,struct radv_shader_stage * gs_stage)1607 radv_graphics_shaders_fill_linked_tes_gs_io_info(struct radv_shader_stage *tes_stage,
1608 struct radv_shader_stage *gs_stage)
1609 {
1610 assume(gs_stage->stage == MESA_SHADER_GEOMETRY);
1611
1612 const unsigned num_reserved_slots = util_bitcount64(gs_stage->nir->info.inputs_read);
1613 tes_stage->info.tes.num_linked_outputs = num_reserved_slots;
1614 tes_stage->info.outputs_linked = true;
1615 gs_stage->info.gs.num_linked_inputs = num_reserved_slots;
1616 gs_stage->info.inputs_linked = true;
1617 }
1618
1619 static void
radv_graphics_shaders_fill_linked_io_info(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1620 radv_graphics_shaders_fill_linked_io_info(struct radv_shader_stage *producer_stage,
1621 struct radv_shader_stage *consumer_stage)
1622 {
1623 /* We don't need to fill this info for the last pre-rasterization stage. */
1624 if (consumer_stage->stage == MESA_SHADER_FRAGMENT)
1625 return;
1626
1627 switch (producer_stage->stage) {
1628 case MESA_SHADER_VERTEX:
1629 radv_graphics_shaders_fill_linked_vs_io_info(producer_stage, consumer_stage);
1630 break;
1631
1632 case MESA_SHADER_TESS_CTRL:
1633 radv_graphics_shaders_fill_linked_tcs_tes_io_info(producer_stage, consumer_stage);
1634 break;
1635
1636 case MESA_SHADER_TESS_EVAL:
1637 radv_graphics_shaders_fill_linked_tes_gs_io_info(producer_stage, consumer_stage);
1638 break;
1639
1640 default:
1641 break;
1642 }
1643 }
1644
1645 /**
1646 * Varying optimizations performed on lowered shader I/O.
1647 *
1648 * We do this after lowering shader I/O because this is more effective
1649 * than running the same optimizations on I/O derefs.
1650 */
1651 static void
radv_graphics_shaders_link_varyings(struct radv_shader_stage * stages)1652 radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages)
1653 {
1654 /* Optimize varyings from first to last stage. */
1655 gl_shader_stage prev = MESA_SHADER_NONE;
1656 for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) {
1657 gl_shader_stage s = graphics_shader_order[i];
1658 if (!stages[s].nir)
1659 continue;
1660
1661 if (prev != MESA_SHADER_NONE) {
1662 if (!stages[prev].key.optimisations_disabled && !stages[s].key.optimisations_disabled)
1663 radv_graphics_shaders_link_varyings_first(&stages[prev], &stages[s]);
1664 }
1665
1666 prev = s;
1667 }
1668
1669 /* Optimize varyings from last to first stage. */
1670 gl_shader_stage next = MESA_SHADER_NONE;
1671 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; --i) {
1672 gl_shader_stage s = graphics_shader_order[i];
1673 if (!stages[s].nir)
1674 continue;
1675
1676 if (next != MESA_SHADER_NONE) {
1677 if (!stages[s].key.optimisations_disabled && !stages[next].key.optimisations_disabled)
1678 radv_graphics_shaders_link_varyings_second(&stages[s], &stages[next]);
1679
1680 radv_graphics_shaders_fill_linked_io_info(&stages[s], &stages[next]);
1681 }
1682
1683 next = s;
1684 }
1685 }
1686
1687 struct radv_ps_epilog_key
radv_generate_ps_epilog_key(const struct radv_device * device,const struct radv_ps_epilog_state * state)1688 radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
1689 {
1690 const struct radv_physical_device *pdev = radv_device_physical(device);
1691 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1692 unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
1693 struct radv_ps_epilog_key key;
1694
1695 memset(&key, 0, sizeof(key));
1696 memset(key.color_map, MESA_VK_ATTACHMENT_UNUSED, sizeof(key.color_map));
1697
1698 for (unsigned i = 0; i < state->color_attachment_count; ++i) {
1699 unsigned cf;
1700 unsigned cb_idx = state->color_attachment_mappings[i];
1701 VkFormat fmt = state->color_attachment_formats[i];
1702
1703 if (fmt == VK_FORMAT_UNDEFINED || !(state->color_write_mask & (0xfu << (i * 4))) ||
1704 cb_idx == MESA_VK_ATTACHMENT_UNUSED) {
1705 cf = V_028714_SPI_SHADER_ZERO;
1706 } else {
1707 bool blend_enable = state->color_blend_enable & (0xfu << (i * 4));
1708
1709 cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
1710
1711 if (format_is_int8(fmt))
1712 is_int8 |= 1 << i;
1713 if (format_is_int10(fmt))
1714 is_int10 |= 1 << i;
1715 if (format_is_float32(fmt))
1716 is_float32 |= 1 << i;
1717 }
1718
1719 col_format |= cf << (4 * i);
1720
1721 key.color_map[i] = state->color_attachment_mappings[i];
1722 }
1723
1724 if (!(col_format & 0xf) && state->need_src_alpha & (1 << 0)) {
1725 /* When a subpass doesn't have any color attachments, write the alpha channel of MRT0 when
1726 * alpha coverage is enabled because the depth attachment needs it.
1727 */
1728 col_format |= V_028714_SPI_SHADER_32_AR;
1729 key.color_map[0] = 0;
1730 }
1731
1732 /* The output for dual source blending should have the same format as the first output. */
1733 if (state->mrt0_is_dual_src) {
1734 assert(!(col_format >> 4));
1735 col_format |= (col_format & 0xf) << 4;
1736 key.color_map[1] = 1;
1737 }
1738
1739 z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask,
1740 state->alpha_to_coverage_via_mrtz);
1741
1742 key.spi_shader_col_format = col_format;
1743 key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0;
1744 key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0;
1745 key.enable_mrt_output_nan_fixup = instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0;
1746 key.colors_written = state->colors_written;
1747 key.mrt0_is_dual_src = state->mrt0_is_dual_src;
1748 key.export_depth = state->export_depth;
1749 key.export_stencil = state->export_stencil;
1750 key.export_sample_mask = state->export_sample_mask;
1751 key.alpha_to_coverage_via_mrtz = state->alpha_to_coverage_via_mrtz;
1752 key.spi_shader_z_format = z_format;
1753 key.alpha_to_one = state->alpha_to_one;
1754
1755 return key;
1756 }
1757
1758 static struct radv_ps_epilog_key
radv_pipeline_generate_ps_epilog_key(const struct radv_device * device,const struct vk_graphics_pipeline_state * state)1759 radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state)
1760 {
1761 struct radv_ps_epilog_state ps_epilog = {0};
1762
1763 if (state->ms && state->ms->alpha_to_coverage_enable)
1764 ps_epilog.need_src_alpha |= 0x1;
1765
1766 if (state->cb) {
1767 for (uint32_t i = 0; i < state->cb->attachment_count; i++) {
1768 VkBlendOp eqRGB = state->cb->attachments[i].color_blend_op;
1769 VkBlendFactor srcRGB = state->cb->attachments[i].src_color_blend_factor;
1770 VkBlendFactor dstRGB = state->cb->attachments[i].dst_color_blend_factor;
1771
1772 /* Ignore other blend targets if dual-source blending is enabled to prevent wrong
1773 * behaviour.
1774 */
1775 if (i > 0 && ps_epilog.mrt0_is_dual_src)
1776 continue;
1777
1778 ps_epilog.color_write_mask |= (unsigned)state->cb->attachments[i].write_mask << (4 * i);
1779 if (!((ps_epilog.color_write_mask >> (i * 4)) & 0xf))
1780 continue;
1781
1782 if (state->cb->attachments[i].blend_enable)
1783 ps_epilog.color_blend_enable |= 0xfu << (i * 4);
1784
1785 if (!((ps_epilog.color_blend_enable >> (i * 4)) & 0xf))
1786 continue;
1787
1788 if (i == 0 && radv_can_enable_dual_src(&state->cb->attachments[i])) {
1789 ps_epilog.mrt0_is_dual_src = true;
1790 }
1791
1792 radv_normalize_blend_factor(eqRGB, &srcRGB, &dstRGB);
1793
1794 if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
1795 srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
1796 srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
1797 ps_epilog.need_src_alpha |= 1 << i;
1798 }
1799 }
1800
1801 if (state->rp) {
1802 ps_epilog.color_attachment_count = state->rp->color_attachment_count;
1803
1804 for (uint32_t i = 0; i < ps_epilog.color_attachment_count; i++) {
1805 ps_epilog.color_attachment_formats[i] = state->rp->color_attachment_formats[i];
1806 }
1807 }
1808
1809 if (state->ms)
1810 ps_epilog.alpha_to_one = state->ms->alpha_to_one_enable;
1811
1812 for (uint32_t i = 0; i < MAX_RTS; i++) {
1813 ps_epilog.color_attachment_mappings[i] = state->cal ? state->cal->color_map[i] : i;
1814 }
1815
1816 return radv_generate_ps_epilog_key(device, &ps_epilog);
1817 }
1818
1819 static struct radv_graphics_state_key
radv_generate_graphics_state_key(const struct radv_device * device,const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)1820 radv_generate_graphics_state_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state,
1821 VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
1822 {
1823 const struct radv_physical_device *pdev = radv_device_physical(device);
1824 struct radv_graphics_state_key key;
1825
1826 memset(&key, 0, sizeof(key));
1827
1828 key.lib_flags = lib_flags;
1829 key.has_multiview_view_index = state->rp ? !!state->rp->view_mask : 0;
1830
1831 if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI)) {
1832 key.vs.has_prolog = true;
1833 }
1834
1835 /* Compile the pre-rasterization stages only when the vertex input interface is missing. */
1836 if ((state->shader_stages && VK_SHADER_STAGE_VERTEX_BIT) && !state->vi) {
1837 key.vs.has_prolog = true;
1838 }
1839
1840 /* Vertex input state */
1841 if (state->vi) {
1842 u_foreach_bit (i, state->vi->attributes_valid) {
1843 uint32_t binding = state->vi->attributes[i].binding;
1844 uint32_t offset = state->vi->attributes[i].offset;
1845 enum pipe_format format = radv_format_to_pipe_format(state->vi->attributes[i].format);
1846
1847 key.vi.vertex_attribute_formats[i] = format;
1848 key.vi.vertex_attribute_bindings[i] = binding;
1849 key.vi.vertex_attribute_offsets[i] = offset;
1850 key.vi.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
1851
1852 /* vertex_attribute_strides is only needed to workaround GFX6/7 offset>=stride checks. */
1853 if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && pdev->info.gfx_level < GFX8) {
1854 /* From the Vulkan spec 1.2.157:
1855 *
1856 * "If the bound pipeline state object was created with the
1857 * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE dynamic state enabled then pStrides[i]
1858 * specifies the distance in bytes between two consecutive elements within the
1859 * corresponding buffer. In this case the VkVertexInputBindingDescription::stride state
1860 * from the pipeline state object is ignored."
1861 *
1862 * Make sure the vertex attribute stride is zero to avoid computing a wrong offset if
1863 * it's initialized to something else than zero.
1864 */
1865 key.vi.vertex_attribute_strides[i] = state->vi->bindings[binding].stride;
1866 }
1867
1868 if (state->vi->bindings[binding].input_rate) {
1869 key.vi.instance_rate_inputs |= 1u << i;
1870 }
1871
1872 const struct ac_vtx_format_info *vtx_info =
1873 ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.family, format);
1874 unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
1875
1876 /* If offset is misaligned, then the buffer offset must be too. Just skip updating
1877 * vertex_binding_align in this case.
1878 */
1879 if (offset % attrib_align == 0) {
1880 key.vi.vertex_binding_align[binding] = MAX2(key.vi.vertex_binding_align[binding], attrib_align);
1881 }
1882 }
1883 }
1884
1885 if (state->ts)
1886 key.ts.patch_control_points = state->ts->patch_control_points;
1887
1888 const bool alpha_to_coverage_unknown =
1889 !state->ms || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE);
1890 const bool alpha_to_coverage_enabled = alpha_to_coverage_unknown || state->ms->alpha_to_coverage_enable;
1891 const bool alpha_to_one_unknown = !state->ms || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE);
1892 const bool alpha_to_one_enabled = alpha_to_one_unknown || state->ms->alpha_to_one_enable;
1893
1894 /* alpha-to-coverage is always exported via MRTZ on GFX11 but it's also using MRTZ when
1895 * alpha-to-one is enabled (alpha to MRTZ.a and one to MRT0.a).
1896 */
1897 key.ms.alpha_to_coverage_via_mrtz =
1898 alpha_to_coverage_enabled && (pdev->info.gfx_level >= GFX11 || alpha_to_one_enabled);
1899
1900 if (state->ms) {
1901 key.ms.sample_shading_enable = state->ms->sample_shading_enable;
1902 if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) &&
1903 state->ms->rasterization_samples > 1) {
1904 key.ms.rasterization_samples = state->ms->rasterization_samples;
1905 }
1906 }
1907
1908 if (state->ia) {
1909 key.ia.topology = radv_translate_prim(state->ia->primitive_topology);
1910 }
1911
1912 if (!state->vi || !(state->shader_stages & (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
1913 VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_MESH_BIT_EXT))) {
1914 key.unknown_rast_prim = true;
1915 }
1916
1917 if (pdev->info.gfx_level >= GFX10 && state->rs) {
1918 key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
1919 }
1920
1921 key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
1922
1923 if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) &&
1924 (pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH))
1925 key.adjust_frag_coord_z = true;
1926
1927 if (radv_pipeline_needs_ps_epilog(state, lib_flags))
1928 key.ps.has_epilog = true;
1929
1930 key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
1931
1932 /* Alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also exported.
1933 * Though, when a PS epilog is needed and the MS state is NULL (with dynamic rendering), it's not
1934 * possible to know the info at compile time and MRTZ needs to be exported in the epilog.
1935 */
1936 if (key.ps.has_epilog) {
1937 if (pdev->info.gfx_level >= GFX11) {
1938 key.ps.exports_mrtz_via_epilog = alpha_to_coverage_unknown;
1939 } else {
1940 key.ps.exports_mrtz_via_epilog =
1941 (alpha_to_coverage_unknown && alpha_to_one_enabled) || (alpha_to_one_unknown && alpha_to_coverage_enabled);
1942 }
1943 }
1944
1945 key.dynamic_rasterization_samples = BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
1946 (!!(state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
1947
1948 if (pdev->use_ngg) {
1949 VkShaderStageFlags ngg_stage;
1950
1951 if (state->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
1952 ngg_stage = VK_SHADER_STAGE_GEOMETRY_BIT;
1953 } else if (state->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1954 ngg_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1955 } else {
1956 ngg_stage = VK_SHADER_STAGE_VERTEX_BIT;
1957 }
1958
1959 key.dynamic_provoking_vtx_mode =
1960 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) &&
1961 (ngg_stage == VK_SHADER_STAGE_VERTEX_BIT || ngg_stage == VK_SHADER_STAGE_GEOMETRY_BIT);
1962 }
1963
1964 if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) && state->ia &&
1965 state->ia->primitive_topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST &&
1966 !BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_POLYGON_MODE) && state->rs &&
1967 state->rs->polygon_mode != VK_POLYGON_MODE_POINT) {
1968 key.enable_remove_point_size = true;
1969 }
1970
1971 if (device->vk.enabled_features.smoothLines) {
1972 /* Make the line rasterization mode dynamic for smooth lines to conditionally enable the lowering at draw time.
1973 * This is because it's not possible to know if the graphics pipeline will draw lines at this point and it also
1974 * simplifies the implementation.
1975 */
1976 if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
1977 (state->rs && state->rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH))
1978 key.dynamic_line_rast_mode = true;
1979
1980 /* For GPL, when the fragment shader is compiled without any pre-rasterization information,
1981 * ensure the line rasterization mode is considered dynamic because we can't know if it's
1982 * going to draw lines or not.
1983 */
1984 key.dynamic_line_rast_mode |= !!(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
1985 !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
1986 }
1987
1988 return key;
1989 }
1990
1991 static struct radv_graphics_pipeline_key
radv_generate_graphics_pipeline_key(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)1992 radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
1993 const struct vk_graphics_pipeline_state *state,
1994 VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
1995 {
1996 VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
1997 struct radv_graphics_pipeline_key key = {0};
1998
1999 key.gfx_state = radv_generate_graphics_state_key(device, state, lib_flags);
2000
2001 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2002 const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
2003 gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
2004
2005 key.stage_info[s] = radv_pipeline_get_shader_key(device, stage, create_flags, pCreateInfo->pNext);
2006
2007 if (s == MESA_SHADER_MESH && (state->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT))
2008 key.stage_info[s].has_task_shader = true;
2009 }
2010
2011 return key;
2012 }
2013
2014 static void
radv_fill_shader_info_ngg(struct radv_device * device,struct radv_shader_stage * stages,VkShaderStageFlagBits active_nir_stages)2015 radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
2016 VkShaderStageFlagBits active_nir_stages)
2017 {
2018 const struct radv_physical_device *pdev = radv_device_physical(device);
2019 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2020
2021 if (!pdev->cache_key.use_ngg)
2022 return;
2023
2024 if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
2025 stages[MESA_SHADER_VERTEX].info.is_ngg = true;
2026 } else if (stages[MESA_SHADER_TESS_EVAL].nir) {
2027 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = true;
2028 } else if (stages[MESA_SHADER_MESH].nir) {
2029 stages[MESA_SHADER_MESH].info.is_ngg = true;
2030 }
2031
2032 if (pdev->info.gfx_level >= GFX11) {
2033 if (stages[MESA_SHADER_GEOMETRY].nir)
2034 stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
2035 } else {
2036 /* GFX10/GFX10.3 can't always enable NGG due to HW bugs/limitations. */
2037 if (stages[MESA_SHADER_TESS_EVAL].nir && stages[MESA_SHADER_GEOMETRY].nir &&
2038 stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations *
2039 stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out >
2040 256) {
2041 /* Fallback to the legacy path if tessellation is
2042 * enabled with extreme geometry because
2043 * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
2044 * might hang.
2045 */
2046 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2047 }
2048
2049 struct radv_shader_stage *last_vgt_stage = NULL;
2050 radv_foreach_stage(i, active_nir_stages)
2051 {
2052 if (radv_is_last_vgt_stage(&stages[i])) {
2053 last_vgt_stage = &stages[i];
2054 }
2055 }
2056
2057 if ((last_vgt_stage && last_vgt_stage->nir->xfb_info) ||
2058 ((instance->debug_flags & RADV_DEBUG_NO_NGG_GS) && stages[MESA_SHADER_GEOMETRY].nir)) {
2059 /* NGG needs to be disabled on GFX10/GFX10.3 when:
2060 * - streamout is used because NGG streamout isn't supported
2061 * - NGG GS is explictly disabled to workaround performance issues
2062 */
2063 if (stages[MESA_SHADER_TESS_EVAL].nir)
2064 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2065 else
2066 stages[MESA_SHADER_VERTEX].info.is_ngg = false;
2067 }
2068
2069 if (stages[MESA_SHADER_GEOMETRY].nir) {
2070 if (stages[MESA_SHADER_TESS_EVAL].nir)
2071 stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[MESA_SHADER_TESS_EVAL].info.is_ngg;
2072 else
2073 stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[MESA_SHADER_VERTEX].info.is_ngg;
2074 }
2075
2076 /* When pre-rasterization stages are compiled separately with shader objects, NGG GS needs to
2077 * be disabled because if the next stage of VS/TES is GS and GS is unknown, it might use
2078 * streamout but it's not possible to know that when compiling VS or TES only.
2079 */
2080 if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage == MESA_SHADER_GEOMETRY &&
2081 !stages[MESA_SHADER_GEOMETRY].nir) {
2082 stages[MESA_SHADER_VERTEX].info.is_ngg = false;
2083 } else if (stages[MESA_SHADER_TESS_EVAL].nir &&
2084 stages[MESA_SHADER_TESS_EVAL].info.next_stage == MESA_SHADER_GEOMETRY &&
2085 !stages[MESA_SHADER_GEOMETRY].nir) {
2086 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2087 } else if (stages[MESA_SHADER_GEOMETRY].nir &&
2088 (!stages[MESA_SHADER_VERTEX].nir && !stages[MESA_SHADER_TESS_EVAL].nir)) {
2089 stages[MESA_SHADER_GEOMETRY].info.is_ngg = false;
2090 }
2091 }
2092 }
2093
2094 static bool
radv_consider_force_vrs(const struct radv_graphics_state_key * gfx_state,const struct radv_shader_stage * last_vgt_stage,const struct radv_shader_stage * fs_stage)2095 radv_consider_force_vrs(const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage *last_vgt_stage,
2096 const struct radv_shader_stage *fs_stage)
2097 {
2098 if (!gfx_state->ps.force_vrs_enabled)
2099 return false;
2100
2101 /* Mesh shaders aren't considered. */
2102 if (last_vgt_stage->info.stage == MESA_SHADER_MESH)
2103 return false;
2104
2105 if (last_vgt_stage->nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
2106 return false;
2107
2108 /* VRS has no effect if there is no pixel shader. */
2109 if (last_vgt_stage->info.next_stage == MESA_SHADER_NONE)
2110 return false;
2111
2112 /* Do not enable if the PS uses gl_FragCoord because it breaks postprocessing in some games, or with Primitive
2113 * Ordered Pixel Shading (regardless of whether per-pixel data is addressed with gl_FragCoord or a custom
2114 * interpolator) as that'd result in races between adjacent primitives with no common fine pixels.
2115 */
2116 nir_shader *fs_shader = fs_stage->nir;
2117 if (fs_shader && (BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
2118 BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_PIXEL_COORD) ||
2119 fs_shader->info.fs.sample_interlock_ordered || fs_shader->info.fs.sample_interlock_unordered ||
2120 fs_shader->info.fs.pixel_interlock_ordered || fs_shader->info.fs.pixel_interlock_unordered)) {
2121 return false;
2122 }
2123
2124 return true;
2125 }
2126
2127 static gl_shader_stage
radv_get_next_stage(gl_shader_stage stage,VkShaderStageFlagBits active_nir_stages)2128 radv_get_next_stage(gl_shader_stage stage, VkShaderStageFlagBits active_nir_stages)
2129 {
2130 switch (stage) {
2131 case MESA_SHADER_VERTEX:
2132 if (active_nir_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2133 return MESA_SHADER_TESS_CTRL;
2134 } else if (active_nir_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2135 return MESA_SHADER_GEOMETRY;
2136 } else if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2137 return MESA_SHADER_FRAGMENT;
2138 } else {
2139 return MESA_SHADER_NONE;
2140 }
2141 case MESA_SHADER_TESS_CTRL:
2142 return MESA_SHADER_TESS_EVAL;
2143 case MESA_SHADER_TESS_EVAL:
2144 if (active_nir_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2145 return MESA_SHADER_GEOMETRY;
2146 } else if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2147 return MESA_SHADER_FRAGMENT;
2148 } else {
2149 return MESA_SHADER_NONE;
2150 }
2151 case MESA_SHADER_GEOMETRY:
2152 case MESA_SHADER_MESH:
2153 if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2154 return MESA_SHADER_FRAGMENT;
2155 } else {
2156 return MESA_SHADER_NONE;
2157 }
2158 case MESA_SHADER_TASK:
2159 return MESA_SHADER_MESH;
2160 case MESA_SHADER_FRAGMENT:
2161 return MESA_SHADER_NONE;
2162 default:
2163 unreachable("invalid graphics shader stage");
2164 }
2165 }
2166
2167 static void
radv_fill_shader_info(struct radv_device * device,const enum radv_pipeline_type pipeline_type,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stages,VkShaderStageFlagBits active_nir_stages)2168 radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type pipeline_type,
2169 const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages,
2170 VkShaderStageFlagBits active_nir_stages)
2171 {
2172 radv_foreach_stage(i, active_nir_stages)
2173 {
2174 bool consider_force_vrs = false;
2175
2176 if (radv_is_last_vgt_stage(&stages[i])) {
2177 consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
2178 }
2179
2180 radv_nir_shader_info_pass(device, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type,
2181 consider_force_vrs, &stages[i].info);
2182 }
2183
2184 radv_nir_shader_info_link(device, gfx_state, stages);
2185 }
2186
2187 static void
radv_declare_pipeline_args(struct radv_device * device,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,VkShaderStageFlagBits active_nir_stages)2188 radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
2189 const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages)
2190 {
2191 const struct radv_physical_device *pdev = radv_device_physical(device);
2192 enum amd_gfx_level gfx_level = pdev->info.gfx_level;
2193
2194 if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
2195 radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
2196 MESA_SHADER_VERTEX, &stages[MESA_SHADER_TESS_CTRL].args);
2197 stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
2198 stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
2199 stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask;
2200
2201 stages[MESA_SHADER_VERTEX].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs;
2202 stages[MESA_SHADER_VERTEX].info.inline_push_constant_mask =
2203 stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask;
2204 stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args;
2205
2206 active_nir_stages &= ~(1 << MESA_SHADER_VERTEX);
2207 active_nir_stages &= ~(1 << MESA_SHADER_TESS_CTRL);
2208 }
2209
2210 if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
2211 gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2212 radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, pre_stage,
2213 &stages[MESA_SHADER_GEOMETRY].args);
2214 stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
2215 stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
2216 stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
2217
2218 stages[pre_stage].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs;
2219 stages[pre_stage].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask;
2220 stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args;
2221 active_nir_stages &= ~(1 << pre_stage);
2222 active_nir_stages &= ~(1 << MESA_SHADER_GEOMETRY);
2223 }
2224
2225 u_foreach_bit (i, active_nir_stages) {
2226 radv_declare_shader_args(device, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args);
2227 stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
2228 stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
2229 }
2230 }
2231
2232 static struct radv_shader *
radv_create_gs_copy_shader(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * gs_stage,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,bool skip_shaders_cache,struct radv_shader_binary ** gs_copy_binary)2233 radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache *cache,
2234 struct radv_shader_stage *gs_stage, const struct radv_graphics_state_key *gfx_state,
2235 bool keep_executable_info, bool keep_statistic_info, bool skip_shaders_cache,
2236 struct radv_shader_binary **gs_copy_binary)
2237 {
2238 const struct radv_physical_device *pdev = radv_device_physical(device);
2239 struct radv_instance *instance = radv_physical_device_instance(pdev);
2240
2241 const struct radv_shader_info *gs_info = &gs_stage->info;
2242 ac_nir_gs_output_info output_info = {
2243 .streams = gs_info->gs.output_streams,
2244 .sysval_mask = gs_info->gs.output_usage_mask,
2245 .varying_mask = gs_info->gs.output_usage_mask,
2246 };
2247 nir_shader *nir = ac_nir_create_gs_copy_shader(
2248 gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
2249 gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
2250 gs_info->force_vrs_per_vertex, &output_info);
2251
2252 nir->info.internal = true;
2253
2254 nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
2255 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2256
2257 struct radv_shader_stage gs_copy_stage = {
2258 .stage = MESA_SHADER_VERTEX,
2259 .shader_sha1 = {0},
2260 .key =
2261 {
2262 .optimisations_disabled = gs_stage->key.optimisations_disabled,
2263 },
2264 };
2265 radv_nir_shader_info_init(gs_copy_stage.stage, MESA_SHADER_FRAGMENT, &gs_copy_stage.info);
2266 radv_nir_shader_info_pass(device, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, false,
2267 &gs_copy_stage.info);
2268 gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */
2269 gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
2270 gs_copy_stage.info.so = gs_info->so;
2271 gs_copy_stage.info.outinfo = gs_info->outinfo;
2272 gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex;
2273 gs_copy_stage.info.type = RADV_SHADER_TYPE_GS_COPY;
2274
2275 radv_declare_shader_args(device, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
2276 &gs_copy_stage.args);
2277 gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
2278 gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
2279
2280 NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, pdev->info.has_ls_vgpr_init_bug,
2281 AC_HW_VERTEX_SHADER, 64, 64, &gs_copy_stage.args.ac);
2282 NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
2283
2284 struct radv_graphics_pipeline_key key = {0};
2285 bool dump_shader = radv_can_dump_shader(device, nir);
2286
2287 if (dump_shader)
2288 simple_mtx_lock(&instance->shader_dump_mtx);
2289
2290 char *nir_string = NULL;
2291 if (keep_executable_info || dump_shader)
2292 nir_string = radv_dump_nir_shaders(instance, &nir, 1);
2293
2294 *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info,
2295 keep_statistic_info);
2296 struct radv_shader *copy_shader =
2297 radv_shader_create(device, cache, *gs_copy_binary, skip_shaders_cache || dump_shader);
2298
2299 if (copy_shader) {
2300 copy_shader->nir_string = nir_string;
2301 radv_shader_dump_debug_info(device, dump_shader, *gs_copy_binary, copy_shader, &nir, 1, &gs_copy_stage.info);
2302 }
2303
2304 if (dump_shader)
2305 simple_mtx_unlock(&instance->shader_dump_mtx);
2306
2307 return copy_shader;
2308 }
2309
2310 static void
radv_graphics_shaders_nir_to_asm(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,bool skip_shaders_cache,VkShaderStageFlagBits active_nir_stages,struct radv_shader ** shaders,struct radv_shader_binary ** binaries,struct radv_shader ** gs_copy_shader,struct radv_shader_binary ** gs_copy_binary)2311 radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
2312 struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2313 bool keep_executable_info, bool keep_statistic_info, bool skip_shaders_cache,
2314 VkShaderStageFlagBits active_nir_stages, struct radv_shader **shaders,
2315 struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader,
2316 struct radv_shader_binary **gs_copy_binary)
2317 {
2318 const struct radv_physical_device *pdev = radv_device_physical(device);
2319 struct radv_instance *instance = radv_physical_device_instance(pdev);
2320
2321 for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
2322 if (!(active_nir_stages & (1 << s)))
2323 continue;
2324
2325 nir_shader *nir_shaders[2] = {stages[s].nir, NULL};
2326 unsigned shader_count = 1;
2327
2328 /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
2329 if (pdev->info.gfx_level >= GFX9 &&
2330 ((s == MESA_SHADER_GEOMETRY &&
2331 (active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
2332 (s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
2333 gl_shader_stage pre_stage;
2334
2335 if (s == MESA_SHADER_GEOMETRY && (active_nir_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) {
2336 pre_stage = MESA_SHADER_TESS_EVAL;
2337 } else {
2338 pre_stage = MESA_SHADER_VERTEX;
2339 }
2340
2341 nir_shaders[0] = stages[pre_stage].nir;
2342 nir_shaders[1] = stages[s].nir;
2343 shader_count = 2;
2344 }
2345
2346 int64_t stage_start = os_time_get_nano();
2347
2348 bool dump_shader = false;
2349 for (unsigned i = 0; i < shader_count; ++i)
2350 dump_shader |= radv_can_dump_shader(device, nir_shaders[i]);
2351
2352 bool dump_nir = dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
2353
2354 if (dump_shader) {
2355 simple_mtx_lock(&instance->shader_dump_mtx);
2356
2357 if (dump_nir) {
2358 for (uint32_t i = 0; i < shader_count; i++)
2359 nir_print_shader(nir_shaders[i], stderr);
2360 }
2361 }
2362
2363 char *nir_string = NULL;
2364 if (keep_executable_info || dump_shader)
2365 nir_string = radv_dump_nir_shaders(instance, nir_shaders, shader_count);
2366
2367 binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state,
2368 keep_executable_info, keep_statistic_info);
2369 shaders[s] = radv_shader_create(device, cache, binaries[s], skip_shaders_cache || dump_shader);
2370
2371 shaders[s]->nir_string = nir_string;
2372
2373 radv_shader_dump_debug_info(device, dump_shader, binaries[s], shaders[s], nir_shaders, shader_count,
2374 &stages[s].info);
2375
2376 if (dump_shader)
2377 simple_mtx_unlock(&instance->shader_dump_mtx);
2378
2379 if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
2380 *gs_copy_shader =
2381 radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state, keep_executable_info,
2382 keep_statistic_info, skip_shaders_cache, gs_copy_binary);
2383 }
2384
2385 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2386
2387 active_nir_stages &= ~(1 << nir_shaders[0]->info.stage);
2388 if (nir_shaders[1])
2389 active_nir_stages &= ~(1 << nir_shaders[1]->info.stage);
2390 }
2391 }
2392
2393 static void
radv_pipeline_retain_shaders(struct radv_retained_shaders * retained_shaders,struct radv_shader_stage * stages)2394 radv_pipeline_retain_shaders(struct radv_retained_shaders *retained_shaders, struct radv_shader_stage *stages)
2395 {
2396 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2397 if (!stages[s].entrypoint)
2398 continue;
2399
2400 int64_t stage_start = os_time_get_nano();
2401
2402 /* Serialize the NIR shader to reduce memory pressure. */
2403 struct blob blob;
2404
2405 blob_init(&blob);
2406 nir_serialize(&blob, stages[s].nir, true);
2407 blob_finish_get_buffer(&blob, &retained_shaders->stages[s].serialized_nir,
2408 &retained_shaders->stages[s].serialized_nir_size);
2409
2410 memcpy(retained_shaders->stages[s].shader_sha1, stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
2411 memcpy(&retained_shaders->stages[s].key, &stages[s].key, sizeof(stages[s].key));
2412
2413 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2414 }
2415 }
2416
2417 static void
radv_pipeline_import_retained_shaders(const struct radv_device * device,struct radv_graphics_lib_pipeline * lib,struct radv_shader_stage * stages)2418 radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_lib_pipeline *lib,
2419 struct radv_shader_stage *stages)
2420 {
2421 const struct radv_physical_device *pdev = radv_device_physical(device);
2422 struct radv_retained_shaders *retained_shaders = &lib->retained_shaders;
2423
2424 /* Import the stages (SPIR-V only in case of cache hits). */
2425 for (uint32_t i = 0; i < lib->stage_count; i++) {
2426 const VkPipelineShaderStageCreateInfo *sinfo = &lib->stages[i];
2427 gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage);
2428
2429 radv_pipeline_stage_init(lib->base.base.create_flags, sinfo,
2430 &lib->layout, &lib->stage_keys[s], &stages[s]);
2431 }
2432
2433 /* Import the NIR shaders (after SPIRV->NIR). */
2434 for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.base.shaders); s++) {
2435 if (!retained_shaders->stages[s].serialized_nir_size)
2436 continue;
2437
2438 int64_t stage_start = os_time_get_nano();
2439
2440 /* Deserialize the NIR shader. */
2441 const struct nir_shader_compiler_options *options = &pdev->nir_options[s];
2442 struct blob_reader blob_reader;
2443 blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir,
2444 retained_shaders->stages[s].serialized_nir_size);
2445
2446 stages[s].stage = s;
2447 stages[s].nir = nir_deserialize(NULL, options, &blob_reader);
2448 stages[s].entrypoint = nir_shader_get_entrypoint(stages[s].nir)->function->name;
2449 memcpy(stages[s].shader_sha1, retained_shaders->stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
2450 memcpy(&stages[s].key, &retained_shaders->stages[s].key, sizeof(stages[s].key));
2451
2452 radv_shader_layout_init(&lib->layout, s, &stages[s].layout);
2453
2454 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2455
2456 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2457 }
2458 }
2459
2460 static void
radv_pipeline_load_retained_shaders(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,struct radv_shader_stage * stages)2461 radv_pipeline_load_retained_shaders(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2462 struct radv_shader_stage *stages)
2463 {
2464 const VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2465 const VkPipelineLibraryCreateInfoKHR *libs_info =
2466 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2467
2468 /* Nothing to load if no libs are imported. */
2469 if (!libs_info)
2470 return;
2471
2472 /* Nothing to load if fast-linking is enabled and if there is no retained shaders. */
2473 if (radv_should_import_lib_binaries(create_flags))
2474 return;
2475
2476 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2477 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2478 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2479
2480 radv_pipeline_import_retained_shaders(device, gfx_pipeline_lib, stages);
2481 }
2482 }
2483
2484 static unsigned
radv_get_rasterization_prim(const struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state)2485 radv_get_rasterization_prim(const struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state)
2486 {
2487 unsigned rast_prim;
2488
2489 if (gfx_state->unknown_rast_prim)
2490 return -1;
2491
2492 if (stages[MESA_SHADER_GEOMETRY].nir) {
2493 rast_prim = radv_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive);
2494 } else if (stages[MESA_SHADER_TESS_EVAL].nir) {
2495 if (stages[MESA_SHADER_TESS_EVAL].nir->info.tess.point_mode) {
2496 rast_prim = V_028A6C_POINTLIST;
2497 } else {
2498 rast_prim = radv_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode);
2499 }
2500 } else if (stages[MESA_SHADER_MESH].nir) {
2501 rast_prim = radv_conv_gl_prim_to_gs_out(stages[MESA_SHADER_MESH].nir->info.mesh.primitive_type);
2502 } else {
2503 rast_prim = radv_conv_prim_to_gs_out(gfx_state->ia.topology, false);
2504 }
2505
2506 return rast_prim;
2507 }
2508
2509 static bool
radv_is_fast_linking_enabled(const VkGraphicsPipelineCreateInfo * pCreateInfo)2510 radv_is_fast_linking_enabled(const VkGraphicsPipelineCreateInfo *pCreateInfo)
2511 {
2512 const VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2513 const VkPipelineLibraryCreateInfoKHR *libs_info =
2514 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2515
2516 if (!libs_info)
2517 return false;
2518
2519 return !(create_flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT);
2520 }
2521
2522 static bool
radv_skip_graphics_pipeline_compile(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo)2523 radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo)
2524 {
2525 const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
2526 const VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2527 const struct radv_physical_device *pdev = radv_device_physical(device);
2528 VkShaderStageFlagBits binary_stages = 0;
2529 VkShaderStageFlags active_stages = 0;
2530
2531 /* No compilation when pipeline binaries are imported. */
2532 if (binary_info && binary_info->binaryCount > 0)
2533 return true;
2534
2535 /* Do not skip for libraries. */
2536 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
2537 return false;
2538
2539 /* Do not skip when fast-linking isn't enabled. */
2540 if (!radv_is_fast_linking_enabled(pCreateInfo))
2541 return false;
2542
2543 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2544 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2545 active_stages |= sinfo->stage;
2546 }
2547
2548 const VkPipelineLibraryCreateInfoKHR *libs_info =
2549 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2550 if (libs_info) {
2551 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2552 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2553 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2554
2555 assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
2556
2557 active_stages |= gfx_pipeline_lib->base.active_stages;
2558
2559 for (uint32_t s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2560 if (!gfx_pipeline_lib->base.base.shaders[s])
2561 continue;
2562
2563 binary_stages |= mesa_to_vk_shader_stage(s);
2564 }
2565 }
2566 }
2567
2568 if (pdev->info.gfx_level >= GFX9) {
2569 /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
2570 if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2571 binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
2572 }
2573
2574 if (binary_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2575 if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2576 binary_stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2577 } else {
2578 binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
2579 }
2580 }
2581 }
2582
2583 /* Only skip compilation when all binaries have been imported. */
2584 return binary_stages == active_stages;
2585 }
2586
2587 void
radv_graphics_shaders_compile(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,bool is_internal,bool skip_shaders_cache,struct radv_retained_shaders * retained_shaders,bool noop_fs,struct radv_shader ** shaders,struct radv_shader_binary ** binaries,struct radv_shader ** gs_copy_shader,struct radv_shader_binary ** gs_copy_binary)2588 radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
2589 struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2590 bool keep_executable_info, bool keep_statistic_info, bool is_internal,
2591 bool skip_shaders_cache, struct radv_retained_shaders *retained_shaders, bool noop_fs,
2592 struct radv_shader **shaders, struct radv_shader_binary **binaries,
2593 struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
2594 {
2595 const struct radv_physical_device *pdev = radv_device_physical(device);
2596 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2597 const bool nir_cache = instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
2598 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2599 if (!stages[s].entrypoint)
2600 continue;
2601
2602 int64_t stage_start = os_time_get_nano();
2603
2604 /* NIR might already have been imported from a library. */
2605 if (!stages[s].nir) {
2606 struct radv_spirv_to_nir_options options = {
2607 .lower_view_index_to_zero = !gfx_state->has_multiview_view_index,
2608 .lower_view_index_to_device_index = stages[s].key.view_index_from_device_index,
2609 };
2610 blake3_hash key;
2611
2612 if (nir_cache) {
2613 radv_hash_graphics_spirv_to_nir(key, &stages[s], &options);
2614 stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key);
2615 }
2616 if (!stages[s].nir) {
2617 stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
2618 if (nir_cache)
2619 radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir);
2620 }
2621 }
2622
2623 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2624 }
2625
2626 if (retained_shaders) {
2627 radv_pipeline_retain_shaders(retained_shaders, stages);
2628 }
2629
2630 VkShaderStageFlagBits active_nir_stages = 0;
2631 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2632 if (stages[i].nir)
2633 active_nir_stages |= mesa_to_vk_shader_stage(i);
2634 }
2635
2636 if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
2637 BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
2638 nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
2639 nir_shader *task = stages[MESA_SHADER_TASK].nir;
2640
2641 /* Mesh shaders only have a 1D "vertex index" which we use
2642 * as "workgroup index" to emulate the 3D workgroup ID.
2643 */
2644 nir_lower_compute_system_values_options o = {
2645 .lower_workgroup_id_to_index = true,
2646 .shortcut_1d_workgroup_id = true,
2647 .num_workgroups[0] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[0] : 0,
2648 .num_workgroups[1] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[1] : 0,
2649 .num_workgroups[2] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[2] : 0,
2650 };
2651
2652 NIR_PASS(_, mesh, nir_lower_compute_system_values, &o);
2653 }
2654
2655 radv_foreach_stage(i, active_nir_stages)
2656 {
2657 gl_shader_stage next_stage;
2658
2659 if (stages[i].next_stage != MESA_SHADER_NONE) {
2660 next_stage = stages[i].next_stage;
2661 } else {
2662 next_stage = radv_get_next_stage(i, active_nir_stages);
2663 }
2664
2665 radv_nir_shader_info_init(i, next_stage, &stages[i].info);
2666 }
2667
2668 /* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */
2669 radv_fill_shader_info_ngg(device, stages, active_nir_stages);
2670
2671 if (stages[MESA_SHADER_GEOMETRY].nir) {
2672 unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
2673
2674 if (stages[MESA_SHADER_GEOMETRY].info.is_ngg) {
2675 nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
2676 nir_lower_gs_intrinsics_count_vertices_per_primitive |
2677 nir_lower_gs_intrinsics_overwrite_incomplete;
2678 }
2679
2680 NIR_PASS(_, stages[MESA_SHADER_GEOMETRY].nir, nir_lower_gs_intrinsics, nir_gs_flags);
2681 }
2682
2683 /* Remove all varyings when the fragment shader is a noop. */
2684 if (noop_fs) {
2685 radv_foreach_stage(i, active_nir_stages)
2686 {
2687 if (radv_is_last_vgt_stage(&stages[i])) {
2688 radv_remove_varyings(stages[i].nir);
2689 break;
2690 }
2691 }
2692 }
2693
2694 radv_graphics_shaders_link(device, gfx_state, stages);
2695
2696 if (stages[MESA_SHADER_FRAGMENT].nir) {
2697 unsigned rast_prim = radv_get_rasterization_prim(stages, gfx_state);
2698
2699 NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, gfx_state, rast_prim);
2700
2701 NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_fragcoord_wtrans);
2702
2703 /* frag_depth = gl_FragCoord.z broadcasts to all samples of the fragment shader invocation,
2704 * so only optimize it away if we know there is only one sample per invocation.
2705 * Because we don't know if sample shading is used with factor 1.0f, this means
2706 * we only optimize single sampled shaders.
2707 */
2708 if ((gfx_state->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
2709 !gfx_state->dynamic_rasterization_samples && gfx_state->ms.rasterization_samples == 0)
2710 NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_fragdepth);
2711 }
2712
2713 if (stages[MESA_SHADER_VERTEX].nir && !gfx_state->vs.has_prolog)
2714 NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_nir_optimize_vs_inputs_to_const, gfx_state);
2715
2716 radv_foreach_stage(i, active_nir_stages)
2717 {
2718 int64_t stage_start = os_time_get_nano();
2719
2720 radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
2721
2722 /* Gather info again, information such as outputs_read can be out-of-date. */
2723 nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir));
2724 radv_nir_lower_io(device, stages[i].nir);
2725
2726 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2727 }
2728
2729 if (stages[MESA_SHADER_FRAGMENT].nir) {
2730 radv_nir_lower_poly_line_smooth(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
2731
2732 if (!gfx_state->ps.has_epilog)
2733 radv_nir_remap_color_attachment(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
2734
2735 bool update_info = false;
2736 NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
2737 if (update_info)
2738 nir_shader_gather_info(stages[MESA_SHADER_FRAGMENT].nir,
2739 nir_shader_get_entrypoint(stages[MESA_SHADER_FRAGMENT].nir));
2740 }
2741
2742 /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
2743 radv_graphics_shaders_link_varyings(stages);
2744
2745 /* Optimize constant clip/cull distance after linking to operate on scalar io in the last
2746 * pre raster stage.
2747 */
2748 radv_foreach_stage(i, active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
2749 {
2750 if (stages[i].key.optimisations_disabled)
2751 continue;
2752
2753 int64_t stage_start = os_time_get_nano();
2754
2755 NIR_PASS(_, stages[i].nir, nir_opt_clip_cull_const);
2756
2757 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2758 }
2759
2760 radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
2761
2762 radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages);
2763
2764 radv_foreach_stage(i, active_nir_stages)
2765 {
2766 int64_t stage_start = os_time_get_nano();
2767
2768 radv_postprocess_nir(device, gfx_state, &stages[i]);
2769
2770 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2771 }
2772
2773 /* Compile NIR shaders to AMD assembly. */
2774 radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
2775 skip_shaders_cache, active_nir_stages, shaders, binaries, gs_copy_shader,
2776 gs_copy_binary);
2777
2778 if (keep_executable_info) {
2779 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
2780 struct radv_shader *shader = shaders[i];
2781 if (!shader)
2782 continue;
2783
2784 if (!stages[i].spirv.size)
2785 continue;
2786
2787 shader->spirv = malloc(stages[i].spirv.size);
2788 memcpy(shader->spirv, stages[i].spirv.data, stages[i].spirv.size);
2789 shader->spirv_size = stages[i].spirv.size;
2790 }
2791 }
2792 }
2793
2794 static bool
radv_should_compute_pipeline_hash(const struct radv_device * device,const enum radv_pipeline_type pipeline_type,bool fast_linking_enabled)2795 radv_should_compute_pipeline_hash(const struct radv_device *device, const enum radv_pipeline_type pipeline_type,
2796 bool fast_linking_enabled)
2797 {
2798 const struct radv_physical_device *pdev = radv_device_physical(device);
2799 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2800
2801 /* Skip computing the pipeline hash when GPL fast-linking is enabled because these shaders aren't
2802 * supposed to be cached and computing the hash is costly. Though, make sure it's always computed
2803 * when RGP is enabled, otherwise ISA isn't reported.
2804 */
2805 return !fast_linking_enabled ||
2806 ((instance->vk.trace_mode & RADV_TRACE_MODE_RGP) && pipeline_type == RADV_PIPELINE_GRAPHICS);
2807 }
2808
2809 void
radv_graphics_pipeline_state_finish(struct radv_device * device,struct radv_graphics_pipeline_state * gfx_state)2810 radv_graphics_pipeline_state_finish(struct radv_device *device, struct radv_graphics_pipeline_state *gfx_state)
2811 {
2812 radv_pipeline_layout_finish(device, &gfx_state->layout);
2813 vk_free(&device->vk.alloc, gfx_state->vk_data);
2814
2815 if (gfx_state->stages) {
2816 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++)
2817 ralloc_free(gfx_state->stages[i].nir);
2818 free(gfx_state->stages);
2819 }
2820 }
2821
2822 VkResult
radv_generate_graphics_pipeline_state(struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,struct radv_graphics_pipeline_state * gfx_state)2823 radv_generate_graphics_pipeline_state(struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2824 struct radv_graphics_pipeline_state *gfx_state)
2825 {
2826 VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
2827 const VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2828 const bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
2829 enum radv_pipeline_type pipeline_type = RADV_PIPELINE_GRAPHICS;
2830 VkResult result;
2831
2832 memset(gfx_state, 0, sizeof(*gfx_state));
2833
2834 VkGraphicsPipelineLibraryFlagBitsEXT needed_lib_flags = ALL_GRAPHICS_LIB_FLAGS;
2835 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
2836 const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
2837 vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
2838 needed_lib_flags = lib_info ? lib_info->flags : 0;
2839 pipeline_type = RADV_PIPELINE_GRAPHICS_LIB;
2840 }
2841
2842 radv_pipeline_layout_init(device, &gfx_state->layout, false);
2843
2844 /* If we have libraries, import them first. */
2845 const VkPipelineLibraryCreateInfoKHR *libs_info =
2846 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2847 if (libs_info) {
2848 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2849 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2850 const struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2851
2852 vk_graphics_pipeline_state_merge(&gfx_state->vk, &gfx_pipeline_lib->graphics_state);
2853
2854 radv_graphics_pipeline_import_layout(&gfx_state->layout, &gfx_pipeline_lib->layout);
2855
2856 needed_lib_flags &= ~gfx_pipeline_lib->lib_flags;
2857 }
2858 }
2859
2860 result = vk_graphics_pipeline_state_fill(&device->vk, &gfx_state->vk, pCreateInfo, NULL, 0, NULL, NULL,
2861 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &gfx_state->vk_data);
2862 if (result != VK_SUCCESS)
2863 goto fail;
2864
2865 if (pipeline_layout)
2866 radv_graphics_pipeline_import_layout(&gfx_state->layout, pipeline_layout);
2867
2868 if (radv_should_compute_pipeline_hash(device, pipeline_type, fast_linking_enabled))
2869 radv_pipeline_layout_hash(&gfx_state->layout);
2870
2871 gfx_state->compilation_required = !radv_skip_graphics_pipeline_compile(device, pCreateInfo);
2872 if (gfx_state->compilation_required) {
2873 gfx_state->key = radv_generate_graphics_pipeline_key(device, pCreateInfo, &gfx_state->vk, needed_lib_flags);
2874
2875 gfx_state->stages = malloc(sizeof(struct radv_shader_stage) * MESA_VULKAN_SHADER_STAGES);
2876 if (!gfx_state->stages) {
2877 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2878 goto fail;
2879 }
2880
2881 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2882 gfx_state->stages[i].entrypoint = NULL;
2883 gfx_state->stages[i].nir = NULL;
2884 gfx_state->stages[i].spirv.size = 0;
2885 gfx_state->stages[i].next_stage = MESA_SHADER_NONE;
2886 }
2887
2888 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2889 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2890 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2891
2892 radv_pipeline_stage_init(create_flags, sinfo, &gfx_state->layout, &gfx_state->key.stage_info[stage],
2893 &gfx_state->stages[stage]);
2894 }
2895
2896 radv_pipeline_load_retained_shaders(device, pCreateInfo, gfx_state->stages);
2897 }
2898
2899 return VK_SUCCESS;
2900
2901 fail:
2902 radv_graphics_pipeline_state_finish(device, gfx_state);
2903 return result;
2904 }
2905
2906 void
radv_graphics_pipeline_hash(const struct radv_device * device,const struct radv_graphics_pipeline_state * gfx_state,unsigned char * hash)2907 radv_graphics_pipeline_hash(const struct radv_device *device, const struct radv_graphics_pipeline_state *gfx_state,
2908 unsigned char *hash)
2909 {
2910 struct mesa_sha1 ctx;
2911
2912 _mesa_sha1_init(&ctx);
2913 radv_pipeline_hash(device, &gfx_state->layout, &ctx);
2914
2915 _mesa_sha1_update(&ctx, &gfx_state->key.gfx_state, sizeof(gfx_state->key.gfx_state));
2916
2917 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2918 const struct radv_shader_stage *stage = &gfx_state->stages[s];
2919
2920 if (!stage->entrypoint)
2921 continue;
2922
2923 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
2924 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key));
2925 }
2926
2927 _mesa_sha1_final(&ctx, hash);
2928 }
2929
2930 static VkResult
radv_graphics_pipeline_compile(struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_state * gfx_state,struct radv_device * device,struct vk_pipeline_cache * cache,bool fast_linking_enabled)2931 radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2932 const struct radv_graphics_pipeline_state *gfx_state, struct radv_device *device,
2933 struct vk_pipeline_cache *cache, bool fast_linking_enabled)
2934 {
2935 struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
2936 struct radv_shader_binary *gs_copy_binary = NULL;
2937 bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
2938 bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
2939 bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base);
2940 struct radv_shader_stage *stages = gfx_state->stages;
2941 const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
2942 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2943 VkPipelineCreationFeedback pipeline_feedback = {
2944 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2945 };
2946 VkResult result = VK_SUCCESS;
2947 const bool retain_shaders =
2948 !!(pipeline->base.create_flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
2949 struct radv_retained_shaders *retained_shaders = NULL;
2950
2951 int64_t pipeline_start = os_time_get_nano();
2952
2953 if (radv_should_compute_pipeline_hash(device, pipeline->base.type, fast_linking_enabled)) {
2954 radv_graphics_pipeline_hash(device, gfx_state, pipeline->base.sha1);
2955
2956 pipeline->base.pipeline_hash = *(uint64_t *)pipeline->base.sha1;
2957 }
2958
2959 /* Skip the shaders cache when any of the below are true:
2960 * - fast-linking is enabled because it's useless to cache unoptimized pipelines
2961 * - graphics pipeline libraries are created with the RETAIN_LINK_TIME_OPTIMIZATION flag and
2962 * module identifiers are used (ie. no SPIR-V provided).
2963 */
2964 if (fast_linking_enabled) {
2965 skip_shaders_cache = true;
2966 } else if (retain_shaders) {
2967 assert(pipeline->base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
2968 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2969 if (stages[i].entrypoint && !stages[i].spirv.size) {
2970 skip_shaders_cache = true;
2971 break;
2972 }
2973 }
2974 }
2975
2976 bool found_in_application_cache = true;
2977 if (!skip_shaders_cache &&
2978 radv_graphics_pipeline_cache_search(device, cache, pipeline, &found_in_application_cache)) {
2979 if (found_in_application_cache)
2980 pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2981
2982 if (retain_shaders) {
2983 /* For graphics pipeline libraries created with the RETAIN_LINK_TIME_OPTIMIZATION flag, we
2984 * need to retain the stage info because we can't know if the LTO pipelines will
2985 * be find in the shaders cache.
2986 */
2987 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
2988
2989 gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info(device, pCreateInfo->stageCount,
2990 pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx);
2991 if (!gfx_pipeline_lib->stages)
2992 return VK_ERROR_OUT_OF_HOST_MEMORY;
2993
2994 gfx_pipeline_lib->stage_count = pCreateInfo->stageCount;
2995
2996 for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
2997 gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
2998 gfx_pipeline_lib->stage_keys[s] = gfx_state->key.stage_info[s];
2999 }
3000 }
3001
3002 result = VK_SUCCESS;
3003 goto done;
3004 }
3005
3006 if (pipeline->base.create_flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3007 return VK_PIPELINE_COMPILE_REQUIRED;
3008
3009 if (retain_shaders) {
3010 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
3011 retained_shaders = &gfx_pipeline_lib->retained_shaders;
3012 }
3013
3014 const bool noop_fs = radv_pipeline_needs_noop_fs(pipeline, &gfx_state->key.gfx_state);
3015
3016 radv_graphics_shaders_compile(device, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
3017 keep_statistic_info, pipeline->base.is_internal, skip_shaders_cache, retained_shaders,
3018 noop_fs, pipeline->base.shaders, binaries, &pipeline->base.gs_copy_shader,
3019 &gs_copy_binary);
3020
3021 if (!skip_shaders_cache) {
3022 radv_pipeline_cache_insert(device, cache, &pipeline->base);
3023 }
3024
3025 free(gs_copy_binary);
3026 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3027 free(binaries[i]);
3028 if (stages[i].nir) {
3029 if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->base.shaders[i]) {
3030 radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[i], i, stderr);
3031 }
3032 }
3033 }
3034
3035 done:
3036 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3037
3038 if (creation_feedback) {
3039 *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
3040
3041 if (creation_feedback->pipelineStageCreationFeedbackCount > 0) {
3042 uint32_t num_feedbacks = 0;
3043
3044 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
3045 gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
3046 creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
3047 }
3048
3049 /* Stages imported from graphics pipeline libraries are defined as additional entries in the
3050 * order they were imported.
3051 */
3052 const VkPipelineLibraryCreateInfoKHR *libs_info =
3053 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3054 if (libs_info) {
3055 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3056 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3057 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3058
3059 if (!gfx_pipeline_lib->base.active_stages)
3060 continue;
3061
3062 radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages)
3063 {
3064 creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
3065 }
3066 }
3067 }
3068
3069 assert(num_feedbacks == creation_feedback->pipelineStageCreationFeedbackCount);
3070 }
3071 }
3072
3073 return result;
3074 }
3075
3076 struct radv_vgt_shader_key
radv_get_vgt_shader_key(const struct radv_device * device,struct radv_shader ** shaders,const struct radv_shader * gs_copy_shader)3077 radv_get_vgt_shader_key(const struct radv_device *device, struct radv_shader **shaders,
3078 const struct radv_shader *gs_copy_shader)
3079 {
3080 uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
3081 struct radv_shader *last_vgt_shader = NULL;
3082 struct radv_vgt_shader_key key;
3083
3084 memset(&key, 0, sizeof(key));
3085
3086 if (shaders[MESA_SHADER_GEOMETRY]) {
3087 last_vgt_shader = shaders[MESA_SHADER_GEOMETRY];
3088 } else if (shaders[MESA_SHADER_TESS_EVAL]) {
3089 last_vgt_shader = shaders[MESA_SHADER_TESS_EVAL];
3090 } else if (shaders[MESA_SHADER_VERTEX]) {
3091 last_vgt_shader = shaders[MESA_SHADER_VERTEX];
3092 } else {
3093 assert(shaders[MESA_SHADER_MESH]);
3094 last_vgt_shader = shaders[MESA_SHADER_MESH];
3095 }
3096
3097 vs_size = gs_size = last_vgt_shader->info.wave_size;
3098 if (gs_copy_shader)
3099 vs_size = gs_copy_shader->info.wave_size;
3100
3101 if (shaders[MESA_SHADER_TESS_CTRL])
3102 hs_size = shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
3103
3104 key.tess = !!shaders[MESA_SHADER_TESS_CTRL];
3105 key.gs = !!shaders[MESA_SHADER_GEOMETRY];
3106 if (last_vgt_shader->info.is_ngg) {
3107 key.ngg = 1;
3108 key.ngg_passthrough = last_vgt_shader->info.is_ngg_passthrough;
3109 key.ngg_streamout = last_vgt_shader->info.so.num_outputs > 0;
3110 }
3111 if (shaders[MESA_SHADER_MESH]) {
3112 key.mesh = 1;
3113 key.mesh_scratch_ring = shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
3114 }
3115
3116 key.hs_wave32 = hs_size == 32;
3117 key.vs_wave32 = vs_size == 32;
3118 key.gs_wave32 = gs_size == 32;
3119
3120 return key;
3121 }
3122
3123 static bool
gfx103_pipeline_vrs_coarse_shading(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline)3124 gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
3125 {
3126 const struct radv_physical_device *pdev = radv_device_physical(device);
3127 const struct radv_instance *instance = radv_physical_device_instance(pdev);
3128 struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
3129
3130 if (pdev->info.gfx_level != GFX10_3)
3131 return false;
3132
3133 if (instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
3134 return false;
3135
3136 if (ps && !ps->info.ps.allow_flat_shading)
3137 return false;
3138
3139 return true;
3140 }
3141
3142 static void
radv_pipeline_init_vertex_input_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)3143 radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
3144 const struct vk_graphics_pipeline_state *state)
3145 {
3146 const struct radv_physical_device *pdev = radv_device_physical(device);
3147 const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
3148
3149 if (!state->vi)
3150 return;
3151
3152 u_foreach_bit (i, state->vi->bindings_valid) {
3153 pipeline->binding_stride[i] = state->vi->bindings[i].stride;
3154 }
3155
3156 if (vs->info.vs.use_per_attribute_vb_descs) {
3157 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
3158 const enum radeon_family family = pdev->info.family;
3159 const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family);
3160
3161 pipeline->vertex_input.bindings_match_attrib = true;
3162
3163 u_foreach_bit (i, state->vi->attributes_valid) {
3164 uint32_t binding = state->vi->attributes[i].binding;
3165 uint32_t offset = state->vi->attributes[i].offset;
3166
3167 pipeline->vertex_input.attribute_mask |= BITFIELD_BIT(i);
3168 pipeline->vertex_input.bindings[i] = binding;
3169 pipeline->vertex_input.bindings_match_attrib &= binding == i;
3170
3171 if (state->vi->bindings[binding].stride) {
3172 pipeline->vertex_input.attrib_index_offset[i] = offset / state->vi->bindings[binding].stride;
3173 }
3174
3175 if (state->vi->bindings[binding].input_rate) {
3176 pipeline->vertex_input.instance_rate_inputs |= BITFIELD_BIT(i);
3177 pipeline->vertex_input.divisors[i] = state->vi->bindings[binding].divisor;
3178
3179 if (state->vi->bindings[binding].divisor == 0) {
3180 pipeline->vertex_input.zero_divisors |= BITFIELD_BIT(i);
3181 } else if (state->vi->bindings[binding].divisor > 1) {
3182 pipeline->vertex_input.nontrivial_divisors |= BITFIELD_BIT(i);
3183 }
3184 }
3185
3186 pipeline->vertex_input.offsets[i] = offset;
3187
3188 enum pipe_format format = radv_format_to_pipe_format(state->vi->attributes[i].format);
3189 const struct ac_vtx_format_info *vtx_info = &vtx_info_table[format];
3190
3191 pipeline->vertex_input.formats[i] = format;
3192 uint8_t format_align_req_minus_1 = vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1);
3193 pipeline->vertex_input.format_align_req_minus_1[i] = format_align_req_minus_1;
3194 uint8_t component_align_req_minus_1 =
3195 MIN2(vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size, 4) - 1;
3196 pipeline->vertex_input.component_align_req_minus_1[i] = component_align_req_minus_1;
3197 pipeline->vertex_input.format_sizes[i] = vtx_info->element_size;
3198 pipeline->vertex_input.alpha_adjust_lo |= (vtx_info->alpha_adjust & 0x1) << i;
3199 pipeline->vertex_input.alpha_adjust_hi |= (vtx_info->alpha_adjust >> 1) << i;
3200 if (G_008F0C_DST_SEL_X(vtx_info->dst_sel) == V_008F0C_SQ_SEL_Z) {
3201 pipeline->vertex_input.post_shuffle |= BITFIELD_BIT(i);
3202 }
3203
3204 if (!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
3205 pipeline->vertex_input.nontrivial_formats |= BITFIELD_BIT(i);
3206 }
3207 }
3208 } else {
3209 u_foreach_bit (i, vs->info.vs.vb_desc_usage_mask) {
3210 pipeline->vertex_input.bindings[i] = i;
3211 }
3212 }
3213 }
3214
3215 static void
radv_pipeline_init_shader_stages_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline)3216 radv_pipeline_init_shader_stages_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
3217 {
3218 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
3219 bool shader_exists = !!pipeline->base.shaders[i];
3220 if (shader_exists || i < MESA_SHADER_COMPUTE) {
3221 if (shader_exists)
3222 pipeline->base.need_indirect_descriptor_sets |=
3223 radv_shader_need_indirect_descriptor_sets(pipeline->base.shaders[i]);
3224 }
3225 }
3226
3227 gl_shader_stage first_stage =
3228 radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX;
3229
3230 const struct radv_shader *shader = radv_get_shader(pipeline->base.shaders, first_stage);
3231 const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
3232
3233 if (loc->sgpr_idx != -1) {
3234 pipeline->vtx_base_sgpr = shader->info.user_data_0;
3235 pipeline->vtx_base_sgpr += loc->sgpr_idx * 4;
3236 pipeline->vtx_emit_num = loc->num_sgprs;
3237 pipeline->uses_drawid = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id;
3238 pipeline->uses_baseinstance = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance;
3239
3240 assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance);
3241 }
3242 }
3243
3244 uint32_t
radv_get_vgt_gs_out(struct radv_shader ** shaders,uint32_t primitive_topology,bool is_ngg)3245 radv_get_vgt_gs_out(struct radv_shader **shaders, uint32_t primitive_topology, bool is_ngg)
3246 {
3247 uint32_t gs_out;
3248
3249 if (shaders[MESA_SHADER_GEOMETRY]) {
3250 gs_out = radv_conv_gl_prim_to_gs_out(shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
3251 } else if (shaders[MESA_SHADER_TESS_CTRL]) {
3252 if (shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
3253 gs_out = V_028A6C_POINTLIST;
3254 } else {
3255 gs_out = radv_conv_tess_prim_to_gs_out(shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode);
3256 }
3257 } else if (shaders[MESA_SHADER_MESH]) {
3258 gs_out = radv_conv_gl_prim_to_gs_out(shaders[MESA_SHADER_MESH]->info.ms.output_prim);
3259 } else {
3260 gs_out = radv_conv_prim_to_gs_out(primitive_topology, is_ngg);
3261 }
3262
3263 return gs_out;
3264 }
3265
3266 static uint32_t
radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)3267 radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state)
3268 {
3269 const bool is_ngg = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.is_ngg;
3270 uint32_t primitive_topology = 0;
3271
3272 if (pipeline->last_vgt_api_stage == MESA_SHADER_VERTEX)
3273 primitive_topology = radv_translate_prim(state->ia->primitive_topology);
3274
3275 return radv_get_vgt_gs_out(pipeline->base.shaders, primitive_topology, is_ngg);
3276 }
3277
3278 static void
radv_pipeline_init_extra(struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfoRADV * radv_info,const struct vk_graphics_pipeline_state * state)3279 radv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfoRADV *radv_info,
3280 const struct vk_graphics_pipeline_state *state)
3281 {
3282 pipeline->custom_blend_mode = radv_info->custom_blend_mode;
3283
3284 if (radv_pipeline_has_ds_attachments(state->rp)) {
3285 pipeline->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(radv_info->db_depth_clear);
3286 pipeline->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(radv_info->db_stencil_clear);
3287 pipeline->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(radv_info->depth_compress_disable);
3288 pipeline->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(radv_info->stencil_compress_disable);
3289 }
3290 }
3291
3292 bool
radv_needs_null_export_workaround(const struct radv_device * device,const struct radv_shader * ps,unsigned custom_blend_mode)3293 radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
3294 unsigned custom_blend_mode)
3295 {
3296 const struct radv_physical_device *pdev = radv_device_physical(device);
3297 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
3298
3299 if (!ps)
3300 return false;
3301
3302 /* Ensure that some export memory is always allocated, for two reasons:
3303 *
3304 * 1) Correctness: The hardware ignores the EXEC mask if no export
3305 * memory is allocated, so KILL and alpha test do not work correctly
3306 * without this.
3307 * 2) Performance: Every shader needs at least a NULL export, even when
3308 * it writes no color/depth output. The NULL export instruction
3309 * stalls without this setting.
3310 *
3311 * Don't add this to CB_SHADER_MASK.
3312 *
3313 * GFX10 supports pixel shaders without exports by setting both the
3314 * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
3315 * instructions if any are present.
3316 *
3317 * GFX11 requires one color output, otherwise the DCC decompression does nothing.
3318 *
3319 * Primitive Ordered Pixel Shading also requires an export, otherwise interlocking doesn't work
3320 * correctly before GFX11, and a hang happens on GFX11.
3321 */
3322 return (gfx_level <= GFX9 || ps->info.ps.can_discard || ps->info.ps.pops ||
3323 (custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 && gfx_level >= GFX11)) &&
3324 !ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask;
3325 }
3326
3327 static VkResult
radv_graphics_pipeline_import_binaries(struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkPipelineBinaryInfoKHR * binary_info)3328 radv_graphics_pipeline_import_binaries(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
3329 const VkPipelineBinaryInfoKHR *binary_info)
3330 {
3331 blake3_hash pipeline_hash;
3332 struct mesa_blake3 ctx;
3333
3334 _mesa_blake3_init(&ctx);
3335
3336 for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
3337 VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
3338 struct radv_shader *shader;
3339 struct blob_reader blob;
3340
3341 blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
3342
3343 shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
3344 if (!shader)
3345 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3346
3347 if (shader->info.stage == MESA_SHADER_VERTEX && i > 0) {
3348 /* The GS copy-shader is a VS placed after all other stages. */
3349 pipeline->base.gs_copy_shader = shader;
3350 } else {
3351 pipeline->base.shaders[shader->info.stage] = shader;
3352 }
3353
3354 _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
3355 }
3356
3357 _mesa_blake3_final(&ctx, pipeline_hash);
3358
3359 pipeline->base.pipeline_hash = *(uint64_t *)pipeline_hash;
3360
3361 pipeline->has_pipeline_binaries = true;
3362
3363 return VK_SUCCESS;
3364 }
3365
3366 static VkResult
radv_graphics_pipeline_init(struct radv_graphics_pipeline * pipeline,struct radv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo)3367 radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device,
3368 struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo)
3369 {
3370 bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
3371 struct radv_graphics_pipeline_state gfx_state;
3372 VkResult result = VK_SUCCESS;
3373
3374 pipeline->last_vgt_api_stage = MESA_SHADER_NONE;
3375
3376 const VkPipelineLibraryCreateInfoKHR *libs_info =
3377 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3378
3379 /* If we have libraries, import them first. */
3380 if (libs_info) {
3381 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3382 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3383 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3384
3385 assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
3386
3387 radv_graphics_pipeline_import_lib(device, pipeline, gfx_pipeline_lib);
3388 }
3389 }
3390
3391 radv_pipeline_import_graphics_info(device, pipeline, pCreateInfo);
3392
3393 result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
3394 if (result != VK_SUCCESS)
3395 return result;
3396
3397 const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
3398
3399 if (binary_info && binary_info->binaryCount > 0) {
3400 result = radv_graphics_pipeline_import_binaries(device, pipeline, binary_info);
3401 } else {
3402 if (gfx_state.compilation_required) {
3403 result =
3404 radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
3405 }
3406 }
3407
3408 if (result != VK_SUCCESS) {
3409 radv_graphics_pipeline_state_finish(device, &gfx_state);
3410 return result;
3411 }
3412
3413 uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &gfx_state.vk);
3414
3415 radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &gfx_state.vk);
3416
3417 if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
3418 radv_pipeline_init_input_assembly_state(device, pipeline);
3419 radv_pipeline_init_dynamic_state(device, pipeline, &gfx_state.vk, pCreateInfo);
3420
3421 if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
3422 radv_pipeline_init_vertex_input_state(device, pipeline, &gfx_state.vk);
3423
3424 radv_pipeline_init_shader_stages_state(device, pipeline);
3425
3426 pipeline->is_ngg = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.is_ngg;
3427 pipeline->has_ngg_culling = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling;
3428 pipeline->force_vrs_per_vertex = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex;
3429 pipeline->rast_prim = vgt_gs_out_prim_type;
3430 pipeline->uses_out_of_order_rast = gfx_state.vk.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD;
3431 pipeline->uses_vrs = radv_is_vrs_enabled(&gfx_state.vk);
3432 pipeline->uses_vrs_attachment = radv_pipeline_uses_vrs_attachment(pipeline, &gfx_state.vk);
3433 pipeline->uses_vrs_coarse_shading = !pipeline->uses_vrs && gfx103_pipeline_vrs_coarse_shading(device, pipeline);
3434
3435 pipeline->base.push_constant_size = gfx_state.layout.push_constant_size;
3436 pipeline->base.dynamic_offset_count = gfx_state.layout.dynamic_offset_count;
3437
3438 const VkGraphicsPipelineCreateInfoRADV *radv_info =
3439 vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_CREATE_INFO_RADV);
3440 if (radv_info) {
3441 radv_pipeline_init_extra(pipeline, radv_info, &gfx_state.vk);
3442 }
3443
3444 radv_graphics_pipeline_state_finish(device, &gfx_state);
3445 return result;
3446 }
3447
3448 static VkResult
radv_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3449 radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
3450 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
3451 {
3452 VK_FROM_HANDLE(radv_device, device, _device);
3453 VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
3454 struct radv_graphics_pipeline *pipeline;
3455 VkResult result;
3456
3457 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3458 if (pipeline == NULL)
3459 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3460
3461 radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_GRAPHICS);
3462 pipeline->base.create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
3463 pipeline->base.is_internal = _cache == device->meta_state.cache;
3464
3465 result = radv_graphics_pipeline_init(pipeline, device, cache, pCreateInfo);
3466 if (result != VK_SUCCESS) {
3467 radv_pipeline_destroy(device, &pipeline->base, pAllocator);
3468 return result;
3469 }
3470
3471 *pPipeline = radv_pipeline_to_handle(&pipeline->base);
3472 radv_rmv_log_graphics_pipeline_create(device, &pipeline->base, pipeline->base.is_internal);
3473 return VK_SUCCESS;
3474 }
3475
3476 void
radv_destroy_graphics_pipeline(struct radv_device * device,struct radv_graphics_pipeline * pipeline)3477 radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
3478 {
3479 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3480 if (pipeline->base.shaders[i])
3481 radv_shader_unref(device, pipeline->base.shaders[i]);
3482 }
3483
3484 if (pipeline->base.gs_copy_shader)
3485 radv_shader_unref(device, pipeline->base.gs_copy_shader);
3486 }
3487
3488 static VkResult
radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline * pipeline,struct radv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo)3489 radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, struct radv_device *device,
3490 struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo)
3491 {
3492 VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3493 VkResult result;
3494
3495 const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
3496 vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
3497 const VkPipelineLibraryCreateInfoKHR *libs_info =
3498 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3499 bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
3500
3501 struct vk_graphics_pipeline_state *state = &pipeline->graphics_state;
3502
3503 pipeline->base.last_vgt_api_stage = MESA_SHADER_NONE;
3504 pipeline->lib_flags = lib_info ? lib_info->flags : 0;
3505
3506 radv_pipeline_layout_init(device, &pipeline->layout, false);
3507
3508 /* If we have libraries, import them first. */
3509 if (libs_info) {
3510 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3511 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3512 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3513
3514 vk_graphics_pipeline_state_merge(state, &gfx_pipeline_lib->graphics_state);
3515
3516 radv_graphics_pipeline_import_layout(&pipeline->layout, &gfx_pipeline_lib->layout);
3517
3518 radv_graphics_pipeline_import_lib(device, &pipeline->base, gfx_pipeline_lib);
3519
3520 pipeline->lib_flags |= gfx_pipeline_lib->lib_flags;
3521 }
3522 }
3523
3524 result = vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, 0, NULL, NULL,
3525 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data);
3526 if (result != VK_SUCCESS)
3527 return result;
3528
3529 radv_pipeline_import_graphics_info(device, &pipeline->base, pCreateInfo);
3530
3531 if (pipeline_layout)
3532 radv_graphics_pipeline_import_layout(&pipeline->layout, pipeline_layout);
3533
3534 const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
3535
3536 if (binary_info && binary_info->binaryCount > 0) {
3537 result = radv_graphics_pipeline_import_binaries(device, &pipeline->base, binary_info);
3538 } else {
3539 struct radv_graphics_pipeline_state gfx_state;
3540
3541 result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
3542 if (result != VK_SUCCESS)
3543 return result;
3544
3545 result =
3546 radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
3547
3548 radv_graphics_pipeline_state_finish(device, &gfx_state);
3549 }
3550
3551 return result;
3552 }
3553
3554 static VkResult
radv_graphics_lib_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3555 radv_graphics_lib_pipeline_create(VkDevice _device, VkPipelineCache _cache,
3556 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3557 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
3558 {
3559 VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
3560 VK_FROM_HANDLE(radv_device, device, _device);
3561 struct radv_graphics_lib_pipeline *pipeline;
3562 VkResult result;
3563
3564 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3565 if (pipeline == NULL)
3566 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3567
3568 radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_GRAPHICS_LIB);
3569 pipeline->base.base.create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
3570
3571 pipeline->mem_ctx = ralloc_context(NULL);
3572
3573 result = radv_graphics_lib_pipeline_init(pipeline, device, cache, pCreateInfo);
3574 if (result != VK_SUCCESS) {
3575 radv_pipeline_destroy(device, &pipeline->base.base, pAllocator);
3576 return result;
3577 }
3578
3579 *pPipeline = radv_pipeline_to_handle(&pipeline->base.base);
3580
3581 return VK_SUCCESS;
3582 }
3583
3584 void
radv_destroy_graphics_lib_pipeline(struct radv_device * device,struct radv_graphics_lib_pipeline * pipeline)3585 radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline)
3586 {
3587 struct radv_retained_shaders *retained_shaders = &pipeline->retained_shaders;
3588
3589 radv_pipeline_layout_finish(device, &pipeline->layout);
3590
3591 vk_free(&device->vk.alloc, pipeline->state_data);
3592
3593 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3594 free(retained_shaders->stages[i].serialized_nir);
3595 }
3596
3597 ralloc_free(pipeline->mem_ctx);
3598
3599 radv_destroy_graphics_pipeline(device, &pipeline->base);
3600 }
3601
3602 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3603 radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
3604 const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
3605 VkPipeline *pPipelines)
3606 {
3607 VkResult result = VK_SUCCESS;
3608 unsigned i = 0;
3609
3610 for (; i < count; i++) {
3611 const VkPipelineCreateFlagBits2 create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
3612 VkResult r;
3613 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
3614 r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
3615 } else {
3616 r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
3617 }
3618 if (r != VK_SUCCESS) {
3619 result = r;
3620 pPipelines[i] = VK_NULL_HANDLE;
3621
3622 if (create_flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT)
3623 break;
3624 }
3625 }
3626
3627 for (; i < count; ++i)
3628 pPipelines[i] = VK_NULL_HANDLE;
3629
3630 return result;
3631 }
3632