1 /*
2 * Copyright © 2019 Red Hat.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "lvp_private.h"
25 #include "vk_nir_convert_ycbcr.h"
26 #include "vk_pipeline.h"
27 #include "vk_render_pass.h"
28 #include "vk_util.h"
29 #include "glsl_types.h"
30 #include "util/os_time.h"
31 #include "spirv/nir_spirv.h"
32 #include "nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34 #include "lvp_lower_vulkan_resource.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "nir/nir_xfb_info.h"
38
39 #define SPIR_V_MAGIC_NUMBER 0x07230203
40
41 #define MAX_DYNAMIC_STATES 72
42
43 typedef void (*cso_destroy_func)(struct pipe_context*, void*);
44
45 static void
shader_destroy(struct lvp_device * device,struct lvp_shader * shader,bool locked)46 shader_destroy(struct lvp_device *device, struct lvp_shader *shader, bool locked)
47 {
48 if (!shader->pipeline_nir)
49 return;
50 gl_shader_stage stage = shader->pipeline_nir->nir->info.stage;
51 cso_destroy_func destroy[] = {
52 device->queue.ctx->delete_vs_state,
53 device->queue.ctx->delete_tcs_state,
54 device->queue.ctx->delete_tes_state,
55 device->queue.ctx->delete_gs_state,
56 device->queue.ctx->delete_fs_state,
57 device->queue.ctx->delete_compute_state,
58 device->queue.ctx->delete_ts_state,
59 device->queue.ctx->delete_ms_state,
60 };
61
62 if (!locked)
63 simple_mtx_lock(&device->queue.lock);
64
65 set_foreach(&shader->inlines.variants, entry) {
66 struct lvp_inline_variant *variant = (void*)entry->key;
67 destroy[stage](device->queue.ctx, variant->cso);
68 free(variant);
69 }
70 ralloc_free(shader->inlines.variants.table);
71
72 if (shader->shader_cso)
73 destroy[stage](device->queue.ctx, shader->shader_cso);
74 if (shader->tess_ccw_cso)
75 destroy[stage](device->queue.ctx, shader->tess_ccw_cso);
76
77 if (!locked)
78 simple_mtx_unlock(&device->queue.lock);
79
80 lvp_pipeline_nir_ref(&shader->pipeline_nir, NULL);
81 lvp_pipeline_nir_ref(&shader->tess_ccw, NULL);
82 }
83
84 void
lvp_pipeline_destroy(struct lvp_device * device,struct lvp_pipeline * pipeline,bool locked)85 lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline, bool locked)
86 {
87 lvp_forall_stage(i)
88 shader_destroy(device, &pipeline->shaders[i], locked);
89
90 if (pipeline->layout)
91 vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
92
93 for (unsigned i = 0; i < pipeline->num_groups; i++) {
94 LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]);
95 lvp_pipeline_destroy(device, p, locked);
96 }
97
98 if (pipeline->rt.stages) {
99 for (uint32_t i = 0; i < pipeline->rt.stage_count; i++)
100 lvp_pipeline_nir_ref(pipeline->rt.stages + i, NULL);
101 }
102
103 free(pipeline->rt.stages);
104 free(pipeline->rt.groups);
105
106 vk_free(&device->vk.alloc, pipeline->state_data);
107 vk_object_base_finish(&pipeline->base);
108 vk_free(&device->vk.alloc, pipeline);
109 }
110
lvp_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)111 VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
112 VkDevice _device,
113 VkPipeline _pipeline,
114 const VkAllocationCallbacks* pAllocator)
115 {
116 LVP_FROM_HANDLE(lvp_device, device, _device);
117 LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);
118
119 if (!_pipeline)
120 return;
121
122 if (pipeline->used) {
123 simple_mtx_lock(&device->queue.lock);
124 util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
125 simple_mtx_unlock(&device->queue.lock);
126 } else {
127 lvp_pipeline_destroy(device, pipeline, false);
128 }
129 }
130
131 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)132 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
133 {
134 assert(glsl_type_is_vector_or_scalar(type));
135
136 uint32_t comp_size = glsl_type_is_boolean(type)
137 ? 4 : glsl_get_bit_size(type) / 8;
138 unsigned length = glsl_get_vector_elements(type);
139 *size = comp_size * length,
140 *align = comp_size;
141 }
142
143 static bool
remove_barriers_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)144 remove_barriers_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
145 {
146 if (intr->intrinsic != nir_intrinsic_barrier)
147 return false;
148 if (data) {
149 if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE)
150 return false;
151
152 if (nir_intrinsic_memory_scope(intr) == SCOPE_WORKGROUP ||
153 nir_intrinsic_memory_scope(intr) == SCOPE_DEVICE ||
154 nir_intrinsic_memory_scope(intr) == SCOPE_QUEUE_FAMILY)
155 return false;
156 }
157 nir_instr_remove(&intr->instr);
158 return true;
159 }
160
161 static bool
remove_barriers(nir_shader * nir,bool is_compute)162 remove_barriers(nir_shader *nir, bool is_compute)
163 {
164 return nir_shader_intrinsics_pass(nir, remove_barriers_impl,
165 nir_metadata_dominance,
166 (void*)is_compute);
167 }
168
169 static bool
lower_demote_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)170 lower_demote_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
171 {
172 if (intr->intrinsic == nir_intrinsic_demote) {
173 intr->intrinsic = nir_intrinsic_terminate;
174 return true;
175 }
176 if (intr->intrinsic == nir_intrinsic_demote_if) {
177 intr->intrinsic = nir_intrinsic_terminate_if;
178 return true;
179 }
180 return false;
181 }
182
183 static bool
lower_demote(nir_shader * nir)184 lower_demote(nir_shader *nir)
185 {
186 return nir_shader_intrinsics_pass(nir, lower_demote_impl,
187 nir_metadata_dominance, NULL);
188 }
189
190 static bool
find_tex(const nir_instr * instr,const void * data_cb)191 find_tex(const nir_instr *instr, const void *data_cb)
192 {
193 if (instr->type == nir_instr_type_tex)
194 return true;
195 return false;
196 }
197
198 static nir_def *
fixup_tex_instr(struct nir_builder * b,nir_instr * instr,void * data_cb)199 fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
200 {
201 nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
202 unsigned offset = 0;
203
204 int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
205 if (idx == -1)
206 return NULL;
207
208 if (!nir_src_is_const(tex_instr->src[idx].src))
209 return NULL;
210 offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
211
212 nir_tex_instr_remove_src(tex_instr, idx);
213 tex_instr->texture_index += offset;
214 return NIR_LOWER_INSTR_PROGRESS;
215 }
216
217 static bool
lvp_nir_fixup_indirect_tex(nir_shader * shader)218 lvp_nir_fixup_indirect_tex(nir_shader *shader)
219 {
220 return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
221 }
222
223 static void
optimize(nir_shader * nir)224 optimize(nir_shader *nir)
225 {
226 bool progress = false;
227 do {
228 progress = false;
229
230 NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
231 NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
232 NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
233 NIR_PASS(progress, nir, nir_opt_deref);
234 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
235
236 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
237
238 NIR_PASS(progress, nir, nir_copy_prop);
239 NIR_PASS(progress, nir, nir_opt_dce);
240 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
241
242 NIR_PASS(progress, nir, nir_opt_algebraic);
243 NIR_PASS(progress, nir, nir_opt_constant_folding);
244
245 NIR_PASS(progress, nir, nir_opt_remove_phis);
246 bool loop = false;
247 NIR_PASS(loop, nir, nir_opt_loop);
248 progress |= loop;
249 if (loop) {
250 /* If nir_opt_loop makes progress, then we need to clean
251 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
252 * to make progress.
253 */
254 NIR_PASS(progress, nir, nir_copy_prop);
255 NIR_PASS(progress, nir, nir_opt_dce);
256 NIR_PASS(progress, nir, nir_opt_remove_phis);
257 }
258 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
259 NIR_PASS(progress, nir, nir_opt_dead_cf);
260 NIR_PASS(progress, nir, nir_opt_conditional_discard);
261 NIR_PASS(progress, nir, nir_opt_remove_phis);
262 NIR_PASS(progress, nir, nir_opt_cse);
263 NIR_PASS(progress, nir, nir_opt_undef);
264
265 NIR_PASS(progress, nir, nir_opt_deref);
266 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
267 NIR_PASS(progress, nir, nir_opt_loop_unroll);
268 NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
269 } while (progress);
270 }
271
272 void
lvp_shader_optimize(nir_shader * nir)273 lvp_shader_optimize(nir_shader *nir)
274 {
275 optimize(nir);
276 NIR_PASS_V(nir, nir_lower_var_copies);
277 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
278 NIR_PASS_V(nir, nir_opt_dce);
279 nir_sweep(nir);
280 }
281
282 struct lvp_pipeline_nir *
lvp_create_pipeline_nir(nir_shader * nir)283 lvp_create_pipeline_nir(nir_shader *nir)
284 {
285 struct lvp_pipeline_nir *pipeline_nir = ralloc(NULL, struct lvp_pipeline_nir);
286 pipeline_nir->nir = nir;
287 pipeline_nir->ref_cnt = 1;
288 return pipeline_nir;
289 }
290
291 static VkResult
compile_spirv(struct lvp_device * pdevice,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,nir_shader ** nir)292 compile_spirv(struct lvp_device *pdevice,
293 VkPipelineCreateFlags2KHR pipeline_flags,
294 const VkPipelineShaderStageCreateInfo *sinfo,
295 nir_shader **nir)
296 {
297 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
298 assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
299 VkResult result;
300
301 #ifdef VK_ENABLE_BETA_EXTENSIONS
302 const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
303 sinfo->pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
304 #endif
305
306 const struct spirv_to_nir_options spirv_options = {
307 .environment = NIR_SPIRV_VULKAN,
308 .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
309 .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
310 .phys_ssbo_addr_format = nir_address_format_64bit_global,
311 .push_const_addr_format = nir_address_format_logical,
312 .shared_addr_format = nir_address_format_32bit_offset,
313 .constant_addr_format = nir_address_format_64bit_global,
314 #ifdef VK_ENABLE_BETA_EXTENSIONS
315 .shader_index = node_info ? node_info->index : 0,
316 #endif
317 };
318
319 result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, pipeline_flags, sinfo,
320 &spirv_options, pdevice->physical_device->drv_options[stage],
321 NULL, nir);
322 return result;
323 }
324
325 static bool
inline_variant_equals(const void * a,const void * b)326 inline_variant_equals(const void *a, const void *b)
327 {
328 const struct lvp_inline_variant *av = a, *bv = b;
329 assert(av->mask == bv->mask);
330 u_foreach_bit(slot, av->mask) {
331 if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot])))
332 return false;
333 }
334 return true;
335 }
336
337 static const struct vk_ycbcr_conversion_state *
lvp_ycbcr_conversion_lookup(const void * data,uint32_t set,uint32_t binding,uint32_t array_index)338 lvp_ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
339 {
340 const struct lvp_pipeline_layout *layout = data;
341
342 const struct lvp_descriptor_set_layout *set_layout = container_of(layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk);
343 const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding];
344 if (!binding_layout->immutable_samplers)
345 return NULL;
346
347 struct vk_ycbcr_conversion *ycbcr_conversion = binding_layout->immutable_samplers[array_index]->vk.ycbcr_conversion;
348 return ycbcr_conversion ? &ycbcr_conversion->state : NULL;
349 }
350
351 /* pipeline is NULL for shader objects. */
352 static void
lvp_shader_lower(struct lvp_device * pdevice,nir_shader * nir,struct lvp_pipeline_layout * layout)353 lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_pipeline_layout *layout)
354 {
355 if (nir->info.stage != MESA_SHADER_TESS_CTRL)
356 NIR_PASS_V(nir, remove_barriers, nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK);
357
358 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
359 .frag_coord = true,
360 .point_coord = true,
361 };
362 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
363
364 struct nir_lower_subgroups_options subgroup_opts = {0};
365 subgroup_opts.lower_quad = true;
366 subgroup_opts.ballot_components = 1;
367 subgroup_opts.ballot_bit_size = 32;
368 subgroup_opts.lower_inverse_ballot = true;
369 subgroup_opts.lower_rotate_to_shuffle = true;
370 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
371
372 if (nir->info.stage == MESA_SHADER_FRAGMENT)
373 lvp_lower_input_attachments(nir, false);
374 NIR_PASS_V(nir, nir_lower_system_values);
375 NIR_PASS_V(nir, nir_lower_is_helper_invocation);
376 NIR_PASS_V(nir, lower_demote);
377
378 const struct nir_lower_compute_system_values_options compute_system_values = {0};
379 NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_system_values);
380
381 NIR_PASS_V(nir, nir_remove_dead_variables,
382 nir_var_uniform | nir_var_image, NULL);
383
384 optimize(nir);
385 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
386
387 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
388 NIR_PASS_V(nir, nir_split_var_copies);
389 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
390
391 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
392 nir_address_format_32bit_offset);
393
394 NIR_PASS_V(nir, nir_lower_explicit_io,
395 nir_var_mem_ubo | nir_var_mem_ssbo,
396 nir_address_format_vec2_index_32bit_offset);
397
398 NIR_PASS_V(nir, nir_lower_explicit_io,
399 nir_var_mem_global | nir_var_mem_constant,
400 nir_address_format_64bit_global);
401
402 NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, layout);
403
404 nir_lower_non_uniform_access_options options = {
405 .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access,
406 };
407 NIR_PASS(_, nir, nir_lower_non_uniform_access, &options);
408
409 lvp_lower_pipeline_layout(pdevice, layout, nir);
410
411 NIR_PASS(_, nir, lvp_nir_lower_ray_queries);
412
413 if (nir->info.stage == MESA_SHADER_COMPUTE ||
414 nir->info.stage == MESA_SHADER_TASK ||
415 nir->info.stage == MESA_SHADER_MESH) {
416 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
417 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
418 }
419
420 if (nir->info.stage == MESA_SHADER_TASK ||
421 nir->info.stage == MESA_SHADER_MESH) {
422 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_task_payload, shared_var_info);
423 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_task_payload, nir_address_format_32bit_offset);
424 }
425
426 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
427
428 if (nir->info.stage == MESA_SHADER_VERTEX ||
429 nir->info.stage == MESA_SHADER_GEOMETRY) {
430 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
431 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
432 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
433 }
434
435 // TODO: also optimize the tex srcs. see radeonSI for reference */
436 /* Skip if there are potentially conflicting rounding modes */
437 struct nir_opt_16bit_tex_image_options opt_16bit_options = {
438 .rounding_mode = nir_rounding_mode_undef,
439 .opt_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
440 };
441 NIR_PASS_V(nir, nir_opt_16bit_tex_image, &opt_16bit_options);
442
443 /* Lower texture OPs llvmpipe supports to reduce the amount of sample
444 * functions that need to be pre-compiled.
445 */
446 const nir_lower_tex_options tex_options = {
447 /* lower_tg4_offsets can introduce new sparse residency intrinsics
448 * which is why we have to lower everything before calling
449 * lvp_nir_lower_sparse_residency.
450 */
451 .lower_tg4_offsets = true,
452 .lower_txd = true,
453 };
454 NIR_PASS(_, nir, nir_lower_tex, &tex_options);
455
456 NIR_PASS(_, nir, lvp_nir_lower_sparse_residency);
457
458 lvp_shader_optimize(nir);
459
460 if (nir->info.stage != MESA_SHADER_VERTEX)
461 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
462 else {
463 nir->num_inputs = util_last_bit64(nir->info.inputs_read);
464 nir_foreach_shader_in_variable(var, nir) {
465 var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
466 }
467 }
468 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
469 nir->info.stage);
470 }
471
472 VkResult
lvp_spirv_to_nir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo,nir_shader ** out_nir)473 lvp_spirv_to_nir(struct lvp_pipeline *pipeline, const VkPipelineShaderStageCreateInfo *sinfo,
474 nir_shader **out_nir)
475 {
476 VkResult result = compile_spirv(pipeline->device, pipeline->flags, sinfo, out_nir);
477 if (result == VK_SUCCESS) {
478 if (pipeline->type == LVP_PIPELINE_EXEC_GRAPH)
479 lvp_lower_exec_graph(pipeline, *out_nir);
480 lvp_shader_lower(pipeline->device, *out_nir, pipeline->layout);
481 }
482
483 return result;
484 }
485
486 void
lvp_shader_init(struct lvp_shader * shader,nir_shader * nir)487 lvp_shader_init(struct lvp_shader *shader, nir_shader *nir)
488 {
489 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
490 if (impl->ssa_alloc > 100) //skip for small shaders
491 shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir);
492 shader->pipeline_nir = lvp_create_pipeline_nir(nir);
493 if (shader->inlines.can_inline)
494 _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals);
495 }
496
497 static VkResult
lvp_shader_compile_to_ir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo)498 lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
499 const VkPipelineShaderStageCreateInfo *sinfo)
500 {
501 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
502 assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
503 nir_shader *nir;
504 VkResult result = lvp_spirv_to_nir(pipeline, sinfo, &nir);
505 if (result == VK_SUCCESS) {
506 struct lvp_shader *shader = &pipeline->shaders[stage];
507 lvp_shader_init(shader, nir);
508 shader->push_constant_size = pipeline->layout->push_constant_size;
509 }
510 return result;
511 }
512
513 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)514 merge_tess_info(struct shader_info *tes_info,
515 const struct shader_info *tcs_info)
516 {
517 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
518 *
519 * "PointMode. Controls generation of points rather than triangles
520 * or lines. This functionality defaults to disabled, and is
521 * enabled if either shader stage includes the execution mode.
522 *
523 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
524 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
525 * and OutputVertices, it says:
526 *
527 * "One mode must be set in at least one of the tessellation
528 * shader stages."
529 *
530 * So, the fields can be set in either the TCS or TES, but they must
531 * agree if set in both. Our backend looks at TES, so bitwise-or in
532 * the values from the TCS.
533 */
534 assert(tcs_info->tess.tcs_vertices_out == 0 ||
535 tes_info->tess.tcs_vertices_out == 0 ||
536 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
537 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
538
539 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
540 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
541 tcs_info->tess.spacing == tes_info->tess.spacing);
542 tes_info->tess.spacing |= tcs_info->tess.spacing;
543
544 assert(tcs_info->tess._primitive_mode == 0 ||
545 tes_info->tess._primitive_mode == 0 ||
546 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
547 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
548 tes_info->tess.ccw |= tcs_info->tess.ccw;
549 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
550 }
551
552 static void
lvp_shader_xfb_init(struct lvp_shader * shader)553 lvp_shader_xfb_init(struct lvp_shader *shader)
554 {
555 nir_xfb_info *xfb_info = shader->pipeline_nir->nir->xfb_info;
556 if (xfb_info) {
557 uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
558 memset(output_mapping, 0, sizeof(output_mapping));
559
560 nir_foreach_shader_out_variable(var, shader->pipeline_nir->nir) {
561 unsigned slots = nir_variable_count_slots(var, var->type);
562 for (unsigned i = 0; i < slots; i++)
563 output_mapping[var->data.location + i] = var->data.driver_location + i;
564 }
565
566 shader->stream_output.num_outputs = xfb_info->output_count;
567 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
568 if (xfb_info->buffers_written & (1 << i)) {
569 shader->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
570 }
571 }
572 for (unsigned i = 0; i < xfb_info->output_count; i++) {
573 shader->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
574 shader->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
575 shader->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
576 shader->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
577 shader->stream_output.output[i].start_component = xfb_info->outputs[i].component_offset;
578 shader->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
579 }
580
581 }
582 }
583
584 static void
lvp_pipeline_xfb_init(struct lvp_pipeline * pipeline)585 lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
586 {
587 gl_shader_stage stage = MESA_SHADER_VERTEX;
588 if (pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir)
589 stage = MESA_SHADER_GEOMETRY;
590 else if (pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir)
591 stage = MESA_SHADER_TESS_EVAL;
592 else if (pipeline->shaders[MESA_SHADER_MESH].pipeline_nir)
593 stage = MESA_SHADER_MESH;
594 pipeline->last_vertex = stage;
595 lvp_shader_xfb_init(&pipeline->shaders[stage]);
596 }
597
598 static void *
lvp_shader_compile_stage(struct lvp_device * device,struct lvp_shader * shader,nir_shader * nir)599 lvp_shader_compile_stage(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir)
600 {
601 if (nir->info.stage == MESA_SHADER_COMPUTE) {
602 struct pipe_compute_state shstate = {0};
603 shstate.prog = nir;
604 shstate.ir_type = PIPE_SHADER_IR_NIR;
605 shstate.static_shared_mem = nir->info.shared_size;
606 return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
607 } else {
608 struct pipe_shader_state shstate = {0};
609 shstate.type = PIPE_SHADER_IR_NIR;
610 shstate.ir.nir = nir;
611 memcpy(&shstate.stream_output, &shader->stream_output, sizeof(shstate.stream_output));
612
613 switch (nir->info.stage) {
614 case MESA_SHADER_FRAGMENT:
615 return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
616 case MESA_SHADER_VERTEX:
617 return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
618 case MESA_SHADER_GEOMETRY:
619 return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
620 case MESA_SHADER_TESS_CTRL:
621 return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
622 case MESA_SHADER_TESS_EVAL:
623 return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
624 case MESA_SHADER_TASK:
625 return device->queue.ctx->create_ts_state(device->queue.ctx, &shstate);
626 case MESA_SHADER_MESH:
627 return device->queue.ctx->create_ms_state(device->queue.ctx, &shstate);
628 default:
629 unreachable("illegal shader");
630 break;
631 }
632 }
633 return NULL;
634 }
635
636 void *
lvp_shader_compile(struct lvp_device * device,struct lvp_shader * shader,nir_shader * nir,bool locked)637 lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir, bool locked)
638 {
639 device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
640
641 if (!locked)
642 simple_mtx_lock(&device->queue.lock);
643
644 void *state = lvp_shader_compile_stage(device, shader, nir);
645
646 if (!locked)
647 simple_mtx_unlock(&device->queue.lock);
648
649 return state;
650 }
651
652 #ifndef NDEBUG
653 static bool
layouts_equal(const struct lvp_descriptor_set_layout * a,const struct lvp_descriptor_set_layout * b)654 layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
655 {
656 const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
657 uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
658 uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
659 /* base equal */
660 if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
661 return false;
662
663 /* bindings equal */
664 if (a->binding_count != b->binding_count)
665 return false;
666 size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
667 const struct lvp_descriptor_set_binding_layout *la = a->binding;
668 const struct lvp_descriptor_set_binding_layout *lb = b->binding;
669 if (memcmp(la, lb, binding_size)) {
670 for (unsigned i = 0; i < a->binding_count; i++) {
671 if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
672 return false;
673 }
674 }
675
676 /* immutable sampler equal */
677 if (a->immutable_sampler_count != b->immutable_sampler_count)
678 return false;
679 if (a->immutable_sampler_count) {
680 size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
681 if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
682 struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
683 struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
684 for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
685 if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
686 return false;
687 }
688 }
689 }
690 return true;
691 }
692 #endif
693
694 static void
merge_layouts(struct vk_device * device,struct lvp_pipeline * dst,struct lvp_pipeline_layout * src)695 merge_layouts(struct vk_device *device, struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
696 {
697 if (!src)
698 return;
699 if (dst->layout) {
700 /* these must match */
701 ASSERTED VkPipelineCreateFlags src_flag = src->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
702 ASSERTED VkPipelineCreateFlags dst_flag = dst->layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
703 assert(src_flag == dst_flag);
704 }
705 /* always try to reuse existing layout: independent sets bit doesn't guarantee independent sets */
706 if (!dst->layout) {
707 dst->layout = (struct lvp_pipeline_layout*)vk_pipeline_layout_ref(&src->vk);
708 return;
709 }
710 /* this is a big optimization when hit */
711 if (dst->layout == src)
712 return;
713 #ifndef NDEBUG
714 /* verify that layouts match */
715 const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
716 const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
717 for (unsigned i = 0; i < smaller->vk.set_count; i++) {
718 if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
719 smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
720 continue;
721
722 const struct lvp_descriptor_set_layout *smaller_set_layout =
723 vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
724 const struct lvp_descriptor_set_layout *bigger_set_layout =
725 vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
726
727 assert(!smaller_set_layout->binding_count ||
728 !bigger_set_layout->binding_count ||
729 layouts_equal(smaller_set_layout, bigger_set_layout));
730 }
731 #endif
732 /* must be independent sets with different layouts: reallocate to avoid modifying original layout */
733 struct lvp_pipeline_layout *old_layout = dst->layout;
734 dst->layout = vk_zalloc(&device->alloc, sizeof(struct lvp_pipeline_layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
735 memcpy(dst->layout, old_layout, sizeof(struct lvp_pipeline_layout));
736 dst->layout->vk.ref_cnt = 1;
737 for (unsigned i = 0; i < dst->layout->vk.set_count; i++) {
738 if (dst->layout->vk.set_layouts[i])
739 vk_descriptor_set_layout_ref(dst->layout->vk.set_layouts[i]);
740 }
741 vk_pipeline_layout_unref(device, &old_layout->vk);
742
743 for (unsigned i = 0; i < src->vk.set_count; i++) {
744 if (!dst->layout->vk.set_layouts[i]) {
745 dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
746 if (dst->layout->vk.set_layouts[i])
747 vk_descriptor_set_layout_ref(src->vk.set_layouts[i]);
748 }
749 }
750 dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
751 src->vk.set_count);
752 dst->layout->push_constant_size += src->push_constant_size;
753 dst->layout->push_constant_stages |= src->push_constant_stages;
754 }
755
756 static void
copy_shader_sanitized(struct lvp_shader * dst,const struct lvp_shader * src)757 copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src)
758 {
759 *dst = *src;
760 dst->pipeline_nir = NULL; //this gets handled later
761 dst->tess_ccw = NULL; //this gets handled later
762 assert(!dst->shader_cso);
763 assert(!dst->tess_ccw_cso);
764 if (src->inlines.can_inline)
765 _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals);
766 }
767
768 static VkResult
lvp_graphics_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags)769 lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
770 struct lvp_device *device,
771 struct lvp_pipeline_cache *cache,
772 const VkGraphicsPipelineCreateInfo *pCreateInfo,
773 VkPipelineCreateFlagBits2KHR flags)
774 {
775 pipeline->type = LVP_PIPELINE_GRAPHICS;
776 pipeline->flags = flags;
777
778 VkResult result;
779
780 const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
781 GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
782 const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
783 PIPELINE_LIBRARY_CREATE_INFO_KHR);
784 const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
785 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
786 if (libinfo)
787 pipeline->stages = libinfo->flags;
788 else if (!libstate)
789 pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
790 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
791 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
792 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
793
794 if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
795 pipeline->library = true;
796
797 struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
798
799 if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
800 /* this is a regular pipeline with no partials: directly reuse */
801 pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL;
802 else if (pipeline->stages & layout_stages) {
803 if ((pipeline->stages & layout_stages) == layout_stages)
804 /* this has all the layout stages: directly reuse */
805 pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk);
806 else {
807 /* this is a partial: copy for later merging to avoid modifying another layout */
808 merge_layouts(&device->vk, pipeline, layout);
809 }
810 }
811
812 if (libstate) {
813 for (unsigned i = 0; i < libstate->libraryCount; i++) {
814 LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
815 vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
816 &p->graphics_state);
817 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
818 pipeline->line_smooth = p->line_smooth;
819 pipeline->disable_multisample = p->disable_multisample;
820 pipeline->line_rectangular = p->line_rectangular;
821 memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4);
822 memcpy(&pipeline->shaders[MESA_SHADER_TASK], &p->shaders[MESA_SHADER_TASK], sizeof(struct lvp_shader) * 2);
823 lvp_forall_gfx_stage(i) {
824 if (i == MESA_SHADER_FRAGMENT)
825 continue;
826 copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]);
827 }
828 }
829 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
830 pipeline->force_min_sample = p->force_min_sample;
831 copy_shader_sanitized(&pipeline->shaders[MESA_SHADER_FRAGMENT], &p->shaders[MESA_SHADER_FRAGMENT]);
832 }
833 if (p->stages & layout_stages) {
834 if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) {
835 merge_layouts(&device->vk, pipeline, p->layout);
836 lvp_forall_gfx_stage(i) {
837 pipeline->shaders[i].push_constant_size = pipeline->layout->push_constant_size;
838 }
839 }
840 }
841 pipeline->stages |= p->stages;
842 }
843 }
844
845 result = vk_graphics_pipeline_state_fill(&device->vk,
846 &pipeline->graphics_state,
847 pCreateInfo, NULL, 0, NULL, NULL,
848 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
849 &pipeline->state_data);
850 if (result != VK_SUCCESS)
851 return result;
852
853 assert(pipeline->library || pipeline->stages & (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
854 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
855 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
856
857 pipeline->device = device;
858
859 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
860 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
861 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
862 if (stage == MESA_SHADER_FRAGMENT) {
863 if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
864 continue;
865 } else {
866 if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
867 continue;
868 }
869 result = lvp_shader_compile_to_ir(pipeline, sinfo);
870 if (result != VK_SUCCESS)
871 goto fail;
872
873 switch (stage) {
874 case MESA_SHADER_FRAGMENT:
875 if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir->info.fs.uses_sample_shading)
876 pipeline->force_min_sample = true;
877 break;
878 default: break;
879 }
880 }
881 if (pCreateInfo->stageCount && pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) {
882 nir_lower_patch_vertices(pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir, pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
883 merge_tess_info(&pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info, &pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info);
884 if (BITSET_TEST(pipeline->graphics_state.dynamic,
885 MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
886 pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir));
887 pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
888 } else if (pipeline->graphics_state.ts &&
889 pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) {
890 pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
891 }
892 }
893 if (libstate) {
894 for (unsigned i = 0; i < libstate->libraryCount; i++) {
895 LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
896 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
897 if (p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir)
898 lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir);
899 }
900 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
901 lvp_forall_gfx_stage(j) {
902 if (j == MESA_SHADER_FRAGMENT)
903 continue;
904 if (p->shaders[j].pipeline_nir)
905 lvp_pipeline_nir_ref(&pipeline->shaders[j].pipeline_nir, p->shaders[j].pipeline_nir);
906 }
907 if (p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
908 lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw, p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw);
909 }
910 }
911 } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
912 const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
913 if (rs) {
914 /* always draw bresenham if !smooth */
915 pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
916 pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ||
917 rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
918 pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
919 } else
920 pipeline->line_rectangular = true;
921 lvp_pipeline_xfb_init(pipeline);
922 }
923 if (!libstate && !pipeline->library)
924 lvp_pipeline_shaders_compile(pipeline, false);
925
926 return VK_SUCCESS;
927
928 fail:
929 for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
930 lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL);
931 }
932 vk_free(&device->vk.alloc, pipeline->state_data);
933
934 return result;
935 }
936
937 void
lvp_pipeline_shaders_compile(struct lvp_pipeline * pipeline,bool locked)938 lvp_pipeline_shaders_compile(struct lvp_pipeline *pipeline, bool locked)
939 {
940 if (pipeline->compiled)
941 return;
942 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
943 if (!pipeline->shaders[i].pipeline_nir)
944 continue;
945
946 gl_shader_stage stage = i;
947 assert(stage == pipeline->shaders[i].pipeline_nir->nir->info.stage);
948
949 if (!pipeline->shaders[stage].inlines.can_inline) {
950 pipeline->shaders[stage].shader_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
951 nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir), locked);
952 if (pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
953 pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
954 nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir), locked);
955 }
956 }
957 pipeline->compiled = true;
958 }
959
960 static VkResult
lvp_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags,VkPipeline * pPipeline,bool group)961 lvp_graphics_pipeline_create(
962 VkDevice _device,
963 VkPipelineCache _cache,
964 const VkGraphicsPipelineCreateInfo *pCreateInfo,
965 VkPipelineCreateFlagBits2KHR flags,
966 VkPipeline *pPipeline,
967 bool group)
968 {
969 LVP_FROM_HANDLE(lvp_device, device, _device);
970 LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
971 struct lvp_pipeline *pipeline;
972 VkResult result;
973
974 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
975
976 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
977 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
978 if (pipeline == NULL)
979 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
980
981 vk_object_base_init(&device->vk, &pipeline->base,
982 VK_OBJECT_TYPE_PIPELINE);
983 uint64_t t0 = os_time_get_nano();
984 result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, flags);
985 if (result != VK_SUCCESS) {
986 vk_free(&device->vk.alloc, pipeline);
987 return result;
988 }
989
990 VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
991 if (feedback && !group) {
992 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
993 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
994 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
995 }
996
997 *pPipeline = lvp_pipeline_to_handle(pipeline);
998
999 return VK_SUCCESS;
1000 }
1001
lvp_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1002 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
1003 VkDevice _device,
1004 VkPipelineCache pipelineCache,
1005 uint32_t count,
1006 const VkGraphicsPipelineCreateInfo* pCreateInfos,
1007 const VkAllocationCallbacks* pAllocator,
1008 VkPipeline* pPipelines)
1009 {
1010 VkResult result = VK_SUCCESS;
1011 unsigned i = 0;
1012
1013 for (; i < count; i++) {
1014 VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1015 VkPipelineCreateFlagBits2KHR flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1016
1017 if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1018 r = lvp_graphics_pipeline_create(_device,
1019 pipelineCache,
1020 &pCreateInfos[i],
1021 flags,
1022 &pPipelines[i],
1023 false);
1024 if (r != VK_SUCCESS) {
1025 result = r;
1026 pPipelines[i] = VK_NULL_HANDLE;
1027 if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1028 break;
1029 }
1030 }
1031 if (result != VK_SUCCESS) {
1032 for (; i < count; i++)
1033 pPipelines[i] = VK_NULL_HANDLE;
1034 }
1035
1036 return result;
1037 }
1038
1039 static VkResult
lvp_compute_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags)1040 lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
1041 struct lvp_device *device,
1042 struct lvp_pipeline_cache *cache,
1043 const VkComputePipelineCreateInfo *pCreateInfo,
1044 VkPipelineCreateFlagBits2KHR flags)
1045 {
1046 pipeline->flags = flags;
1047 pipeline->device = device;
1048 pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
1049 vk_pipeline_layout_ref(&pipeline->layout->vk);
1050 pipeline->force_min_sample = false;
1051
1052 pipeline->type = LVP_PIPELINE_COMPUTE;
1053
1054 VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
1055 if (result != VK_SUCCESS)
1056 return result;
1057
1058 struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE];
1059 if (!shader->inlines.can_inline)
1060 shader->shader_cso = lvp_shader_compile(pipeline->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false);
1061 pipeline->compiled = true;
1062 if (pipeline->layout)
1063 shader->push_constant_size = pipeline->layout->push_constant_size;
1064 return VK_SUCCESS;
1065 }
1066
1067 static VkResult
lvp_compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags,VkPipeline * pPipeline)1068 lvp_compute_pipeline_create(
1069 VkDevice _device,
1070 VkPipelineCache _cache,
1071 const VkComputePipelineCreateInfo *pCreateInfo,
1072 VkPipelineCreateFlagBits2KHR flags,
1073 VkPipeline *pPipeline)
1074 {
1075 LVP_FROM_HANDLE(lvp_device, device, _device);
1076 LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
1077 struct lvp_pipeline *pipeline;
1078 VkResult result;
1079
1080 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1081
1082 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
1083 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1084 if (pipeline == NULL)
1085 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1086
1087 vk_object_base_init(&device->vk, &pipeline->base,
1088 VK_OBJECT_TYPE_PIPELINE);
1089 uint64_t t0 = os_time_get_nano();
1090 result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo, flags);
1091 if (result != VK_SUCCESS) {
1092 vk_free(&device->vk.alloc, pipeline);
1093 return result;
1094 }
1095
1096 const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1097 if (feedback) {
1098 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1099 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1100 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1101 }
1102
1103 *pPipeline = lvp_pipeline_to_handle(pipeline);
1104
1105 return VK_SUCCESS;
1106 }
1107
lvp_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1108 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
1109 VkDevice _device,
1110 VkPipelineCache pipelineCache,
1111 uint32_t count,
1112 const VkComputePipelineCreateInfo* pCreateInfos,
1113 const VkAllocationCallbacks* pAllocator,
1114 VkPipeline* pPipelines)
1115 {
1116 VkResult result = VK_SUCCESS;
1117 unsigned i = 0;
1118
1119 for (; i < count; i++) {
1120 VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1121 VkPipelineCreateFlagBits2KHR flags = vk_compute_pipeline_create_flags(&pCreateInfos[i]);
1122
1123 if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1124 r = lvp_compute_pipeline_create(_device,
1125 pipelineCache,
1126 &pCreateInfos[i],
1127 flags,
1128 &pPipelines[i]);
1129 if (r != VK_SUCCESS) {
1130 result = r;
1131 pPipelines[i] = VK_NULL_HANDLE;
1132 if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1133 break;
1134 }
1135 }
1136 if (result != VK_SUCCESS) {
1137 for (; i < count; i++)
1138 pPipelines[i] = VK_NULL_HANDLE;
1139 }
1140
1141
1142 return result;
1143 }
1144
lvp_DestroyShaderEXT(VkDevice _device,VkShaderEXT _shader,const VkAllocationCallbacks * pAllocator)1145 VKAPI_ATTR void VKAPI_CALL lvp_DestroyShaderEXT(
1146 VkDevice _device,
1147 VkShaderEXT _shader,
1148 const VkAllocationCallbacks* pAllocator)
1149 {
1150 LVP_FROM_HANDLE(lvp_device, device, _device);
1151 LVP_FROM_HANDLE(lvp_shader, shader, _shader);
1152
1153 if (!shader)
1154 return;
1155 shader_destroy(device, shader, false);
1156
1157 vk_pipeline_layout_unref(&device->vk, &shader->layout->vk);
1158 blob_finish(&shader->blob);
1159 vk_object_base_finish(&shader->base);
1160 vk_free2(&device->vk.alloc, pAllocator, shader);
1161 }
1162
1163 static VkShaderEXT
create_shader_object(struct lvp_device * device,const VkShaderCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator)1164 create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator)
1165 {
1166 nir_shader *nir = NULL;
1167 gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
1168 assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
1169 if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) {
1170 VkShaderModuleCreateInfo minfo = {
1171 VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
1172 NULL,
1173 0,
1174 pCreateInfo->codeSize,
1175 pCreateInfo->pCode,
1176 };
1177 VkPipelineShaderStageCreateFlagBits flags = 0;
1178 if (pCreateInfo->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
1179 flags |= VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT;
1180 if (pCreateInfo->flags & VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
1181 flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
1182 VkPipelineShaderStageCreateInfo sinfo = {
1183 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1184 &minfo,
1185 flags,
1186 pCreateInfo->stage,
1187 VK_NULL_HANDLE,
1188 pCreateInfo->pName,
1189 pCreateInfo->pSpecializationInfo,
1190 };
1191 VkResult result = compile_spirv(device, 0, &sinfo, &nir);
1192 if (result != VK_SUCCESS)
1193 goto fail;
1194 nir->info.separate_shader = true;
1195 } else {
1196 assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT);
1197 if (pCreateInfo->codeSize < SHA1_DIGEST_LENGTH + VK_UUID_SIZE + 1)
1198 return VK_NULL_HANDLE;
1199 struct blob_reader blob;
1200 const uint8_t *data = pCreateInfo->pCode;
1201 uint8_t uuid[VK_UUID_SIZE];
1202 lvp_device_get_cache_uuid(uuid);
1203 if (memcmp(uuid, data, VK_UUID_SIZE))
1204 return VK_NULL_HANDLE;
1205 size_t size = pCreateInfo->codeSize - SHA1_DIGEST_LENGTH - VK_UUID_SIZE;
1206 unsigned char sha1[20];
1207
1208 struct mesa_sha1 sctx;
1209 _mesa_sha1_init(&sctx);
1210 _mesa_sha1_update(&sctx, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
1211 _mesa_sha1_final(&sctx, sha1);
1212 if (memcmp(sha1, data + VK_UUID_SIZE, SHA1_DIGEST_LENGTH))
1213 return VK_NULL_HANDLE;
1214
1215 blob_reader_init(&blob, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
1216 nir = nir_deserialize(NULL, device->pscreen->get_compiler_options(device->pscreen, PIPE_SHADER_IR_NIR, stage), &blob);
1217 if (!nir)
1218 goto fail;
1219 }
1220 if (!nir_shader_get_entrypoint(nir))
1221 goto fail;
1222 struct lvp_shader *shader = vk_object_zalloc(&device->vk, pAllocator, sizeof(struct lvp_shader), VK_OBJECT_TYPE_SHADER_EXT);
1223 if (!shader)
1224 goto fail;
1225 blob_init(&shader->blob);
1226 VkPipelineLayoutCreateInfo pci = {
1227 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1228 NULL,
1229 0,
1230 pCreateInfo->setLayoutCount,
1231 pCreateInfo->pSetLayouts,
1232 pCreateInfo->pushConstantRangeCount,
1233 pCreateInfo->pPushConstantRanges,
1234 };
1235 shader->layout = lvp_pipeline_layout_create(device, &pci, pAllocator);
1236 shader->push_constant_size = shader->layout->push_constant_size;
1237
1238 if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT)
1239 lvp_shader_lower(device, nir, shader->layout);
1240
1241 lvp_shader_init(shader, nir);
1242
1243 lvp_shader_xfb_init(shader);
1244 if (stage == MESA_SHADER_TESS_EVAL) {
1245 /* spec requires that all tess modes are set in both shaders */
1246 nir_lower_patch_vertices(shader->pipeline_nir->nir, shader->pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
1247 shader->tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, shader->pipeline_nir->nir));
1248 shader->tess_ccw->nir->info.tess.ccw = !shader->pipeline_nir->nir->info.tess.ccw;
1249 shader->tess_ccw_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->tess_ccw->nir), false);
1250 } else if (stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) {
1251 /* this is (currently) illegal */
1252 assert(!nir->info.fs.uses_fbfetch_output);
1253 shader_destroy(device, shader, false);
1254
1255 vk_object_base_finish(&shader->base);
1256 vk_free2(&device->vk.alloc, pAllocator, shader);
1257 return VK_NULL_HANDLE;
1258 }
1259 nir_serialize(&shader->blob, nir, true);
1260 shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir), false);
1261 return lvp_shader_to_handle(shader);
1262 fail:
1263 ralloc_free(nir);
1264 return VK_NULL_HANDLE;
1265 }
1266
lvp_CreateShadersEXT(VkDevice _device,uint32_t createInfoCount,const VkShaderCreateInfoEXT * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkShaderEXT * pShaders)1267 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateShadersEXT(
1268 VkDevice _device,
1269 uint32_t createInfoCount,
1270 const VkShaderCreateInfoEXT* pCreateInfos,
1271 const VkAllocationCallbacks* pAllocator,
1272 VkShaderEXT* pShaders)
1273 {
1274 LVP_FROM_HANDLE(lvp_device, device, _device);
1275 unsigned i;
1276 for (i = 0; i < createInfoCount; i++) {
1277 pShaders[i] = create_shader_object(device, &pCreateInfos[i], pAllocator);
1278 if (!pShaders[i]) {
1279 if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) {
1280 if (i < createInfoCount - 1)
1281 memset(&pShaders[i + 1], 0, (createInfoCount - i - 1) * sizeof(VkShaderEXT));
1282 return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
1283 }
1284 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1285 }
1286 }
1287 return VK_SUCCESS;
1288 }
1289
1290
lvp_GetShaderBinaryDataEXT(VkDevice device,VkShaderEXT _shader,size_t * pDataSize,void * pData)1291 VKAPI_ATTR VkResult VKAPI_CALL lvp_GetShaderBinaryDataEXT(
1292 VkDevice device,
1293 VkShaderEXT _shader,
1294 size_t* pDataSize,
1295 void* pData)
1296 {
1297 LVP_FROM_HANDLE(lvp_shader, shader, _shader);
1298 VkResult ret = VK_SUCCESS;
1299 if (pData) {
1300 if (*pDataSize < shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE) {
1301 ret = VK_INCOMPLETE;
1302 *pDataSize = 0;
1303 } else {
1304 *pDataSize = MIN2(*pDataSize, shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE);
1305 uint8_t *data = pData;
1306 lvp_device_get_cache_uuid(data);
1307 struct mesa_sha1 sctx;
1308 _mesa_sha1_init(&sctx);
1309 _mesa_sha1_update(&sctx, shader->blob.data, shader->blob.size);
1310 _mesa_sha1_final(&sctx, data + VK_UUID_SIZE);
1311 memcpy(data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, shader->blob.data, shader->blob.size);
1312 }
1313 } else {
1314 *pDataSize = shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE;
1315 }
1316 return ret;
1317 }
1318
1319 #ifdef VK_ENABLE_BETA_EXTENSIONS
1320 static VkResult
lvp_exec_graph_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkExecutionGraphPipelineCreateInfoAMDX * create_info,VkPipelineCreateFlagBits2KHR flags,VkPipeline * out_pipeline)1321 lvp_exec_graph_pipeline_create(VkDevice _device, VkPipelineCache _cache,
1322 const VkExecutionGraphPipelineCreateInfoAMDX *create_info,
1323 VkPipelineCreateFlagBits2KHR flags,
1324 VkPipeline *out_pipeline)
1325 {
1326 LVP_FROM_HANDLE(lvp_device, device, _device);
1327 struct lvp_pipeline *pipeline;
1328 VkResult result;
1329
1330 assert(create_info->sType == VK_STRUCTURE_TYPE_EXECUTION_GRAPH_PIPELINE_CREATE_INFO_AMDX);
1331
1332 uint32_t stage_count = create_info->stageCount;
1333 if (create_info->pLibraryInfo) {
1334 for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
1335 VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
1336 stage_count += library->num_groups;
1337 }
1338 }
1339
1340 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + stage_count * sizeof(VkPipeline), 8,
1341 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1342 if (!pipeline)
1343 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1344
1345 vk_object_base_init(&device->vk, &pipeline->base,
1346 VK_OBJECT_TYPE_PIPELINE);
1347
1348 uint64_t t0 = os_time_get_nano();
1349
1350 pipeline->type = LVP_PIPELINE_EXEC_GRAPH;
1351 pipeline->flags = vk_graph_pipeline_create_flags(create_info);
1352 pipeline->layout = lvp_pipeline_layout_from_handle(create_info->layout);
1353
1354 pipeline->exec_graph.scratch_size = 0;
1355 pipeline->num_groups = stage_count;
1356
1357 uint32_t stage_index = 0;
1358 for (uint32_t i = 0; i < create_info->stageCount; i++) {
1359 const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
1360 create_info->pStages[i].pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
1361
1362 VkComputePipelineCreateInfo stage_create_info = {
1363 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1364 .flags = create_info->flags,
1365 .stage = create_info->pStages[i],
1366 .layout = create_info->layout,
1367 };
1368
1369 result = lvp_compute_pipeline_create(_device, _cache, &stage_create_info, flags, &pipeline->groups[i]);
1370 if (result != VK_SUCCESS)
1371 goto fail;
1372
1373 VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
1374 nir_shader *nir = stage->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir;
1375
1376 if (node_info) {
1377 stage->exec_graph.name = node_info->pName;
1378 stage->exec_graph.index = node_info->index;
1379 }
1380
1381 /* TODO: Add a shader info NIR pass to figure out how many the payloads the shader creates. */
1382 stage->exec_graph.scratch_size = nir->info.cs.node_payloads_size * 256;
1383 pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, stage->exec_graph.scratch_size);
1384
1385 stage_index++;
1386 }
1387
1388 if (create_info->pLibraryInfo) {
1389 for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
1390 VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
1391 for (uint32_t j = 0; j < library->num_groups; j++) {
1392 /* TODO: Do we need reference counting? */
1393 pipeline->groups[stage_index] = library->groups[j];
1394 stage_index++;
1395 }
1396 pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, library->exec_graph.scratch_size);
1397 }
1398 }
1399
1400 const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1401 if (feedback) {
1402 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1403 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1404 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1405 }
1406
1407 *out_pipeline = lvp_pipeline_to_handle(pipeline);
1408
1409 return VK_SUCCESS;
1410
1411 fail:
1412 for (uint32_t i = 0; i < stage_count; i++)
1413 lvp_DestroyPipeline(_device, pipeline->groups[i], NULL);
1414
1415 vk_free(&device->vk.alloc, pipeline);
1416
1417 return result;
1418 }
1419
1420 VKAPI_ATTR VkResult VKAPI_CALL
lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkExecutionGraphPipelineCreateInfoAMDX * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1421 lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device, VkPipelineCache pipelineCache,
1422 uint32_t createInfoCount,
1423 const VkExecutionGraphPipelineCreateInfoAMDX *pCreateInfos,
1424 const VkAllocationCallbacks *pAllocator,
1425 VkPipeline *pPipelines)
1426 {
1427 VkResult result = VK_SUCCESS;
1428 uint32_t i = 0;
1429
1430 for (; i < createInfoCount; i++) {
1431 VkPipelineCreateFlagBits2KHR flags = vk_graph_pipeline_create_flags(&pCreateInfos[i]);
1432
1433 VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1434 if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1435 r = lvp_exec_graph_pipeline_create(device, pipelineCache, &pCreateInfos[i], flags, &pPipelines[i]);
1436 if (r != VK_SUCCESS) {
1437 result = r;
1438 pPipelines[i] = VK_NULL_HANDLE;
1439 if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1440 break;
1441 }
1442 }
1443 if (result != VK_SUCCESS) {
1444 for (; i < createInfoCount; i++)
1445 pPipelines[i] = VK_NULL_HANDLE;
1446 }
1447
1448 return result;
1449 }
1450
1451 VKAPI_ATTR VkResult VKAPI_CALL
lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device,VkPipeline executionGraph,VkExecutionGraphPipelineScratchSizeAMDX * pSizeInfo)1452 lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device, VkPipeline executionGraph,
1453 VkExecutionGraphPipelineScratchSizeAMDX *pSizeInfo)
1454 {
1455 VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
1456 pSizeInfo->minSize = MAX2(pipeline->exec_graph.scratch_size * 32, 16);
1457 pSizeInfo->maxSize = pSizeInfo->minSize;
1458 return VK_SUCCESS;
1459 }
1460
1461 VKAPI_ATTR VkResult VKAPI_CALL
lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device,VkPipeline executionGraph,const VkPipelineShaderStageNodeCreateInfoAMDX * pNodeInfo,uint32_t * pNodeIndex)1462 lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device, VkPipeline executionGraph,
1463 const VkPipelineShaderStageNodeCreateInfoAMDX *pNodeInfo,
1464 uint32_t *pNodeIndex)
1465 {
1466 VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
1467
1468 for (uint32_t i = 0; i < pipeline->num_groups; i++) {
1469 VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
1470 if (stage->exec_graph.index == pNodeInfo->index &&
1471 !strcmp(stage->exec_graph.name, pNodeInfo->pName)) {
1472 *pNodeIndex = i;
1473 return VK_SUCCESS;
1474 }
1475 }
1476
1477 return VK_ERROR_OUT_OF_HOST_MEMORY;
1478 }
1479 #endif
1480