• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "lvp_private.h"
25 #include "vk_nir_convert_ycbcr.h"
26 #include "vk_pipeline.h"
27 #include "vk_render_pass.h"
28 #include "vk_util.h"
29 #include "glsl_types.h"
30 #include "util/os_time.h"
31 #include "spirv/nir_spirv.h"
32 #include "nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34 #include "lvp_lower_vulkan_resource.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "nir/nir_xfb_info.h"
38 
39 #define SPIR_V_MAGIC_NUMBER 0x07230203
40 
41 #define MAX_DYNAMIC_STATES 72
42 
43 typedef void (*cso_destroy_func)(struct pipe_context*, void*);
44 
45 static void
shader_destroy(struct lvp_device * device,struct lvp_shader * shader,bool locked)46 shader_destroy(struct lvp_device *device, struct lvp_shader *shader, bool locked)
47 {
48    if (!shader->pipeline_nir)
49       return;
50    gl_shader_stage stage = shader->pipeline_nir->nir->info.stage;
51    cso_destroy_func destroy[] = {
52       device->queue.ctx->delete_vs_state,
53       device->queue.ctx->delete_tcs_state,
54       device->queue.ctx->delete_tes_state,
55       device->queue.ctx->delete_gs_state,
56       device->queue.ctx->delete_fs_state,
57       device->queue.ctx->delete_compute_state,
58       device->queue.ctx->delete_ts_state,
59       device->queue.ctx->delete_ms_state,
60    };
61 
62    if (!locked)
63       simple_mtx_lock(&device->queue.lock);
64 
65    set_foreach(&shader->inlines.variants, entry) {
66       struct lvp_inline_variant *variant = (void*)entry->key;
67       destroy[stage](device->queue.ctx, variant->cso);
68       free(variant);
69    }
70    ralloc_free(shader->inlines.variants.table);
71 
72    if (shader->shader_cso)
73       destroy[stage](device->queue.ctx, shader->shader_cso);
74    if (shader->tess_ccw_cso)
75       destroy[stage](device->queue.ctx, shader->tess_ccw_cso);
76 
77    if (!locked)
78       simple_mtx_unlock(&device->queue.lock);
79 
80    lvp_pipeline_nir_ref(&shader->pipeline_nir, NULL);
81    lvp_pipeline_nir_ref(&shader->tess_ccw, NULL);
82 }
83 
84 void
lvp_pipeline_destroy(struct lvp_device * device,struct lvp_pipeline * pipeline,bool locked)85 lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline, bool locked)
86 {
87    lvp_forall_stage(i)
88       shader_destroy(device, &pipeline->shaders[i], locked);
89 
90    if (pipeline->layout)
91       vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
92 
93    for (unsigned i = 0; i < pipeline->num_groups; i++) {
94       LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]);
95       lvp_pipeline_destroy(device, p, locked);
96    }
97 
98    vk_free(&device->vk.alloc, pipeline->state_data);
99    vk_object_base_finish(&pipeline->base);
100    vk_free(&device->vk.alloc, pipeline);
101 }
102 
lvp_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)103 VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
104    VkDevice                                    _device,
105    VkPipeline                                  _pipeline,
106    const VkAllocationCallbacks*                pAllocator)
107 {
108    LVP_FROM_HANDLE(lvp_device, device, _device);
109    LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);
110 
111    if (!_pipeline)
112       return;
113 
114    if (pipeline->used) {
115       simple_mtx_lock(&device->queue.lock);
116       util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
117       simple_mtx_unlock(&device->queue.lock);
118    } else {
119       lvp_pipeline_destroy(device, pipeline, false);
120    }
121 }
122 
123 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)124 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
125 {
126    assert(glsl_type_is_vector_or_scalar(type));
127 
128    uint32_t comp_size = glsl_type_is_boolean(type)
129       ? 4 : glsl_get_bit_size(type) / 8;
130    unsigned length = glsl_get_vector_elements(type);
131    *size = comp_size * length,
132       *align = comp_size;
133 }
134 
135 static bool
remove_barriers_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)136 remove_barriers_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
137 {
138    if (intr->intrinsic != nir_intrinsic_barrier)
139       return false;
140    if (data) {
141       if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE)
142          return false;
143 
144       if (nir_intrinsic_memory_scope(intr) == SCOPE_WORKGROUP ||
145           nir_intrinsic_memory_scope(intr) == SCOPE_DEVICE ||
146           nir_intrinsic_memory_scope(intr) == SCOPE_QUEUE_FAMILY)
147          return false;
148    }
149    nir_instr_remove(&intr->instr);
150    return true;
151 }
152 
153 static bool
remove_barriers(nir_shader * nir,bool is_compute)154 remove_barriers(nir_shader *nir, bool is_compute)
155 {
156    return nir_shader_intrinsics_pass(nir, remove_barriers_impl,
157                                      nir_metadata_dominance,
158                                      (void*)is_compute);
159 }
160 
161 static bool
lower_demote_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)162 lower_demote_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
163 {
164    if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) {
165       intr->intrinsic = nir_intrinsic_discard;
166       return true;
167    }
168    if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) {
169       intr->intrinsic = nir_intrinsic_discard_if;
170       return true;
171    }
172    return false;
173 }
174 
175 static bool
lower_demote(nir_shader * nir)176 lower_demote(nir_shader *nir)
177 {
178    return nir_shader_intrinsics_pass(nir, lower_demote_impl,
179                                      nir_metadata_dominance, NULL);
180 }
181 
182 static bool
find_tex(const nir_instr * instr,const void * data_cb)183 find_tex(const nir_instr *instr, const void *data_cb)
184 {
185    if (instr->type == nir_instr_type_tex)
186       return true;
187    return false;
188 }
189 
190 static nir_def *
fixup_tex_instr(struct nir_builder * b,nir_instr * instr,void * data_cb)191 fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
192 {
193    nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
194    unsigned offset = 0;
195 
196    int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
197    if (idx == -1)
198       return NULL;
199 
200    if (!nir_src_is_const(tex_instr->src[idx].src))
201       return NULL;
202    offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
203 
204    nir_tex_instr_remove_src(tex_instr, idx);
205    tex_instr->texture_index += offset;
206    return NIR_LOWER_INSTR_PROGRESS;
207 }
208 
209 static bool
lvp_nir_fixup_indirect_tex(nir_shader * shader)210 lvp_nir_fixup_indirect_tex(nir_shader *shader)
211 {
212    return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
213 }
214 
215 static void
optimize(nir_shader * nir)216 optimize(nir_shader *nir)
217 {
218    bool progress = false;
219    do {
220       progress = false;
221 
222       NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
223       NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
224       NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
225       NIR_PASS(progress, nir, nir_opt_deref);
226       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
227 
228       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
229 
230       NIR_PASS(progress, nir, nir_copy_prop);
231       NIR_PASS(progress, nir, nir_opt_dce);
232       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
233 
234       NIR_PASS(progress, nir, nir_opt_algebraic);
235       NIR_PASS(progress, nir, nir_opt_constant_folding);
236 
237       NIR_PASS(progress, nir, nir_opt_remove_phis);
238       bool loop = false;
239       NIR_PASS(loop, nir, nir_opt_loop);
240       progress |= loop;
241       if (loop) {
242          /* If nir_opt_loop makes progress, then we need to clean
243           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
244           * to make progress.
245           */
246          NIR_PASS(progress, nir, nir_copy_prop);
247          NIR_PASS(progress, nir, nir_opt_dce);
248          NIR_PASS(progress, nir, nir_opt_remove_phis);
249       }
250       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
251       NIR_PASS(progress, nir, nir_opt_dead_cf);
252       NIR_PASS(progress, nir, nir_opt_conditional_discard);
253       NIR_PASS(progress, nir, nir_opt_remove_phis);
254       NIR_PASS(progress, nir, nir_opt_cse);
255       NIR_PASS(progress, nir, nir_opt_undef);
256 
257       NIR_PASS(progress, nir, nir_opt_deref);
258       NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
259       NIR_PASS(progress, nir, nir_opt_loop_unroll);
260       NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
261    } while (progress);
262 }
263 
264 void
lvp_shader_optimize(nir_shader * nir)265 lvp_shader_optimize(nir_shader *nir)
266 {
267    optimize(nir);
268    NIR_PASS_V(nir, nir_lower_var_copies);
269    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
270    NIR_PASS_V(nir, nir_opt_dce);
271    nir_sweep(nir);
272 }
273 
274 static struct lvp_pipeline_nir *
create_pipeline_nir(nir_shader * nir)275 create_pipeline_nir(nir_shader *nir)
276 {
277    struct lvp_pipeline_nir *pipeline_nir = ralloc(NULL, struct lvp_pipeline_nir);
278    pipeline_nir->nir = nir;
279    pipeline_nir->ref_cnt = 1;
280    return pipeline_nir;
281 }
282 
283 static VkResult
compile_spirv(struct lvp_device * pdevice,const VkPipelineShaderStageCreateInfo * sinfo,nir_shader ** nir)284 compile_spirv(struct lvp_device *pdevice, const VkPipelineShaderStageCreateInfo *sinfo, nir_shader **nir)
285 {
286    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
287    assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
288    VkResult result;
289 
290 #ifdef VK_ENABLE_BETA_EXTENSIONS
291    const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
292       sinfo->pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
293 #endif
294 
295    const struct spirv_to_nir_options spirv_options = {
296       .environment = NIR_SPIRV_VULKAN,
297       .caps = {
298          .float64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_DOUBLES) == 1),
299          .int16 = true,
300          .int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1),
301          .tessellation = true,
302          .float_controls = true,
303          .float32_atomic_add = true,
304 #if LLVM_VERSION_MAJOR >= 15
305          .float32_atomic_min_max = true,
306 #endif
307          .image_ms_array = true,
308          .image_read_without_format = true,
309          .image_write_without_format = true,
310          .storage_image_ms = true,
311          .geometry_streams = true,
312          .storage_8bit = true,
313          .storage_16bit = true,
314          .variable_pointers = true,
315          .stencil_export = true,
316          .post_depth_coverage = true,
317          .transform_feedback = true,
318          .device_group = true,
319          .draw_parameters = true,
320          .shader_viewport_index_layer = true,
321          .shader_clock = true,
322          .multiview = true,
323          .physical_storage_buffer_address = true,
324          .int64_atomics = true,
325          .subgroup_arithmetic = true,
326          .subgroup_basic = true,
327          .subgroup_ballot = true,
328          .subgroup_quad = true,
329 #if LLVM_VERSION_MAJOR >= 10
330          .subgroup_shuffle = true,
331 #endif
332          .subgroup_vote = true,
333          .vk_memory_model = true,
334          .vk_memory_model_device_scope = true,
335          .int8 = true,
336          .float16 = true,
337          .demote_to_helper_invocation = true,
338          .mesh_shading = true,
339          .descriptor_array_dynamic_indexing = true,
340          .descriptor_array_non_uniform_indexing = true,
341          .descriptor_indexing = true,
342          .runtime_descriptor_array = true,
343          .shader_enqueue = true,
344       },
345       .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
346       .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
347       .phys_ssbo_addr_format = nir_address_format_64bit_global,
348       .push_const_addr_format = nir_address_format_logical,
349       .shared_addr_format = nir_address_format_32bit_offset,
350 #ifdef VK_ENABLE_BETA_EXTENSIONS
351       .shader_index = node_info ? node_info->index : 0,
352 #endif
353    };
354 
355    result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo,
356                                             &spirv_options, pdevice->physical_device->drv_options[stage],
357                                             NULL, nir);
358    return result;
359 }
360 
361 static bool
inline_variant_equals(const void * a,const void * b)362 inline_variant_equals(const void *a, const void *b)
363 {
364    const struct lvp_inline_variant *av = a, *bv = b;
365    assert(av->mask == bv->mask);
366    u_foreach_bit(slot, av->mask) {
367       if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot])))
368          return false;
369    }
370    return true;
371 }
372 
373 static const struct vk_ycbcr_conversion_state *
lvp_ycbcr_conversion_lookup(const void * data,uint32_t set,uint32_t binding,uint32_t array_index)374 lvp_ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
375 {
376    const struct lvp_pipeline_layout *layout = data;
377 
378    const struct lvp_descriptor_set_layout *set_layout = container_of(layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk);
379    const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding];
380    if (!binding_layout->immutable_samplers)
381       return NULL;
382 
383    struct vk_ycbcr_conversion *ycbcr_conversion = binding_layout->immutable_samplers[array_index]->vk.ycbcr_conversion;
384    return ycbcr_conversion ? &ycbcr_conversion->state : NULL;
385 }
386 
387 /* pipeline is NULL for shader objects. */
388 static void
lvp_shader_lower(struct lvp_device * pdevice,struct lvp_pipeline * pipeline,nir_shader * nir,struct lvp_pipeline_layout * layout)389 lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_shader *nir, struct lvp_pipeline_layout *layout)
390 {
391    if (nir->info.stage != MESA_SHADER_TESS_CTRL)
392       NIR_PASS_V(nir, remove_barriers, nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK);
393 
394    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
395       .frag_coord = true,
396       .point_coord = true,
397    };
398    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
399 
400    struct nir_lower_subgroups_options subgroup_opts = {0};
401    subgroup_opts.lower_quad = true;
402    subgroup_opts.ballot_components = 1;
403    subgroup_opts.ballot_bit_size = 32;
404    subgroup_opts.lower_inverse_ballot = true;
405    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
406 
407    if (nir->info.stage == MESA_SHADER_FRAGMENT)
408       lvp_lower_input_attachments(nir, false);
409    NIR_PASS_V(nir, nir_lower_system_values);
410    NIR_PASS_V(nir, nir_lower_is_helper_invocation);
411    NIR_PASS_V(nir, lower_demote);
412    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
413 
414    NIR_PASS_V(nir, nir_remove_dead_variables,
415               nir_var_uniform | nir_var_image, NULL);
416 
417    optimize(nir);
418    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
419 
420    NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
421    NIR_PASS_V(nir, nir_split_var_copies);
422    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
423 
424    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
425               nir_address_format_32bit_offset);
426 
427    NIR_PASS_V(nir, nir_lower_explicit_io,
428               nir_var_mem_ubo | nir_var_mem_ssbo,
429               nir_address_format_vec2_index_32bit_offset);
430 
431    NIR_PASS_V(nir, nir_lower_explicit_io,
432               nir_var_mem_global,
433               nir_address_format_64bit_global);
434 
435    if (nir->info.stage == MESA_SHADER_COMPUTE)
436       lvp_lower_exec_graph(pipeline, nir);
437 
438    NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, layout);
439 
440    nir_lower_non_uniform_access_options options = {
441       .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access,
442    };
443    NIR_PASS(_, nir, nir_lower_non_uniform_access, &options);
444 
445    lvp_lower_pipeline_layout(pdevice, layout, nir);
446 
447    if (nir->info.stage == MESA_SHADER_COMPUTE ||
448        nir->info.stage == MESA_SHADER_TASK ||
449        nir->info.stage == MESA_SHADER_MESH) {
450       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
451       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
452    }
453 
454    if (nir->info.stage == MESA_SHADER_TASK ||
455        nir->info.stage == MESA_SHADER_MESH) {
456       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_task_payload, shared_var_info);
457       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_task_payload, nir_address_format_32bit_offset);
458    }
459 
460    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
461 
462    if (nir->info.stage == MESA_SHADER_VERTEX ||
463        nir->info.stage == MESA_SHADER_GEOMETRY) {
464       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
465    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
466       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
467    }
468 
469    // TODO: also optimize the tex srcs. see radeonSI for reference */
470    /* Skip if there are potentially conflicting rounding modes */
471    struct nir_fold_16bit_tex_image_options fold_16bit_options = {
472       .rounding_mode = nir_rounding_mode_undef,
473       .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
474    };
475    NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options);
476 
477    /* Lower texture OPs llvmpipe supports to reduce the amount of sample
478     * functions that need to be pre-compiled.
479     */
480    const nir_lower_tex_options tex_options = {
481       .lower_txd = true,
482    };
483    NIR_PASS(_, nir, nir_lower_tex, &tex_options);
484 
485    lvp_shader_optimize(nir);
486 
487    if (nir->info.stage != MESA_SHADER_VERTEX)
488       nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
489    else {
490       nir->num_inputs = util_last_bit64(nir->info.inputs_read);
491       nir_foreach_shader_in_variable(var, nir) {
492          var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
493       }
494    }
495    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
496                                nir->info.stage);
497 }
498 
499 static void
lvp_shader_init(struct lvp_shader * shader,nir_shader * nir)500 lvp_shader_init(struct lvp_shader *shader, nir_shader *nir)
501 {
502    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
503    if (impl->ssa_alloc > 100) //skip for small shaders
504       shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir);
505    shader->pipeline_nir = create_pipeline_nir(nir);
506    if (shader->inlines.can_inline)
507       _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals);
508 }
509 
510 static VkResult
lvp_shader_compile_to_ir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo)511 lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
512                          const VkPipelineShaderStageCreateInfo *sinfo)
513 {
514    struct lvp_device *pdevice = pipeline->device;
515    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
516    assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
517    struct lvp_shader *shader = &pipeline->shaders[stage];
518    nir_shader *nir;
519    VkResult result = compile_spirv(pdevice, sinfo, &nir);
520    if (result == VK_SUCCESS) {
521       lvp_shader_lower(pdevice, pipeline, nir, pipeline->layout);
522       lvp_shader_init(shader, nir);
523    }
524    return result;
525 }
526 
527 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)528 merge_tess_info(struct shader_info *tes_info,
529                 const struct shader_info *tcs_info)
530 {
531    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
532     *
533     *    "PointMode. Controls generation of points rather than triangles
534     *     or lines. This functionality defaults to disabled, and is
535     *     enabled if either shader stage includes the execution mode.
536     *
537     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
538     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
539     * and OutputVertices, it says:
540     *
541     *    "One mode must be set in at least one of the tessellation
542     *     shader stages."
543     *
544     * So, the fields can be set in either the TCS or TES, but they must
545     * agree if set in both.  Our backend looks at TES, so bitwise-or in
546     * the values from the TCS.
547     */
548    assert(tcs_info->tess.tcs_vertices_out == 0 ||
549           tes_info->tess.tcs_vertices_out == 0 ||
550           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
551    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
552 
553    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
554           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
555           tcs_info->tess.spacing == tes_info->tess.spacing);
556    tes_info->tess.spacing |= tcs_info->tess.spacing;
557 
558    assert(tcs_info->tess._primitive_mode == 0 ||
559           tes_info->tess._primitive_mode == 0 ||
560           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
561    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
562    tes_info->tess.ccw |= tcs_info->tess.ccw;
563    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
564 }
565 
566 static void
lvp_shader_xfb_init(struct lvp_shader * shader)567 lvp_shader_xfb_init(struct lvp_shader *shader)
568 {
569    nir_xfb_info *xfb_info = shader->pipeline_nir->nir->xfb_info;
570    if (xfb_info) {
571       uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
572       memset(output_mapping, 0, sizeof(output_mapping));
573 
574       nir_foreach_shader_out_variable(var, shader->pipeline_nir->nir) {
575          unsigned slots = nir_variable_count_slots(var, var->type);
576          for (unsigned i = 0; i < slots; i++)
577             output_mapping[var->data.location + i] = var->data.driver_location + i;
578       }
579 
580       shader->stream_output.num_outputs = xfb_info->output_count;
581       for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
582          if (xfb_info->buffers_written & (1 << i)) {
583             shader->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
584          }
585       }
586       for (unsigned i = 0; i < xfb_info->output_count; i++) {
587          shader->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
588          shader->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
589          shader->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
590          shader->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
591          shader->stream_output.output[i].start_component = xfb_info->outputs[i].component_offset;
592          shader->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
593       }
594 
595    }
596 }
597 
598 static void
lvp_pipeline_xfb_init(struct lvp_pipeline * pipeline)599 lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
600 {
601    gl_shader_stage stage = MESA_SHADER_VERTEX;
602    if (pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir)
603       stage = MESA_SHADER_GEOMETRY;
604    else if (pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir)
605       stage = MESA_SHADER_TESS_EVAL;
606    else if (pipeline->shaders[MESA_SHADER_MESH].pipeline_nir)
607       stage = MESA_SHADER_MESH;
608    pipeline->last_vertex = stage;
609    lvp_shader_xfb_init(&pipeline->shaders[stage]);
610 }
611 
612 static void *
lvp_shader_compile_stage(struct lvp_device * device,struct lvp_shader * shader,nir_shader * nir)613 lvp_shader_compile_stage(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir)
614 {
615    if (nir->info.stage == MESA_SHADER_COMPUTE) {
616       struct pipe_compute_state shstate = {0};
617       shstate.prog = nir;
618       shstate.ir_type = PIPE_SHADER_IR_NIR;
619       shstate.static_shared_mem = nir->info.shared_size;
620       return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
621    } else {
622       struct pipe_shader_state shstate = {0};
623       shstate.type = PIPE_SHADER_IR_NIR;
624       shstate.ir.nir = nir;
625       memcpy(&shstate.stream_output, &shader->stream_output, sizeof(shstate.stream_output));
626 
627       switch (nir->info.stage) {
628       case MESA_SHADER_FRAGMENT:
629          return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
630       case MESA_SHADER_VERTEX:
631          return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
632       case MESA_SHADER_GEOMETRY:
633          return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
634       case MESA_SHADER_TESS_CTRL:
635          return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
636       case MESA_SHADER_TESS_EVAL:
637          return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
638       case MESA_SHADER_TASK:
639          return device->queue.ctx->create_ts_state(device->queue.ctx, &shstate);
640       case MESA_SHADER_MESH:
641          return device->queue.ctx->create_ms_state(device->queue.ctx, &shstate);
642       default:
643          unreachable("illegal shader");
644          break;
645       }
646    }
647    return NULL;
648 }
649 
650 void *
lvp_shader_compile(struct lvp_device * device,struct lvp_shader * shader,nir_shader * nir,bool locked)651 lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir, bool locked)
652 {
653    device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
654 
655    if (!locked)
656       simple_mtx_lock(&device->queue.lock);
657 
658    void *state = lvp_shader_compile_stage(device, shader, nir);
659 
660    if (!locked)
661       simple_mtx_unlock(&device->queue.lock);
662 
663    return state;
664 }
665 
666 #ifndef NDEBUG
667 static bool
layouts_equal(const struct lvp_descriptor_set_layout * a,const struct lvp_descriptor_set_layout * b)668 layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
669 {
670    const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
671    uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
672    uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
673    /* base equal */
674    if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
675       return false;
676 
677    /* bindings equal */
678    if (a->binding_count != b->binding_count)
679       return false;
680    size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
681    const struct lvp_descriptor_set_binding_layout *la = a->binding;
682    const struct lvp_descriptor_set_binding_layout *lb = b->binding;
683    if (memcmp(la, lb, binding_size)) {
684       for (unsigned i = 0; i < a->binding_count; i++) {
685          if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
686             return false;
687       }
688    }
689 
690    /* immutable sampler equal */
691    if (a->immutable_sampler_count != b->immutable_sampler_count)
692       return false;
693    if (a->immutable_sampler_count) {
694       size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
695       if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
696          struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
697          struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
698          for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
699             if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
700                return false;
701          }
702       }
703    }
704    return true;
705 }
706 #endif
707 
708 static void
merge_layouts(struct vk_device * device,struct lvp_pipeline * dst,struct lvp_pipeline_layout * src)709 merge_layouts(struct vk_device *device, struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
710 {
711    if (!src)
712       return;
713    if (dst->layout) {
714       /* these must match */
715       ASSERTED VkPipelineCreateFlags src_flag = src->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
716       ASSERTED VkPipelineCreateFlags dst_flag = dst->layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
717       assert(src_flag == dst_flag);
718    }
719    /* always try to reuse existing layout: independent sets bit doesn't guarantee independent sets */
720    if (!dst->layout) {
721       dst->layout = (struct lvp_pipeline_layout*)vk_pipeline_layout_ref(&src->vk);
722       return;
723    }
724    /* this is a big optimization when hit */
725    if (dst->layout == src)
726       return;
727 #ifndef NDEBUG
728    /* verify that layouts match */
729    const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
730    const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
731    for (unsigned i = 0; i < smaller->vk.set_count; i++) {
732       if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
733           smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
734          continue;
735 
736       const struct lvp_descriptor_set_layout *smaller_set_layout =
737          vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
738       const struct lvp_descriptor_set_layout *bigger_set_layout =
739          vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
740 
741       assert(!smaller_set_layout->binding_count ||
742              !bigger_set_layout->binding_count ||
743              layouts_equal(smaller_set_layout, bigger_set_layout));
744    }
745 #endif
746    /* must be independent sets with different layouts: reallocate to avoid modifying original layout */
747    struct lvp_pipeline_layout *old_layout = dst->layout;
748    dst->layout = vk_zalloc(&device->alloc, sizeof(struct lvp_pipeline_layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
749    memcpy(dst->layout, old_layout, sizeof(struct lvp_pipeline_layout));
750    dst->layout->vk.ref_cnt = 1;
751    for (unsigned i = 0; i < dst->layout->vk.set_count; i++) {
752       if (dst->layout->vk.set_layouts[i])
753          vk_descriptor_set_layout_ref(dst->layout->vk.set_layouts[i]);
754    }
755    vk_pipeline_layout_unref(device, &old_layout->vk);
756 
757    for (unsigned i = 0; i < src->vk.set_count; i++) {
758       if (!dst->layout->vk.set_layouts[i]) {
759          dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
760          if (dst->layout->vk.set_layouts[i])
761             vk_descriptor_set_layout_ref(src->vk.set_layouts[i]);
762       }
763    }
764    dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
765                                     src->vk.set_count);
766    dst->layout->push_constant_size += src->push_constant_size;
767    dst->layout->push_constant_stages |= src->push_constant_stages;
768 }
769 
770 static void
copy_shader_sanitized(struct lvp_shader * dst,const struct lvp_shader * src)771 copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src)
772 {
773    *dst = *src;
774    dst->pipeline_nir = NULL; //this gets handled later
775    dst->tess_ccw = NULL; //this gets handled later
776    assert(!dst->shader_cso);
777    assert(!dst->tess_ccw_cso);
778    if (src->inlines.can_inline)
779       _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals);
780 }
781 
782 static VkResult
lvp_graphics_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags)783 lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
784                            struct lvp_device *device,
785                            struct lvp_pipeline_cache *cache,
786                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
787                            VkPipelineCreateFlagBits2KHR flags)
788 {
789    pipeline->type = LVP_PIPELINE_GRAPHICS;
790 
791    VkResult result;
792 
793    const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
794                                                                                 GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
795    const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
796                                                                          PIPELINE_LIBRARY_CREATE_INFO_KHR);
797    const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
798                                                            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
799    if (libinfo)
800       pipeline->stages = libinfo->flags;
801    else if (!libstate)
802       pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
803                          VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
804                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
805                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
806 
807    if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
808       pipeline->library = true;
809 
810    struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
811 
812    if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
813       /* this is a regular pipeline with no partials: directly reuse */
814       pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL;
815    else if (pipeline->stages & layout_stages) {
816       if ((pipeline->stages & layout_stages) == layout_stages)
817          /* this has all the layout stages: directly reuse */
818          pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk);
819       else {
820          /* this is a partial: copy for later merging to avoid modifying another layout */
821          merge_layouts(&device->vk, pipeline, layout);
822       }
823    }
824 
825    if (libstate) {
826       for (unsigned i = 0; i < libstate->libraryCount; i++) {
827          LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
828          vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
829                                           &p->graphics_state);
830          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
831             pipeline->line_smooth = p->line_smooth;
832             pipeline->disable_multisample = p->disable_multisample;
833             pipeline->line_rectangular = p->line_rectangular;
834             memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4);
835             memcpy(&pipeline->shaders[MESA_SHADER_TASK], &p->shaders[MESA_SHADER_TASK], sizeof(struct lvp_shader) * 2);
836             lvp_forall_gfx_stage(i) {
837                if (i == MESA_SHADER_FRAGMENT)
838                   continue;
839                copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]);
840             }
841          }
842          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
843             pipeline->force_min_sample = p->force_min_sample;
844             copy_shader_sanitized(&pipeline->shaders[MESA_SHADER_FRAGMENT], &p->shaders[MESA_SHADER_FRAGMENT]);
845          }
846          if (p->stages & layout_stages) {
847             if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
848                merge_layouts(&device->vk, pipeline, p->layout);
849          }
850          pipeline->stages |= p->stages;
851       }
852    }
853 
854    result = vk_graphics_pipeline_state_fill(&device->vk,
855                                             &pipeline->graphics_state,
856                                             pCreateInfo, NULL, 0, NULL, NULL,
857                                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
858                                             &pipeline->state_data);
859    if (result != VK_SUCCESS)
860       return result;
861 
862    assert(pipeline->library || pipeline->stages & (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
863                                                    VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
864                                                    VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
865 
866    pipeline->device = device;
867 
868    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
869       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
870       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
871       if (stage == MESA_SHADER_FRAGMENT) {
872          if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
873             continue;
874       } else {
875          if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
876             continue;
877       }
878       result = lvp_shader_compile_to_ir(pipeline, sinfo);
879       if (result != VK_SUCCESS)
880          goto fail;
881 
882       switch (stage) {
883       case MESA_SHADER_FRAGMENT:
884          if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir->info.fs.uses_sample_shading)
885             pipeline->force_min_sample = true;
886          break;
887       default: break;
888       }
889    }
890    if (pCreateInfo->stageCount && pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) {
891       nir_lower_patch_vertices(pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir, pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
892       merge_tess_info(&pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info, &pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info);
893       if (BITSET_TEST(pipeline->graphics_state.dynamic,
894                       MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
895          pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw = create_pipeline_nir(nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir));
896          pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
897       } else if (pipeline->graphics_state.ts &&
898                  pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) {
899          pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
900       }
901    }
902    if (libstate) {
903        for (unsigned i = 0; i < libstate->libraryCount; i++) {
904           LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
905           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
906              if (p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir)
907                 lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir);
908           }
909           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
910              lvp_forall_gfx_stage(j) {
911                 if (j == MESA_SHADER_FRAGMENT)
912                    continue;
913                 if (p->shaders[j].pipeline_nir)
914                    lvp_pipeline_nir_ref(&pipeline->shaders[j].pipeline_nir, p->shaders[j].pipeline_nir);
915              }
916              if (p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
917                 lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw, p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw);
918           }
919        }
920    } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
921       const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
922       if (rs) {
923          /* always draw bresenham if !smooth */
924          pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
925          pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ||
926                                          rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
927          pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
928       } else
929          pipeline->line_rectangular = true;
930       lvp_pipeline_xfb_init(pipeline);
931    }
932    if (!libstate && !pipeline->library)
933       lvp_pipeline_shaders_compile(pipeline, false);
934 
935    return VK_SUCCESS;
936 
937 fail:
938    for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
939       lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL);
940    }
941    vk_free(&device->vk.alloc, pipeline->state_data);
942 
943    return result;
944 }
945 
946 void
lvp_pipeline_shaders_compile(struct lvp_pipeline * pipeline,bool locked)947 lvp_pipeline_shaders_compile(struct lvp_pipeline *pipeline, bool locked)
948 {
949    if (pipeline->compiled)
950       return;
951    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
952       if (!pipeline->shaders[i].pipeline_nir)
953          continue;
954 
955       gl_shader_stage stage = i;
956       assert(stage == pipeline->shaders[i].pipeline_nir->nir->info.stage);
957 
958       if (!pipeline->shaders[stage].inlines.can_inline) {
959          pipeline->shaders[stage].shader_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
960             nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir), locked);
961          if (pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
962             pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
963                nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir), locked);
964       }
965    }
966    pipeline->compiled = true;
967 }
968 
969 static VkResult
lvp_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags,VkPipeline * pPipeline,bool group)970 lvp_graphics_pipeline_create(
971    VkDevice _device,
972    VkPipelineCache _cache,
973    const VkGraphicsPipelineCreateInfo *pCreateInfo,
974    VkPipelineCreateFlagBits2KHR flags,
975    VkPipeline *pPipeline,
976    bool group)
977 {
978    LVP_FROM_HANDLE(lvp_device, device, _device);
979    LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
980    struct lvp_pipeline *pipeline;
981    VkResult result;
982 
983    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
984 
985    size_t size = 0;
986    const VkGraphicsPipelineShaderGroupsCreateInfoNV *groupinfo = vk_find_struct_const(pCreateInfo, GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV);
987    if (!group && groupinfo)
988       size += (groupinfo->groupCount + groupinfo->pipelineCount) * sizeof(VkPipeline);
989 
990    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + size, 8,
991                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
992    if (pipeline == NULL)
993       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
994 
995    vk_object_base_init(&device->vk, &pipeline->base,
996                        VK_OBJECT_TYPE_PIPELINE);
997    uint64_t t0 = os_time_get_nano();
998    result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, flags);
999    if (result != VK_SUCCESS) {
1000       vk_free(&device->vk.alloc, pipeline);
1001       return result;
1002    }
1003    if (!group && groupinfo) {
1004       VkGraphicsPipelineCreateInfo pci = *pCreateInfo;
1005       for (unsigned i = 0; i < groupinfo->groupCount; i++) {
1006          const VkGraphicsShaderGroupCreateInfoNV *g = &groupinfo->pGroups[i];
1007          pci.pVertexInputState = g->pVertexInputState;
1008          pci.pTessellationState = g->pTessellationState;
1009          pci.pStages = g->pStages;
1010          pci.stageCount = g->stageCount;
1011          result = lvp_graphics_pipeline_create(_device, _cache, &pci, flags, &pipeline->groups[i], true);
1012          if (result != VK_SUCCESS) {
1013             lvp_pipeline_destroy(device, pipeline, false);
1014             return result;
1015          }
1016          pipeline->num_groups++;
1017       }
1018       for (unsigned i = 0; i < groupinfo->pipelineCount; i++)
1019          pipeline->groups[pipeline->num_groups + i] = groupinfo->pPipelines[i];
1020       pipeline->num_groups_total = groupinfo->groupCount + groupinfo->pipelineCount;
1021    }
1022 
1023    VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1024    if (feedback && !group) {
1025       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1026       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1027       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1028    }
1029 
1030    *pPipeline = lvp_pipeline_to_handle(pipeline);
1031 
1032    return VK_SUCCESS;
1033 }
1034 
lvp_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1035 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
1036    VkDevice                                    _device,
1037    VkPipelineCache                             pipelineCache,
1038    uint32_t                                    count,
1039    const VkGraphicsPipelineCreateInfo*         pCreateInfos,
1040    const VkAllocationCallbacks*                pAllocator,
1041    VkPipeline*                                 pPipelines)
1042 {
1043    VkResult result = VK_SUCCESS;
1044    unsigned i = 0;
1045 
1046    for (; i < count; i++) {
1047       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1048       VkPipelineCreateFlagBits2KHR flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1049 
1050       if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1051          r = lvp_graphics_pipeline_create(_device,
1052                                           pipelineCache,
1053                                           &pCreateInfos[i],
1054                                           flags,
1055                                           &pPipelines[i],
1056                                           false);
1057       if (r != VK_SUCCESS) {
1058          result = r;
1059          pPipelines[i] = VK_NULL_HANDLE;
1060          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1061             break;
1062       }
1063    }
1064    if (result != VK_SUCCESS) {
1065       for (; i < count; i++)
1066          pPipelines[i] = VK_NULL_HANDLE;
1067    }
1068 
1069    return result;
1070 }
1071 
1072 static VkResult
lvp_compute_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo)1073 lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
1074                           struct lvp_device *device,
1075                           struct lvp_pipeline_cache *cache,
1076                           const VkComputePipelineCreateInfo *pCreateInfo)
1077 {
1078    pipeline->device = device;
1079    pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
1080    vk_pipeline_layout_ref(&pipeline->layout->vk);
1081    pipeline->force_min_sample = false;
1082 
1083    pipeline->type = LVP_PIPELINE_COMPUTE;
1084 
1085    VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
1086    if (result != VK_SUCCESS)
1087       return result;
1088 
1089    struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE];
1090    if (!shader->inlines.can_inline)
1091       shader->shader_cso = lvp_shader_compile(pipeline->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false);
1092    pipeline->compiled = true;
1093    return VK_SUCCESS;
1094 }
1095 
1096 static VkResult
lvp_compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags,VkPipeline * pPipeline)1097 lvp_compute_pipeline_create(
1098    VkDevice _device,
1099    VkPipelineCache _cache,
1100    const VkComputePipelineCreateInfo *pCreateInfo,
1101    VkPipelineCreateFlagBits2KHR flags,
1102    VkPipeline *pPipeline)
1103 {
1104    LVP_FROM_HANDLE(lvp_device, device, _device);
1105    LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
1106    struct lvp_pipeline *pipeline;
1107    VkResult result;
1108 
1109    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1110 
1111    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
1112                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1113    if (pipeline == NULL)
1114       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1115 
1116    vk_object_base_init(&device->vk, &pipeline->base,
1117                        VK_OBJECT_TYPE_PIPELINE);
1118    uint64_t t0 = os_time_get_nano();
1119    result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo);
1120    if (result != VK_SUCCESS) {
1121       vk_free(&device->vk.alloc, pipeline);
1122       return result;
1123    }
1124 
1125    const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1126    if (feedback) {
1127       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1128       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1129       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1130    }
1131 
1132    *pPipeline = lvp_pipeline_to_handle(pipeline);
1133 
1134    return VK_SUCCESS;
1135 }
1136 
lvp_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1137 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
1138    VkDevice                                    _device,
1139    VkPipelineCache                             pipelineCache,
1140    uint32_t                                    count,
1141    const VkComputePipelineCreateInfo*          pCreateInfos,
1142    const VkAllocationCallbacks*                pAllocator,
1143    VkPipeline*                                 pPipelines)
1144 {
1145    VkResult result = VK_SUCCESS;
1146    unsigned i = 0;
1147 
1148    for (; i < count; i++) {
1149       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1150       VkPipelineCreateFlagBits2KHR flags = vk_compute_pipeline_create_flags(&pCreateInfos[i]);
1151 
1152       if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1153          r = lvp_compute_pipeline_create(_device,
1154                                          pipelineCache,
1155                                          &pCreateInfos[i],
1156                                          flags,
1157                                          &pPipelines[i]);
1158       if (r != VK_SUCCESS) {
1159          result = r;
1160          pPipelines[i] = VK_NULL_HANDLE;
1161          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1162             break;
1163       }
1164    }
1165    if (result != VK_SUCCESS) {
1166       for (; i < count; i++)
1167          pPipelines[i] = VK_NULL_HANDLE;
1168    }
1169 
1170 
1171    return result;
1172 }
1173 
lvp_DestroyShaderEXT(VkDevice _device,VkShaderEXT _shader,const VkAllocationCallbacks * pAllocator)1174 VKAPI_ATTR void VKAPI_CALL lvp_DestroyShaderEXT(
1175     VkDevice                                    _device,
1176     VkShaderEXT                                 _shader,
1177     const VkAllocationCallbacks*                pAllocator)
1178 {
1179    LVP_FROM_HANDLE(lvp_device, device, _device);
1180    LVP_FROM_HANDLE(lvp_shader, shader, _shader);
1181 
1182    if (!shader)
1183       return;
1184    shader_destroy(device, shader, false);
1185 
1186    vk_pipeline_layout_unref(&device->vk, &shader->layout->vk);
1187    blob_finish(&shader->blob);
1188    vk_object_base_finish(&shader->base);
1189    vk_free2(&device->vk.alloc, pAllocator, shader);
1190 }
1191 
1192 static VkShaderEXT
create_shader_object(struct lvp_device * device,const VkShaderCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator)1193 create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator)
1194 {
1195    nir_shader *nir = NULL;
1196    gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
1197    assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
1198    if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) {
1199       VkShaderModuleCreateInfo minfo = {
1200          VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
1201          NULL,
1202          0,
1203          pCreateInfo->codeSize,
1204          pCreateInfo->pCode,
1205       };
1206       VkPipelineShaderStageCreateFlagBits flags = 0;
1207       if (pCreateInfo->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
1208          flags |= VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT;
1209       if (pCreateInfo->flags & VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
1210          flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
1211       VkPipelineShaderStageCreateInfo sinfo = {
1212          VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1213          &minfo,
1214          flags,
1215          pCreateInfo->stage,
1216          VK_NULL_HANDLE,
1217          pCreateInfo->pName,
1218          pCreateInfo->pSpecializationInfo,
1219       };
1220       VkResult result = compile_spirv(device, &sinfo, &nir);
1221       if (result != VK_SUCCESS)
1222          goto fail;
1223       nir->info.separate_shader = true;
1224    } else {
1225       assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT);
1226       if (pCreateInfo->codeSize < SHA1_DIGEST_LENGTH + VK_UUID_SIZE + 1)
1227          return VK_NULL_HANDLE;
1228       struct blob_reader blob;
1229       const uint8_t *data = pCreateInfo->pCode;
1230       uint8_t uuid[VK_UUID_SIZE];
1231       lvp_device_get_cache_uuid(uuid);
1232       if (memcmp(uuid, data, VK_UUID_SIZE))
1233          return VK_NULL_HANDLE;
1234       size_t size = pCreateInfo->codeSize - SHA1_DIGEST_LENGTH - VK_UUID_SIZE;
1235       unsigned char sha1[20];
1236 
1237       struct mesa_sha1 sctx;
1238       _mesa_sha1_init(&sctx);
1239       _mesa_sha1_update(&sctx, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
1240       _mesa_sha1_final(&sctx, sha1);
1241       if (memcmp(sha1, data + VK_UUID_SIZE, SHA1_DIGEST_LENGTH))
1242          return VK_NULL_HANDLE;
1243 
1244       blob_reader_init(&blob, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
1245       nir = nir_deserialize(NULL, device->pscreen->get_compiler_options(device->pscreen, PIPE_SHADER_IR_NIR, stage), &blob);
1246       if (!nir)
1247          goto fail;
1248    }
1249    if (!nir_shader_get_entrypoint(nir))
1250       goto fail;
1251    struct lvp_shader *shader = vk_object_zalloc(&device->vk, pAllocator, sizeof(struct lvp_shader), VK_OBJECT_TYPE_SHADER_EXT);
1252    if (!shader)
1253       goto fail;
1254    blob_init(&shader->blob);
1255    VkPipelineLayoutCreateInfo pci = {
1256       VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1257       NULL,
1258       0,
1259       pCreateInfo->setLayoutCount,
1260       pCreateInfo->pSetLayouts,
1261       pCreateInfo->pushConstantRangeCount,
1262       pCreateInfo->pPushConstantRanges,
1263    };
1264    shader->layout = lvp_pipeline_layout_create(device, &pci, pAllocator);
1265 
1266    if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT)
1267       lvp_shader_lower(device, NULL, nir, shader->layout);
1268 
1269    lvp_shader_init(shader, nir);
1270 
1271    lvp_shader_xfb_init(shader);
1272    if (stage == MESA_SHADER_TESS_EVAL) {
1273       /* spec requires that all tess modes are set in both shaders */
1274       nir_lower_patch_vertices(shader->pipeline_nir->nir, shader->pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
1275       shader->tess_ccw = create_pipeline_nir(nir_shader_clone(NULL, shader->pipeline_nir->nir));
1276       shader->tess_ccw->nir->info.tess.ccw = !shader->pipeline_nir->nir->info.tess.ccw;
1277       shader->tess_ccw_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->tess_ccw->nir), false);
1278    } else if (stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) {
1279       /* this is (currently) illegal */
1280       assert(!nir->info.fs.uses_fbfetch_output);
1281       shader_destroy(device, shader, false);
1282 
1283       vk_object_base_finish(&shader->base);
1284       vk_free2(&device->vk.alloc, pAllocator, shader);
1285       return VK_NULL_HANDLE;
1286    }
1287    nir_serialize(&shader->blob, nir, true);
1288    shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir), false);
1289    return lvp_shader_to_handle(shader);
1290 fail:
1291    ralloc_free(nir);
1292    return VK_NULL_HANDLE;
1293 }
1294 
lvp_CreateShadersEXT(VkDevice _device,uint32_t createInfoCount,const VkShaderCreateInfoEXT * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkShaderEXT * pShaders)1295 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateShadersEXT(
1296     VkDevice                                    _device,
1297     uint32_t                                    createInfoCount,
1298     const VkShaderCreateInfoEXT*                pCreateInfos,
1299     const VkAllocationCallbacks*                pAllocator,
1300     VkShaderEXT*                                pShaders)
1301 {
1302    LVP_FROM_HANDLE(lvp_device, device, _device);
1303    unsigned i;
1304    for (i = 0; i < createInfoCount; i++) {
1305       pShaders[i] = create_shader_object(device, &pCreateInfos[i], pAllocator);
1306       if (!pShaders[i]) {
1307          if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) {
1308             if (i < createInfoCount - 1)
1309                memset(&pShaders[i + 1], 0, (createInfoCount - i - 1) * sizeof(VkShaderEXT));
1310             return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
1311          }
1312          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1313       }
1314    }
1315    return VK_SUCCESS;
1316 }
1317 
1318 
lvp_GetShaderBinaryDataEXT(VkDevice device,VkShaderEXT _shader,size_t * pDataSize,void * pData)1319 VKAPI_ATTR VkResult VKAPI_CALL lvp_GetShaderBinaryDataEXT(
1320     VkDevice                                    device,
1321     VkShaderEXT                                 _shader,
1322     size_t*                                     pDataSize,
1323     void*                                       pData)
1324 {
1325    LVP_FROM_HANDLE(lvp_shader, shader, _shader);
1326    VkResult ret = VK_SUCCESS;
1327    if (pData) {
1328       if (*pDataSize < shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE) {
1329          ret = VK_INCOMPLETE;
1330          *pDataSize = 0;
1331       } else {
1332          *pDataSize = MIN2(*pDataSize, shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE);
1333          uint8_t *data = pData;
1334          lvp_device_get_cache_uuid(data);
1335          struct mesa_sha1 sctx;
1336          _mesa_sha1_init(&sctx);
1337          _mesa_sha1_update(&sctx, shader->blob.data, shader->blob.size);
1338          _mesa_sha1_final(&sctx, data + VK_UUID_SIZE);
1339          memcpy(data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, shader->blob.data, shader->blob.size);
1340       }
1341    } else {
1342       *pDataSize = shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE;
1343    }
1344    return ret;
1345 }
1346 
1347 #ifdef VK_ENABLE_BETA_EXTENSIONS
1348 static VkResult
lvp_exec_graph_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkExecutionGraphPipelineCreateInfoAMDX * create_info,VkPipelineCreateFlagBits2KHR flags,VkPipeline * out_pipeline)1349 lvp_exec_graph_pipeline_create(VkDevice _device, VkPipelineCache _cache,
1350                                const VkExecutionGraphPipelineCreateInfoAMDX *create_info,
1351                                VkPipelineCreateFlagBits2KHR flags,
1352                                VkPipeline *out_pipeline)
1353 {
1354    LVP_FROM_HANDLE(lvp_device, device, _device);
1355    struct lvp_pipeline *pipeline;
1356    VkResult result;
1357 
1358    assert(create_info->sType == VK_STRUCTURE_TYPE_EXECUTION_GRAPH_PIPELINE_CREATE_INFO_AMDX);
1359 
1360    uint32_t stage_count = create_info->stageCount;
1361    if (create_info->pLibraryInfo) {
1362       for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
1363          VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
1364          stage_count += library->num_groups;
1365       }
1366    }
1367 
1368    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + stage_count * sizeof(VkPipeline), 8,
1369                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1370    if (!pipeline)
1371       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1372 
1373    vk_object_base_init(&device->vk, &pipeline->base,
1374                        VK_OBJECT_TYPE_PIPELINE);
1375 
1376    uint64_t t0 = os_time_get_nano();
1377 
1378    pipeline->type = LVP_PIPELINE_EXEC_GRAPH;
1379    pipeline->layout = lvp_pipeline_layout_from_handle(create_info->layout);
1380 
1381    pipeline->exec_graph.scratch_size = 0;
1382    pipeline->num_groups = stage_count;
1383 
1384    uint32_t stage_index = 0;
1385    for (uint32_t i = 0; i < create_info->stageCount; i++) {
1386       const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
1387          create_info->pStages[i].pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
1388 
1389       VkComputePipelineCreateInfo stage_create_info = {
1390          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1391          .flags = create_info->flags,
1392          .stage = create_info->pStages[i],
1393          .layout = create_info->layout,
1394       };
1395 
1396       result = lvp_compute_pipeline_create(_device, _cache, &stage_create_info, flags, &pipeline->groups[i]);
1397       if (result != VK_SUCCESS)
1398          goto fail;
1399 
1400       VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
1401       nir_shader *nir = stage->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir;
1402 
1403       if (node_info) {
1404          stage->exec_graph.name = node_info->pName;
1405          stage->exec_graph.index = node_info->index;
1406       }
1407 
1408       /* TODO: Add a shader info NIR pass to figure out how many the payloads the shader creates. */
1409       stage->exec_graph.scratch_size = nir->info.cs.node_payloads_size * 256;
1410       pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, stage->exec_graph.scratch_size);
1411 
1412       stage_index++;
1413    }
1414 
1415    if (create_info->pLibraryInfo) {
1416       for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
1417          VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
1418          for (uint32_t j = 0; j < library->num_groups; j++) {
1419             /* TODO: Do we need reference counting? */
1420             pipeline->groups[stage_index] = library->groups[j];
1421             stage_index++;
1422          }
1423          pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, library->exec_graph.scratch_size);
1424       }
1425    }
1426 
1427    const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1428    if (feedback) {
1429       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1430       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1431       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1432    }
1433 
1434    *out_pipeline = lvp_pipeline_to_handle(pipeline);
1435 
1436    return VK_SUCCESS;
1437 
1438 fail:
1439    for (uint32_t i = 0; i < stage_count; i++)
1440       lvp_DestroyPipeline(_device, pipeline->groups[i], NULL);
1441 
1442    vk_free(&device->vk.alloc, pipeline);
1443 
1444    return result;
1445 }
1446 
1447 VKAPI_ATTR VkResult VKAPI_CALL
lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkExecutionGraphPipelineCreateInfoAMDX * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1448 lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device, VkPipelineCache pipelineCache,
1449                                       uint32_t createInfoCount,
1450                                       const VkExecutionGraphPipelineCreateInfoAMDX *pCreateInfos,
1451                                       const VkAllocationCallbacks *pAllocator,
1452                                       VkPipeline *pPipelines)
1453 {
1454    VkResult result = VK_SUCCESS;
1455    uint32_t i = 0;
1456 
1457    for (; i < createInfoCount; i++) {
1458       VkPipelineCreateFlagBits2KHR flags = vk_graph_pipeline_create_flags(&pCreateInfos[i]);
1459 
1460       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1461       if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1462          r = lvp_exec_graph_pipeline_create(device, pipelineCache, &pCreateInfos[i], flags, &pPipelines[i]);
1463       if (r != VK_SUCCESS) {
1464          result = r;
1465          pPipelines[i] = VK_NULL_HANDLE;
1466          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1467             break;
1468       }
1469    }
1470    if (result != VK_SUCCESS) {
1471       for (; i < createInfoCount; i++)
1472          pPipelines[i] = VK_NULL_HANDLE;
1473    }
1474 
1475    return result;
1476 }
1477 
1478 VKAPI_ATTR VkResult VKAPI_CALL
lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device,VkPipeline executionGraph,VkExecutionGraphPipelineScratchSizeAMDX * pSizeInfo)1479 lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device, VkPipeline executionGraph,
1480                                              VkExecutionGraphPipelineScratchSizeAMDX *pSizeInfo)
1481 {
1482    VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
1483    pSizeInfo->size = MAX2(pipeline->exec_graph.scratch_size * 32, 16);
1484    return VK_SUCCESS;
1485 }
1486 
1487 VKAPI_ATTR VkResult VKAPI_CALL
lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device,VkPipeline executionGraph,const VkPipelineShaderStageNodeCreateInfoAMDX * pNodeInfo,uint32_t * pNodeIndex)1488 lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device, VkPipeline executionGraph,
1489                                            const VkPipelineShaderStageNodeCreateInfoAMDX *pNodeInfo,
1490                                            uint32_t *pNodeIndex)
1491 {
1492    VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
1493 
1494    for (uint32_t i = 0; i < pipeline->num_groups; i++) {
1495       VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
1496       if (stage->exec_graph.index == pNodeInfo->index &&
1497           !strcmp(stage->exec_graph.name, pNodeInfo->pName)) {
1498          *pNodeIndex = i;
1499          return VK_SUCCESS;
1500       }
1501    }
1502 
1503    return VK_ERROR_OUT_OF_HOST_MEMORY;
1504 }
1505 #endif
1506