• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "lvp_private.h"
25 #include "vk_pipeline.h"
26 #include "vk_render_pass.h"
27 #include "vk_util.h"
28 #include "glsl_types.h"
29 #include "util/os_time.h"
30 #include "spirv/nir_spirv.h"
31 #include "nir/nir_builder.h"
32 #include "lvp_lower_vulkan_resource.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "tgsi/tgsi_from_mesa.h"
36 #include "nir/nir_xfb_info.h"
37 
38 #define SPIR_V_MAGIC_NUMBER 0x07230203
39 
40 #define LVP_PIPELINE_DUP(dst, src, type, count) do {             \
41       type *temp = ralloc_array(mem_ctx, type, count);           \
42       if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY;             \
43       memcpy(temp, (src), sizeof(type) * count);                 \
44       dst = temp;                                                \
45    } while(0)
46 
47 void
lvp_pipeline_destroy(struct lvp_device * device,struct lvp_pipeline * pipeline)48 lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline)
49 {
50    if (pipeline->shader_cso[PIPE_SHADER_VERTEX])
51       device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]);
52    if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT])
53       device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]);
54    if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY])
55       device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]);
56    if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL])
57       device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]);
58    if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL])
59       device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]);
60    if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])
61       device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]);
62 
63    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
64       ralloc_free(pipeline->pipeline_nir[i]);
65 
66    if (pipeline->layout)
67       vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
68 
69    ralloc_free(pipeline->mem_ctx);
70    vk_free(&device->vk.alloc, pipeline->state_data);
71    vk_object_base_finish(&pipeline->base);
72    vk_free(&device->vk.alloc, pipeline);
73 }
74 
lvp_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)75 VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
76    VkDevice                                    _device,
77    VkPipeline                                  _pipeline,
78    const VkAllocationCallbacks*                pAllocator)
79 {
80    LVP_FROM_HANDLE(lvp_device, device, _device);
81    LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);
82 
83    if (!_pipeline)
84       return;
85 
86    simple_mtx_lock(&device->queue.pipeline_lock);
87    util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
88    simple_mtx_unlock(&device->queue.pipeline_lock);
89 }
90 
91 static inline unsigned
st_shader_stage_to_ptarget(gl_shader_stage stage)92 st_shader_stage_to_ptarget(gl_shader_stage stage)
93 {
94    switch (stage) {
95    case MESA_SHADER_VERTEX:
96       return PIPE_SHADER_VERTEX;
97    case MESA_SHADER_FRAGMENT:
98       return PIPE_SHADER_FRAGMENT;
99    case MESA_SHADER_GEOMETRY:
100       return PIPE_SHADER_GEOMETRY;
101    case MESA_SHADER_TESS_CTRL:
102       return PIPE_SHADER_TESS_CTRL;
103    case MESA_SHADER_TESS_EVAL:
104       return PIPE_SHADER_TESS_EVAL;
105    case MESA_SHADER_COMPUTE:
106       return PIPE_SHADER_COMPUTE;
107    default:
108       break;
109    }
110 
111    assert(!"should not be reached");
112    return PIPE_SHADER_VERTEX;
113 }
114 
115 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)116 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
117 {
118    assert(glsl_type_is_vector_or_scalar(type));
119 
120    uint32_t comp_size = glsl_type_is_boolean(type)
121       ? 4 : glsl_get_bit_size(type) / 8;
122    unsigned length = glsl_get_vector_elements(type);
123    *size = comp_size * length,
124       *align = comp_size;
125 }
126 
127 static void
set_image_access(struct lvp_pipeline * pipeline,nir_shader * nir,nir_intrinsic_instr * instr,bool reads,bool writes)128 set_image_access(struct lvp_pipeline *pipeline, nir_shader *nir,
129                    nir_intrinsic_instr *instr,
130                    bool reads, bool writes)
131 {
132    nir_variable *var = nir_intrinsic_get_var(instr, 0);
133    /* calculate the variable's offset in the layout */
134    uint64_t value = 0;
135    const struct lvp_descriptor_set_binding_layout *binding =
136       get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding);
137    for (unsigned s = 0; s < var->data.descriptor_set; s++) {
138      if (pipeline->layout->vk.set_layouts[s])
139         value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].image_count;
140    }
141    value += binding->stage[nir->info.stage].image_index;
142    const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
143    uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value;
144 
145    if (reads)
146       pipeline->access[nir->info.stage].images_read |= mask;
147    if (writes)
148       pipeline->access[nir->info.stage].images_written |= mask;
149 }
150 
151 static void
set_buffer_access(struct lvp_pipeline * pipeline,nir_shader * nir,nir_intrinsic_instr * instr)152 set_buffer_access(struct lvp_pipeline *pipeline, nir_shader *nir,
153                     nir_intrinsic_instr *instr)
154 {
155    nir_variable *var = nir_intrinsic_get_var(instr, 0);
156    if (!var) {
157       nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
158       if (deref->modes != nir_var_mem_ssbo)
159          return;
160       nir_binding b = nir_chase_binding(instr->src[0]);
161       var = nir_get_binding_variable(nir, b);
162       if (!var)
163          return;
164    }
165    if (var->data.mode != nir_var_mem_ssbo)
166       return;
167    /* calculate the variable's offset in the layout */
168    uint64_t value = 0;
169    const struct lvp_descriptor_set_binding_layout *binding =
170       get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding);
171    for (unsigned s = 0; s < var->data.descriptor_set; s++) {
172      if (pipeline->layout->vk.set_layouts[s])
173         value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].shader_buffer_count;
174    }
175    value += binding->stage[nir->info.stage].shader_buffer_index;
176    /* Structs have been lowered already, so get_aoa_size is sufficient. */
177    const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
178    uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value;
179    pipeline->access[nir->info.stage].buffers_written |= mask;
180 }
181 
182 static void
scan_intrinsic(struct lvp_pipeline * pipeline,nir_shader * nir,nir_intrinsic_instr * instr)183 scan_intrinsic(struct lvp_pipeline *pipeline, nir_shader *nir, nir_intrinsic_instr *instr)
184 {
185    switch (instr->intrinsic) {
186    case nir_intrinsic_image_deref_sparse_load:
187    case nir_intrinsic_image_deref_load:
188    case nir_intrinsic_image_deref_size:
189    case nir_intrinsic_image_deref_samples:
190       set_image_access(pipeline, nir, instr, true, false);
191       break;
192    case nir_intrinsic_image_deref_store:
193       set_image_access(pipeline, nir, instr, false, true);
194       break;
195    case nir_intrinsic_image_deref_atomic_add:
196    case nir_intrinsic_image_deref_atomic_imin:
197    case nir_intrinsic_image_deref_atomic_umin:
198    case nir_intrinsic_image_deref_atomic_imax:
199    case nir_intrinsic_image_deref_atomic_umax:
200    case nir_intrinsic_image_deref_atomic_and:
201    case nir_intrinsic_image_deref_atomic_or:
202    case nir_intrinsic_image_deref_atomic_xor:
203    case nir_intrinsic_image_deref_atomic_exchange:
204    case nir_intrinsic_image_deref_atomic_comp_swap:
205    case nir_intrinsic_image_deref_atomic_fadd:
206       set_image_access(pipeline, nir, instr, true, true);
207       break;
208    case nir_intrinsic_deref_atomic_add:
209    case nir_intrinsic_deref_atomic_and:
210    case nir_intrinsic_deref_atomic_comp_swap:
211    case nir_intrinsic_deref_atomic_exchange:
212    case nir_intrinsic_deref_atomic_fadd:
213    case nir_intrinsic_deref_atomic_fcomp_swap:
214    case nir_intrinsic_deref_atomic_fmax:
215    case nir_intrinsic_deref_atomic_fmin:
216    case nir_intrinsic_deref_atomic_imax:
217    case nir_intrinsic_deref_atomic_imin:
218    case nir_intrinsic_deref_atomic_or:
219    case nir_intrinsic_deref_atomic_umax:
220    case nir_intrinsic_deref_atomic_umin:
221    case nir_intrinsic_deref_atomic_xor:
222    case nir_intrinsic_store_deref:
223       set_buffer_access(pipeline, nir, instr);
224       break;
225    default: break;
226    }
227 }
228 
229 static void
scan_pipeline_info(struct lvp_pipeline * pipeline,nir_shader * nir)230 scan_pipeline_info(struct lvp_pipeline *pipeline, nir_shader *nir)
231 {
232    nir_foreach_function(function, nir) {
233       if (function->impl)
234          nir_foreach_block(block, function->impl) {
235             nir_foreach_instr(instr, block) {
236                if (instr->type == nir_instr_type_intrinsic)
237                   scan_intrinsic(pipeline, nir, nir_instr_as_intrinsic(instr));
238             }
239          }
240    }
241 
242 }
243 
244 static bool
remove_scoped_barriers_impl(nir_builder * b,nir_instr * instr,void * data)245 remove_scoped_barriers_impl(nir_builder *b, nir_instr *instr, void *data)
246 {
247    if (instr->type != nir_instr_type_intrinsic)
248       return false;
249    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
250    if (intr->intrinsic != nir_intrinsic_scoped_barrier)
251       return false;
252    if (data) {
253       if (nir_intrinsic_memory_scope(intr) == NIR_SCOPE_WORKGROUP ||
254           nir_intrinsic_memory_scope(intr) == NIR_SCOPE_DEVICE)
255          return false;
256    }
257    nir_instr_remove(instr);
258    return true;
259 }
260 
261 static bool
remove_scoped_barriers(nir_shader * nir,bool is_compute)262 remove_scoped_barriers(nir_shader *nir, bool is_compute)
263 {
264    return nir_shader_instructions_pass(nir, remove_scoped_barriers_impl, nir_metadata_dominance, (void*)is_compute);
265 }
266 
267 static bool
lower_demote_impl(nir_builder * b,nir_instr * instr,void * data)268 lower_demote_impl(nir_builder *b, nir_instr *instr, void *data)
269 {
270    if (instr->type != nir_instr_type_intrinsic)
271       return false;
272    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
273    if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) {
274       intr->intrinsic = nir_intrinsic_discard;
275       return true;
276    }
277    if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) {
278       intr->intrinsic = nir_intrinsic_discard_if;
279       return true;
280    }
281    return false;
282 }
283 
284 static bool
lower_demote(nir_shader * nir)285 lower_demote(nir_shader *nir)
286 {
287    return nir_shader_instructions_pass(nir, lower_demote_impl, nir_metadata_dominance, NULL);
288 }
289 
290 static bool
find_tex(const nir_instr * instr,const void * data_cb)291 find_tex(const nir_instr *instr, const void *data_cb)
292 {
293    if (instr->type == nir_instr_type_tex)
294       return true;
295    return false;
296 }
297 
298 static nir_ssa_def *
fixup_tex_instr(struct nir_builder * b,nir_instr * instr,void * data_cb)299 fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
300 {
301    nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
302    unsigned offset = 0;
303 
304    int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
305    if (idx == -1)
306       return NULL;
307 
308    if (!nir_src_is_const(tex_instr->src[idx].src))
309       return NULL;
310    offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
311 
312    nir_tex_instr_remove_src(tex_instr, idx);
313    tex_instr->texture_index += offset;
314    return NIR_LOWER_INSTR_PROGRESS;
315 }
316 
317 static bool
lvp_nir_fixup_indirect_tex(nir_shader * shader)318 lvp_nir_fixup_indirect_tex(nir_shader *shader)
319 {
320    return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
321 }
322 
323 static void
optimize(nir_shader * nir)324 optimize(nir_shader *nir)
325 {
326    bool progress = false;
327    do {
328       progress = false;
329 
330       NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
331       NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
332       NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
333       NIR_PASS(progress, nir, nir_opt_deref);
334       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
335 
336       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
337 
338       NIR_PASS(progress, nir, nir_copy_prop);
339       NIR_PASS(progress, nir, nir_opt_dce);
340       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
341 
342       NIR_PASS(progress, nir, nir_opt_algebraic);
343       NIR_PASS(progress, nir, nir_opt_constant_folding);
344 
345       NIR_PASS(progress, nir, nir_opt_remove_phis);
346       bool trivial_continues = false;
347       NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues);
348       progress |= trivial_continues;
349       if (trivial_continues) {
350          /* If nir_opt_trivial_continues makes progress, then we need to clean
351           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
352           * to make progress.
353           */
354          NIR_PASS(progress, nir, nir_copy_prop);
355          NIR_PASS(progress, nir, nir_opt_dce);
356          NIR_PASS(progress, nir, nir_opt_remove_phis);
357       }
358       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
359       NIR_PASS(progress, nir, nir_opt_dead_cf);
360       NIR_PASS(progress, nir, nir_opt_conditional_discard);
361       NIR_PASS(progress, nir, nir_opt_remove_phis);
362       NIR_PASS(progress, nir, nir_opt_cse);
363       NIR_PASS(progress, nir, nir_opt_undef);
364 
365       NIR_PASS(progress, nir, nir_opt_deref);
366       NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
367       NIR_PASS(progress, nir, nir_opt_loop_unroll);
368       NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
369    } while (progress);
370 }
371 
372 void
lvp_shader_optimize(nir_shader * nir)373 lvp_shader_optimize(nir_shader *nir)
374 {
375    optimize(nir);
376    NIR_PASS_V(nir, nir_lower_var_copies);
377    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
378    NIR_PASS_V(nir, nir_opt_dce);
379    nir_sweep(nir);
380 }
381 
382 static VkResult
lvp_shader_compile_to_ir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo)383 lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
384                          const VkPipelineShaderStageCreateInfo *sinfo)
385 {
386    struct lvp_device *pdevice = pipeline->device;
387    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
388    const nir_shader_compiler_options *drv_options = pdevice->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage));
389    VkResult result;
390    nir_shader *nir;
391 
392    const struct spirv_to_nir_options spirv_options = {
393       .environment = NIR_SPIRV_VULKAN,
394       .caps = {
395          .float64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_DOUBLES) == 1),
396          .int16 = true,
397          .int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1),
398          .tessellation = true,
399          .float_controls = true,
400          .image_ms_array = true,
401          .image_read_without_format = true,
402          .image_write_without_format = true,
403          .storage_image_ms = true,
404          .geometry_streams = true,
405          .storage_8bit = true,
406          .storage_16bit = true,
407          .variable_pointers = true,
408          .stencil_export = true,
409          .post_depth_coverage = true,
410          .transform_feedback = true,
411          .device_group = true,
412          .draw_parameters = true,
413          .shader_viewport_index_layer = true,
414          .multiview = true,
415          .physical_storage_buffer_address = true,
416          .int64_atomics = true,
417          .subgroup_arithmetic = true,
418          .subgroup_basic = true,
419          .subgroup_ballot = true,
420          .subgroup_quad = true,
421 #if LLVM_VERSION_MAJOR >= 10
422          .subgroup_shuffle = true,
423 #endif
424          .subgroup_vote = true,
425          .vk_memory_model = true,
426          .vk_memory_model_device_scope = true,
427          .int8 = true,
428          .float16 = true,
429          .demote_to_helper_invocation = true,
430       },
431       .ubo_addr_format = nir_address_format_32bit_index_offset,
432       .ssbo_addr_format = nir_address_format_32bit_index_offset,
433       .phys_ssbo_addr_format = nir_address_format_64bit_global,
434       .push_const_addr_format = nir_address_format_logical,
435       .shared_addr_format = nir_address_format_32bit_offset,
436    };
437 
438    result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo,
439                                             &spirv_options, drv_options,
440                                             NULL, &nir);
441    if (result != VK_SUCCESS)
442       return result;
443 
444    if (nir->info.stage != MESA_SHADER_TESS_CTRL)
445       NIR_PASS_V(nir, remove_scoped_barriers, nir->info.stage == MESA_SHADER_COMPUTE);
446 
447    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
448       .frag_coord = true,
449       .point_coord = true,
450    };
451    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
452 
453    struct nir_lower_subgroups_options subgroup_opts = {0};
454    subgroup_opts.lower_quad = true;
455    subgroup_opts.ballot_components = 1;
456    subgroup_opts.ballot_bit_size = 32;
457    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
458 
459    if (stage == MESA_SHADER_FRAGMENT)
460       lvp_lower_input_attachments(nir, false);
461    NIR_PASS_V(nir, nir_lower_is_helper_invocation);
462    NIR_PASS_V(nir, lower_demote);
463    NIR_PASS_V(nir, nir_lower_system_values);
464    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
465 
466    NIR_PASS_V(nir, nir_remove_dead_variables,
467               nir_var_uniform | nir_var_image, NULL);
468 
469    scan_pipeline_info(pipeline, nir);
470 
471    optimize(nir);
472    lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir);
473 
474    NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
475    NIR_PASS_V(nir, nir_split_var_copies);
476    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
477 
478    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
479               nir_address_format_32bit_offset);
480 
481    NIR_PASS_V(nir, nir_lower_explicit_io,
482               nir_var_mem_ubo | nir_var_mem_ssbo,
483               nir_address_format_32bit_index_offset);
484 
485    NIR_PASS_V(nir, nir_lower_explicit_io,
486               nir_var_mem_global,
487               nir_address_format_64bit_global);
488 
489    if (nir->info.stage == MESA_SHADER_COMPUTE) {
490       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
491       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
492    }
493 
494    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
495 
496    if (nir->info.stage == MESA_SHADER_VERTEX ||
497        nir->info.stage == MESA_SHADER_GEOMETRY) {
498       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
499    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
500       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
501    }
502 
503    // TODO: also optimize the tex srcs. see radeonSI for reference */
504    /* Skip if there are potentially conflicting rounding modes */
505    struct nir_fold_16bit_tex_image_options fold_16bit_options = {
506       .rounding_mode = nir_rounding_mode_undef,
507       .fold_tex_dest = true,
508    };
509    NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options);
510 
511    lvp_shader_optimize(nir);
512 
513    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
514 
515    if (nir->info.stage != MESA_SHADER_VERTEX)
516       nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
517    else {
518       nir->num_inputs = util_last_bit64(nir->info.inputs_read);
519       nir_foreach_shader_in_variable(var, nir) {
520          var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
521       }
522    }
523    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
524                                nir->info.stage);
525 
526    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
527    if (impl->ssa_alloc > 100) //skip for small shaders
528       pipeline->inlines[stage].must_inline = lvp_find_inlinable_uniforms(pipeline, nir);
529    pipeline->pipeline_nir[stage] = nir;
530 
531    return VK_SUCCESS;
532 }
533 
534 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)535 merge_tess_info(struct shader_info *tes_info,
536                 const struct shader_info *tcs_info)
537 {
538    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
539     *
540     *    "PointMode. Controls generation of points rather than triangles
541     *     or lines. This functionality defaults to disabled, and is
542     *     enabled if either shader stage includes the execution mode.
543     *
544     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
545     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
546     * and OutputVertices, it says:
547     *
548     *    "One mode must be set in at least one of the tessellation
549     *     shader stages."
550     *
551     * So, the fields can be set in either the TCS or TES, but they must
552     * agree if set in both.  Our backend looks at TES, so bitwise-or in
553     * the values from the TCS.
554     */
555    assert(tcs_info->tess.tcs_vertices_out == 0 ||
556           tes_info->tess.tcs_vertices_out == 0 ||
557           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
558    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
559 
560    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
561           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
562           tcs_info->tess.spacing == tes_info->tess.spacing);
563    tes_info->tess.spacing |= tcs_info->tess.spacing;
564 
565    assert(tcs_info->tess._primitive_mode == 0 ||
566           tes_info->tess._primitive_mode == 0 ||
567           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
568    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
569    tes_info->tess.ccw |= tcs_info->tess.ccw;
570    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
571 }
572 
573 static void
lvp_pipeline_xfb_init(struct lvp_pipeline * pipeline)574 lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
575 {
576    gl_shader_stage stage = MESA_SHADER_VERTEX;
577    if (pipeline->pipeline_nir[MESA_SHADER_GEOMETRY])
578       stage = MESA_SHADER_GEOMETRY;
579    else if (pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL])
580       stage = MESA_SHADER_TESS_EVAL;
581    pipeline->last_vertex = stage;
582 
583    nir_xfb_info *xfb_info = pipeline->pipeline_nir[stage]->xfb_info;
584    if (xfb_info) {
585       uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
586       memset(output_mapping, 0, sizeof(output_mapping));
587 
588       nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) {
589          unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
590                                             : glsl_count_attribute_slots(var->type, false);
591          for (unsigned i = 0; i < slots; i++)
592             output_mapping[var->data.location + i] = var->data.driver_location + i;
593       }
594 
595       pipeline->stream_output.num_outputs = xfb_info->output_count;
596       for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
597          if (xfb_info->buffers_written & (1 << i)) {
598             pipeline->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
599          }
600       }
601       for (unsigned i = 0; i < xfb_info->output_count; i++) {
602          pipeline->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
603          pipeline->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
604          pipeline->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
605          pipeline->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
606          pipeline->stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1;
607          pipeline->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
608       }
609 
610    }
611 }
612 
613 void *
lvp_pipeline_compile_stage(struct lvp_pipeline * pipeline,nir_shader * nir)614 lvp_pipeline_compile_stage(struct lvp_pipeline *pipeline, nir_shader *nir)
615 {
616    struct lvp_device *device = pipeline->device;
617    if (nir->info.stage == MESA_SHADER_COMPUTE) {
618       struct pipe_compute_state shstate = {0};
619       shstate.prog = nir;
620       shstate.ir_type = PIPE_SHADER_IR_NIR;
621       shstate.req_local_mem = nir->info.shared_size;
622       return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
623    } else {
624       struct pipe_shader_state shstate = {0};
625       shstate.type = PIPE_SHADER_IR_NIR;
626       shstate.ir.nir = nir;
627       if (nir->info.stage == pipeline->last_vertex)
628          memcpy(&shstate.stream_output, &pipeline->stream_output, sizeof(shstate.stream_output));
629 
630       switch (nir->info.stage) {
631       case MESA_SHADER_FRAGMENT:
632          return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
633       case MESA_SHADER_VERTEX:
634          return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
635       case MESA_SHADER_GEOMETRY:
636          return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
637       case MESA_SHADER_TESS_CTRL:
638          return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
639       case MESA_SHADER_TESS_EVAL:
640          return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
641       default:
642          unreachable("illegal shader");
643          break;
644       }
645    }
646    return NULL;
647 }
648 
649 void *
lvp_pipeline_compile(struct lvp_pipeline * pipeline,nir_shader * nir)650 lvp_pipeline_compile(struct lvp_pipeline *pipeline, nir_shader *nir)
651 {
652    struct lvp_device *device = pipeline->device;
653    device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
654    return lvp_pipeline_compile_stage(pipeline, nir);
655 }
656 
657 #ifndef NDEBUG
658 static bool
layouts_equal(const struct lvp_descriptor_set_layout * a,const struct lvp_descriptor_set_layout * b)659 layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
660 {
661    const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
662    uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
663    uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
664    /* base equal */
665    if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
666       return false;
667 
668    /* bindings equal */
669    if (a->binding_count != b->binding_count)
670       return false;
671    size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
672    const struct lvp_descriptor_set_binding_layout *la = a->binding;
673    const struct lvp_descriptor_set_binding_layout *lb = b->binding;
674    if (memcmp(la, lb, binding_size)) {
675       for (unsigned i = 0; i < a->binding_count; i++) {
676          if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
677             return false;
678       }
679    }
680 
681    /* immutable sampler equal */
682    if (a->immutable_sampler_count != b->immutable_sampler_count)
683       return false;
684    if (a->immutable_sampler_count) {
685       size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
686       if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
687          struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
688          struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
689          for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
690             if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
691                return false;
692          }
693       }
694    }
695    return true;
696 }
697 #endif
698 
699 static void
merge_layouts(struct lvp_pipeline * dst,struct lvp_pipeline_layout * src)700 merge_layouts(struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
701 {
702    if (!src)
703       return;
704    if (!dst->layout) {
705       /* no layout created yet: copy onto ralloc ctx allocation for auto-free */
706       dst->layout = ralloc(dst->mem_ctx, struct lvp_pipeline_layout);
707       memcpy(dst->layout, src, sizeof(struct lvp_pipeline_layout));
708       return;
709    }
710 #ifndef NDEBUG
711    /* verify that layouts match */
712    const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
713    const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
714    for (unsigned i = 0; i < smaller->vk.set_count; i++) {
715       if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
716           smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
717          continue;
718 
719       const struct lvp_descriptor_set_layout *smaller_set_layout =
720          vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
721       const struct lvp_descriptor_set_layout *bigger_set_layout =
722          vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
723 
724       assert(!smaller_set_layout->binding_count ||
725              !bigger_set_layout->binding_count ||
726              layouts_equal(smaller_set_layout, bigger_set_layout));
727    }
728 #endif
729    for (unsigned i = 0; i < src->vk.set_count; i++) {
730       if (!dst->layout->vk.set_layouts[i])
731          dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
732    }
733    dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
734                                     src->vk.set_count);
735    dst->layout->push_constant_size += src->push_constant_size;
736    dst->layout->push_constant_stages |= src->push_constant_stages;
737 }
738 
739 static VkResult
lvp_graphics_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo)740 lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
741                            struct lvp_device *device,
742                            struct lvp_pipeline_cache *cache,
743                            const VkGraphicsPipelineCreateInfo *pCreateInfo)
744 {
745    VkResult result;
746 
747    const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
748                                                                                 GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
749    const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
750                                                                          PIPELINE_LIBRARY_CREATE_INFO_KHR);
751    const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
752                                                            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
753    if (libinfo)
754       pipeline->stages = libinfo->flags;
755    else if (!libstate)
756       pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
757                          VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
758                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
759                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
760    pipeline->mem_ctx = ralloc_context(NULL);
761 
762    if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
763       pipeline->library = true;
764 
765    struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
766    if (layout)
767       vk_pipeline_layout_ref(&layout->vk);
768 
769    if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
770       /* this is a regular pipeline with no partials: directly reuse */
771       pipeline->layout = layout;
772    else if (pipeline->stages & layout_stages) {
773       if ((pipeline->stages & layout_stages) == layout_stages)
774          /* this has all the layout stages: directly reuse */
775          pipeline->layout = layout;
776       else {
777          /* this is a partial: copy for later merging to avoid modifying another layout */
778          merge_layouts(pipeline, layout);
779       }
780    }
781 
782    if (libstate) {
783       for (unsigned i = 0; i < libstate->libraryCount; i++) {
784          LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
785          vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
786                                           &p->graphics_state);
787          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
788             pipeline->line_smooth = p->line_smooth;
789             pipeline->disable_multisample = p->disable_multisample;
790             pipeline->line_rectangular = p->line_rectangular;
791             pipeline->last_vertex = p->last_vertex;
792             memcpy(&pipeline->stream_output, &p->stream_output, sizeof(p->stream_output));
793             memcpy(&pipeline->access, &p->access, sizeof(p->access));
794          }
795          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)
796             pipeline->force_min_sample = p->force_min_sample;
797          if (p->stages & layout_stages) {
798             if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
799                merge_layouts(pipeline, p->layout);
800          }
801          pipeline->stages |= p->stages;
802       }
803    }
804 
805    result = vk_graphics_pipeline_state_fill(&device->vk,
806                                             &pipeline->graphics_state,
807                                             pCreateInfo, NULL, NULL, NULL,
808                                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
809                                             &pipeline->state_data);
810    if (result != VK_SUCCESS)
811       return result;
812 
813    assert(pipeline->library || pipeline->stages == (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
814                                                     VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
815                                                     VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
816                                                     VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
817 
818    pipeline->device = device;
819 
820    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
821       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
822       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
823       if (stage == MESA_SHADER_FRAGMENT) {
824          if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
825             continue;
826       } else {
827          if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
828             continue;
829       }
830       result = lvp_shader_compile_to_ir(pipeline, sinfo);
831       if (result != VK_SUCCESS)
832          goto fail;
833 
834       switch (stage) {
835       case MESA_SHADER_GEOMETRY:
836          pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] &&
837                                      pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == SHADER_PRIM_LINES;
838          break;
839       case MESA_SHADER_FRAGMENT:
840          if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
841             pipeline->force_min_sample = true;
842          break;
843       default: break;
844       }
845    }
846    if (pCreateInfo->stageCount && pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) {
847       nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
848       merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info);
849       if (pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
850          pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw;
851    }
852    if (libstate) {
853        for (unsigned i = 0; i < libstate->libraryCount; i++) {
854           LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
855           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
856              if (p->pipeline_nir[MESA_SHADER_FRAGMENT])
857                 pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[MESA_SHADER_FRAGMENT]);
858           }
859           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
860              for (unsigned j = MESA_SHADER_VERTEX; j < MESA_SHADER_FRAGMENT; j++) {
861                 if (p->pipeline_nir[j])
862                    pipeline->pipeline_nir[j] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[j]);
863              }
864           }
865        }
866    } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
867       const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
868       if (rs) {
869          /* always draw bresenham if !smooth */
870          pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
871          pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ||
872                                          rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
873          pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
874       } else
875          pipeline->line_rectangular = true;
876       lvp_pipeline_xfb_init(pipeline);
877    }
878 
879    if (!pipeline->library) {
880       bool has_fragment_shader = false;
881       for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) {
882          if (!pipeline->pipeline_nir[i])
883             continue;
884 
885          gl_shader_stage stage = i;
886          assert(stage == pipeline->pipeline_nir[i]->info.stage);
887          enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage);
888          if (!pipeline->inlines[stage].can_inline)
889             pipeline->shader_cso[pstage] = lvp_pipeline_compile(pipeline,
890                                                                 nir_shader_clone(NULL, pipeline->pipeline_nir[stage]));
891          if (stage == MESA_SHADER_FRAGMENT)
892             has_fragment_shader = true;
893       }
894 
895       if (has_fragment_shader == false) {
896          /* create a dummy fragment shader for this pipeline. */
897          nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
898                                                         "dummy_frag");
899 
900          pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;
901          struct pipe_shader_state shstate = {0};
902          shstate.type = PIPE_SHADER_IR_NIR;
903          shstate.ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]);
904          pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
905       }
906    }
907    return VK_SUCCESS;
908 
909 fail:
910    for (unsigned i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) {
911       if (pipeline->pipeline_nir[i])
912          ralloc_free(pipeline->pipeline_nir[i]);
913    }
914    vk_free(&device->vk.alloc, pipeline->state_data);
915 
916    return result;
917 }
918 
919 static VkResult
lvp_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipeline * pPipeline)920 lvp_graphics_pipeline_create(
921    VkDevice _device,
922    VkPipelineCache _cache,
923    const VkGraphicsPipelineCreateInfo *pCreateInfo,
924    VkPipeline *pPipeline)
925 {
926    LVP_FROM_HANDLE(lvp_device, device, _device);
927    LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
928    struct lvp_pipeline *pipeline;
929    VkResult result;
930 
931    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
932 
933    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
934                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
935    if (pipeline == NULL)
936       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
937 
938    vk_object_base_init(&device->vk, &pipeline->base,
939                        VK_OBJECT_TYPE_PIPELINE);
940    uint64_t t0 = os_time_get_nano();
941    result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo);
942    if (result != VK_SUCCESS) {
943       vk_free(&device->vk.alloc, pipeline);
944       return result;
945    }
946 
947    VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
948    if (feedback) {
949       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
950       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
951       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
952    }
953 
954    *pPipeline = lvp_pipeline_to_handle(pipeline);
955 
956    return VK_SUCCESS;
957 }
958 
lvp_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)959 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
960    VkDevice                                    _device,
961    VkPipelineCache                             pipelineCache,
962    uint32_t                                    count,
963    const VkGraphicsPipelineCreateInfo*         pCreateInfos,
964    const VkAllocationCallbacks*                pAllocator,
965    VkPipeline*                                 pPipelines)
966 {
967    VkResult result = VK_SUCCESS;
968    unsigned i = 0;
969 
970    for (; i < count; i++) {
971       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
972       if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
973          r = lvp_graphics_pipeline_create(_device,
974                                           pipelineCache,
975                                           &pCreateInfos[i],
976                                           &pPipelines[i]);
977       if (r != VK_SUCCESS) {
978          result = r;
979          pPipelines[i] = VK_NULL_HANDLE;
980          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
981             break;
982       }
983    }
984    if (result != VK_SUCCESS) {
985       for (; i < count; i++)
986          pPipelines[i] = VK_NULL_HANDLE;
987    }
988 
989    return result;
990 }
991 
992 static VkResult
lvp_compute_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo)993 lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
994                           struct lvp_device *device,
995                           struct lvp_pipeline_cache *cache,
996                           const VkComputePipelineCreateInfo *pCreateInfo)
997 {
998    pipeline->device = device;
999    pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
1000    vk_pipeline_layout_ref(&pipeline->layout->vk);
1001    pipeline->force_min_sample = false;
1002 
1003    pipeline->mem_ctx = ralloc_context(NULL);
1004    pipeline->is_compute_pipeline = true;
1005 
1006    VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
1007    if (result != VK_SUCCESS)
1008       return result;
1009 
1010    if (!pipeline->inlines[MESA_SHADER_COMPUTE].can_inline)
1011       pipeline->shader_cso[PIPE_SHADER_COMPUTE] = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE]));
1012    return VK_SUCCESS;
1013 }
1014 
1015 static VkResult
lvp_compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipeline * pPipeline)1016 lvp_compute_pipeline_create(
1017    VkDevice _device,
1018    VkPipelineCache _cache,
1019    const VkComputePipelineCreateInfo *pCreateInfo,
1020    VkPipeline *pPipeline)
1021 {
1022    LVP_FROM_HANDLE(lvp_device, device, _device);
1023    LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
1024    struct lvp_pipeline *pipeline;
1025    VkResult result;
1026 
1027    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1028 
1029    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
1030                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1031    if (pipeline == NULL)
1032       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1033 
1034    vk_object_base_init(&device->vk, &pipeline->base,
1035                        VK_OBJECT_TYPE_PIPELINE);
1036    uint64_t t0 = os_time_get_nano();
1037    result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo);
1038    if (result != VK_SUCCESS) {
1039       vk_free(&device->vk.alloc, pipeline);
1040       return result;
1041    }
1042 
1043    const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1044    if (feedback) {
1045       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1046       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1047       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1048    }
1049 
1050    *pPipeline = lvp_pipeline_to_handle(pipeline);
1051 
1052    return VK_SUCCESS;
1053 }
1054 
lvp_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1055 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
1056    VkDevice                                    _device,
1057    VkPipelineCache                             pipelineCache,
1058    uint32_t                                    count,
1059    const VkComputePipelineCreateInfo*          pCreateInfos,
1060    const VkAllocationCallbacks*                pAllocator,
1061    VkPipeline*                                 pPipelines)
1062 {
1063    VkResult result = VK_SUCCESS;
1064    unsigned i = 0;
1065 
1066    for (; i < count; i++) {
1067       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1068       if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1069          r = lvp_compute_pipeline_create(_device,
1070                                          pipelineCache,
1071                                          &pCreateInfos[i],
1072                                          &pPipelines[i]);
1073       if (r != VK_SUCCESS) {
1074          result = r;
1075          pPipelines[i] = VK_NULL_HANDLE;
1076          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1077             break;
1078       }
1079    }
1080    if (result != VK_SUCCESS) {
1081       for (; i < count; i++)
1082          pPipelines[i] = VK_NULL_HANDLE;
1083    }
1084 
1085 
1086    return result;
1087 }
1088