1 /*
2 * Copyright © 2019 Red Hat.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "lvp_private.h"
25 #include "vk_pipeline.h"
26 #include "vk_render_pass.h"
27 #include "vk_util.h"
28 #include "glsl_types.h"
29 #include "util/os_time.h"
30 #include "spirv/nir_spirv.h"
31 #include "nir/nir_builder.h"
32 #include "lvp_lower_vulkan_resource.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "tgsi/tgsi_from_mesa.h"
36 #include "nir/nir_xfb_info.h"
37
38 #define SPIR_V_MAGIC_NUMBER 0x07230203
39
40 #define LVP_PIPELINE_DUP(dst, src, type, count) do { \
41 type *temp = ralloc_array(mem_ctx, type, count); \
42 if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY; \
43 memcpy(temp, (src), sizeof(type) * count); \
44 dst = temp; \
45 } while(0)
46
47 void
lvp_pipeline_destroy(struct lvp_device * device,struct lvp_pipeline * pipeline)48 lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline)
49 {
50 if (pipeline->shader_cso[PIPE_SHADER_VERTEX])
51 device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]);
52 if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT])
53 device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]);
54 if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY])
55 device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]);
56 if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL])
57 device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]);
58 if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL])
59 device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]);
60 if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])
61 device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]);
62
63 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
64 ralloc_free(pipeline->pipeline_nir[i]);
65
66 if (pipeline->layout)
67 vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
68
69 ralloc_free(pipeline->mem_ctx);
70 vk_free(&device->vk.alloc, pipeline->state_data);
71 vk_object_base_finish(&pipeline->base);
72 vk_free(&device->vk.alloc, pipeline);
73 }
74
lvp_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)75 VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
76 VkDevice _device,
77 VkPipeline _pipeline,
78 const VkAllocationCallbacks* pAllocator)
79 {
80 LVP_FROM_HANDLE(lvp_device, device, _device);
81 LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);
82
83 if (!_pipeline)
84 return;
85
86 simple_mtx_lock(&device->queue.pipeline_lock);
87 util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
88 simple_mtx_unlock(&device->queue.pipeline_lock);
89 }
90
91 static inline unsigned
st_shader_stage_to_ptarget(gl_shader_stage stage)92 st_shader_stage_to_ptarget(gl_shader_stage stage)
93 {
94 switch (stage) {
95 case MESA_SHADER_VERTEX:
96 return PIPE_SHADER_VERTEX;
97 case MESA_SHADER_FRAGMENT:
98 return PIPE_SHADER_FRAGMENT;
99 case MESA_SHADER_GEOMETRY:
100 return PIPE_SHADER_GEOMETRY;
101 case MESA_SHADER_TESS_CTRL:
102 return PIPE_SHADER_TESS_CTRL;
103 case MESA_SHADER_TESS_EVAL:
104 return PIPE_SHADER_TESS_EVAL;
105 case MESA_SHADER_COMPUTE:
106 return PIPE_SHADER_COMPUTE;
107 default:
108 break;
109 }
110
111 assert(!"should not be reached");
112 return PIPE_SHADER_VERTEX;
113 }
114
115 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)116 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
117 {
118 assert(glsl_type_is_vector_or_scalar(type));
119
120 uint32_t comp_size = glsl_type_is_boolean(type)
121 ? 4 : glsl_get_bit_size(type) / 8;
122 unsigned length = glsl_get_vector_elements(type);
123 *size = comp_size * length,
124 *align = comp_size;
125 }
126
127 static void
set_image_access(struct lvp_pipeline * pipeline,nir_shader * nir,nir_intrinsic_instr * instr,bool reads,bool writes)128 set_image_access(struct lvp_pipeline *pipeline, nir_shader *nir,
129 nir_intrinsic_instr *instr,
130 bool reads, bool writes)
131 {
132 nir_variable *var = nir_intrinsic_get_var(instr, 0);
133 /* calculate the variable's offset in the layout */
134 uint64_t value = 0;
135 const struct lvp_descriptor_set_binding_layout *binding =
136 get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding);
137 for (unsigned s = 0; s < var->data.descriptor_set; s++) {
138 if (pipeline->layout->vk.set_layouts[s])
139 value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].image_count;
140 }
141 value += binding->stage[nir->info.stage].image_index;
142 const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
143 uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value;
144
145 if (reads)
146 pipeline->access[nir->info.stage].images_read |= mask;
147 if (writes)
148 pipeline->access[nir->info.stage].images_written |= mask;
149 }
150
151 static void
set_buffer_access(struct lvp_pipeline * pipeline,nir_shader * nir,nir_intrinsic_instr * instr)152 set_buffer_access(struct lvp_pipeline *pipeline, nir_shader *nir,
153 nir_intrinsic_instr *instr)
154 {
155 nir_variable *var = nir_intrinsic_get_var(instr, 0);
156 if (!var) {
157 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
158 if (deref->modes != nir_var_mem_ssbo)
159 return;
160 nir_binding b = nir_chase_binding(instr->src[0]);
161 var = nir_get_binding_variable(nir, b);
162 if (!var)
163 return;
164 }
165 if (var->data.mode != nir_var_mem_ssbo)
166 return;
167 /* calculate the variable's offset in the layout */
168 uint64_t value = 0;
169 const struct lvp_descriptor_set_binding_layout *binding =
170 get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding);
171 for (unsigned s = 0; s < var->data.descriptor_set; s++) {
172 if (pipeline->layout->vk.set_layouts[s])
173 value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].shader_buffer_count;
174 }
175 value += binding->stage[nir->info.stage].shader_buffer_index;
176 /* Structs have been lowered already, so get_aoa_size is sufficient. */
177 const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
178 uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value;
179 pipeline->access[nir->info.stage].buffers_written |= mask;
180 }
181
182 static void
scan_intrinsic(struct lvp_pipeline * pipeline,nir_shader * nir,nir_intrinsic_instr * instr)183 scan_intrinsic(struct lvp_pipeline *pipeline, nir_shader *nir, nir_intrinsic_instr *instr)
184 {
185 switch (instr->intrinsic) {
186 case nir_intrinsic_image_deref_sparse_load:
187 case nir_intrinsic_image_deref_load:
188 case nir_intrinsic_image_deref_size:
189 case nir_intrinsic_image_deref_samples:
190 set_image_access(pipeline, nir, instr, true, false);
191 break;
192 case nir_intrinsic_image_deref_store:
193 set_image_access(pipeline, nir, instr, false, true);
194 break;
195 case nir_intrinsic_image_deref_atomic_add:
196 case nir_intrinsic_image_deref_atomic_imin:
197 case nir_intrinsic_image_deref_atomic_umin:
198 case nir_intrinsic_image_deref_atomic_imax:
199 case nir_intrinsic_image_deref_atomic_umax:
200 case nir_intrinsic_image_deref_atomic_and:
201 case nir_intrinsic_image_deref_atomic_or:
202 case nir_intrinsic_image_deref_atomic_xor:
203 case nir_intrinsic_image_deref_atomic_exchange:
204 case nir_intrinsic_image_deref_atomic_comp_swap:
205 case nir_intrinsic_image_deref_atomic_fadd:
206 set_image_access(pipeline, nir, instr, true, true);
207 break;
208 case nir_intrinsic_deref_atomic_add:
209 case nir_intrinsic_deref_atomic_and:
210 case nir_intrinsic_deref_atomic_comp_swap:
211 case nir_intrinsic_deref_atomic_exchange:
212 case nir_intrinsic_deref_atomic_fadd:
213 case nir_intrinsic_deref_atomic_fcomp_swap:
214 case nir_intrinsic_deref_atomic_fmax:
215 case nir_intrinsic_deref_atomic_fmin:
216 case nir_intrinsic_deref_atomic_imax:
217 case nir_intrinsic_deref_atomic_imin:
218 case nir_intrinsic_deref_atomic_or:
219 case nir_intrinsic_deref_atomic_umax:
220 case nir_intrinsic_deref_atomic_umin:
221 case nir_intrinsic_deref_atomic_xor:
222 case nir_intrinsic_store_deref:
223 set_buffer_access(pipeline, nir, instr);
224 break;
225 default: break;
226 }
227 }
228
229 static void
scan_pipeline_info(struct lvp_pipeline * pipeline,nir_shader * nir)230 scan_pipeline_info(struct lvp_pipeline *pipeline, nir_shader *nir)
231 {
232 nir_foreach_function(function, nir) {
233 if (function->impl)
234 nir_foreach_block(block, function->impl) {
235 nir_foreach_instr(instr, block) {
236 if (instr->type == nir_instr_type_intrinsic)
237 scan_intrinsic(pipeline, nir, nir_instr_as_intrinsic(instr));
238 }
239 }
240 }
241
242 }
243
244 static bool
remove_scoped_barriers_impl(nir_builder * b,nir_instr * instr,void * data)245 remove_scoped_barriers_impl(nir_builder *b, nir_instr *instr, void *data)
246 {
247 if (instr->type != nir_instr_type_intrinsic)
248 return false;
249 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
250 if (intr->intrinsic != nir_intrinsic_scoped_barrier)
251 return false;
252 if (data) {
253 if (nir_intrinsic_memory_scope(intr) == NIR_SCOPE_WORKGROUP ||
254 nir_intrinsic_memory_scope(intr) == NIR_SCOPE_DEVICE)
255 return false;
256 }
257 nir_instr_remove(instr);
258 return true;
259 }
260
261 static bool
remove_scoped_barriers(nir_shader * nir,bool is_compute)262 remove_scoped_barriers(nir_shader *nir, bool is_compute)
263 {
264 return nir_shader_instructions_pass(nir, remove_scoped_barriers_impl, nir_metadata_dominance, (void*)is_compute);
265 }
266
267 static bool
lower_demote_impl(nir_builder * b,nir_instr * instr,void * data)268 lower_demote_impl(nir_builder *b, nir_instr *instr, void *data)
269 {
270 if (instr->type != nir_instr_type_intrinsic)
271 return false;
272 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
273 if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) {
274 intr->intrinsic = nir_intrinsic_discard;
275 return true;
276 }
277 if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) {
278 intr->intrinsic = nir_intrinsic_discard_if;
279 return true;
280 }
281 return false;
282 }
283
284 static bool
lower_demote(nir_shader * nir)285 lower_demote(nir_shader *nir)
286 {
287 return nir_shader_instructions_pass(nir, lower_demote_impl, nir_metadata_dominance, NULL);
288 }
289
290 static bool
find_tex(const nir_instr * instr,const void * data_cb)291 find_tex(const nir_instr *instr, const void *data_cb)
292 {
293 if (instr->type == nir_instr_type_tex)
294 return true;
295 return false;
296 }
297
298 static nir_ssa_def *
fixup_tex_instr(struct nir_builder * b,nir_instr * instr,void * data_cb)299 fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
300 {
301 nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
302 unsigned offset = 0;
303
304 int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
305 if (idx == -1)
306 return NULL;
307
308 if (!nir_src_is_const(tex_instr->src[idx].src))
309 return NULL;
310 offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
311
312 nir_tex_instr_remove_src(tex_instr, idx);
313 tex_instr->texture_index += offset;
314 return NIR_LOWER_INSTR_PROGRESS;
315 }
316
317 static bool
lvp_nir_fixup_indirect_tex(nir_shader * shader)318 lvp_nir_fixup_indirect_tex(nir_shader *shader)
319 {
320 return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
321 }
322
323 static void
optimize(nir_shader * nir)324 optimize(nir_shader *nir)
325 {
326 bool progress = false;
327 do {
328 progress = false;
329
330 NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
331 NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
332 NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
333 NIR_PASS(progress, nir, nir_opt_deref);
334 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
335
336 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
337
338 NIR_PASS(progress, nir, nir_copy_prop);
339 NIR_PASS(progress, nir, nir_opt_dce);
340 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
341
342 NIR_PASS(progress, nir, nir_opt_algebraic);
343 NIR_PASS(progress, nir, nir_opt_constant_folding);
344
345 NIR_PASS(progress, nir, nir_opt_remove_phis);
346 bool trivial_continues = false;
347 NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues);
348 progress |= trivial_continues;
349 if (trivial_continues) {
350 /* If nir_opt_trivial_continues makes progress, then we need to clean
351 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
352 * to make progress.
353 */
354 NIR_PASS(progress, nir, nir_copy_prop);
355 NIR_PASS(progress, nir, nir_opt_dce);
356 NIR_PASS(progress, nir, nir_opt_remove_phis);
357 }
358 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
359 NIR_PASS(progress, nir, nir_opt_dead_cf);
360 NIR_PASS(progress, nir, nir_opt_conditional_discard);
361 NIR_PASS(progress, nir, nir_opt_remove_phis);
362 NIR_PASS(progress, nir, nir_opt_cse);
363 NIR_PASS(progress, nir, nir_opt_undef);
364
365 NIR_PASS(progress, nir, nir_opt_deref);
366 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
367 NIR_PASS(progress, nir, nir_opt_loop_unroll);
368 NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
369 } while (progress);
370 }
371
372 void
lvp_shader_optimize(nir_shader * nir)373 lvp_shader_optimize(nir_shader *nir)
374 {
375 optimize(nir);
376 NIR_PASS_V(nir, nir_lower_var_copies);
377 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
378 NIR_PASS_V(nir, nir_opt_dce);
379 nir_sweep(nir);
380 }
381
382 static VkResult
lvp_shader_compile_to_ir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo)383 lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
384 const VkPipelineShaderStageCreateInfo *sinfo)
385 {
386 struct lvp_device *pdevice = pipeline->device;
387 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
388 const nir_shader_compiler_options *drv_options = pdevice->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage));
389 VkResult result;
390 nir_shader *nir;
391
392 const struct spirv_to_nir_options spirv_options = {
393 .environment = NIR_SPIRV_VULKAN,
394 .caps = {
395 .float64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_DOUBLES) == 1),
396 .int16 = true,
397 .int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1),
398 .tessellation = true,
399 .float_controls = true,
400 .image_ms_array = true,
401 .image_read_without_format = true,
402 .image_write_without_format = true,
403 .storage_image_ms = true,
404 .geometry_streams = true,
405 .storage_8bit = true,
406 .storage_16bit = true,
407 .variable_pointers = true,
408 .stencil_export = true,
409 .post_depth_coverage = true,
410 .transform_feedback = true,
411 .device_group = true,
412 .draw_parameters = true,
413 .shader_viewport_index_layer = true,
414 .multiview = true,
415 .physical_storage_buffer_address = true,
416 .int64_atomics = true,
417 .subgroup_arithmetic = true,
418 .subgroup_basic = true,
419 .subgroup_ballot = true,
420 .subgroup_quad = true,
421 #if LLVM_VERSION_MAJOR >= 10
422 .subgroup_shuffle = true,
423 #endif
424 .subgroup_vote = true,
425 .vk_memory_model = true,
426 .vk_memory_model_device_scope = true,
427 .int8 = true,
428 .float16 = true,
429 .demote_to_helper_invocation = true,
430 },
431 .ubo_addr_format = nir_address_format_32bit_index_offset,
432 .ssbo_addr_format = nir_address_format_32bit_index_offset,
433 .phys_ssbo_addr_format = nir_address_format_64bit_global,
434 .push_const_addr_format = nir_address_format_logical,
435 .shared_addr_format = nir_address_format_32bit_offset,
436 };
437
438 result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo,
439 &spirv_options, drv_options,
440 NULL, &nir);
441 if (result != VK_SUCCESS)
442 return result;
443
444 if (nir->info.stage != MESA_SHADER_TESS_CTRL)
445 NIR_PASS_V(nir, remove_scoped_barriers, nir->info.stage == MESA_SHADER_COMPUTE);
446
447 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
448 .frag_coord = true,
449 .point_coord = true,
450 };
451 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
452
453 struct nir_lower_subgroups_options subgroup_opts = {0};
454 subgroup_opts.lower_quad = true;
455 subgroup_opts.ballot_components = 1;
456 subgroup_opts.ballot_bit_size = 32;
457 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
458
459 if (stage == MESA_SHADER_FRAGMENT)
460 lvp_lower_input_attachments(nir, false);
461 NIR_PASS_V(nir, nir_lower_is_helper_invocation);
462 NIR_PASS_V(nir, lower_demote);
463 NIR_PASS_V(nir, nir_lower_system_values);
464 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
465
466 NIR_PASS_V(nir, nir_remove_dead_variables,
467 nir_var_uniform | nir_var_image, NULL);
468
469 scan_pipeline_info(pipeline, nir);
470
471 optimize(nir);
472 lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir);
473
474 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
475 NIR_PASS_V(nir, nir_split_var_copies);
476 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
477
478 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
479 nir_address_format_32bit_offset);
480
481 NIR_PASS_V(nir, nir_lower_explicit_io,
482 nir_var_mem_ubo | nir_var_mem_ssbo,
483 nir_address_format_32bit_index_offset);
484
485 NIR_PASS_V(nir, nir_lower_explicit_io,
486 nir_var_mem_global,
487 nir_address_format_64bit_global);
488
489 if (nir->info.stage == MESA_SHADER_COMPUTE) {
490 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
491 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
492 }
493
494 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
495
496 if (nir->info.stage == MESA_SHADER_VERTEX ||
497 nir->info.stage == MESA_SHADER_GEOMETRY) {
498 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
499 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
500 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
501 }
502
503 // TODO: also optimize the tex srcs. see radeonSI for reference */
504 /* Skip if there are potentially conflicting rounding modes */
505 struct nir_fold_16bit_tex_image_options fold_16bit_options = {
506 .rounding_mode = nir_rounding_mode_undef,
507 .fold_tex_dest = true,
508 };
509 NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options);
510
511 lvp_shader_optimize(nir);
512
513 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
514
515 if (nir->info.stage != MESA_SHADER_VERTEX)
516 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
517 else {
518 nir->num_inputs = util_last_bit64(nir->info.inputs_read);
519 nir_foreach_shader_in_variable(var, nir) {
520 var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
521 }
522 }
523 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
524 nir->info.stage);
525
526 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
527 if (impl->ssa_alloc > 100) //skip for small shaders
528 pipeline->inlines[stage].must_inline = lvp_find_inlinable_uniforms(pipeline, nir);
529 pipeline->pipeline_nir[stage] = nir;
530
531 return VK_SUCCESS;
532 }
533
534 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)535 merge_tess_info(struct shader_info *tes_info,
536 const struct shader_info *tcs_info)
537 {
538 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
539 *
540 * "PointMode. Controls generation of points rather than triangles
541 * or lines. This functionality defaults to disabled, and is
542 * enabled if either shader stage includes the execution mode.
543 *
544 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
545 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
546 * and OutputVertices, it says:
547 *
548 * "One mode must be set in at least one of the tessellation
549 * shader stages."
550 *
551 * So, the fields can be set in either the TCS or TES, but they must
552 * agree if set in both. Our backend looks at TES, so bitwise-or in
553 * the values from the TCS.
554 */
555 assert(tcs_info->tess.tcs_vertices_out == 0 ||
556 tes_info->tess.tcs_vertices_out == 0 ||
557 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
558 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
559
560 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
561 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
562 tcs_info->tess.spacing == tes_info->tess.spacing);
563 tes_info->tess.spacing |= tcs_info->tess.spacing;
564
565 assert(tcs_info->tess._primitive_mode == 0 ||
566 tes_info->tess._primitive_mode == 0 ||
567 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
568 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
569 tes_info->tess.ccw |= tcs_info->tess.ccw;
570 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
571 }
572
573 static void
lvp_pipeline_xfb_init(struct lvp_pipeline * pipeline)574 lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
575 {
576 gl_shader_stage stage = MESA_SHADER_VERTEX;
577 if (pipeline->pipeline_nir[MESA_SHADER_GEOMETRY])
578 stage = MESA_SHADER_GEOMETRY;
579 else if (pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL])
580 stage = MESA_SHADER_TESS_EVAL;
581 pipeline->last_vertex = stage;
582
583 nir_xfb_info *xfb_info = pipeline->pipeline_nir[stage]->xfb_info;
584 if (xfb_info) {
585 uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
586 memset(output_mapping, 0, sizeof(output_mapping));
587
588 nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) {
589 unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
590 : glsl_count_attribute_slots(var->type, false);
591 for (unsigned i = 0; i < slots; i++)
592 output_mapping[var->data.location + i] = var->data.driver_location + i;
593 }
594
595 pipeline->stream_output.num_outputs = xfb_info->output_count;
596 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
597 if (xfb_info->buffers_written & (1 << i)) {
598 pipeline->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
599 }
600 }
601 for (unsigned i = 0; i < xfb_info->output_count; i++) {
602 pipeline->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
603 pipeline->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
604 pipeline->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
605 pipeline->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
606 pipeline->stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1;
607 pipeline->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
608 }
609
610 }
611 }
612
613 void *
lvp_pipeline_compile_stage(struct lvp_pipeline * pipeline,nir_shader * nir)614 lvp_pipeline_compile_stage(struct lvp_pipeline *pipeline, nir_shader *nir)
615 {
616 struct lvp_device *device = pipeline->device;
617 if (nir->info.stage == MESA_SHADER_COMPUTE) {
618 struct pipe_compute_state shstate = {0};
619 shstate.prog = nir;
620 shstate.ir_type = PIPE_SHADER_IR_NIR;
621 shstate.req_local_mem = nir->info.shared_size;
622 return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
623 } else {
624 struct pipe_shader_state shstate = {0};
625 shstate.type = PIPE_SHADER_IR_NIR;
626 shstate.ir.nir = nir;
627 if (nir->info.stage == pipeline->last_vertex)
628 memcpy(&shstate.stream_output, &pipeline->stream_output, sizeof(shstate.stream_output));
629
630 switch (nir->info.stage) {
631 case MESA_SHADER_FRAGMENT:
632 return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
633 case MESA_SHADER_VERTEX:
634 return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
635 case MESA_SHADER_GEOMETRY:
636 return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
637 case MESA_SHADER_TESS_CTRL:
638 return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
639 case MESA_SHADER_TESS_EVAL:
640 return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
641 default:
642 unreachable("illegal shader");
643 break;
644 }
645 }
646 return NULL;
647 }
648
649 void *
lvp_pipeline_compile(struct lvp_pipeline * pipeline,nir_shader * nir)650 lvp_pipeline_compile(struct lvp_pipeline *pipeline, nir_shader *nir)
651 {
652 struct lvp_device *device = pipeline->device;
653 device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
654 return lvp_pipeline_compile_stage(pipeline, nir);
655 }
656
657 #ifndef NDEBUG
658 static bool
layouts_equal(const struct lvp_descriptor_set_layout * a,const struct lvp_descriptor_set_layout * b)659 layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
660 {
661 const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
662 uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
663 uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
664 /* base equal */
665 if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
666 return false;
667
668 /* bindings equal */
669 if (a->binding_count != b->binding_count)
670 return false;
671 size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
672 const struct lvp_descriptor_set_binding_layout *la = a->binding;
673 const struct lvp_descriptor_set_binding_layout *lb = b->binding;
674 if (memcmp(la, lb, binding_size)) {
675 for (unsigned i = 0; i < a->binding_count; i++) {
676 if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
677 return false;
678 }
679 }
680
681 /* immutable sampler equal */
682 if (a->immutable_sampler_count != b->immutable_sampler_count)
683 return false;
684 if (a->immutable_sampler_count) {
685 size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
686 if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
687 struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
688 struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
689 for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
690 if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
691 return false;
692 }
693 }
694 }
695 return true;
696 }
697 #endif
698
699 static void
merge_layouts(struct lvp_pipeline * dst,struct lvp_pipeline_layout * src)700 merge_layouts(struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
701 {
702 if (!src)
703 return;
704 if (!dst->layout) {
705 /* no layout created yet: copy onto ralloc ctx allocation for auto-free */
706 dst->layout = ralloc(dst->mem_ctx, struct lvp_pipeline_layout);
707 memcpy(dst->layout, src, sizeof(struct lvp_pipeline_layout));
708 return;
709 }
710 #ifndef NDEBUG
711 /* verify that layouts match */
712 const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
713 const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
714 for (unsigned i = 0; i < smaller->vk.set_count; i++) {
715 if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
716 smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
717 continue;
718
719 const struct lvp_descriptor_set_layout *smaller_set_layout =
720 vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
721 const struct lvp_descriptor_set_layout *bigger_set_layout =
722 vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
723
724 assert(!smaller_set_layout->binding_count ||
725 !bigger_set_layout->binding_count ||
726 layouts_equal(smaller_set_layout, bigger_set_layout));
727 }
728 #endif
729 for (unsigned i = 0; i < src->vk.set_count; i++) {
730 if (!dst->layout->vk.set_layouts[i])
731 dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
732 }
733 dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
734 src->vk.set_count);
735 dst->layout->push_constant_size += src->push_constant_size;
736 dst->layout->push_constant_stages |= src->push_constant_stages;
737 }
738
739 static VkResult
lvp_graphics_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo)740 lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
741 struct lvp_device *device,
742 struct lvp_pipeline_cache *cache,
743 const VkGraphicsPipelineCreateInfo *pCreateInfo)
744 {
745 VkResult result;
746
747 const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
748 GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
749 const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
750 PIPELINE_LIBRARY_CREATE_INFO_KHR);
751 const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
752 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
753 if (libinfo)
754 pipeline->stages = libinfo->flags;
755 else if (!libstate)
756 pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
757 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
758 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
759 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
760 pipeline->mem_ctx = ralloc_context(NULL);
761
762 if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
763 pipeline->library = true;
764
765 struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
766 if (layout)
767 vk_pipeline_layout_ref(&layout->vk);
768
769 if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
770 /* this is a regular pipeline with no partials: directly reuse */
771 pipeline->layout = layout;
772 else if (pipeline->stages & layout_stages) {
773 if ((pipeline->stages & layout_stages) == layout_stages)
774 /* this has all the layout stages: directly reuse */
775 pipeline->layout = layout;
776 else {
777 /* this is a partial: copy for later merging to avoid modifying another layout */
778 merge_layouts(pipeline, layout);
779 }
780 }
781
782 if (libstate) {
783 for (unsigned i = 0; i < libstate->libraryCount; i++) {
784 LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
785 vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
786 &p->graphics_state);
787 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
788 pipeline->line_smooth = p->line_smooth;
789 pipeline->disable_multisample = p->disable_multisample;
790 pipeline->line_rectangular = p->line_rectangular;
791 pipeline->last_vertex = p->last_vertex;
792 memcpy(&pipeline->stream_output, &p->stream_output, sizeof(p->stream_output));
793 memcpy(&pipeline->access, &p->access, sizeof(p->access));
794 }
795 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)
796 pipeline->force_min_sample = p->force_min_sample;
797 if (p->stages & layout_stages) {
798 if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
799 merge_layouts(pipeline, p->layout);
800 }
801 pipeline->stages |= p->stages;
802 }
803 }
804
805 result = vk_graphics_pipeline_state_fill(&device->vk,
806 &pipeline->graphics_state,
807 pCreateInfo, NULL, NULL, NULL,
808 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
809 &pipeline->state_data);
810 if (result != VK_SUCCESS)
811 return result;
812
813 assert(pipeline->library || pipeline->stages == (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
814 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
815 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
816 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
817
818 pipeline->device = device;
819
820 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
821 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
822 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
823 if (stage == MESA_SHADER_FRAGMENT) {
824 if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
825 continue;
826 } else {
827 if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
828 continue;
829 }
830 result = lvp_shader_compile_to_ir(pipeline, sinfo);
831 if (result != VK_SUCCESS)
832 goto fail;
833
834 switch (stage) {
835 case MESA_SHADER_GEOMETRY:
836 pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] &&
837 pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == SHADER_PRIM_LINES;
838 break;
839 case MESA_SHADER_FRAGMENT:
840 if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
841 pipeline->force_min_sample = true;
842 break;
843 default: break;
844 }
845 }
846 if (pCreateInfo->stageCount && pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) {
847 nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
848 merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info);
849 if (pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
850 pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw;
851 }
852 if (libstate) {
853 for (unsigned i = 0; i < libstate->libraryCount; i++) {
854 LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
855 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
856 if (p->pipeline_nir[MESA_SHADER_FRAGMENT])
857 pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[MESA_SHADER_FRAGMENT]);
858 }
859 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
860 for (unsigned j = MESA_SHADER_VERTEX; j < MESA_SHADER_FRAGMENT; j++) {
861 if (p->pipeline_nir[j])
862 pipeline->pipeline_nir[j] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[j]);
863 }
864 }
865 }
866 } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
867 const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
868 if (rs) {
869 /* always draw bresenham if !smooth */
870 pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
871 pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ||
872 rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
873 pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
874 } else
875 pipeline->line_rectangular = true;
876 lvp_pipeline_xfb_init(pipeline);
877 }
878
879 if (!pipeline->library) {
880 bool has_fragment_shader = false;
881 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) {
882 if (!pipeline->pipeline_nir[i])
883 continue;
884
885 gl_shader_stage stage = i;
886 assert(stage == pipeline->pipeline_nir[i]->info.stage);
887 enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage);
888 if (!pipeline->inlines[stage].can_inline)
889 pipeline->shader_cso[pstage] = lvp_pipeline_compile(pipeline,
890 nir_shader_clone(NULL, pipeline->pipeline_nir[stage]));
891 if (stage == MESA_SHADER_FRAGMENT)
892 has_fragment_shader = true;
893 }
894
895 if (has_fragment_shader == false) {
896 /* create a dummy fragment shader for this pipeline. */
897 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
898 "dummy_frag");
899
900 pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;
901 struct pipe_shader_state shstate = {0};
902 shstate.type = PIPE_SHADER_IR_NIR;
903 shstate.ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]);
904 pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
905 }
906 }
907 return VK_SUCCESS;
908
909 fail:
910 for (unsigned i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) {
911 if (pipeline->pipeline_nir[i])
912 ralloc_free(pipeline->pipeline_nir[i]);
913 }
914 vk_free(&device->vk.alloc, pipeline->state_data);
915
916 return result;
917 }
918
919 static VkResult
lvp_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipeline * pPipeline)920 lvp_graphics_pipeline_create(
921 VkDevice _device,
922 VkPipelineCache _cache,
923 const VkGraphicsPipelineCreateInfo *pCreateInfo,
924 VkPipeline *pPipeline)
925 {
926 LVP_FROM_HANDLE(lvp_device, device, _device);
927 LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
928 struct lvp_pipeline *pipeline;
929 VkResult result;
930
931 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
932
933 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
934 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
935 if (pipeline == NULL)
936 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
937
938 vk_object_base_init(&device->vk, &pipeline->base,
939 VK_OBJECT_TYPE_PIPELINE);
940 uint64_t t0 = os_time_get_nano();
941 result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo);
942 if (result != VK_SUCCESS) {
943 vk_free(&device->vk.alloc, pipeline);
944 return result;
945 }
946
947 VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
948 if (feedback) {
949 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
950 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
951 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
952 }
953
954 *pPipeline = lvp_pipeline_to_handle(pipeline);
955
956 return VK_SUCCESS;
957 }
958
lvp_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)959 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
960 VkDevice _device,
961 VkPipelineCache pipelineCache,
962 uint32_t count,
963 const VkGraphicsPipelineCreateInfo* pCreateInfos,
964 const VkAllocationCallbacks* pAllocator,
965 VkPipeline* pPipelines)
966 {
967 VkResult result = VK_SUCCESS;
968 unsigned i = 0;
969
970 for (; i < count; i++) {
971 VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
972 if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
973 r = lvp_graphics_pipeline_create(_device,
974 pipelineCache,
975 &pCreateInfos[i],
976 &pPipelines[i]);
977 if (r != VK_SUCCESS) {
978 result = r;
979 pPipelines[i] = VK_NULL_HANDLE;
980 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
981 break;
982 }
983 }
984 if (result != VK_SUCCESS) {
985 for (; i < count; i++)
986 pPipelines[i] = VK_NULL_HANDLE;
987 }
988
989 return result;
990 }
991
992 static VkResult
lvp_compute_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo)993 lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
994 struct lvp_device *device,
995 struct lvp_pipeline_cache *cache,
996 const VkComputePipelineCreateInfo *pCreateInfo)
997 {
998 pipeline->device = device;
999 pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
1000 vk_pipeline_layout_ref(&pipeline->layout->vk);
1001 pipeline->force_min_sample = false;
1002
1003 pipeline->mem_ctx = ralloc_context(NULL);
1004 pipeline->is_compute_pipeline = true;
1005
1006 VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
1007 if (result != VK_SUCCESS)
1008 return result;
1009
1010 if (!pipeline->inlines[MESA_SHADER_COMPUTE].can_inline)
1011 pipeline->shader_cso[PIPE_SHADER_COMPUTE] = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE]));
1012 return VK_SUCCESS;
1013 }
1014
1015 static VkResult
lvp_compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipeline * pPipeline)1016 lvp_compute_pipeline_create(
1017 VkDevice _device,
1018 VkPipelineCache _cache,
1019 const VkComputePipelineCreateInfo *pCreateInfo,
1020 VkPipeline *pPipeline)
1021 {
1022 LVP_FROM_HANDLE(lvp_device, device, _device);
1023 LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
1024 struct lvp_pipeline *pipeline;
1025 VkResult result;
1026
1027 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1028
1029 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
1030 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1031 if (pipeline == NULL)
1032 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1033
1034 vk_object_base_init(&device->vk, &pipeline->base,
1035 VK_OBJECT_TYPE_PIPELINE);
1036 uint64_t t0 = os_time_get_nano();
1037 result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo);
1038 if (result != VK_SUCCESS) {
1039 vk_free(&device->vk.alloc, pipeline);
1040 return result;
1041 }
1042
1043 const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1044 if (feedback) {
1045 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1046 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1047 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1048 }
1049
1050 *pPipeline = lvp_pipeline_to_handle(pipeline);
1051
1052 return VK_SUCCESS;
1053 }
1054
lvp_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1055 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
1056 VkDevice _device,
1057 VkPipelineCache pipelineCache,
1058 uint32_t count,
1059 const VkComputePipelineCreateInfo* pCreateInfos,
1060 const VkAllocationCallbacks* pAllocator,
1061 VkPipeline* pPipelines)
1062 {
1063 VkResult result = VK_SUCCESS;
1064 unsigned i = 0;
1065
1066 for (; i < count; i++) {
1067 VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1068 if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1069 r = lvp_compute_pipeline_create(_device,
1070 pipelineCache,
1071 &pCreateInfos[i],
1072 &pPipelines[i]);
1073 if (r != VK_SUCCESS) {
1074 result = r;
1075 pPipelines[i] = VK_NULL_HANDLE;
1076 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1077 break;
1078 }
1079 }
1080 if (result != VK_SUCCESS) {
1081 for (; i < count; i++)
1082 pPipelines[i] = VK_NULL_HANDLE;
1083 }
1084
1085
1086 return result;
1087 }
1088