1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28
29 #include "common/v3d_debug.h"
30 #include "qpu/qpu_disasm.h"
31
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34
35 #include "util/u_atomic.h"
36 #include "util/u_prim.h"
37 #include "util/os_time.h"
38
39 #include "vk_pipeline.h"
40 #include "vulkan/util/vk_format.h"
41
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47 uint32_t v3d_key_size)
48 {
49 struct mesa_sha1 ctx;
50 unsigned char sha1[20];
51 char sha1buf[41];
52
53 _mesa_sha1_init(&ctx);
54
55 _mesa_sha1_update(&ctx, key, v3d_key_size);
56
57 _mesa_sha1_final(&ctx, sha1);
58 _mesa_sha1_format(sha1buf, sha1);
59
60 fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62
63 static void
pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage * p_stage)64 pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65 {
66 VkPipelineShaderStageCreateInfo info = {
67 .module = vk_shader_module_handle_from_nir(p_stage->nir),
68 .pName = p_stage->entrypoint,
69 .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70 };
71
72 vk_pipeline_hash_shader_stage(&info, p_stage->shader_sha1);
73 }
74
75 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)76 v3dv_shader_variant_destroy(struct v3dv_device *device,
77 struct v3dv_shader_variant *variant)
78 {
79 /* The assembly BO is shared by all variants in the pipeline, so it can't
80 * be freed here and should be freed with the pipeline
81 */
82 if (variant->qpu_insts)
83 free(variant->qpu_insts);
84 ralloc_free(variant->prog_data.base);
85 vk_free(&device->vk.alloc, variant);
86 }
87
88 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)89 destroy_pipeline_stage(struct v3dv_device *device,
90 struct v3dv_pipeline_stage *p_stage,
91 const VkAllocationCallbacks *pAllocator)
92 {
93 if (!p_stage)
94 return;
95
96 ralloc_free(p_stage->nir);
97 vk_free2(&device->vk.alloc, pAllocator, p_stage);
98 }
99
100 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)101 pipeline_free_stages(struct v3dv_device *device,
102 struct v3dv_pipeline *pipeline,
103 const VkAllocationCallbacks *pAllocator)
104 {
105 assert(pipeline);
106
107 /* FIXME: we can't just use a loop over mesa stage due the bin, would be
108 * good to find an alternative.
109 */
110 destroy_pipeline_stage(device, pipeline->vs, pAllocator);
111 destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
112 destroy_pipeline_stage(device, pipeline->gs, pAllocator);
113 destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
114 destroy_pipeline_stage(device, pipeline->fs, pAllocator);
115 destroy_pipeline_stage(device, pipeline->cs, pAllocator);
116
117 pipeline->vs = NULL;
118 pipeline->vs_bin = NULL;
119 pipeline->gs = NULL;
120 pipeline->gs_bin = NULL;
121 pipeline->fs = NULL;
122 pipeline->cs = NULL;
123 }
124
125 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)126 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
127 struct v3dv_device *device,
128 const VkAllocationCallbacks *pAllocator)
129 {
130 if (!pipeline)
131 return;
132
133 pipeline_free_stages(device, pipeline, pAllocator);
134
135 if (pipeline->shared_data) {
136 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
137 pipeline->shared_data = NULL;
138 }
139
140 if (pipeline->spill.bo) {
141 assert(pipeline->spill.size_per_thread > 0);
142 v3dv_bo_free(device, pipeline->spill.bo);
143 }
144
145 if (pipeline->default_attribute_values) {
146 v3dv_bo_free(device, pipeline->default_attribute_values);
147 pipeline->default_attribute_values = NULL;
148 }
149
150 if (pipeline->executables.mem_ctx)
151 ralloc_free(pipeline->executables.mem_ctx);
152
153 vk_object_free(&device->vk, pAllocator, pipeline);
154 }
155
156 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)157 v3dv_DestroyPipeline(VkDevice _device,
158 VkPipeline _pipeline,
159 const VkAllocationCallbacks *pAllocator)
160 {
161 V3DV_FROM_HANDLE(v3dv_device, device, _device);
162 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
163
164 if (!pipeline)
165 return;
166
167 v3dv_destroy_pipeline(pipeline, device, pAllocator);
168 }
169
170 static const struct spirv_to_nir_options default_spirv_options = {
171 .caps = {
172 .device_group = true,
173 .float_controls = true,
174 .multiview = true,
175 .storage_8bit = true,
176 .storage_16bit = true,
177 .subgroup_basic = true,
178 .variable_pointers = true,
179 .vk_memory_model = true,
180 .vk_memory_model_device_scope = true,
181 .physical_storage_buffer_address = true,
182 },
183 .ubo_addr_format = nir_address_format_32bit_index_offset,
184 .ssbo_addr_format = nir_address_format_32bit_index_offset,
185 .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
186 .push_const_addr_format = nir_address_format_logical,
187 .shared_addr_format = nir_address_format_32bit_offset,
188 };
189
190 const nir_shader_compiler_options v3dv_nir_options = {
191 .lower_uadd_sat = true,
192 .lower_usub_sat = true,
193 .lower_iadd_sat = true,
194 .lower_all_io_to_temps = true,
195 .lower_extract_byte = true,
196 .lower_extract_word = true,
197 .lower_insert_byte = true,
198 .lower_insert_word = true,
199 .lower_bitfield_insert_to_shifts = true,
200 .lower_bitfield_extract_to_shifts = true,
201 .lower_bitfield_reverse = true,
202 .lower_bit_count = true,
203 .lower_cs_local_id_to_index = true,
204 .lower_ffract = true,
205 .lower_fmod = true,
206 .lower_pack_unorm_2x16 = true,
207 .lower_pack_snorm_2x16 = true,
208 .lower_unpack_unorm_2x16 = true,
209 .lower_unpack_snorm_2x16 = true,
210 .lower_pack_unorm_4x8 = true,
211 .lower_pack_snorm_4x8 = true,
212 .lower_unpack_unorm_4x8 = true,
213 .lower_unpack_snorm_4x8 = true,
214 .lower_pack_half_2x16 = true,
215 .lower_unpack_half_2x16 = true,
216 .lower_pack_32_2x16 = true,
217 .lower_pack_32_2x16_split = true,
218 .lower_unpack_32_2x16_split = true,
219 .lower_mul_2x32_64 = true,
220 .lower_fdiv = true,
221 .lower_find_lsb = true,
222 .lower_ffma16 = true,
223 .lower_ffma32 = true,
224 .lower_ffma64 = true,
225 .lower_flrp32 = true,
226 .lower_fpow = true,
227 .lower_fsat = true,
228 .lower_fsqrt = true,
229 .lower_ifind_msb = true,
230 .lower_isign = true,
231 .lower_ldexp = true,
232 .lower_mul_high = true,
233 .lower_wpos_pntc = true,
234 .lower_rotate = true,
235 .lower_to_scalar = true,
236 .lower_device_index_to_zero = true,
237 .has_fsub = true,
238 .has_isub = true,
239 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
240 * needs to be supported */
241 .lower_interpolate_at = true,
242 .max_unroll_iterations = 16,
243 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
244 .divergence_analysis_options =
245 nir_divergence_multiple_workgroup_per_compute_subgroup
246 };
247
248 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)249 v3dv_pipeline_get_nir_options(void)
250 {
251 return &v3dv_nir_options;
252 }
253
254 #define OPT(pass, ...) ({ \
255 bool this_progress = false; \
256 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
257 if (this_progress) \
258 progress = true; \
259 this_progress; \
260 })
261
262 static void
nir_optimize(nir_shader * nir,bool allow_copies)263 nir_optimize(nir_shader *nir, bool allow_copies)
264 {
265 bool progress;
266
267 do {
268 progress = false;
269 OPT(nir_split_array_vars, nir_var_function_temp);
270 OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
271 OPT(nir_opt_deref);
272 OPT(nir_lower_vars_to_ssa);
273 if (allow_copies) {
274 /* Only run this pass in the first call to nir_optimize. Later calls
275 * assume that we've lowered away any copy_deref instructions and we
276 * don't want to introduce any more.
277 */
278 OPT(nir_opt_find_array_copies);
279 }
280
281 OPT(nir_remove_dead_variables,
282 (nir_variable_mode)(nir_var_function_temp |
283 nir_var_shader_temp |
284 nir_var_mem_shared),
285 NULL);
286
287 OPT(nir_opt_copy_prop_vars);
288 OPT(nir_opt_dead_write_vars);
289 OPT(nir_opt_combine_stores, nir_var_all);
290
291 OPT(nir_lower_alu_to_scalar, NULL, NULL);
292
293 OPT(nir_copy_prop);
294 OPT(nir_lower_phis_to_scalar, false);
295
296 OPT(nir_copy_prop);
297 OPT(nir_opt_dce);
298 OPT(nir_opt_cse);
299 OPT(nir_opt_combine_stores, nir_var_all);
300
301 /* Passing 0 to the peephole select pass causes it to convert
302 * if-statements that contain only move instructions in the branches
303 * regardless of the count.
304 *
305 * Passing 1 to the peephole select pass causes it to convert
306 * if-statements that contain at most a single ALU instruction (total)
307 * in both branches.
308 */
309 OPT(nir_opt_peephole_select, 0, false, false);
310 OPT(nir_opt_peephole_select, 8, false, true);
311
312 OPT(nir_opt_intrinsics);
313 OPT(nir_opt_idiv_const, 32);
314 OPT(nir_opt_algebraic);
315 OPT(nir_lower_alu);
316 OPT(nir_opt_constant_folding);
317
318 OPT(nir_opt_dead_cf);
319 if (nir_opt_trivial_continues(nir)) {
320 progress = true;
321 OPT(nir_copy_prop);
322 OPT(nir_opt_dce);
323 }
324 OPT(nir_opt_conditional_discard);
325
326 OPT(nir_opt_remove_phis);
327 OPT(nir_opt_gcm, false);
328 OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
329 OPT(nir_opt_undef);
330 OPT(nir_lower_pack);
331
332 /* There are two optimizations that we don't do here, and we rely on the
333 * backend:
334 *
335 * nir_lower_flrp only needs to be called once, as nothing should
336 * rematerialize any flrps. As we are already calling it on the backend
337 * compiler, we don't call it again.
338 *
339 * nir_opt_loop_unroll: as the backend includes custom strategies in
340 * order to get the lowest spill/fills possible, and some of them
341 * include disable loop unrolling.
342 *
343 * FIXME: ideally we would like to just remove this method and
344 * v3d_optimize_nir. But:
345 *
346 * * Using it leads to some regressions on Vulkan CTS tests, due to
347 * some lowering use there
348 * * We would need to move to the backend some additional
349 * lowerings/optimizations that are used on the Vulkan
350 * frontend. That would require to check that we are not getting any
351 * regression or performance drop on OpenGL
352 *
353 * For now we would keep this Vulkan fronted nir_optimize
354 */
355
356 } while (progress);
357 }
358
359 static void
preprocess_nir(nir_shader * nir)360 preprocess_nir(nir_shader *nir)
361 {
362 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
363 .frag_coord = true,
364 .point_coord = true,
365 };
366 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
367
368 /* Vulkan uses the separate-shader linking model */
369 nir->info.separate_shader = true;
370
371 /* Make sure we lower variable initializers on output variables so that
372 * nir_remove_dead_variables below sees the corresponding stores
373 */
374 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
375
376 if (nir->info.stage == MESA_SHADER_FRAGMENT)
377 NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
378 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
379 NIR_PASS(_, nir, nir_lower_input_attachments,
380 &(nir_input_attachment_options) {
381 .use_fragcoord_sysval = false,
382 });
383 }
384
385 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
386 nir_shader_get_entrypoint(nir), true, false);
387
388 NIR_PASS(_, nir, nir_lower_system_values);
389
390 NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
391
392 NIR_PASS(_, nir, nir_normalize_cubemap_coords);
393
394 NIR_PASS(_, nir, nir_lower_global_vars_to_local);
395
396 NIR_PASS(_, nir, nir_split_var_copies);
397 NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
398
399 nir_optimize(nir, true);
400
401 NIR_PASS(_, nir, nir_lower_explicit_io,
402 nir_var_mem_push_const,
403 nir_address_format_32bit_offset);
404
405 NIR_PASS(_, nir, nir_lower_explicit_io,
406 nir_var_mem_ubo | nir_var_mem_ssbo,
407 nir_address_format_32bit_index_offset);
408
409 NIR_PASS(_, nir, nir_lower_explicit_io,
410 nir_var_mem_global,
411 nir_address_format_2x32bit_global);
412
413 NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
414
415 /* Lower a bunch of stuff */
416 NIR_PASS(_, nir, nir_lower_var_copies);
417
418 NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
419
420 NIR_PASS(_, nir, nir_lower_indirect_derefs,
421 nir_var_function_temp, 2);
422
423 NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
424 nir_var_mem_ubo | nir_var_mem_ssbo,
425 nir_lower_direct_array_deref_of_vec_load);
426
427 NIR_PASS(_, nir, nir_lower_frexp);
428
429 /* Get rid of split copies */
430 nir_optimize(nir, false);
431 }
432
433 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)434 shader_module_compile_to_nir(struct v3dv_device *device,
435 struct v3dv_pipeline_stage *stage)
436 {
437 nir_shader *nir;
438 const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
439
440
441 if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL)
442 v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
443
444 /* vk_shader_module_to_nir also handles internal shaders, when module->nir
445 * != NULL. It also calls nir_validate_shader on both cases, so we don't
446 * call it again here.
447 */
448 VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
449 broadcom_shader_stage_to_gl(stage->stage),
450 stage->entrypoint,
451 stage->spec_info,
452 &default_spirv_options,
453 nir_options,
454 NULL, &nir);
455 if (result != VK_SUCCESS)
456 return NULL;
457 assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
458
459 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) {
460 char sha1buf[41];
461 _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
462 nir->info.name = ralloc_strdup(nir, sha1buf);
463 }
464
465 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
466 v3d_debug_flag_for_shader_stage(
467 broadcom_shader_stage_to_gl(stage->stage))))) {
468 fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
469 broadcom_shader_stage_name(stage->stage),
470 stage->program_id);
471 nir_print_shader(nir, stderr);
472 fprintf(stderr, "\n");
473 }
474
475 preprocess_nir(nir);
476
477 return nir;
478 }
479
480 static int
type_size_vec4(const struct glsl_type * type,bool bindless)481 type_size_vec4(const struct glsl_type *type, bool bindless)
482 {
483 return glsl_count_attribute_slots(type, false);
484 }
485
486 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
487 * rethink.
488 */
489 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size)490 descriptor_map_add(struct v3dv_descriptor_map *map,
491 int set,
492 int binding,
493 int array_index,
494 int array_size,
495 int start_index,
496 uint8_t return_size)
497 {
498 assert(array_index < array_size);
499 assert(return_size == 16 || return_size == 32);
500
501 unsigned index = start_index;
502 for (; index < map->num_desc; index++) {
503 if (map->used[index] &&
504 set == map->set[index] &&
505 binding == map->binding[index] &&
506 array_index == map->array_index[index]) {
507 assert(array_size == map->array_size[index]);
508 if (return_size != map->return_size[index]) {
509 /* It the return_size is different it means that the same sampler
510 * was used for operations with different precision
511 * requirement. In this case we need to ensure that we use the
512 * larger one.
513 */
514 map->return_size[index] = 32;
515 }
516 return index;
517 } else if (!map->used[index]) {
518 break;
519 }
520 }
521
522 assert(index < DESCRIPTOR_MAP_SIZE);
523 assert(!map->used[index]);
524
525 map->used[index] = true;
526 map->set[index] = set;
527 map->binding[index] = binding;
528 map->array_index[index] = array_index;
529 map->array_size[index] = array_size;
530 map->return_size[index] = return_size;
531 map->num_desc = MAX2(map->num_desc, index + 1);
532
533 return index;
534 }
535
536 struct lower_pipeline_layout_state {
537 struct v3dv_pipeline *pipeline;
538 const struct v3dv_pipeline_layout *layout;
539 bool needs_default_sampler_state;
540 };
541
542
543 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)544 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
545 struct lower_pipeline_layout_state *state)
546 {
547 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
548 instr->intrinsic = nir_intrinsic_load_uniform;
549 }
550
551 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)552 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
553 VkDescriptorType desc_type,
554 gl_shader_stage gl_stage,
555 bool is_sampler)
556 {
557 enum broadcom_shader_stage broadcom_stage =
558 gl_shader_stage_to_broadcom(gl_stage);
559
560 assert(pipeline->shared_data &&
561 pipeline->shared_data->maps[broadcom_stage]);
562
563 switch(desc_type) {
564 case VK_DESCRIPTOR_TYPE_SAMPLER:
565 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
566 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
567 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
568 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
569 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
570 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
571 return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
572 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
573 return is_sampler ?
574 &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
575 &pipeline->shared_data->maps[broadcom_stage]->texture_map;
576 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
577 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
578 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
579 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
580 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
581 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
582 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
583 default:
584 unreachable("Descriptor type unknown or not having a descriptor map");
585 }
586 }
587
588 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
589 * could be used by the v3d_compiler */
590 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)591 lower_vulkan_resource_index(nir_builder *b,
592 nir_intrinsic_instr *instr,
593 struct lower_pipeline_layout_state *state)
594 {
595 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
596
597 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
598
599 unsigned set = nir_intrinsic_desc_set(instr);
600 unsigned binding = nir_intrinsic_binding(instr);
601 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
602 struct v3dv_descriptor_set_binding_layout *binding_layout =
603 &set_layout->binding[binding];
604 unsigned index = 0;
605
606 switch (binding_layout->type) {
607 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
608 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
609 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
610 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
611 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
612 struct v3dv_descriptor_map *descriptor_map =
613 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
614 b->shader->info.stage, false);
615
616 if (!const_val)
617 unreachable("non-constant vulkan_resource_index array index");
618
619 /* At compile-time we will need to know if we are processing a UBO load
620 * for an inline or a regular UBO so we can handle inline loads like
621 * push constants. At the level of NIR level however, the inline
622 * information is gone, so we rely on the index to make this distinction.
623 * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
624 * inline buffers. This means that at the descriptor map level
625 * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
626 * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
627 */
628 uint32_t start_index = 0;
629 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
630 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
631 start_index = MAX_INLINE_UNIFORM_BUFFERS;
632 }
633
634 index = descriptor_map_add(descriptor_map, set, binding,
635 const_val->u32,
636 binding_layout->array_size,
637 start_index,
638 32 /* return_size: doesn't really apply for this case */);
639
640 /* We always reserve index 0 for push constants */
641 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
642 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
643 binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
644 index++;
645 }
646
647 break;
648 }
649
650 default:
651 unreachable("unsupported descriptor type for vulkan_resource_index");
652 break;
653 }
654
655 /* Since we use the deref pass, both vulkan_resource_index and
656 * vulkan_load_descriptor return a vec2 providing an index and
657 * offset. Our backend compiler only cares about the index part.
658 */
659 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
660 nir_imm_ivec2(b, index, 0));
661 nir_instr_remove(&instr->instr);
662 }
663
664 /* Returns return_size, so it could be used for the case of not having a
665 * sampler object
666 */
667 static uint8_t
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct lower_pipeline_layout_state * state)668 lower_tex_src_to_offset(nir_builder *b,
669 nir_tex_instr *instr,
670 unsigned src_idx,
671 struct lower_pipeline_layout_state *state)
672 {
673 nir_ssa_def *index = NULL;
674 unsigned base_index = 0;
675 unsigned array_elements = 1;
676 nir_tex_src *src = &instr->src[src_idx];
677 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
678
679 /* We compute first the offsets */
680 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
681 while (deref->deref_type != nir_deref_type_var) {
682 assert(deref->parent.is_ssa);
683 nir_deref_instr *parent =
684 nir_instr_as_deref(deref->parent.ssa->parent_instr);
685
686 assert(deref->deref_type == nir_deref_type_array);
687
688 if (nir_src_is_const(deref->arr.index) && index == NULL) {
689 /* We're still building a direct index */
690 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
691 } else {
692 if (index == NULL) {
693 /* We used to be direct but not anymore */
694 index = nir_imm_int(b, base_index);
695 base_index = 0;
696 }
697
698 index = nir_iadd(b, index,
699 nir_imul(b, nir_imm_int(b, array_elements),
700 nir_ssa_for_src(b, deref->arr.index, 1)));
701 }
702
703 array_elements *= glsl_get_length(parent->type);
704
705 deref = parent;
706 }
707
708 if (index)
709 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
710
711 /* We have the offsets, we apply them, rewriting the source or removing
712 * instr if needed
713 */
714 if (index) {
715 nir_instr_rewrite_src(&instr->instr, &src->src,
716 nir_src_for_ssa(index));
717
718 src->src_type = is_sampler ?
719 nir_tex_src_sampler_offset :
720 nir_tex_src_texture_offset;
721 } else {
722 nir_tex_instr_remove_src(instr, src_idx);
723 }
724
725 uint32_t set = deref->var->data.descriptor_set;
726 uint32_t binding = deref->var->data.binding;
727 /* FIXME: this is a really simplified check for the precision to be used
728 * for the sampling. Right now we are ony checking for the variables used
729 * on the operation itself, but there are other cases that we could use to
730 * infer the precision requirement.
731 */
732 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
733 deref->var->data.precision == GLSL_PRECISION_LOW;
734 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
735 struct v3dv_descriptor_set_binding_layout *binding_layout =
736 &set_layout->binding[binding];
737
738 /* For input attachments, the shader includes the attachment_idx. As we are
739 * treating them as a texture, we only want the base_index
740 */
741 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
742 deref->var->data.index + base_index :
743 base_index;
744
745 uint8_t return_size;
746 if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
747 return_size = 16;
748 else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
749 return_size = 32;
750 else
751 return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
752
753 struct v3dv_descriptor_map *map =
754 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
755 b->shader->info.stage, is_sampler);
756 int desc_index =
757 descriptor_map_add(map,
758 deref->var->data.descriptor_set,
759 deref->var->data.binding,
760 array_index,
761 binding_layout->array_size,
762 0,
763 return_size);
764
765 if (is_sampler)
766 instr->sampler_index = desc_index;
767 else
768 instr->texture_index = desc_index;
769
770 return return_size;
771 }
772
773 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct lower_pipeline_layout_state * state)774 lower_sampler(nir_builder *b,
775 nir_tex_instr *instr,
776 struct lower_pipeline_layout_state *state)
777 {
778 uint8_t return_size = 0;
779
780 int texture_idx =
781 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
782
783 if (texture_idx >= 0)
784 return_size = lower_tex_src_to_offset(b, instr, texture_idx, state);
785
786 int sampler_idx =
787 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
788
789 if (sampler_idx >= 0)
790 lower_tex_src_to_offset(b, instr, sampler_idx, state);
791
792 if (texture_idx < 0 && sampler_idx < 0)
793 return false;
794
795 /* If we don't have a sampler, we assign it the idx we reserve for this
796 * case, and we ensure that it is using the correct return size.
797 */
798 if (sampler_idx < 0) {
799 state->needs_default_sampler_state = true;
800 instr->sampler_index = return_size == 16 ?
801 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
802 }
803
804 return true;
805 }
806
807 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
808 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)809 lower_image_deref(nir_builder *b,
810 nir_intrinsic_instr *instr,
811 struct lower_pipeline_layout_state *state)
812 {
813 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
814 nir_ssa_def *index = NULL;
815 unsigned array_elements = 1;
816 unsigned base_index = 0;
817
818 while (deref->deref_type != nir_deref_type_var) {
819 assert(deref->parent.is_ssa);
820 nir_deref_instr *parent =
821 nir_instr_as_deref(deref->parent.ssa->parent_instr);
822
823 assert(deref->deref_type == nir_deref_type_array);
824
825 if (nir_src_is_const(deref->arr.index) && index == NULL) {
826 /* We're still building a direct index */
827 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
828 } else {
829 if (index == NULL) {
830 /* We used to be direct but not anymore */
831 index = nir_imm_int(b, base_index);
832 base_index = 0;
833 }
834
835 index = nir_iadd(b, index,
836 nir_imul(b, nir_imm_int(b, array_elements),
837 nir_ssa_for_src(b, deref->arr.index, 1)));
838 }
839
840 array_elements *= glsl_get_length(parent->type);
841
842 deref = parent;
843 }
844
845 if (index)
846 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
847
848 uint32_t set = deref->var->data.descriptor_set;
849 uint32_t binding = deref->var->data.binding;
850 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
851 struct v3dv_descriptor_set_binding_layout *binding_layout =
852 &set_layout->binding[binding];
853
854 uint32_t array_index = deref->var->data.index + base_index;
855
856 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
857 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
858
859 struct v3dv_descriptor_map *map =
860 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
861 b->shader->info.stage, false);
862
863 int desc_index =
864 descriptor_map_add(map,
865 deref->var->data.descriptor_set,
866 deref->var->data.binding,
867 array_index,
868 binding_layout->array_size,
869 0,
870 32 /* return_size: doesn't apply for textures */);
871
872 /* Note: we don't need to do anything here in relation to the precision and
873 * the output size because for images we can infer that info from the image
874 * intrinsic, that includes the image format (see
875 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
876 */
877
878 index = nir_imm_int(b, desc_index);
879
880 nir_rewrite_image_intrinsic(instr, index, false);
881 }
882
883 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)884 lower_intrinsic(nir_builder *b,
885 nir_intrinsic_instr *instr,
886 struct lower_pipeline_layout_state *state)
887 {
888 switch (instr->intrinsic) {
889 case nir_intrinsic_load_push_constant:
890 lower_load_push_constant(b, instr, state);
891 return true;
892
893 case nir_intrinsic_vulkan_resource_index:
894 lower_vulkan_resource_index(b, instr, state);
895 return true;
896
897 case nir_intrinsic_load_vulkan_descriptor: {
898 /* Loading the descriptor happens as part of load/store instructions,
899 * so for us this is a no-op.
900 */
901 nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
902 nir_instr_remove(&instr->instr);
903 return true;
904 }
905
906 case nir_intrinsic_image_deref_load:
907 case nir_intrinsic_image_deref_store:
908 case nir_intrinsic_image_deref_atomic_add:
909 case nir_intrinsic_image_deref_atomic_imin:
910 case nir_intrinsic_image_deref_atomic_umin:
911 case nir_intrinsic_image_deref_atomic_imax:
912 case nir_intrinsic_image_deref_atomic_umax:
913 case nir_intrinsic_image_deref_atomic_and:
914 case nir_intrinsic_image_deref_atomic_or:
915 case nir_intrinsic_image_deref_atomic_xor:
916 case nir_intrinsic_image_deref_atomic_exchange:
917 case nir_intrinsic_image_deref_atomic_comp_swap:
918 case nir_intrinsic_image_deref_size:
919 case nir_intrinsic_image_deref_samples:
920 lower_image_deref(b, instr, state);
921 return true;
922
923 default:
924 return false;
925 }
926 }
927
928 static bool
lower_pipeline_layout_cb(nir_builder * b,nir_instr * instr,void * _state)929 lower_pipeline_layout_cb(nir_builder *b,
930 nir_instr *instr,
931 void *_state)
932 {
933 bool progress = false;
934 struct lower_pipeline_layout_state *state = _state;
935
936 b->cursor = nir_before_instr(instr);
937 switch (instr->type) {
938 case nir_instr_type_tex:
939 progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
940 break;
941 case nir_instr_type_intrinsic:
942 progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
943 break;
944 default:
945 break;
946 }
947
948 return progress;
949 }
950
951 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout,bool * needs_default_sampler_state)952 lower_pipeline_layout_info(nir_shader *shader,
953 struct v3dv_pipeline *pipeline,
954 const struct v3dv_pipeline_layout *layout,
955 bool *needs_default_sampler_state)
956 {
957 bool progress = false;
958
959 struct lower_pipeline_layout_state state = {
960 .pipeline = pipeline,
961 .layout = layout,
962 .needs_default_sampler_state = false,
963 };
964
965 progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
966 nir_metadata_block_index |
967 nir_metadata_dominance,
968 &state);
969
970 *needs_default_sampler_state = state.needs_default_sampler_state;
971
972 return progress;
973 }
974
975
976 static void
lower_fs_io(nir_shader * nir)977 lower_fs_io(nir_shader *nir)
978 {
979 /* Our backend doesn't handle array fragment shader outputs */
980 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
981 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
982
983 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
984 MESA_SHADER_FRAGMENT);
985
986 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
987 MESA_SHADER_FRAGMENT);
988
989 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
990 type_size_vec4, 0);
991 }
992
993 static void
lower_gs_io(struct nir_shader * nir)994 lower_gs_io(struct nir_shader *nir)
995 {
996 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
997
998 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
999 MESA_SHADER_GEOMETRY);
1000
1001 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1002 MESA_SHADER_GEOMETRY);
1003 }
1004
1005 static void
lower_vs_io(struct nir_shader * nir)1006 lower_vs_io(struct nir_shader *nir)
1007 {
1008 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1009
1010 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1011 MESA_SHADER_VERTEX);
1012
1013 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1014 MESA_SHADER_VERTEX);
1015
1016 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1017 * overlaps with v3d_nir_lower_io. Need further research though.
1018 */
1019 }
1020
1021 static void
shader_debug_output(const char * message,void * data)1022 shader_debug_output(const char *message, void *data)
1023 {
1024 /* FIXME: We probably don't want to debug anything extra here, and in fact
1025 * the compiler is not using this callback too much, only as an alternative
1026 * way to debug out the shaderdb stats, that you can already get using
1027 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1028 * compiler to remove that callback.
1029 */
1030 }
1031
1032 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1033 pipeline_populate_v3d_key(struct v3d_key *key,
1034 const struct v3dv_pipeline_stage *p_stage,
1035 uint32_t ucp_enables,
1036 bool robust_buffer_access)
1037 {
1038 assert(p_stage->pipeline->shared_data &&
1039 p_stage->pipeline->shared_data->maps[p_stage->stage]);
1040
1041 /* The following values are default values used at pipeline create. We use
1042 * there 32 bit as default return size.
1043 */
1044 struct v3dv_descriptor_map *sampler_map =
1045 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1046 struct v3dv_descriptor_map *texture_map =
1047 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1048
1049 key->num_tex_used = texture_map->num_desc;
1050 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1051 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1052 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1053 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1054 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1055 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1056 }
1057
1058 key->num_samplers_used = sampler_map->num_desc;
1059 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1060 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1061 sampler_idx++) {
1062 key->sampler[sampler_idx].return_size =
1063 sampler_map->return_size[sampler_idx];
1064
1065 key->sampler[sampler_idx].return_channels =
1066 key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1067 }
1068
1069 switch (p_stage->stage) {
1070 case BROADCOM_SHADER_VERTEX:
1071 case BROADCOM_SHADER_VERTEX_BIN:
1072 key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1073 break;
1074 case BROADCOM_SHADER_GEOMETRY:
1075 case BROADCOM_SHADER_GEOMETRY_BIN:
1076 /* FIXME: while we don't implement tessellation shaders */
1077 key->is_last_geometry_stage = true;
1078 break;
1079 case BROADCOM_SHADER_FRAGMENT:
1080 case BROADCOM_SHADER_COMPUTE:
1081 key->is_last_geometry_stage = false;
1082 break;
1083 default:
1084 unreachable("unsupported shader stage");
1085 }
1086
1087 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1088 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1089 * takes care of adding a single compact array variable at
1090 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1091 *
1092 * The only lowering we are interested is specific to the fragment shader,
1093 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1094 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1095 * the ucp enable mask for that stage.
1096 */
1097 key->ucp_enables = ucp_enables;
1098
1099 key->robust_buffer_access = robust_buffer_access;
1100
1101 key->environment = V3D_ENVIRONMENT_VULKAN;
1102 }
1103
1104 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1105 * same. For not using prim_mode that is the one already used on v3d
1106 */
1107 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1108 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1109 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1110 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1111 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1112 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1113 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1114 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1115 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1116 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1117 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1118 };
1119
1120 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1121 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1122 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1123 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1124 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1125 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1126 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1127 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1128 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1129 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1130 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1131 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1132 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1133 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1134 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1135 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1136 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1137 };
1138
1139 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1140 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1141 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1142 const struct v3dv_pipeline_stage *p_stage,
1143 bool has_geometry_shader,
1144 uint32_t ucp_enables)
1145 {
1146 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1147
1148 memset(key, 0, sizeof(*key));
1149
1150 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1151 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1152
1153 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1154 pCreateInfo->pInputAssemblyState;
1155 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1156
1157 key->is_points = (topology == PIPE_PRIM_POINTS);
1158 key->is_lines = (topology >= PIPE_PRIM_LINES &&
1159 topology <= PIPE_PRIM_LINE_STRIP);
1160 key->has_gs = has_geometry_shader;
1161
1162 const VkPipelineColorBlendStateCreateInfo *cb_info =
1163 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1164 pCreateInfo->pColorBlendState : NULL;
1165
1166 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1167 vk_to_pipe_logicop[cb_info->logicOp] :
1168 PIPE_LOGICOP_COPY;
1169
1170 const bool raster_enabled =
1171 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1172
1173 /* Multisample rasterization state must be ignored if rasterization
1174 * is disabled.
1175 */
1176 const VkPipelineMultisampleStateCreateInfo *ms_info =
1177 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1178 if (ms_info) {
1179 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1180 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1181 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1182
1183 if (key->msaa) {
1184 key->sample_coverage =
1185 p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1186 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1187 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1188 }
1189 }
1190
1191 /* This is intended for V3D versions before 4.1, otherwise we just use the
1192 * tile buffer load/store swap R/B bit.
1193 */
1194 key->swap_color_rb = 0;
1195
1196 const struct v3dv_render_pass *pass =
1197 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1198 const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1199 for (uint32_t i = 0; i < subpass->color_count; i++) {
1200 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1201 if (att_idx == VK_ATTACHMENT_UNUSED)
1202 continue;
1203
1204 key->cbufs |= 1 << i;
1205
1206 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1207 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1208
1209 /* If logic operations are enabled then we might emit color reads and we
1210 * need to know the color buffer format and swizzle for that
1211 */
1212 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1213 key->color_fmt[i].format = fb_pipe_format;
1214 memcpy(key->color_fmt[i].swizzle,
1215 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format),
1216 sizeof(key->color_fmt[i].swizzle));
1217 }
1218
1219 const struct util_format_description *desc =
1220 vk_format_description(fb_format);
1221
1222 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1223 desc->channel[0].size == 32) {
1224 key->f32_color_rb |= 1 << i;
1225 }
1226
1227 if (p_stage->nir->info.fs.untyped_color_outputs) {
1228 if (util_format_is_pure_uint(fb_pipe_format))
1229 key->uint_color_rb |= 1 << i;
1230 else if (util_format_is_pure_sint(fb_pipe_format))
1231 key->int_color_rb |= 1 << i;
1232 }
1233
1234 if (key->is_points) {
1235 /* This mask represents state for GL_ARB_point_sprite which is not
1236 * relevant to Vulkan.
1237 */
1238 key->point_sprite_mask = 0;
1239
1240 /* Vulkan mandates upper left. */
1241 key->point_coord_upper_left = true;
1242 }
1243 }
1244 }
1245
1246 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1247 setup_stage_outputs_from_next_stage_inputs(
1248 uint8_t next_stage_num_inputs,
1249 struct v3d_varying_slot *next_stage_input_slots,
1250 uint8_t *num_used_outputs,
1251 struct v3d_varying_slot *used_output_slots,
1252 uint32_t size_of_used_output_slots)
1253 {
1254 *num_used_outputs = next_stage_num_inputs;
1255 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1256 }
1257
1258 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1259 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1260 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1261 const struct v3dv_pipeline_stage *p_stage)
1262 {
1263 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1264 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1265
1266 memset(key, 0, sizeof(*key));
1267
1268 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1269 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1270
1271 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1272
1273 key->per_vertex_point_size =
1274 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1275
1276 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1277
1278 assert(key->base.is_last_geometry_stage);
1279 if (key->is_coord) {
1280 /* Output varyings in the last binning shader are only used for transform
1281 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1282 */
1283 key->num_used_outputs = 0;
1284 } else {
1285 struct v3dv_shader_variant *fs_variant =
1286 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1287
1288 STATIC_ASSERT(sizeof(key->used_outputs) ==
1289 sizeof(fs_variant->prog_data.fs->input_slots));
1290
1291 setup_stage_outputs_from_next_stage_inputs(
1292 fs_variant->prog_data.fs->num_inputs,
1293 fs_variant->prog_data.fs->input_slots,
1294 &key->num_used_outputs,
1295 key->used_outputs,
1296 sizeof(key->used_outputs));
1297 }
1298 }
1299
1300 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1301 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1302 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1303 const struct v3dv_pipeline_stage *p_stage)
1304 {
1305 assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1306 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1307
1308 memset(key, 0, sizeof(*key));
1309
1310 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1311 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1312
1313 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1314
1315 /* Vulkan specifies a point size per vertex, so true for if the prim are
1316 * points, like on ES2)
1317 */
1318 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1319 pCreateInfo->pInputAssemblyState;
1320 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1321
1322 /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1323 * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1324 key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1325
1326 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1327
1328 if (key->is_coord) { /* Binning VS*/
1329 if (key->base.is_last_geometry_stage) {
1330 /* Output varyings in the last binning shader are only used for
1331 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1332 * supported.
1333 */
1334 key->num_used_outputs = 0;
1335 } else {
1336 /* Linking against GS binning program */
1337 assert(pipeline->gs);
1338 struct v3dv_shader_variant *gs_bin_variant =
1339 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1340
1341 STATIC_ASSERT(sizeof(key->used_outputs) ==
1342 sizeof(gs_bin_variant->prog_data.gs->input_slots));
1343
1344 setup_stage_outputs_from_next_stage_inputs(
1345 gs_bin_variant->prog_data.gs->num_inputs,
1346 gs_bin_variant->prog_data.gs->input_slots,
1347 &key->num_used_outputs,
1348 key->used_outputs,
1349 sizeof(key->used_outputs));
1350 }
1351 } else { /* Render VS */
1352 if (pipeline->gs) {
1353 /* Linking against GS render program */
1354 struct v3dv_shader_variant *gs_variant =
1355 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1356
1357 STATIC_ASSERT(sizeof(key->used_outputs) ==
1358 sizeof(gs_variant->prog_data.gs->input_slots));
1359
1360 setup_stage_outputs_from_next_stage_inputs(
1361 gs_variant->prog_data.gs->num_inputs,
1362 gs_variant->prog_data.gs->input_slots,
1363 &key->num_used_outputs,
1364 key->used_outputs,
1365 sizeof(key->used_outputs));
1366 } else {
1367 /* Linking against FS program */
1368 struct v3dv_shader_variant *fs_variant =
1369 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1370
1371 STATIC_ASSERT(sizeof(key->used_outputs) ==
1372 sizeof(fs_variant->prog_data.fs->input_slots));
1373
1374 setup_stage_outputs_from_next_stage_inputs(
1375 fs_variant->prog_data.fs->num_inputs,
1376 fs_variant->prog_data.fs->input_slots,
1377 &key->num_used_outputs,
1378 key->used_outputs,
1379 sizeof(key->used_outputs));
1380 }
1381 }
1382
1383 const VkPipelineVertexInputStateCreateInfo *vi_info =
1384 pCreateInfo->pVertexInputState;
1385 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1386 const VkVertexInputAttributeDescription *desc =
1387 &vi_info->pVertexAttributeDescriptions[i];
1388 assert(desc->location < MAX_VERTEX_ATTRIBS);
1389 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1390 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1391 }
1392 }
1393
1394 /**
1395 * Creates the initial form of the pipeline stage for a binning shader by
1396 * cloning the render shader and flagging it as a coordinate shader.
1397 *
1398 * Returns NULL if it was not able to allocate the object, so it should be
1399 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1400 */
1401 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1402 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1403 const VkAllocationCallbacks *pAllocator)
1404 {
1405 struct v3dv_device *device = src->pipeline->device;
1406
1407 struct v3dv_pipeline_stage *p_stage =
1408 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1409 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1410
1411 if (p_stage == NULL)
1412 return NULL;
1413
1414 assert(src->stage == BROADCOM_SHADER_VERTEX ||
1415 src->stage == BROADCOM_SHADER_GEOMETRY);
1416
1417 enum broadcom_shader_stage bin_stage =
1418 src->stage == BROADCOM_SHADER_VERTEX ?
1419 BROADCOM_SHADER_VERTEX_BIN :
1420 BROADCOM_SHADER_GEOMETRY_BIN;
1421
1422 p_stage->pipeline = src->pipeline;
1423 p_stage->stage = bin_stage;
1424 p_stage->entrypoint = src->entrypoint;
1425 p_stage->module = src->module;
1426 /* For binning shaders we will clone the NIR code from the corresponding
1427 * render shader later, when we call pipeline_compile_xxx_shader. This way
1428 * we only have to run the relevant NIR lowerings once for render shaders
1429 */
1430 p_stage->nir = NULL;
1431 p_stage->spec_info = src->spec_info;
1432 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1433 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1434
1435 return p_stage;
1436 }
1437
1438 /**
1439 * Returns false if it was not able to allocate or map the assembly bo memory.
1440 */
1441 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1442 upload_assembly(struct v3dv_pipeline *pipeline)
1443 {
1444 uint32_t total_size = 0;
1445 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1446 struct v3dv_shader_variant *variant =
1447 pipeline->shared_data->variants[stage];
1448
1449 if (variant != NULL)
1450 total_size += variant->qpu_insts_size;
1451 }
1452
1453 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1454 "pipeline shader assembly", true);
1455 if (!bo) {
1456 fprintf(stderr, "failed to allocate memory for shader\n");
1457 return false;
1458 }
1459
1460 bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1461 if (!ok) {
1462 fprintf(stderr, "failed to map source shader buffer\n");
1463 return false;
1464 }
1465
1466 uint32_t offset = 0;
1467 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1468 struct v3dv_shader_variant *variant =
1469 pipeline->shared_data->variants[stage];
1470
1471 if (variant != NULL) {
1472 variant->assembly_offset = offset;
1473
1474 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1475 offset += variant->qpu_insts_size;
1476
1477 /* We dont need qpu_insts anymore. */
1478 free(variant->qpu_insts);
1479 variant->qpu_insts = NULL;
1480 }
1481 }
1482 assert(total_size == offset);
1483
1484 pipeline->shared_data->assembly_bo = bo;
1485
1486 return true;
1487 }
1488
1489 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1490 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1491 struct v3dv_pipeline_key *key,
1492 unsigned char *sha1_out)
1493 {
1494 struct mesa_sha1 ctx;
1495 _mesa_sha1_init(&ctx);
1496
1497 if (pipeline->layout) {
1498 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1499 sizeof(pipeline->layout->sha1));
1500 }
1501
1502 /* We need to include all shader stages in the sha1 key as linking may modify
1503 * the shader code in any stage. An alternative would be to use the
1504 * serialized NIR, but that seems like an overkill.
1505 */
1506 _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1507 sizeof(pipeline->vs->shader_sha1));
1508
1509 if (pipeline->gs) {
1510 _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1511 sizeof(pipeline->gs->shader_sha1));
1512 }
1513
1514 _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1515 sizeof(pipeline->fs->shader_sha1));
1516
1517 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1518
1519 _mesa_sha1_final(&ctx, sha1_out);
1520 }
1521
1522 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1523 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1524 struct v3dv_pipeline_key *key,
1525 unsigned char *sha1_out)
1526 {
1527 struct mesa_sha1 ctx;
1528 _mesa_sha1_init(&ctx);
1529
1530 if (pipeline->layout) {
1531 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1532 sizeof(pipeline->layout->sha1));
1533 }
1534
1535 _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1536 sizeof(pipeline->cs->shader_sha1));
1537
1538 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1539
1540 _mesa_sha1_final(&ctx, sha1_out);
1541 }
1542
1543 /* Checks that the pipeline has enough spill size to use for any of their
1544 * variants
1545 */
1546 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1547 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1548 {
1549 uint32_t max_spill_size = 0;
1550
1551 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1552 struct v3dv_shader_variant *variant =
1553 pipeline->shared_data->variants[stage];
1554
1555 if (variant != NULL) {
1556 max_spill_size = MAX2(variant->prog_data.base->spill_size,
1557 max_spill_size);
1558 }
1559 }
1560
1561 if (max_spill_size > 0) {
1562 struct v3dv_device *device = pipeline->device;
1563
1564 /* The TIDX register we use for choosing the area to access
1565 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1566 * Even at minimum threadcount in a particular shader, that
1567 * means we still multiply by qpus by 4.
1568 */
1569 const uint32_t total_spill_size =
1570 4 * device->devinfo.qpu_count * max_spill_size;
1571 if (pipeline->spill.bo) {
1572 assert(pipeline->spill.size_per_thread > 0);
1573 v3dv_bo_free(device, pipeline->spill.bo);
1574 }
1575 pipeline->spill.bo =
1576 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1577 pipeline->spill.size_per_thread = max_spill_size;
1578 }
1579 }
1580
1581 /**
1582 * Creates a new shader_variant_create. Note that for prog_data is not const,
1583 * so it is assumed that the caller will prove a pointer that the
1584 * shader_variant will own.
1585 *
1586 * Creation doesn't include allocate a BO to store the content of qpu_insts,
1587 * as we will try to share the same bo for several shader variants. Also note
1588 * that qpu_ints being NULL is valid, for example if we are creating the
1589 * shader_variants from the cache, so we can just upload the assembly of all
1590 * the shader stages at once.
1591 */
1592 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1593 v3dv_shader_variant_create(struct v3dv_device *device,
1594 enum broadcom_shader_stage stage,
1595 struct v3d_prog_data *prog_data,
1596 uint32_t prog_data_size,
1597 uint32_t assembly_offset,
1598 uint64_t *qpu_insts,
1599 uint32_t qpu_insts_size,
1600 VkResult *out_vk_result)
1601 {
1602 struct v3dv_shader_variant *variant =
1603 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1604 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1605
1606 if (variant == NULL) {
1607 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1608 return NULL;
1609 }
1610
1611 variant->stage = stage;
1612 variant->prog_data_size = prog_data_size;
1613 variant->prog_data.base = prog_data;
1614
1615 variant->assembly_offset = assembly_offset;
1616 variant->qpu_insts_size = qpu_insts_size;
1617 variant->qpu_insts = qpu_insts;
1618
1619 *out_vk_result = VK_SUCCESS;
1620
1621 return variant;
1622 }
1623
1624 /* For a given key, it returns the compiled version of the shader. Returns a
1625 * new reference to the shader_variant to the caller, or NULL.
1626 *
1627 * If the method returns NULL it means that something wrong happened:
1628 * * Not enough memory: this is one of the possible outcomes defined by
1629 * vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1630 * * Compilation error: hypothetically this shouldn't happen, as the spec
1631 * states that vkShaderModule needs to be created with a valid SPIR-V, so
1632 * any compilation failure is a driver bug. In the practice, something as
1633 * common as failing to register allocate can lead to a compilation
1634 * failure. In that case the only option (for any driver) is
1635 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1636 * error.
1637 */
1638 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1639 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1640 struct v3d_key *key,
1641 size_t key_size,
1642 const VkAllocationCallbacks *pAllocator,
1643 VkResult *out_vk_result)
1644 {
1645 int64_t stage_start = os_time_get_nano();
1646
1647 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1648 struct v3dv_physical_device *physical_device =
1649 &pipeline->device->instance->physicalDevice;
1650 const struct v3d_compiler *compiler = physical_device->compiler;
1651
1652 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1653 v3d_debug_flag_for_shader_stage
1654 (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1655 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1656 broadcom_shader_stage_name(p_stage->stage),
1657 p_stage->program_id);
1658 nir_print_shader(p_stage->nir, stderr);
1659 fprintf(stderr, "\n");
1660 }
1661
1662 uint64_t *qpu_insts;
1663 uint32_t qpu_insts_size;
1664 struct v3d_prog_data *prog_data;
1665 uint32_t prog_data_size =
1666 v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1667
1668 qpu_insts = v3d_compile(compiler,
1669 key, &prog_data,
1670 p_stage->nir,
1671 shader_debug_output, NULL,
1672 p_stage->program_id, 0,
1673 &qpu_insts_size);
1674
1675 struct v3dv_shader_variant *variant = NULL;
1676
1677 if (!qpu_insts) {
1678 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1679 gl_shader_stage_name(p_stage->stage),
1680 p_stage->program_id);
1681 *out_vk_result = VK_ERROR_UNKNOWN;
1682 } else {
1683 variant =
1684 v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1685 prog_data, prog_data_size,
1686 0, /* assembly_offset, no final value yet */
1687 qpu_insts, qpu_insts_size,
1688 out_vk_result);
1689 }
1690 /* At this point we don't need anymore the nir shader, but we are freeing
1691 * all the temporary p_stage structs used during the pipeline creation when
1692 * we finish it, so let's not worry about freeing the nir here.
1693 */
1694
1695 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1696
1697 return variant;
1698 }
1699
1700 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1701 link_shaders(nir_shader *producer, nir_shader *consumer)
1702 {
1703 assert(producer);
1704 assert(consumer);
1705
1706 if (producer->options->lower_to_scalar) {
1707 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1708 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1709 }
1710
1711 nir_lower_io_arrays_to_elements(producer, consumer);
1712
1713 nir_optimize(producer, false);
1714 nir_optimize(consumer, false);
1715
1716 if (nir_link_opt_varyings(producer, consumer))
1717 nir_optimize(consumer, false);
1718
1719 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1720 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1721
1722 if (nir_remove_unused_varyings(producer, consumer)) {
1723 NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1724 NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1725
1726 nir_optimize(producer, false);
1727 nir_optimize(consumer, false);
1728
1729 /* Optimizations can cause varyings to become unused.
1730 * nir_compact_varyings() depends on all dead varyings being removed so
1731 * we need to call nir_remove_dead_variables() again here.
1732 */
1733 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1734 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1735 }
1736 }
1737
1738 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1739 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1740 struct v3dv_pipeline_stage *p_stage,
1741 struct v3dv_pipeline_layout *layout)
1742 {
1743 int64_t stage_start = os_time_get_nano();
1744
1745 assert(pipeline->shared_data &&
1746 pipeline->shared_data->maps[p_stage->stage]);
1747
1748 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1749
1750 /* We add this because we need a valid sampler for nir_lower_tex to do
1751 * unpacking of the texture operation result, even for the case where there
1752 * is no sampler state.
1753 *
1754 * We add two of those, one for the case we need a 16bit return_size, and
1755 * another for the case we need a 32bit return size.
1756 */
1757 struct v3dv_descriptor_maps *maps =
1758 pipeline->shared_data->maps[p_stage->stage];
1759
1760 UNUSED unsigned index;
1761 index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16);
1762 assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1763
1764 index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32);
1765 assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1766
1767 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1768 bool needs_default_sampler_state = false;
1769 NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1770 &needs_default_sampler_state);
1771
1772 /* If in the end we didn't need to use the default sampler states and the
1773 * shader doesn't need any other samplers, get rid of them so we can
1774 * recognize that this program doesn't use any samplers at all.
1775 */
1776 if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1777 maps->sampler_map.num_desc = 0;
1778
1779 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1780 }
1781
1782 /**
1783 * The SPIR-V compiler will insert a sized compact array for
1784 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1785 * where the size of the array determines the number of active clip planes.
1786 */
1787 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1788 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1789 {
1790 assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1791 const nir_shader *shader = p_stage->nir;
1792 assert(shader);
1793
1794 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1795 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1796 assert(var->data.compact);
1797 return (1 << glsl_get_length(var->type)) - 1;
1798 }
1799 }
1800 return 0;
1801 }
1802
1803 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1804 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1805 struct v3dv_pipeline *pipeline,
1806 struct v3dv_pipeline_cache *cache)
1807 {
1808 int64_t stage_start = os_time_get_nano();
1809
1810 nir_shader *nir = NULL;
1811
1812 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1813 &v3dv_nir_options,
1814 p_stage->shader_sha1);
1815
1816 if (nir) {
1817 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1818
1819 /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1820 * creation so the cache hit is not recorded in the pipeline feedback
1821 * flags
1822 */
1823
1824 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1825
1826 return nir;
1827 }
1828
1829 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1830
1831 if (nir) {
1832 struct v3dv_pipeline_cache *default_cache =
1833 &pipeline->device->default_pipeline_cache;
1834
1835 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1836 p_stage->shader_sha1);
1837
1838 /* Ensure that the variant is on the default cache, as cmd_buffer could
1839 * need to change the current variant
1840 */
1841 if (default_cache != cache) {
1842 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1843 p_stage->shader_sha1);
1844 }
1845
1846 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1847
1848 return nir;
1849 }
1850
1851 /* FIXME: this shouldn't happen, raise error? */
1852 return NULL;
1853 }
1854
1855 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1856 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1857 const VkAllocationCallbacks *pAllocator,
1858 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1859 {
1860 assert(pipeline->vs_bin != NULL);
1861 if (pipeline->vs_bin->nir == NULL) {
1862 assert(pipeline->vs->nir);
1863 pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1864 }
1865
1866 VkResult vk_result;
1867 struct v3d_vs_key key;
1868 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1869 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1870 pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1871 pAllocator, &vk_result);
1872 if (vk_result != VK_SUCCESS)
1873 return vk_result;
1874
1875 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1876 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1877 pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1878 pAllocator, &vk_result);
1879
1880 return vk_result;
1881 }
1882
1883 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1884 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1885 const VkAllocationCallbacks *pAllocator,
1886 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1887 {
1888 assert(pipeline->gs);
1889
1890 assert(pipeline->gs_bin != NULL);
1891 if (pipeline->gs_bin->nir == NULL) {
1892 assert(pipeline->gs->nir);
1893 pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1894 }
1895
1896 VkResult vk_result;
1897 struct v3d_gs_key key;
1898 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1899 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1900 pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1901 pAllocator, &vk_result);
1902 if (vk_result != VK_SUCCESS)
1903 return vk_result;
1904
1905 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1906 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1907 pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1908 pAllocator, &vk_result);
1909
1910 return vk_result;
1911 }
1912
1913 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1914 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1915 const VkAllocationCallbacks *pAllocator,
1916 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1917 {
1918 struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1919
1920 p_stage = pipeline->fs;
1921
1922 struct v3d_fs_key key;
1923
1924 pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1925 pipeline->gs != NULL,
1926 get_ucp_enable_mask(pipeline->vs));
1927
1928 VkResult vk_result;
1929 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1930 pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1931 pAllocator, &vk_result);
1932
1933 return vk_result;
1934 }
1935
1936 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1937 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1938 struct v3dv_pipeline_key *key,
1939 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1940 {
1941 memset(key, 0, sizeof(*key));
1942 key->robust_buffer_access =
1943 pipeline->device->features.robustBufferAccess;
1944
1945 const bool raster_enabled =
1946 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1947
1948 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1949 pCreateInfo->pInputAssemblyState;
1950 key->topology = vk_to_pipe_prim_type[ia_info->topology];
1951
1952 const VkPipelineColorBlendStateCreateInfo *cb_info =
1953 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1954
1955 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1956 vk_to_pipe_logicop[cb_info->logicOp] :
1957 PIPE_LOGICOP_COPY;
1958
1959 /* Multisample rasterization state must be ignored if rasterization
1960 * is disabled.
1961 */
1962 const VkPipelineMultisampleStateCreateInfo *ms_info =
1963 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1964 if (ms_info) {
1965 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1966 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1967 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1968
1969 if (key->msaa) {
1970 key->sample_coverage =
1971 pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1972 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1973 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1974 }
1975 }
1976
1977 const struct v3dv_render_pass *pass =
1978 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1979 const struct v3dv_subpass *subpass = pipeline->subpass;
1980 for (uint32_t i = 0; i < subpass->color_count; i++) {
1981 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1982 if (att_idx == VK_ATTACHMENT_UNUSED)
1983 continue;
1984
1985 key->cbufs |= 1 << i;
1986
1987 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1988 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1989
1990 /* If logic operations are enabled then we might emit color reads and we
1991 * need to know the color buffer format and swizzle for that
1992 */
1993 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1994 key->color_fmt[i].format = fb_pipe_format;
1995 memcpy(key->color_fmt[i].swizzle,
1996 v3dv_get_format_swizzle(pipeline->device, fb_format),
1997 sizeof(key->color_fmt[i].swizzle));
1998 }
1999
2000 const struct util_format_description *desc =
2001 vk_format_description(fb_format);
2002
2003 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2004 desc->channel[0].size == 32) {
2005 key->f32_color_rb |= 1 << i;
2006 }
2007 }
2008
2009 const VkPipelineVertexInputStateCreateInfo *vi_info =
2010 pCreateInfo->pVertexInputState;
2011 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2012 const VkVertexInputAttributeDescription *desc =
2013 &vi_info->pVertexAttributeDescriptions[i];
2014 assert(desc->location < MAX_VERTEX_ATTRIBS);
2015 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2016 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2017 }
2018
2019 assert(pipeline->subpass);
2020 key->has_multiview = pipeline->subpass->view_mask != 0;
2021 }
2022
2023 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2024 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2025 struct v3dv_pipeline_key *key,
2026 const VkComputePipelineCreateInfo *pCreateInfo)
2027 {
2028 /* We use the same pipeline key for graphics and compute, but we don't need
2029 * to add a field to flag compute keys because this key is not used alone
2030 * to search in the cache, we also use the SPIR-V or the serialized NIR for
2031 * example, which already flags compute shaders.
2032 */
2033 memset(key, 0, sizeof(*key));
2034 key->robust_buffer_access =
2035 pipeline->device->features.robustBufferAccess;
2036 }
2037
2038 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2039 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2040 struct v3dv_pipeline *pipeline,
2041 bool is_graphics_pipeline)
2042 {
2043 /* We create new_entry using the device alloc. Right now shared_data is ref
2044 * and unref by both the pipeline and the pipeline cache, so we can't
2045 * ensure that the cache or pipeline alloc will be available on the last
2046 * unref.
2047 */
2048 struct v3dv_pipeline_shared_data *new_entry =
2049 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2050 sizeof(struct v3dv_pipeline_shared_data), 8,
2051 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2052
2053 if (new_entry == NULL)
2054 return NULL;
2055
2056 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2057 /* We don't need specific descriptor maps for binning stages we use the
2058 * map for the render stage.
2059 */
2060 if (broadcom_shader_stage_is_binning(stage))
2061 continue;
2062
2063 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2064 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2065 continue;
2066 }
2067
2068 if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2069 /* We always inject a custom GS if we have multiview */
2070 if (!pipeline->subpass->view_mask)
2071 continue;
2072 }
2073
2074 struct v3dv_descriptor_maps *new_maps =
2075 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2076 sizeof(struct v3dv_descriptor_maps), 8,
2077 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2078
2079 if (new_maps == NULL)
2080 goto fail;
2081
2082 new_entry->maps[stage] = new_maps;
2083 }
2084
2085 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2086 new_entry->maps[BROADCOM_SHADER_VERTEX];
2087
2088 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2089 new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2090
2091 new_entry->ref_cnt = 1;
2092 memcpy(new_entry->sha1_key, sha1_key, 20);
2093
2094 return new_entry;
2095
2096 fail:
2097 if (new_entry != NULL) {
2098 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2099 if (new_entry->maps[stage] != NULL)
2100 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2101 }
2102 }
2103
2104 vk_free(&pipeline->device->vk.alloc, new_entry);
2105
2106 return NULL;
2107 }
2108
2109 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedback * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2110 write_creation_feedback(struct v3dv_pipeline *pipeline,
2111 const void *next,
2112 const VkPipelineCreationFeedback *pipeline_feedback,
2113 uint32_t stage_count,
2114 const VkPipelineShaderStageCreateInfo *stages)
2115 {
2116 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2117 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2118
2119 if (create_feedback) {
2120 typed_memcpy(create_feedback->pPipelineCreationFeedback,
2121 pipeline_feedback,
2122 1);
2123
2124 assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2125
2126 for (uint32_t i = 0; i < stage_count; i++) {
2127 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2128 switch (s) {
2129 case MESA_SHADER_VERTEX:
2130 create_feedback->pPipelineStageCreationFeedbacks[i] =
2131 pipeline->vs->feedback;
2132
2133 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2134 pipeline->vs_bin->feedback.duration;
2135 break;
2136
2137 case MESA_SHADER_GEOMETRY:
2138 create_feedback->pPipelineStageCreationFeedbacks[i] =
2139 pipeline->gs->feedback;
2140
2141 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2142 pipeline->gs_bin->feedback.duration;
2143 break;
2144
2145 case MESA_SHADER_FRAGMENT:
2146 create_feedback->pPipelineStageCreationFeedbacks[i] =
2147 pipeline->fs->feedback;
2148 break;
2149
2150 case MESA_SHADER_COMPUTE:
2151 create_feedback->pPipelineStageCreationFeedbacks[i] =
2152 pipeline->cs->feedback;
2153 break;
2154
2155 default:
2156 unreachable("not supported shader stage");
2157 }
2158 }
2159 }
2160 }
2161
2162 static enum shader_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2163 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2164 {
2165 switch (pipeline->topology) {
2166 case PIPE_PRIM_POINTS:
2167 return SHADER_PRIM_POINTS;
2168 case PIPE_PRIM_LINES:
2169 case PIPE_PRIM_LINE_STRIP:
2170 return SHADER_PRIM_LINES;
2171 case PIPE_PRIM_TRIANGLES:
2172 case PIPE_PRIM_TRIANGLE_STRIP:
2173 case PIPE_PRIM_TRIANGLE_FAN:
2174 return SHADER_PRIM_TRIANGLES;
2175 default:
2176 /* Since we don't allow GS with multiview, we can only see non-adjacency
2177 * primitives.
2178 */
2179 unreachable("Unexpected pipeline primitive type");
2180 }
2181 }
2182
2183 static enum shader_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2184 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2185 {
2186 switch (pipeline->topology) {
2187 case PIPE_PRIM_POINTS:
2188 return SHADER_PRIM_POINTS;
2189 case PIPE_PRIM_LINES:
2190 case PIPE_PRIM_LINE_STRIP:
2191 return SHADER_PRIM_LINE_STRIP;
2192 case PIPE_PRIM_TRIANGLES:
2193 case PIPE_PRIM_TRIANGLE_STRIP:
2194 case PIPE_PRIM_TRIANGLE_FAN:
2195 return SHADER_PRIM_TRIANGLE_STRIP;
2196 default:
2197 /* Since we don't allow GS with multiview, we can only see non-adjacency
2198 * primitives.
2199 */
2200 unreachable("Unexpected pipeline primitive type");
2201 }
2202 }
2203
2204 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2205 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2206 struct v3dv_pipeline_cache *cache,
2207 const VkAllocationCallbacks *pAllocator)
2208 {
2209 /* Create the passthrough GS from the VS output interface */
2210 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2211 nir_shader *vs_nir = pipeline->vs->nir;
2212
2213 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2214 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2215 "multiview broadcast gs");
2216 nir_shader *nir = b.shader;
2217 nir->info.inputs_read = vs_nir->info.outputs_written;
2218 nir->info.outputs_written = vs_nir->info.outputs_written |
2219 (1ull << VARYING_SLOT_LAYER);
2220
2221 uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2222 nir->info.gs.input_primitive =
2223 multiview_gs_input_primitive_from_pipeline(pipeline);
2224 nir->info.gs.output_primitive =
2225 multiview_gs_output_primitive_from_pipeline(pipeline);
2226 nir->info.gs.vertices_in = vertex_count;
2227 nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2228 nir->info.gs.invocations = 1;
2229 nir->info.gs.active_stream_mask = 0x1;
2230
2231 /* Make a list of GS input/output variables from the VS outputs */
2232 nir_variable *in_vars[100];
2233 nir_variable *out_vars[100];
2234 uint32_t var_count = 0;
2235 nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2236 char name[8];
2237 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2238
2239 in_vars[var_count] =
2240 nir_variable_create(nir, nir_var_shader_in,
2241 glsl_array_type(out_vs_var->type, vertex_count, 0),
2242 name);
2243 in_vars[var_count]->data.location = out_vs_var->data.location;
2244 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2245 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2246
2247 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2248 out_vars[var_count] =
2249 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2250 out_vars[var_count]->data.location = out_vs_var->data.location;
2251 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2252
2253 var_count++;
2254 }
2255
2256 /* Add the gl_Layer output variable */
2257 nir_variable *out_layer =
2258 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2259 "out_Layer");
2260 out_layer->data.location = VARYING_SLOT_LAYER;
2261
2262 /* Get the view index value that we will write to gl_Layer */
2263 nir_ssa_def *layer =
2264 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2265
2266 /* Emit all output vertices */
2267 for (uint32_t vi = 0; vi < vertex_count; vi++) {
2268 /* Emit all output varyings */
2269 for (uint32_t i = 0; i < var_count; i++) {
2270 nir_deref_instr *in_value =
2271 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2272 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2273 }
2274
2275 /* Emit gl_Layer write */
2276 nir_store_var(&b, out_layer, layer, 0x1);
2277
2278 nir_emit_vertex(&b, 0);
2279 }
2280 nir_end_primitive(&b, 0);
2281
2282 /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2283 * with what we expect from SPIR-V modules.
2284 */
2285 preprocess_nir(nir);
2286
2287 /* Attach the geometry shader to the pipeline */
2288 struct v3dv_device *device = pipeline->device;
2289 struct v3dv_physical_device *physical_device =
2290 &device->instance->physicalDevice;
2291
2292 struct v3dv_pipeline_stage *p_stage =
2293 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2294 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2295
2296 if (p_stage == NULL) {
2297 ralloc_free(nir);
2298 return false;
2299 }
2300
2301 p_stage->pipeline = pipeline;
2302 p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2303 p_stage->entrypoint = "main";
2304 p_stage->module = 0;
2305 p_stage->nir = nir;
2306 pipeline_compute_sha1_from_nir(p_stage);
2307 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2308
2309 pipeline->has_gs = true;
2310 pipeline->gs = p_stage;
2311 pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2312
2313 pipeline->gs_bin =
2314 pipeline_stage_create_binning(pipeline->gs, pAllocator);
2315 if (pipeline->gs_bin == NULL)
2316 return false;
2317
2318 return true;
2319 }
2320
2321 static void
pipeline_check_buffer_device_address(struct v3dv_pipeline * pipeline)2322 pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2323 {
2324 for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2325 struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2326 if (variant && variant->prog_data.base->has_global_address) {
2327 pipeline->uses_buffer_device_address = true;
2328 return;
2329 }
2330 }
2331
2332 pipeline->uses_buffer_device_address = false;
2333 }
2334
2335 /*
2336 * It compiles a pipeline. Note that it also allocate internal object, but if
2337 * some allocations success, but other fails, the method is not freeing the
2338 * successful ones.
2339 *
2340 * This is done to simplify the code, as what we do in this case is just call
2341 * the pipeline destroy method, and this would handle freeing the internal
2342 * objects allocated. We just need to be careful setting to NULL the objects
2343 * not allocated.
2344 */
2345 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2346 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2347 struct v3dv_pipeline_cache *cache,
2348 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2349 const VkAllocationCallbacks *pAllocator)
2350 {
2351 VkPipelineCreationFeedback pipeline_feedback = {
2352 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2353 };
2354 int64_t pipeline_start = os_time_get_nano();
2355
2356 struct v3dv_device *device = pipeline->device;
2357 struct v3dv_physical_device *physical_device =
2358 &device->instance->physicalDevice;
2359
2360 /* First pass to get some common info from the shader, and create the
2361 * individual pipeline_stage objects
2362 */
2363 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2364 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2365 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2366
2367 struct v3dv_pipeline_stage *p_stage =
2368 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2369 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2370
2371 if (p_stage == NULL)
2372 return VK_ERROR_OUT_OF_HOST_MEMORY;
2373
2374 /* Note that we are assigning program_id slightly differently that
2375 * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2376 * would have a different program_id, while v3d would have the same for
2377 * both. For the case of v3dv, it is more natural to have an id this way,
2378 * as right now we are using it for debugging, not for shader-db.
2379 */
2380 p_stage->program_id =
2381 p_atomic_inc_return(&physical_device->next_program_id);
2382
2383 p_stage->pipeline = pipeline;
2384 p_stage->stage = gl_shader_stage_to_broadcom(stage);
2385 p_stage->entrypoint = sinfo->pName;
2386 p_stage->module = vk_shader_module_from_handle(sinfo->module);
2387 p_stage->spec_info = sinfo->pSpecializationInfo;
2388
2389 vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i], p_stage->shader_sha1);
2390
2391 pipeline->active_stages |= sinfo->stage;
2392
2393 /* We will try to get directly the compiled shader variant, so let's not
2394 * worry about getting the nir shader for now.
2395 */
2396 p_stage->nir = NULL;
2397
2398 switch(stage) {
2399 case MESA_SHADER_VERTEX:
2400 pipeline->vs = p_stage;
2401 pipeline->vs_bin =
2402 pipeline_stage_create_binning(pipeline->vs, pAllocator);
2403 if (pipeline->vs_bin == NULL)
2404 return VK_ERROR_OUT_OF_HOST_MEMORY;
2405 break;
2406
2407 case MESA_SHADER_GEOMETRY:
2408 pipeline->has_gs = true;
2409 pipeline->gs = p_stage;
2410 pipeline->gs_bin =
2411 pipeline_stage_create_binning(pipeline->gs, pAllocator);
2412 if (pipeline->gs_bin == NULL)
2413 return VK_ERROR_OUT_OF_HOST_MEMORY;
2414 break;
2415
2416 case MESA_SHADER_FRAGMENT:
2417 pipeline->fs = p_stage;
2418 break;
2419
2420 default:
2421 unreachable("not supported shader stage");
2422 }
2423 }
2424
2425 /* Add a no-op fragment shader if needed */
2426 if (!pipeline->fs) {
2427 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2428 &v3dv_nir_options,
2429 "noop_fs");
2430
2431 struct v3dv_pipeline_stage *p_stage =
2432 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2433 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2434
2435 if (p_stage == NULL)
2436 return VK_ERROR_OUT_OF_HOST_MEMORY;
2437
2438 p_stage->pipeline = pipeline;
2439 p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2440 p_stage->entrypoint = "main";
2441 p_stage->module = 0;
2442 p_stage->nir = b.shader;
2443 pipeline_compute_sha1_from_nir(p_stage);
2444 p_stage->program_id =
2445 p_atomic_inc_return(&physical_device->next_program_id);
2446
2447 pipeline->fs = p_stage;
2448 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2449 }
2450
2451 /* If multiview is enabled, we inject a custom passthrough geometry shader
2452 * to broadcast draw calls to the appropriate views.
2453 */
2454 assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2455 if (pipeline->subpass->view_mask) {
2456 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2457 return VK_ERROR_OUT_OF_HOST_MEMORY;
2458 }
2459
2460 /* First we try to get the variants from the pipeline cache (unless we are
2461 * required to capture internal representations, since in that case we need
2462 * compile).
2463 */
2464 bool needs_executable_info =
2465 pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2466 if (!needs_executable_info) {
2467 struct v3dv_pipeline_key pipeline_key;
2468 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2469 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2470
2471 bool cache_hit = false;
2472
2473 pipeline->shared_data =
2474 v3dv_pipeline_cache_search_for_pipeline(cache,
2475 pipeline->sha1,
2476 &cache_hit);
2477
2478 if (pipeline->shared_data != NULL) {
2479 /* A correct pipeline must have at least a VS and FS */
2480 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2481 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2482 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2483 assert(!pipeline->gs ||
2484 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2485 assert(!pipeline->gs ||
2486 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2487
2488 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2489 pipeline_feedback.flags |=
2490 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2491
2492 goto success;
2493 }
2494 }
2495
2496 if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2497 return VK_PIPELINE_COMPILE_REQUIRED;
2498
2499 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2500 * shader or the pipeline cache) and compile.
2501 */
2502 pipeline->shared_data =
2503 v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2504 if (!pipeline->shared_data)
2505 return VK_ERROR_OUT_OF_HOST_MEMORY;
2506
2507 pipeline->vs->feedback.flags |=
2508 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2509 if (pipeline->gs)
2510 pipeline->gs->feedback.flags |=
2511 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2512 pipeline->fs->feedback.flags |=
2513 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2514
2515 if (!pipeline->vs->nir)
2516 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2517 if (pipeline->gs && !pipeline->gs->nir)
2518 pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2519 if (!pipeline->fs->nir)
2520 pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2521
2522 /* Linking + pipeline lowerings */
2523 if (pipeline->gs) {
2524 link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2525 link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2526 } else {
2527 link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2528 }
2529
2530 pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2531 lower_fs_io(pipeline->fs->nir);
2532
2533 if (pipeline->gs) {
2534 pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2535 lower_gs_io(pipeline->gs->nir);
2536 }
2537
2538 pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2539 lower_vs_io(pipeline->vs->nir);
2540
2541 /* Compiling to vir */
2542 VkResult vk_result;
2543
2544 /* We should have got all the variants or no variants from the cache */
2545 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2546 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2547 if (vk_result != VK_SUCCESS)
2548 return vk_result;
2549
2550 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2551 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2552
2553 if (pipeline->gs) {
2554 vk_result =
2555 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2556 if (vk_result != VK_SUCCESS)
2557 return vk_result;
2558 }
2559
2560 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2561 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2562
2563 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2564 if (vk_result != VK_SUCCESS)
2565 return vk_result;
2566
2567 if (!upload_assembly(pipeline))
2568 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2569
2570 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2571
2572 success:
2573
2574 pipeline_check_buffer_device_address(pipeline);
2575
2576 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2577 write_creation_feedback(pipeline,
2578 pCreateInfo->pNext,
2579 &pipeline_feedback,
2580 pCreateInfo->stageCount,
2581 pCreateInfo->pStages);
2582
2583 /* Since we have the variants in the pipeline shared data we can now free
2584 * the pipeline stages.
2585 */
2586 if (!needs_executable_info)
2587 pipeline_free_stages(device, pipeline, pAllocator);
2588
2589 pipeline_check_spill_size(pipeline);
2590
2591 return compute_vpm_config(pipeline);
2592 }
2593
2594 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2595 compute_vpm_config(struct v3dv_pipeline *pipeline)
2596 {
2597 struct v3dv_shader_variant *vs_variant =
2598 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2599 struct v3dv_shader_variant *vs_bin_variant =
2600 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2601 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2602 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2603
2604 struct v3d_gs_prog_data *gs = NULL;
2605 struct v3d_gs_prog_data *gs_bin = NULL;
2606 if (pipeline->has_gs) {
2607 struct v3dv_shader_variant *gs_variant =
2608 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2609 struct v3dv_shader_variant *gs_bin_variant =
2610 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2611 gs = gs_variant->prog_data.gs;
2612 gs_bin = gs_bin_variant->prog_data.gs;
2613 }
2614
2615 if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2616 vs_bin, vs, gs_bin, gs,
2617 &pipeline->vpm_cfg_bin,
2618 &pipeline->vpm_cfg)) {
2619 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2620 }
2621
2622 return VK_SUCCESS;
2623 }
2624
2625 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2626 v3dv_dynamic_state_mask(VkDynamicState state)
2627 {
2628 switch(state) {
2629 case VK_DYNAMIC_STATE_VIEWPORT:
2630 return V3DV_DYNAMIC_VIEWPORT;
2631 case VK_DYNAMIC_STATE_SCISSOR:
2632 return V3DV_DYNAMIC_SCISSOR;
2633 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2634 return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2635 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2636 return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2637 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2638 return V3DV_DYNAMIC_STENCIL_REFERENCE;
2639 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2640 return V3DV_DYNAMIC_BLEND_CONSTANTS;
2641 case VK_DYNAMIC_STATE_DEPTH_BIAS:
2642 return V3DV_DYNAMIC_DEPTH_BIAS;
2643 case VK_DYNAMIC_STATE_LINE_WIDTH:
2644 return V3DV_DYNAMIC_LINE_WIDTH;
2645 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2646 return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2647
2648 /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2649 * ignoring this dynamic state. We are already asserting at pipeline creation
2650 * time that depth bounds testing is not enabled.
2651 */
2652 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2653 return 0;
2654
2655 default:
2656 unreachable("Unhandled dynamic state");
2657 }
2658 }
2659
2660 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2661 pipeline_init_dynamic_state(
2662 struct v3dv_pipeline *pipeline,
2663 const VkPipelineDynamicStateCreateInfo *pDynamicState,
2664 const VkPipelineViewportStateCreateInfo *pViewportState,
2665 const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2666 const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2667 const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2668 const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2669 {
2670 /* Initialize to default values */
2671 struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2672 memset(dynamic, 0, sizeof(*dynamic));
2673 dynamic->stencil_compare_mask.front = ~0;
2674 dynamic->stencil_compare_mask.back = ~0;
2675 dynamic->stencil_write_mask.front = ~0;
2676 dynamic->stencil_write_mask.back = ~0;
2677 dynamic->line_width = 1.0f;
2678 dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
2679
2680 /* Create a mask of enabled dynamic states */
2681 uint32_t dynamic_states = 0;
2682 if (pDynamicState) {
2683 uint32_t count = pDynamicState->dynamicStateCount;
2684 for (uint32_t s = 0; s < count; s++) {
2685 dynamic_states |=
2686 v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2687 }
2688 }
2689
2690 /* For any pipeline states that are not dynamic, set the dynamic state
2691 * from the static pipeline state.
2692 */
2693 if (pViewportState) {
2694 if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2695 dynamic->viewport.count = pViewportState->viewportCount;
2696 typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2697 pViewportState->viewportCount);
2698
2699 for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2700 v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2701 dynamic->viewport.scale[i],
2702 dynamic->viewport.translate[i]);
2703 }
2704 }
2705
2706 if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2707 dynamic->scissor.count = pViewportState->scissorCount;
2708 typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2709 pViewportState->scissorCount);
2710 }
2711 }
2712
2713 if (pDepthStencilState) {
2714 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2715 dynamic->stencil_compare_mask.front =
2716 pDepthStencilState->front.compareMask;
2717 dynamic->stencil_compare_mask.back =
2718 pDepthStencilState->back.compareMask;
2719 }
2720
2721 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2722 dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2723 dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2724 }
2725
2726 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2727 dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2728 dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2729 }
2730 }
2731
2732 if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2733 memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2734 sizeof(dynamic->blend_constants));
2735 }
2736
2737 if (pRasterizationState) {
2738 if (pRasterizationState->depthBiasEnable &&
2739 !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2740 dynamic->depth_bias.constant_factor =
2741 pRasterizationState->depthBiasConstantFactor;
2742 dynamic->depth_bias.depth_bias_clamp =
2743 pRasterizationState->depthBiasClamp;
2744 dynamic->depth_bias.slope_factor =
2745 pRasterizationState->depthBiasSlopeFactor;
2746 }
2747 if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2748 dynamic->line_width = pRasterizationState->lineWidth;
2749 }
2750
2751 if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2752 dynamic->color_write_enable = 0;
2753 for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2754 dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2755 }
2756
2757 pipeline->dynamic_state.mask = dynamic_states;
2758 }
2759
2760 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2761 stencil_op_is_no_op(const VkStencilOpState *stencil)
2762 {
2763 return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2764 stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2765 }
2766
2767 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2768 enable_depth_bias(struct v3dv_pipeline *pipeline,
2769 const VkPipelineRasterizationStateCreateInfo *rs_info)
2770 {
2771 pipeline->depth_bias.enabled = false;
2772 pipeline->depth_bias.is_z16 = false;
2773
2774 if (!rs_info || !rs_info->depthBiasEnable)
2775 return;
2776
2777 /* Check the depth/stencil attachment description for the subpass used with
2778 * this pipeline.
2779 */
2780 assert(pipeline->pass && pipeline->subpass);
2781 struct v3dv_render_pass *pass = pipeline->pass;
2782 struct v3dv_subpass *subpass = pipeline->subpass;
2783
2784 if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2785 return;
2786
2787 assert(subpass->ds_attachment.attachment < pass->attachment_count);
2788 struct v3dv_render_pass_attachment *att =
2789 &pass->attachments[subpass->ds_attachment.attachment];
2790
2791 if (att->desc.format == VK_FORMAT_D16_UNORM)
2792 pipeline->depth_bias.is_z16 = true;
2793
2794 pipeline->depth_bias.enabled = true;
2795 }
2796
2797 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2798 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2799 const VkPipelineDepthStencilStateCreateInfo *ds_info)
2800 {
2801 if (!ds_info || !ds_info->depthTestEnable) {
2802 pipeline->ez_state = V3D_EZ_DISABLED;
2803 return;
2804 }
2805
2806 switch (ds_info->depthCompareOp) {
2807 case VK_COMPARE_OP_LESS:
2808 case VK_COMPARE_OP_LESS_OR_EQUAL:
2809 pipeline->ez_state = V3D_EZ_LT_LE;
2810 break;
2811 case VK_COMPARE_OP_GREATER:
2812 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2813 pipeline->ez_state = V3D_EZ_GT_GE;
2814 break;
2815 case VK_COMPARE_OP_NEVER:
2816 case VK_COMPARE_OP_EQUAL:
2817 pipeline->ez_state = V3D_EZ_UNDECIDED;
2818 break;
2819 default:
2820 pipeline->ez_state = V3D_EZ_DISABLED;
2821 pipeline->incompatible_ez_test = true;
2822 break;
2823 }
2824
2825 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2826 if (ds_info->stencilTestEnable &&
2827 (!stencil_op_is_no_op(&ds_info->front) ||
2828 !stencil_op_is_no_op(&ds_info->back))) {
2829 pipeline->ez_state = V3D_EZ_DISABLED;
2830 }
2831
2832 /* If the FS writes Z, then it may update against the chosen EZ direction */
2833 struct v3dv_shader_variant *fs_variant =
2834 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2835 if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2836 !fs_variant->prog_data.fs->writes_z_from_fep) {
2837 pipeline->ez_state = V3D_EZ_DISABLED;
2838 }
2839 }
2840
2841 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)2842 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2843 {
2844 for (uint8_t i = 0; i < pipeline->va_count; i++) {
2845 if (vk_format_is_int(pipeline->va[i].vk_format))
2846 return true;
2847 }
2848 return false;
2849 }
2850
2851 /* @pipeline can be NULL. We assume in that case that all the attributes have
2852 * a float format (we only create an all-float BO once and we reuse it with
2853 * all float pipelines), otherwise we look at the actual type of each
2854 * attribute used with the specific pipeline passed in.
2855 */
2856 struct v3dv_bo *
v3dv_pipeline_create_default_attribute_values(struct v3dv_device * device,struct v3dv_pipeline * pipeline)2857 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2858 struct v3dv_pipeline *pipeline)
2859 {
2860 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2861 struct v3dv_bo *bo;
2862
2863 bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2864
2865 if (!bo) {
2866 fprintf(stderr, "failed to allocate memory for the default "
2867 "attribute values\n");
2868 return NULL;
2869 }
2870
2871 bool ok = v3dv_bo_map(device, bo, size);
2872 if (!ok) {
2873 fprintf(stderr, "failed to map default attribute values buffer\n");
2874 return false;
2875 }
2876
2877 uint32_t *attrs = bo->map;
2878 uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2879 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2880 attrs[i * 4 + 0] = 0;
2881 attrs[i * 4 + 1] = 0;
2882 attrs[i * 4 + 2] = 0;
2883 VkFormat attr_format =
2884 pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2885 if (i < va_count && vk_format_is_int(attr_format)) {
2886 attrs[i * 4 + 3] = 1;
2887 } else {
2888 attrs[i * 4 + 3] = fui(1.0);
2889 }
2890 }
2891
2892 v3dv_bo_unmap(device, bo);
2893
2894 return bo;
2895 }
2896
2897 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2898 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2899 const VkPipelineMultisampleStateCreateInfo *ms_info)
2900 {
2901 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2902
2903 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2904 * requires this to be 0xf or 0x0 if using a single sample.
2905 */
2906 if (ms_info && ms_info->pSampleMask &&
2907 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2908 pipeline->sample_mask &= ms_info->pSampleMask[0];
2909 }
2910 }
2911
2912 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2913 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2914 const VkPipelineMultisampleStateCreateInfo *ms_info)
2915 {
2916 pipeline->sample_rate_shading =
2917 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2918 ms_info->sampleShadingEnable;
2919 }
2920
2921 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2922 pipeline_init(struct v3dv_pipeline *pipeline,
2923 struct v3dv_device *device,
2924 struct v3dv_pipeline_cache *cache,
2925 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2926 const VkAllocationCallbacks *pAllocator)
2927 {
2928 VkResult result = VK_SUCCESS;
2929
2930 pipeline->device = device;
2931
2932 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2933 pipeline->layout = layout;
2934
2935 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2936 assert(pCreateInfo->subpass < render_pass->subpass_count);
2937 pipeline->pass = render_pass;
2938 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2939
2940 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2941 pCreateInfo->pInputAssemblyState;
2942 pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2943
2944 /* If rasterization is not enabled, various CreateInfo structs must be
2945 * ignored.
2946 */
2947 const bool raster_enabled =
2948 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2949
2950 const VkPipelineViewportStateCreateInfo *vp_info =
2951 raster_enabled ? pCreateInfo->pViewportState : NULL;
2952
2953 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2954 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2955
2956 const VkPipelineRasterizationStateCreateInfo *rs_info =
2957 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2958
2959 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2960 rs_info ? vk_find_struct_const(
2961 rs_info->pNext,
2962 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2963 NULL;
2964
2965 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2966 rs_info ? vk_find_struct_const(
2967 rs_info->pNext,
2968 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2969 NULL;
2970
2971 const VkPipelineColorBlendStateCreateInfo *cb_info =
2972 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2973
2974 const VkPipelineMultisampleStateCreateInfo *ms_info =
2975 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2976
2977 const VkPipelineColorWriteCreateInfoEXT *cw_info =
2978 cb_info ? vk_find_struct_const(cb_info->pNext,
2979 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2980 NULL;
2981
2982 pipeline_init_dynamic_state(pipeline,
2983 pCreateInfo->pDynamicState,
2984 vp_info, ds_info, cb_info, rs_info, cw_info);
2985
2986 /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2987 * feature and it shouldn't be used by any pipeline.
2988 */
2989 assert(!ds_info || !ds_info->depthBoundsTestEnable);
2990
2991 v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2992 rs_info, pv_info, ls_info,
2993 ms_info);
2994
2995 enable_depth_bias(pipeline, rs_info);
2996 pipeline_set_sample_mask(pipeline, ms_info);
2997 pipeline_set_sample_rate_shading(pipeline, ms_info);
2998
2999 pipeline->primitive_restart =
3000 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3001
3002 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3003
3004 if (result != VK_SUCCESS) {
3005 /* Caller would already destroy the pipeline, and we didn't allocate any
3006 * extra info. We don't need to do anything else.
3007 */
3008 return result;
3009 }
3010
3011 const VkPipelineVertexInputStateCreateInfo *vi_info =
3012 pCreateInfo->pVertexInputState;
3013
3014 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3015 vk_find_struct_const(vi_info->pNext,
3016 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3017
3018 v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3019
3020 if (pipeline_has_integer_vertex_attrib(pipeline)) {
3021 pipeline->default_attribute_values =
3022 v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3023 if (!pipeline->default_attribute_values)
3024 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3025 } else {
3026 pipeline->default_attribute_values = NULL;
3027 }
3028
3029 /* This must be done after the pipeline has been compiled */
3030 pipeline_set_ez_state(pipeline, ds_info);
3031
3032 return result;
3033 }
3034
3035 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3036 graphics_pipeline_create(VkDevice _device,
3037 VkPipelineCache _cache,
3038 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3039 const VkAllocationCallbacks *pAllocator,
3040 VkPipeline *pPipeline)
3041 {
3042 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3043 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3044
3045 struct v3dv_pipeline *pipeline;
3046 VkResult result;
3047
3048 /* Use the default pipeline cache if none is specified */
3049 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3050 cache = &device->default_pipeline_cache;
3051
3052 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3053 VK_OBJECT_TYPE_PIPELINE);
3054
3055 if (pipeline == NULL)
3056 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3057
3058 result = pipeline_init(pipeline, device, cache,
3059 pCreateInfo,
3060 pAllocator);
3061
3062 if (result != VK_SUCCESS) {
3063 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3064 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3065 *pPipeline = VK_NULL_HANDLE;
3066 return result;
3067 }
3068
3069 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3070
3071 return VK_SUCCESS;
3072 }
3073
3074 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3075 v3dv_CreateGraphicsPipelines(VkDevice _device,
3076 VkPipelineCache pipelineCache,
3077 uint32_t count,
3078 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3079 const VkAllocationCallbacks *pAllocator,
3080 VkPipeline *pPipelines)
3081 {
3082 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3083 VkResult result = VK_SUCCESS;
3084
3085 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3086 mtx_lock(&device->pdevice->mutex);
3087
3088 uint32_t i = 0;
3089 for (; i < count; i++) {
3090 VkResult local_result;
3091
3092 local_result = graphics_pipeline_create(_device,
3093 pipelineCache,
3094 &pCreateInfos[i],
3095 pAllocator,
3096 &pPipelines[i]);
3097
3098 if (local_result != VK_SUCCESS) {
3099 result = local_result;
3100 pPipelines[i] = VK_NULL_HANDLE;
3101
3102 if (pCreateInfos[i].flags &
3103 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3104 break;
3105 }
3106 }
3107
3108 for (; i < count; i++)
3109 pPipelines[i] = VK_NULL_HANDLE;
3110
3111 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3112 mtx_unlock(&device->pdevice->mutex);
3113
3114 return result;
3115 }
3116
3117 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3118 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3119 {
3120 assert(glsl_type_is_vector_or_scalar(type));
3121
3122 uint32_t comp_size = glsl_type_is_boolean(type)
3123 ? 4 : glsl_get_bit_size(type) / 8;
3124 unsigned length = glsl_get_vector_elements(type);
3125 *size = comp_size * length,
3126 *align = comp_size * (length == 3 ? 4 : length);
3127 }
3128
3129 static void
lower_cs_shared(struct nir_shader * nir)3130 lower_cs_shared(struct nir_shader *nir)
3131 {
3132 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3133 nir_var_mem_shared, shared_type_info);
3134 NIR_PASS(_, nir, nir_lower_explicit_io,
3135 nir_var_mem_shared, nir_address_format_32bit_offset);
3136 }
3137
3138 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3139 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3140 struct v3dv_pipeline_cache *cache,
3141 const VkComputePipelineCreateInfo *info,
3142 const VkAllocationCallbacks *alloc)
3143 {
3144 VkPipelineCreationFeedback pipeline_feedback = {
3145 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3146 };
3147 int64_t pipeline_start = os_time_get_nano();
3148
3149 struct v3dv_device *device = pipeline->device;
3150 struct v3dv_physical_device *physical_device =
3151 &device->instance->physicalDevice;
3152
3153 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3154 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3155
3156 struct v3dv_pipeline_stage *p_stage =
3157 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3158 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3159 if (!p_stage)
3160 return VK_ERROR_OUT_OF_HOST_MEMORY;
3161
3162 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3163 p_stage->pipeline = pipeline;
3164 p_stage->stage = gl_shader_stage_to_broadcom(stage);
3165 p_stage->entrypoint = sinfo->pName;
3166 p_stage->module = vk_shader_module_from_handle(sinfo->module);
3167 p_stage->spec_info = sinfo->pSpecializationInfo;
3168 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3169
3170 vk_pipeline_hash_shader_stage(&info->stage, p_stage->shader_sha1);
3171
3172 p_stage->nir = NULL;
3173
3174 pipeline->cs = p_stage;
3175 pipeline->active_stages |= sinfo->stage;
3176
3177 /* First we try to get the variants from the pipeline cache (unless we are
3178 * required to capture internal representations, since in that case we need
3179 * compile).
3180 */
3181 bool needs_executable_info =
3182 info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3183 if (!needs_executable_info) {
3184 struct v3dv_pipeline_key pipeline_key;
3185 pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3186 pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3187
3188 bool cache_hit = false;
3189 pipeline->shared_data =
3190 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3191
3192 if (pipeline->shared_data != NULL) {
3193 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3194 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3195 pipeline_feedback.flags |=
3196 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3197
3198 goto success;
3199 }
3200 }
3201
3202 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3203 return VK_PIPELINE_COMPILE_REQUIRED;
3204
3205 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3206 pipeline,
3207 false);
3208 if (!pipeline->shared_data)
3209 return VK_ERROR_OUT_OF_HOST_MEMORY;
3210
3211 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3212
3213 /* If not found on cache, compile it */
3214 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3215 assert(p_stage->nir);
3216
3217 nir_optimize(p_stage->nir, false);
3218 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3219 lower_cs_shared(p_stage->nir);
3220
3221 VkResult result = VK_SUCCESS;
3222
3223 struct v3d_key key;
3224 memset(&key, 0, sizeof(key));
3225 pipeline_populate_v3d_key(&key, p_stage, 0,
3226 pipeline->device->features.robustBufferAccess);
3227 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3228 pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3229 alloc, &result);
3230
3231 if (result != VK_SUCCESS)
3232 return result;
3233
3234 if (!upload_assembly(pipeline))
3235 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3236
3237 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3238
3239 success:
3240
3241 pipeline_check_buffer_device_address(pipeline);
3242
3243 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3244 write_creation_feedback(pipeline,
3245 info->pNext,
3246 &pipeline_feedback,
3247 1,
3248 &info->stage);
3249
3250 /* As we got the variants in pipeline->shared_data, after compiling we
3251 * don't need the pipeline_stages.
3252 */
3253 if (!needs_executable_info)
3254 pipeline_free_stages(device, pipeline, alloc);
3255
3256 pipeline_check_spill_size(pipeline);
3257
3258 return VK_SUCCESS;
3259 }
3260
3261 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3262 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3263 struct v3dv_device *device,
3264 struct v3dv_pipeline_cache *cache,
3265 const VkComputePipelineCreateInfo *info,
3266 const VkAllocationCallbacks *alloc)
3267 {
3268 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3269
3270 pipeline->device = device;
3271 pipeline->layout = layout;
3272
3273 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3274
3275 return result;
3276 }
3277
3278 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3279 compute_pipeline_create(VkDevice _device,
3280 VkPipelineCache _cache,
3281 const VkComputePipelineCreateInfo *pCreateInfo,
3282 const VkAllocationCallbacks *pAllocator,
3283 VkPipeline *pPipeline)
3284 {
3285 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3286 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3287
3288 struct v3dv_pipeline *pipeline;
3289 VkResult result;
3290
3291 /* Use the default pipeline cache if none is specified */
3292 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3293 cache = &device->default_pipeline_cache;
3294
3295 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3296 VK_OBJECT_TYPE_PIPELINE);
3297 if (pipeline == NULL)
3298 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3299
3300 result = compute_pipeline_init(pipeline, device, cache,
3301 pCreateInfo, pAllocator);
3302 if (result != VK_SUCCESS) {
3303 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3304 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3305 *pPipeline = VK_NULL_HANDLE;
3306 return result;
3307 }
3308
3309 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3310
3311 return VK_SUCCESS;
3312 }
3313
3314 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3315 v3dv_CreateComputePipelines(VkDevice _device,
3316 VkPipelineCache pipelineCache,
3317 uint32_t createInfoCount,
3318 const VkComputePipelineCreateInfo *pCreateInfos,
3319 const VkAllocationCallbacks *pAllocator,
3320 VkPipeline *pPipelines)
3321 {
3322 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3323 VkResult result = VK_SUCCESS;
3324
3325 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3326 mtx_lock(&device->pdevice->mutex);
3327
3328 uint32_t i = 0;
3329 for (; i < createInfoCount; i++) {
3330 VkResult local_result;
3331 local_result = compute_pipeline_create(_device,
3332 pipelineCache,
3333 &pCreateInfos[i],
3334 pAllocator,
3335 &pPipelines[i]);
3336
3337 if (local_result != VK_SUCCESS) {
3338 result = local_result;
3339 pPipelines[i] = VK_NULL_HANDLE;
3340
3341 if (pCreateInfos[i].flags &
3342 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3343 break;
3344 }
3345 }
3346
3347 for (; i < createInfoCount; i++)
3348 pPipelines[i] = VK_NULL_HANDLE;
3349
3350 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3351 mtx_unlock(&device->pdevice->mutex);
3352
3353 return result;
3354 }
3355
3356 static nir_shader *
pipeline_get_nir(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3357 pipeline_get_nir(struct v3dv_pipeline *pipeline,
3358 enum broadcom_shader_stage stage)
3359 {
3360 switch (stage) {
3361 case BROADCOM_SHADER_VERTEX:
3362 if (pipeline->vs)
3363 return pipeline->vs->nir;
3364 break;
3365 case BROADCOM_SHADER_VERTEX_BIN:
3366 if(pipeline->vs_bin)
3367 return pipeline->vs_bin->nir;
3368 break;
3369 case BROADCOM_SHADER_GEOMETRY:
3370 if(pipeline->gs)
3371 return pipeline->gs->nir;
3372 break;
3373 case BROADCOM_SHADER_GEOMETRY_BIN:
3374 if (pipeline->gs_bin)
3375 return pipeline->gs_bin->nir;
3376 break;
3377 case BROADCOM_SHADER_FRAGMENT:
3378 if (pipeline->fs)
3379 return pipeline->fs->nir;
3380 break;
3381 case BROADCOM_SHADER_COMPUTE:
3382 if(pipeline->cs)
3383 return pipeline->cs->nir;
3384 break;
3385 default:
3386 unreachable("Unsupported shader stage");
3387 }
3388
3389 return NULL;
3390 }
3391
3392 static struct v3d_prog_data *
pipeline_get_prog_data(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3393 pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3394 enum broadcom_shader_stage stage)
3395 {
3396 if (pipeline->shared_data->variants[stage])
3397 return pipeline->shared_data->variants[stage]->prog_data.base;
3398 return NULL;
3399 }
3400
3401 static uint64_t *
pipeline_get_qpu(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,uint32_t * qpu_size)3402 pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3403 enum broadcom_shader_stage stage,
3404 uint32_t *qpu_size)
3405 {
3406 struct v3dv_shader_variant *variant =
3407 pipeline->shared_data->variants[stage];
3408 if (!variant) {
3409 *qpu_size = 0;
3410 return NULL;
3411 }
3412
3413 /* We expect the QPU BO to have been mapped before calling here */
3414 struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo;
3415 assert(qpu_bo && qpu_bo->map_size >= variant->assembly_offset +
3416 variant->qpu_insts_size);
3417
3418 *qpu_size = variant->qpu_insts_size;
3419 uint64_t *qpu = (uint64_t *)
3420 (((uint8_t *) qpu_bo->map) + variant->assembly_offset);
3421 return qpu;
3422 }
3423
3424 /* FIXME: we use the same macro in various drivers, maybe move it to
3425 * the comon vk_util.h?
3426 */
3427 #define WRITE_STR(field, ...) ({ \
3428 memset(field, 0, sizeof(field)); \
3429 UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3430 assert(_i > 0 && _i < sizeof(field)); \
3431 })
3432
3433 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3434 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3435 const char *data)
3436 {
3437 ir->isText = VK_TRUE;
3438
3439 size_t data_len = strlen(data) + 1;
3440
3441 if (ir->pData == NULL) {
3442 ir->dataSize = data_len;
3443 return true;
3444 }
3445
3446 strncpy(ir->pData, data, ir->dataSize);
3447 if (ir->dataSize < data_len)
3448 return false;
3449
3450 ir->dataSize = data_len;
3451 return true;
3452 }
3453
3454 static void
append(char ** str,size_t * offset,const char * fmt,...)3455 append(char **str, size_t *offset, const char *fmt, ...)
3456 {
3457 va_list args;
3458 va_start(args, fmt);
3459 ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3460 va_end(args);
3461 }
3462
3463 static void
pipeline_collect_executable_data(struct v3dv_pipeline * pipeline)3464 pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3465 {
3466 if (pipeline->executables.mem_ctx)
3467 return;
3468
3469 pipeline->executables.mem_ctx = ralloc_context(NULL);
3470 util_dynarray_init(&pipeline->executables.data,
3471 pipeline->executables.mem_ctx);
3472
3473 /* Don't crash for failed/bogus pipelines */
3474 if (!pipeline->shared_data || !pipeline->shared_data->assembly_bo)
3475 return;
3476
3477 /* Map the assembly BO so we can read the pipeline's QPU code */
3478 struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo;
3479
3480 if (!v3dv_bo_map(pipeline->device, qpu_bo, qpu_bo->size)) {
3481 fprintf(stderr, "failed to map QPU buffer\n");
3482 return;
3483 }
3484
3485 for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3486 VkShaderStageFlags vk_stage =
3487 mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3488 if (!(vk_stage & pipeline->active_stages))
3489 continue;
3490
3491 nir_shader *nir = pipeline_get_nir(pipeline, s);
3492 char *nir_str = nir ?
3493 nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3494
3495 char *qpu_str = NULL;
3496 uint32_t qpu_size;
3497 uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3498 if (qpu) {
3499 uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3500 qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3501 qpu_inst_count * 96);
3502 size_t offset = 0;
3503 for (int i = 0; i < qpu_inst_count; i++) {
3504 const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3505 append(&qpu_str, &offset, "%s\n", str);
3506 ralloc_free((void *)str);
3507 }
3508 }
3509
3510 struct v3dv_pipeline_executable_data data = {
3511 .stage = s,
3512 .nir_str = nir_str,
3513 .qpu_str = qpu_str,
3514 };
3515 util_dynarray_append(&pipeline->executables.data,
3516 struct v3dv_pipeline_executable_data, data);
3517 }
3518
3519 v3dv_bo_unmap(pipeline->device, qpu_bo);
3520 }
3521
3522 static const struct v3dv_pipeline_executable_data *
pipeline_get_executable(struct v3dv_pipeline * pipeline,uint32_t index)3523 pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3524 {
3525 assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3526 struct v3dv_pipeline_executable_data));
3527 return util_dynarray_element(&pipeline->executables.data,
3528 struct v3dv_pipeline_executable_data,
3529 index);
3530 }
3531
3532 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3533 v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3534 VkDevice device,
3535 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3536 uint32_t *pInternalRepresentationCount,
3537 VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3538 {
3539 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3540
3541 pipeline_collect_executable_data(pipeline);
3542
3543 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3544 pInternalRepresentations, pInternalRepresentationCount);
3545
3546 bool incomplete = false;
3547 const struct v3dv_pipeline_executable_data *exe =
3548 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3549
3550 if (exe->nir_str) {
3551 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3552 &out, ir) {
3553 WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3554 WRITE_STR(ir->description, "Final NIR form");
3555 if (!write_ir_text(ir, exe->nir_str))
3556 incomplete = true;
3557 }
3558 }
3559
3560 if (exe->qpu_str) {
3561 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3562 &out, ir) {
3563 WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3564 WRITE_STR(ir->description, "Final QPU assembly");
3565 if (!write_ir_text(ir, exe->qpu_str))
3566 incomplete = true;
3567 }
3568 }
3569
3570 return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3571 }
3572
3573 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3574 v3dv_GetPipelineExecutablePropertiesKHR(
3575 VkDevice device,
3576 const VkPipelineInfoKHR *pPipelineInfo,
3577 uint32_t *pExecutableCount,
3578 VkPipelineExecutablePropertiesKHR *pProperties)
3579 {
3580 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3581
3582 pipeline_collect_executable_data(pipeline);
3583
3584 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3585 pProperties, pExecutableCount);
3586
3587 util_dynarray_foreach(&pipeline->executables.data,
3588 struct v3dv_pipeline_executable_data, exe) {
3589 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3590 gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3591 props->stages = mesa_to_vk_shader_stage(mesa_stage);
3592
3593 WRITE_STR(props->name, "%s (%s)",
3594 _mesa_shader_stage_to_abbrev(mesa_stage),
3595 broadcom_shader_stage_is_binning(exe->stage) ?
3596 "Binning" : "Render");
3597
3598 WRITE_STR(props->description, "%s",
3599 _mesa_shader_stage_to_string(mesa_stage));
3600
3601 props->subgroupSize = V3D_CHANNELS;
3602 }
3603 }
3604
3605 return vk_outarray_status(&out);
3606 }
3607
3608 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3609 v3dv_GetPipelineExecutableStatisticsKHR(
3610 VkDevice device,
3611 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3612 uint32_t *pStatisticCount,
3613 VkPipelineExecutableStatisticKHR *pStatistics)
3614 {
3615 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3616
3617 pipeline_collect_executable_data(pipeline);
3618
3619 const struct v3dv_pipeline_executable_data *exe =
3620 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3621
3622 struct v3d_prog_data *prog_data =
3623 pipeline_get_prog_data(pipeline, exe->stage);
3624
3625 struct v3dv_shader_variant *variant =
3626 pipeline->shared_data->variants[exe->stage];
3627 uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3628
3629 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3630 pStatistics, pStatisticCount);
3631
3632 if (qpu_inst_count > 0) {
3633 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3634 WRITE_STR(stat->name, "Compile Strategy");
3635 WRITE_STR(stat->description, "Chosen compile strategy index");
3636 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3637 stat->value.u64 = prog_data->compile_strategy_idx;
3638 }
3639
3640 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3641 WRITE_STR(stat->name, "Instruction Count");
3642 WRITE_STR(stat->description, "Number of QPU instructions");
3643 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3644 stat->value.u64 = qpu_inst_count;
3645 }
3646
3647 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3648 WRITE_STR(stat->name, "Thread Count");
3649 WRITE_STR(stat->description, "Number of QPU threads dispatched");
3650 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3651 stat->value.u64 = prog_data->threads;
3652 }
3653
3654 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3655 WRITE_STR(stat->name, "Spill Size");
3656 WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3657 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3658 stat->value.u64 = prog_data->spill_size;
3659 }
3660
3661 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3662 WRITE_STR(stat->name, "TMU Spills");
3663 WRITE_STR(stat->description, "Number of times a register was spilled "
3664 "to memory");
3665 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3666 stat->value.u64 = prog_data->spill_size;
3667 }
3668
3669 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3670 WRITE_STR(stat->name, "TMU Fills");
3671 WRITE_STR(stat->description, "Number of times a register was filled "
3672 "from memory");
3673 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3674 stat->value.u64 = prog_data->spill_size;
3675 }
3676
3677 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3678 WRITE_STR(stat->name, "QPU Read Stalls");
3679 WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3680 "register read dependency");
3681 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3682 stat->value.u64 = prog_data->qpu_read_stalls;
3683 }
3684 }
3685
3686 return vk_outarray_status(&out);
3687 }
3688