1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28
29 #include "vk_format_info.h"
30
31 #include "common/v3d_debug.h"
32
33 #include "compiler/nir/nir_builder.h"
34 #include "nir/nir_serialize.h"
35
36 #include "util/u_atomic.h"
37
38 #include "vulkan/util/vk_format.h"
39
40 #include "broadcom/cle/v3dx_pack.h"
41
42 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)43 v3dv_print_v3d_key(struct v3d_key *key,
44 uint32_t v3d_key_size)
45 {
46 struct mesa_sha1 ctx;
47 unsigned char sha1[20];
48 char sha1buf[41];
49
50 _mesa_sha1_init(&ctx);
51
52 _mesa_sha1_update(&ctx, key, v3d_key_size);
53
54 _mesa_sha1_final(&ctx, sha1);
55 _mesa_sha1_format(sha1buf, sha1);
56
57 fprintf(stderr, "key %p: %s\n", key, sha1buf);
58 }
59
60 VkResult
v3dv_CreateShaderModule(VkDevice _device,const VkShaderModuleCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkShaderModule * pShaderModule)61 v3dv_CreateShaderModule(VkDevice _device,
62 const VkShaderModuleCreateInfo *pCreateInfo,
63 const VkAllocationCallbacks *pAllocator,
64 VkShaderModule *pShaderModule)
65 {
66 V3DV_FROM_HANDLE(v3dv_device, device, _device);
67 struct v3dv_shader_module *module;
68
69 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
70 assert(pCreateInfo->flags == 0);
71
72 module = vk_alloc2(&device->alloc, pAllocator,
73 sizeof(*module) + pCreateInfo->codeSize, 8,
74 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
75 if (module == NULL)
76 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
77
78 module->nir = NULL;
79
80 module->size = pCreateInfo->codeSize;
81 memcpy(module->data, pCreateInfo->pCode, module->size);
82
83 _mesa_sha1_compute(module->data, module->size, module->sha1);
84
85 *pShaderModule = v3dv_shader_module_to_handle(module);
86
87 return VK_SUCCESS;
88 }
89
90 void
v3dv_shader_module_internal_init(struct v3dv_shader_module * module,nir_shader * nir)91 v3dv_shader_module_internal_init(struct v3dv_shader_module *module,
92 nir_shader *nir)
93 {
94 module->nir = nir;
95 module->size = 0;
96
97 if (nir != NULL) {
98 struct blob blob;
99 blob_init(&blob);
100
101 nir_serialize(&blob, nir, false);
102 if (!blob.out_of_memory)
103 _mesa_sha1_compute(blob.data, blob.size, module->sha1);
104
105 blob_finish(&blob);
106 }
107 }
108
109 void
v3dv_DestroyShaderModule(VkDevice _device,VkShaderModule _module,const VkAllocationCallbacks * pAllocator)110 v3dv_DestroyShaderModule(VkDevice _device,
111 VkShaderModule _module,
112 const VkAllocationCallbacks *pAllocator)
113 {
114 V3DV_FROM_HANDLE(v3dv_device, device, _device);
115 V3DV_FROM_HANDLE(v3dv_shader_module, module, _module);
116
117 if (!module)
118 return;
119
120 /* NIR modules (which are only created internally by the driver) are not
121 * dynamically allocated so we should never call this for them.
122 * Instead the driver is responsible for freeing the NIR code when it is
123 * no longer needed.
124 */
125 assert(module->nir == NULL);
126
127 vk_free2(&device->alloc, pAllocator, module);
128 }
129
130 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)131 v3dv_shader_variant_destroy(struct v3dv_device *device,
132 struct v3dv_shader_variant *variant)
133 {
134 if (variant->assembly_bo)
135 v3dv_bo_free(device, variant->assembly_bo);
136 ralloc_free(variant->prog_data.base);
137 vk_free(&device->alloc, variant);
138 }
139
140 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)141 destroy_pipeline_stage(struct v3dv_device *device,
142 struct v3dv_pipeline_stage *p_stage,
143 const VkAllocationCallbacks *pAllocator)
144 {
145 if (!p_stage)
146 return;
147
148 ralloc_free(p_stage->nir);
149 if (p_stage->current_variant)
150 v3dv_shader_variant_unref(device, p_stage->current_variant);
151 vk_free2(&device->alloc, pAllocator, p_stage);
152 }
153
154 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)155 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
156 struct v3dv_device *device,
157 const VkAllocationCallbacks *pAllocator)
158 {
159 if (!pipeline)
160 return;
161
162 /* FIXME: we can't just use a loop over mesa stage due the bin, would be
163 * good to find an alternative.
164 */
165 destroy_pipeline_stage(device, pipeline->vs, pAllocator);
166 destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
167 destroy_pipeline_stage(device, pipeline->fs, pAllocator);
168 destroy_pipeline_stage(device, pipeline->cs, pAllocator);
169
170 if (pipeline->spill.bo) {
171 assert(pipeline->spill.size_per_thread > 0);
172 v3dv_bo_free(device, pipeline->spill.bo);
173 }
174
175 if (pipeline->default_attribute_values) {
176 v3dv_bo_free(device, pipeline->default_attribute_values);
177 pipeline->default_attribute_values = NULL;
178 }
179
180 if (pipeline->combined_index_map)
181 _mesa_hash_table_destroy(pipeline->combined_index_map, NULL);
182
183 if (pipeline->default_attribute_values)
184 v3dv_bo_free(device, pipeline->default_attribute_values);
185
186 vk_free2(&device->alloc, pAllocator, pipeline);
187 }
188
189 void
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)190 v3dv_DestroyPipeline(VkDevice _device,
191 VkPipeline _pipeline,
192 const VkAllocationCallbacks *pAllocator)
193 {
194 V3DV_FROM_HANDLE(v3dv_device, device, _device);
195 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
196
197 if (!pipeline)
198 return;
199
200 v3dv_destroy_pipeline(pipeline, device, pAllocator);
201 }
202
203 static const struct spirv_to_nir_options default_spirv_options = {
204 .caps = { false },
205 .ubo_addr_format = nir_address_format_32bit_index_offset,
206 .ssbo_addr_format = nir_address_format_32bit_index_offset,
207 .phys_ssbo_addr_format = nir_address_format_64bit_global,
208 .push_const_addr_format = nir_address_format_logical,
209 .shared_addr_format = nir_address_format_32bit_offset,
210 .frag_coord_is_sysval = false,
211 };
212
213 const nir_shader_compiler_options v3dv_nir_options = {
214 .lower_all_io_to_temps = true,
215 .lower_extract_byte = true,
216 .lower_extract_word = true,
217 .lower_bitfield_insert_to_shifts = true,
218 .lower_bitfield_extract_to_shifts = true,
219 .lower_bitfield_reverse = true,
220 .lower_bit_count = true,
221 .lower_cs_local_id_from_index = true,
222 .lower_ffract = true,
223 .lower_fmod = true,
224 .lower_pack_unorm_2x16 = true,
225 .lower_pack_snorm_2x16 = true,
226 .lower_unpack_unorm_2x16 = true,
227 .lower_unpack_snorm_2x16 = true,
228 .lower_pack_unorm_4x8 = true,
229 .lower_pack_snorm_4x8 = true,
230 .lower_unpack_unorm_4x8 = true,
231 .lower_unpack_snorm_4x8 = true,
232 .lower_pack_half_2x16 = true,
233 .lower_unpack_half_2x16 = true,
234 /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
235 * get the tests to pass since it might produce slightly better code.
236 */
237 .lower_uadd_carry = true,
238 .lower_usub_borrow = true,
239 /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
240 * without lowering.
241 */
242 .lower_mul_2x32_64 = true,
243 .lower_fdiv = true,
244 .lower_find_lsb = true,
245 .lower_ffma16 = true,
246 .lower_ffma32 = true,
247 .lower_ffma64 = true,
248 .lower_flrp32 = true,
249 .lower_fpow = true,
250 .lower_fsat = true,
251 .lower_fsqrt = true,
252 .lower_ifind_msb = true,
253 .lower_isign = true,
254 .lower_ldexp = true,
255 .lower_mul_high = true,
256 .lower_wpos_pntc = true,
257 .lower_rotate = true,
258 .lower_to_scalar = true,
259 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
260 * needs to be supported */
261 .lower_interpolate_at = true,
262 };
263
264 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)265 v3dv_pipeline_get_nir_options(void)
266 {
267 return &v3dv_nir_options;
268 }
269
270 #define OPT(pass, ...) ({ \
271 bool this_progress = false; \
272 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
273 if (this_progress) \
274 progress = true; \
275 this_progress; \
276 })
277
278 static void
nir_optimize(nir_shader * nir,struct v3dv_pipeline_stage * stage,bool allow_copies)279 nir_optimize(nir_shader *nir,
280 struct v3dv_pipeline_stage *stage,
281 bool allow_copies)
282 {
283 bool progress;
284
285 do {
286 progress = false;
287 OPT(nir_split_array_vars, nir_var_function_temp);
288 OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
289 OPT(nir_opt_deref);
290 OPT(nir_lower_vars_to_ssa);
291 if (allow_copies) {
292 /* Only run this pass in the first call to nir_optimize. Later calls
293 * assume that we've lowered away any copy_deref instructions and we
294 * don't want to introduce any more.
295 */
296 OPT(nir_opt_find_array_copies);
297 }
298 OPT(nir_opt_copy_prop_vars);
299 OPT(nir_opt_dead_write_vars);
300 OPT(nir_opt_combine_stores, nir_var_all);
301
302 OPT(nir_lower_alu_to_scalar, NULL, NULL);
303
304 OPT(nir_copy_prop);
305 OPT(nir_lower_phis_to_scalar);
306
307 OPT(nir_copy_prop);
308 OPT(nir_opt_dce);
309 OPT(nir_opt_cse);
310 OPT(nir_opt_combine_stores, nir_var_all);
311
312 /* Passing 0 to the peephole select pass causes it to convert
313 * if-statements that contain only move instructions in the branches
314 * regardless of the count.
315 *
316 * Passing 1 to the peephole select pass causes it to convert
317 * if-statements that contain at most a single ALU instruction (total)
318 * in both branches.
319 */
320 OPT(nir_opt_peephole_select, 0, false, false);
321 OPT(nir_opt_peephole_select, 8, false, true);
322
323 OPT(nir_opt_intrinsics);
324 OPT(nir_opt_idiv_const, 32);
325 OPT(nir_opt_algebraic);
326 OPT(nir_opt_constant_folding);
327
328 OPT(nir_opt_dead_cf);
329
330 OPT(nir_opt_if, false);
331 OPT(nir_opt_conditional_discard);
332
333 OPT(nir_opt_remove_phis);
334 OPT(nir_opt_undef);
335 OPT(nir_lower_pack);
336 } while (progress);
337
338 OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
339 }
340
341 static void
preprocess_nir(nir_shader * nir,struct v3dv_pipeline_stage * stage)342 preprocess_nir(nir_shader *nir,
343 struct v3dv_pipeline_stage *stage)
344 {
345 /* Make sure we lower variable initializers on output variables so that
346 * nir_remove_dead_variables below sees the corresponding stores
347 */
348 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
349
350 /* Now that we've deleted all but the main function, we can go ahead and
351 * lower the rest of the variable initializers.
352 */
353 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
354
355 /* Split member structs. We do this before lower_io_to_temporaries so that
356 * it doesn't lower system values to temporaries by accident.
357 */
358 NIR_PASS_V(nir, nir_split_var_copies);
359 NIR_PASS_V(nir, nir_split_per_member_structs);
360
361 if (nir->info.stage == MESA_SHADER_FRAGMENT)
362 NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
363 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
364 NIR_PASS_V(nir, nir_lower_input_attachments,
365 &(nir_input_attachment_options) {
366 .use_fragcoord_sysval = false,
367 });
368 }
369
370 NIR_PASS_V(nir, nir_lower_explicit_io,
371 nir_var_mem_push_const,
372 nir_address_format_32bit_offset);
373
374 NIR_PASS_V(nir, nir_lower_explicit_io,
375 nir_var_mem_ubo | nir_var_mem_ssbo,
376 nir_address_format_32bit_index_offset);
377
378 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
379 nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
380 NULL);
381
382 NIR_PASS_V(nir, nir_propagate_invariant);
383 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
384 nir_shader_get_entrypoint(nir), true, false);
385
386 NIR_PASS_V(nir, nir_lower_system_values);
387 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
388
389 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
390
391 NIR_PASS_V(nir, nir_normalize_cubemap_coords);
392
393 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
394
395 NIR_PASS_V(nir, nir_split_var_copies);
396 NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
397
398 nir_optimize(nir, stage, true);
399
400 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
401
402 /* Lower a bunch of stuff */
403 NIR_PASS_V(nir, nir_lower_var_copies);
404
405 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in |
406 nir_var_shader_out |
407 nir_var_function_temp, UINT32_MAX);
408
409 NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
410 nir_var_mem_ubo | nir_var_mem_ssbo,
411 nir_lower_direct_array_deref_of_vec_load);
412
413 NIR_PASS_V(nir, nir_lower_frexp);
414
415 /* Get rid of split copies */
416 nir_optimize(nir, stage, false);
417 }
418
419 /* FIXME: This is basically the same code at anv, tu and radv. Move to common
420 * place?
421 */
422 static struct nir_spirv_specialization*
vk_spec_info_to_nir_spirv(const VkSpecializationInfo * spec_info,uint32_t * out_num_spec_entries)423 vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,
424 uint32_t *out_num_spec_entries)
425 {
426 if (spec_info == NULL || spec_info->mapEntryCount == 0)
427 return NULL;
428
429 uint32_t num_spec_entries = spec_info->mapEntryCount;
430 struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
431
432 for (uint32_t i = 0; i < num_spec_entries; i++) {
433 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
434 const void *data = spec_info->pData + entry.offset;
435 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
436
437 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
438 switch (entry.size) {
439 case 8:
440 spec_entries[i].value.u64 = *(const uint64_t *)data;
441 break;
442 case 4:
443 spec_entries[i].value.u32 = *(const uint32_t *)data;
444 break;
445 case 2:
446 spec_entries[i].value.u16 = *(const uint16_t *)data;
447 break;
448 case 1:
449 spec_entries[i].value.u8 = *(const uint8_t *)data;
450 break;
451 default:
452 assert(!"Invalid spec constant size");
453 break;
454 }
455 }
456
457 *out_num_spec_entries = num_spec_entries;
458 return spec_entries;
459 }
460
461 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)462 shader_module_compile_to_nir(struct v3dv_device *device,
463 struct v3dv_pipeline_stage *stage)
464 {
465 nir_shader *nir;
466 const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
467
468 if (!stage->module->nir) {
469 uint32_t *spirv = (uint32_t *) stage->module->data;
470 assert(stage->module->size % 4 == 0);
471
472 if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
473 v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
474
475 uint32_t num_spec_entries = 0;
476 struct nir_spirv_specialization *spec_entries =
477 vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
478 const struct spirv_to_nir_options spirv_options = default_spirv_options;
479 nir = spirv_to_nir(spirv, stage->module->size / 4,
480 spec_entries, num_spec_entries,
481 stage->stage, stage->entrypoint,
482 &spirv_options, nir_options);
483 nir_validate_shader(nir, "after spirv_to_nir");
484 free(spec_entries);
485 } else {
486 /* For NIR modules created by the driver we can't consume the NIR
487 * directly, we need to clone it first, since ownership of the NIR code
488 * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
489 * of the module and modules can be destroyed immediately after been used
490 * to create pipelines.
491 */
492 nir = nir_shader_clone(NULL, stage->module->nir);
493 nir_validate_shader(nir, "nir module");
494 }
495 assert(nir->info.stage == stage->stage);
496
497 if (V3D_DEBUG & (V3D_DEBUG_NIR |
498 v3d_debug_flag_for_shader_stage(stage->stage))) {
499 fprintf(stderr, "Initial form: %s prog %d NIR:\n",
500 gl_shader_stage_name(stage->stage),
501 stage->program_id);
502 nir_print_shader(nir, stderr);
503 fprintf(stderr, "\n");
504 }
505
506 /* We have to lower away local variable initializers right before we
507 * inline functions. That way they get properly initialized at the top
508 * of the function and not at the top of its caller.
509 */
510 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
511 NIR_PASS_V(nir, nir_lower_returns);
512 NIR_PASS_V(nir, nir_inline_functions);
513 NIR_PASS_V(nir, nir_opt_deref);
514
515 /* Pick off the single entrypoint that we want */
516 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
517 if (func->is_entrypoint)
518 func->name = ralloc_strdup(func, "main");
519 else
520 exec_node_remove(&func->node);
521 }
522 assert(exec_list_length(&nir->functions) == 1);
523
524 /* Vulkan uses the separate-shader linking model */
525 nir->info.separate_shader = true;
526
527 preprocess_nir(nir, stage);
528
529 return nir;
530 }
531
532 static int
type_size_vec4(const struct glsl_type * type,bool bindless)533 type_size_vec4(const struct glsl_type *type, bool bindless)
534 {
535 return glsl_count_attribute_slots(type, false);
536 }
537
538 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,bool is_shadow)539 descriptor_map_add(struct v3dv_descriptor_map *map,
540 int set,
541 int binding,
542 int array_index,
543 int array_size,
544 bool is_shadow)
545 {
546 assert(array_index < array_size);
547
548 unsigned index = 0;
549 for (unsigned i = 0; i < map->num_desc; i++) {
550 if (set == map->set[i] &&
551 binding == map->binding[i] &&
552 array_index == map->array_index[i]) {
553 assert(array_size == map->array_size[i]);
554 return index;
555 }
556 index++;
557 }
558
559 assert(index == map->num_desc);
560
561 map->set[map->num_desc] = set;
562 map->binding[map->num_desc] = binding;
563 map->array_index[map->num_desc] = array_index;
564 map->array_size[map->num_desc] = array_size;
565 map->is_shadow[map->num_desc] = is_shadow;
566 map->num_desc++;
567
568 return index;
569 }
570
571
572 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline)573 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
574 struct v3dv_pipeline *pipeline)
575 {
576 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
577 instr->intrinsic = nir_intrinsic_load_uniform;
578 }
579
580 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
581 * could be used by the v3d_compiler */
582 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)583 lower_vulkan_resource_index(nir_builder *b,
584 nir_intrinsic_instr *instr,
585 struct v3dv_pipeline *pipeline,
586 const struct v3dv_pipeline_layout *layout)
587 {
588 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
589
590 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
591
592 unsigned set = nir_intrinsic_desc_set(instr);
593 unsigned binding = nir_intrinsic_binding(instr);
594 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
595 struct v3dv_descriptor_set_binding_layout *binding_layout =
596 &set_layout->binding[binding];
597 unsigned index = 0;
598
599 switch (nir_intrinsic_desc_type(instr)) {
600 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
601 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
602 struct v3dv_descriptor_map *descriptor_map =
603 nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ?
604 &pipeline->ubo_map : &pipeline->ssbo_map;
605
606 if (!const_val)
607 unreachable("non-constant vulkan_resource_index array index");
608
609 index = descriptor_map_add(descriptor_map, set, binding,
610 const_val->u32,
611 binding_layout->array_size,
612 false /* is_shadow: Doesn't really matter in this case */);
613
614 if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
615 /* skip index 0 which is used for push constants */
616 index++;
617 }
618 break;
619 }
620
621 default:
622 unreachable("unsupported desc_type for vulkan_resource_index");
623 break;
624 }
625
626 /* Since we use the deref pass, both vulkan_resource_index and
627 * vulkan_load_descriptor returns a vec2. But for the index the backend
628 * expect just one scalar (like with get_ssbo_size), so lets return here
629 * just it. Then on load_descriptor we would recreate the vec2, keeping the
630 * second component (unused right now) to zero.
631 */
632 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
633 nir_src_for_ssa(nir_imm_int(b, index)));
634 nir_instr_remove(&instr->instr);
635 }
636
637 static struct hash_table *
pipeline_ensure_combined_index_map(struct v3dv_pipeline * pipeline)638 pipeline_ensure_combined_index_map(struct v3dv_pipeline *pipeline)
639 {
640 if (pipeline->combined_index_map == NULL) {
641 pipeline->combined_index_map =
642 _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal);
643 pipeline->next_combined_index = 0;
644 }
645
646 assert(pipeline->combined_index_map);
647
648 return pipeline->combined_index_map;
649 }
650
651 static uint32_t
get_combined_index(struct v3dv_pipeline * pipeline,uint32_t texture_index,uint32_t sampler_index)652 get_combined_index(struct v3dv_pipeline *pipeline,
653 uint32_t texture_index,
654 uint32_t sampler_index)
655 {
656 struct hash_table *ht = pipeline_ensure_combined_index_map(pipeline);
657 uint32_t key = v3dv_pipeline_combined_index_key_create(texture_index, sampler_index);
658 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
659
660 if (entry)
661 return (uint32_t)(uintptr_t) (entry->data);
662
663 uint32_t new_index = pipeline->next_combined_index;
664 pipeline->next_combined_index++;
665
666 pipeline->combined_index_to_key_map[new_index] = key;
667 _mesa_hash_table_insert(ht, &pipeline->combined_index_to_key_map[new_index],
668 (void *)(uintptr_t) (new_index));
669
670 return new_index;
671 }
672
673 static void
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)674 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
675 struct v3dv_pipeline *pipeline,
676 const struct v3dv_pipeline_layout *layout)
677 {
678 nir_ssa_def *index = NULL;
679 unsigned base_index = 0;
680 unsigned array_elements = 1;
681 nir_tex_src *src = &instr->src[src_idx];
682 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
683
684 /* We compute first the offsets */
685 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
686 while (deref->deref_type != nir_deref_type_var) {
687 assert(deref->parent.is_ssa);
688 nir_deref_instr *parent =
689 nir_instr_as_deref(deref->parent.ssa->parent_instr);
690
691 assert(deref->deref_type == nir_deref_type_array);
692
693 if (nir_src_is_const(deref->arr.index) && index == NULL) {
694 /* We're still building a direct index */
695 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
696 } else {
697 if (index == NULL) {
698 /* We used to be direct but not anymore */
699 index = nir_imm_int(b, base_index);
700 base_index = 0;
701 }
702
703 index = nir_iadd(b, index,
704 nir_imul(b, nir_imm_int(b, array_elements),
705 nir_ssa_for_src(b, deref->arr.index, 1)));
706 }
707
708 array_elements *= glsl_get_length(parent->type);
709
710 deref = parent;
711 }
712
713 if (index)
714 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
715
716 /* We have the offsets, we apply them, rewriting the source or removing
717 * instr if needed
718 */
719 if (index) {
720 nir_instr_rewrite_src(&instr->instr, &src->src,
721 nir_src_for_ssa(index));
722
723 src->src_type = is_sampler ?
724 nir_tex_src_sampler_offset :
725 nir_tex_src_texture_offset;
726 } else {
727 nir_tex_instr_remove_src(instr, src_idx);
728 }
729
730 uint32_t set = deref->var->data.descriptor_set;
731 uint32_t binding = deref->var->data.binding;
732 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
733 struct v3dv_descriptor_set_binding_layout *binding_layout =
734 &set_layout->binding[binding];
735
736 /* For input attachments, the shader includes the attachment_idx. As we are
737 * treating them as a texture, we only want the base_index
738 */
739 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
740 deref->var->data.index + base_index :
741 base_index;
742
743 int desc_index =
744 descriptor_map_add(is_sampler ?
745 &pipeline->sampler_map : &pipeline->texture_map,
746 deref->var->data.descriptor_set,
747 deref->var->data.binding,
748 array_index,
749 binding_layout->array_size,
750 instr->is_shadow);
751
752 if (is_sampler)
753 instr->sampler_index = desc_index;
754 else
755 instr->texture_index = desc_index;
756 }
757
758 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)759 lower_sampler(nir_builder *b, nir_tex_instr *instr,
760 struct v3dv_pipeline *pipeline,
761 const struct v3dv_pipeline_layout *layout)
762 {
763 int texture_idx =
764 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
765
766 if (texture_idx >= 0)
767 lower_tex_src_to_offset(b, instr, texture_idx, pipeline, layout);
768
769 int sampler_idx =
770 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
771
772 if (sampler_idx >= 0)
773 lower_tex_src_to_offset(b, instr, sampler_idx, pipeline, layout);
774
775 if (texture_idx < 0 && sampler_idx < 0)
776 return false;
777
778 int combined_index =
779 get_combined_index(pipeline,
780 instr->texture_index,
781 sampler_idx < 0 ? V3DV_NO_SAMPLER_IDX : instr->sampler_index);
782
783 instr->texture_index = combined_index;
784 instr->sampler_index = combined_index;
785
786 return true;
787 }
788
789 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
790 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)791 lower_image_deref(nir_builder *b,
792 nir_intrinsic_instr *instr,
793 struct v3dv_pipeline *pipeline,
794 const struct v3dv_pipeline_layout *layout)
795 {
796 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
797 nir_ssa_def *index = NULL;
798 unsigned array_elements = 1;
799 unsigned base_index = 0;
800
801 while (deref->deref_type != nir_deref_type_var) {
802 assert(deref->parent.is_ssa);
803 nir_deref_instr *parent =
804 nir_instr_as_deref(deref->parent.ssa->parent_instr);
805
806 assert(deref->deref_type == nir_deref_type_array);
807
808 if (nir_src_is_const(deref->arr.index) && index == NULL) {
809 /* We're still building a direct index */
810 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
811 } else {
812 if (index == NULL) {
813 /* We used to be direct but not anymore */
814 index = nir_imm_int(b, base_index);
815 base_index = 0;
816 }
817
818 index = nir_iadd(b, index,
819 nir_imul(b, nir_imm_int(b, array_elements),
820 nir_ssa_for_src(b, deref->arr.index, 1)));
821 }
822
823 array_elements *= glsl_get_length(parent->type);
824
825 deref = parent;
826 }
827
828 if (index)
829 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
830
831 uint32_t set = deref->var->data.descriptor_set;
832 uint32_t binding = deref->var->data.binding;
833 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
834 struct v3dv_descriptor_set_binding_layout *binding_layout =
835 &set_layout->binding[binding];
836
837 uint32_t array_index = deref->var->data.index + base_index;
838
839 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
840 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
841
842 int desc_index =
843 descriptor_map_add(&pipeline->texture_map,
844 deref->var->data.descriptor_set,
845 deref->var->data.binding,
846 array_index,
847 binding_layout->array_size,
848 false /* is_shadow: Doesn't really matter in this case */);
849
850 /* We still need to get a combined_index, as we are integrating images with
851 * the rest of the texture/sampler support
852 */
853 int combined_index =
854 get_combined_index(pipeline, desc_index, V3DV_NO_SAMPLER_IDX);
855
856 index = nir_imm_int(b, combined_index);
857
858 nir_rewrite_image_intrinsic(instr, index, false);
859 }
860
861 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)862 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
863 struct v3dv_pipeline *pipeline,
864 const struct v3dv_pipeline_layout *layout)
865 {
866 switch (instr->intrinsic) {
867 case nir_intrinsic_load_layer_id:
868 /* FIXME: if layered rendering gets supported, this would need a real
869 * lowering
870 */
871 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
872 nir_src_for_ssa(nir_imm_int(b, 0)));
873 nir_instr_remove(&instr->instr);
874 return true;
875
876 case nir_intrinsic_load_push_constant:
877 lower_load_push_constant(b, instr, pipeline);
878 pipeline->use_push_constants = true;
879 return true;
880
881 case nir_intrinsic_vulkan_resource_index:
882 lower_vulkan_resource_index(b, instr, pipeline, layout);
883 return true;
884
885 case nir_intrinsic_load_vulkan_descriptor: {
886 /* We are not using it, as loading the descriptor happens as part of the
887 * load/store instruction, so the simpler is just doing a no-op. We just
888 * lower the desc back to a vec2, as it is what load_ssbo/ubo expects.
889 */
890 nir_ssa_def *desc = nir_vec2(b, instr->src[0].ssa, nir_imm_int(b, 0));
891 nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(desc));
892 nir_instr_remove(&instr->instr);
893 return true;
894 }
895
896 case nir_intrinsic_image_deref_load:
897 case nir_intrinsic_image_deref_store:
898 case nir_intrinsic_image_deref_atomic_add:
899 case nir_intrinsic_image_deref_atomic_imin:
900 case nir_intrinsic_image_deref_atomic_umin:
901 case nir_intrinsic_image_deref_atomic_imax:
902 case nir_intrinsic_image_deref_atomic_umax:
903 case nir_intrinsic_image_deref_atomic_and:
904 case nir_intrinsic_image_deref_atomic_or:
905 case nir_intrinsic_image_deref_atomic_xor:
906 case nir_intrinsic_image_deref_atomic_exchange:
907 case nir_intrinsic_image_deref_atomic_comp_swap:
908 case nir_intrinsic_image_deref_size:
909 case nir_intrinsic_image_deref_samples:
910 lower_image_deref(b, instr, pipeline, layout);
911 return true;
912
913 default:
914 return false;
915 }
916 }
917
918 static bool
lower_impl(nir_function_impl * impl,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)919 lower_impl(nir_function_impl *impl,
920 struct v3dv_pipeline *pipeline,
921 const struct v3dv_pipeline_layout *layout)
922 {
923 nir_builder b;
924 nir_builder_init(&b, impl);
925 bool progress = false;
926
927 nir_foreach_block(block, impl) {
928 nir_foreach_instr_safe(instr, block) {
929 b.cursor = nir_before_instr(instr);
930 switch (instr->type) {
931 case nir_instr_type_tex:
932 progress |=
933 lower_sampler(&b, nir_instr_as_tex(instr), pipeline, layout);
934 break;
935 case nir_instr_type_intrinsic:
936 progress |=
937 lower_intrinsic(&b, nir_instr_as_intrinsic(instr), pipeline, layout);
938 break;
939 default:
940 break;
941 }
942 }
943 }
944
945 return progress;
946 }
947
948 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)949 lower_pipeline_layout_info(nir_shader *shader,
950 struct v3dv_pipeline *pipeline,
951 const struct v3dv_pipeline_layout *layout)
952 {
953 bool progress = false;
954
955 nir_foreach_function(function, shader) {
956 if (function->impl)
957 progress |= lower_impl(function->impl, pipeline, layout);
958 }
959
960 return progress;
961 }
962
963
964 static void
lower_fs_io(nir_shader * nir)965 lower_fs_io(nir_shader *nir)
966 {
967 /* Our backend doesn't handle array fragment shader outputs */
968 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
969 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
970
971 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
972 MESA_SHADER_FRAGMENT);
973
974 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
975 MESA_SHADER_FRAGMENT);
976
977 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
978 type_size_vec4, 0);
979 }
980
981 static void
lower_vs_io(struct nir_shader * nir)982 lower_vs_io(struct nir_shader *nir)
983 {
984 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
985
986 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
987 MESA_SHADER_VERTEX);
988
989 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
990 MESA_SHADER_VERTEX);
991
992 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
993 * overlaps with v3d_nir_lower_io. Need further research though.
994 */
995 }
996
997 static void
shader_debug_output(const char * message,void * data)998 shader_debug_output(const char *message, void *data)
999 {
1000 /* FIXME: We probably don't want to debug anything extra here, and in fact
1001 * the compiler is not using this callback too much, only as an alternative
1002 * way to debug out the shaderdb stats, that you can already get using
1003 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1004 * compiler to remove that callback.
1005 */
1006 }
1007
1008 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1009 pipeline_populate_v3d_key(struct v3d_key *key,
1010 const struct v3dv_pipeline_stage *p_stage,
1011 uint32_t ucp_enables,
1012 bool robust_buffer_access)
1013 {
1014 /* The following values are default values used at pipeline create. We use
1015 * there 16 bit as default return size.
1016 */
1017
1018 /* We don't use the nir shader info.num_textures because that doesn't take
1019 * into account input attachments, even after calling
1020 * nir_lower_input_attachments. As a general rule that makes sense, but on
1021 * our case we are handling them mostly as textures. We iterate through the
1022 * combined_index_map that was filled with the textures sused on th sader.
1023 */
1024 uint32_t tex_idx = 0;
1025 if (p_stage->pipeline->combined_index_map) {
1026 hash_table_foreach(p_stage->pipeline->combined_index_map, entry) {
1027 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1028 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1029 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1030 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1031
1032 key->tex[tex_idx].return_size = 16;
1033 key->tex[tex_idx].return_channels = 2;
1034
1035 tex_idx++;
1036 }
1037 }
1038 key->num_tex_used = tex_idx;
1039 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1040
1041 /* default value. Would be override on the vs/gs populate methods when GS
1042 * gets supported
1043 */
1044 key->is_last_geometry_stage = true;
1045
1046 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1047 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1048 * takes care of adding a single compact array variable at
1049 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1050 *
1051 * The only lowering we are interested is specific to the fragment shader,
1052 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1053 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1054 * the ucp enable mask for that stage.
1055 */
1056 key->ucp_enables = ucp_enables;
1057
1058 key->robust_buffer_access = robust_buffer_access;
1059
1060 key->environment = V3D_ENVIRONMENT_VULKAN;
1061 }
1062
1063 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1064 * same. For not using prim_mode that is the one already used on v3d
1065 */
1066 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1067 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1068 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1069 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1070 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1071 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1072 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1073 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1074 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1075 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1076 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1077 };
1078
1079 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1080 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1081 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1082 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1083 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1084 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1085 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1086 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1087 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1088 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1089 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1090 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1091 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1092 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1093 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1094 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1095 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1096 };
1097
1098 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)1099 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1100 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1101 const struct v3dv_pipeline_stage *p_stage,
1102 uint32_t ucp_enables)
1103 {
1104 memset(key, 0, sizeof(*key));
1105
1106 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1107 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1108
1109 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1110 pCreateInfo->pInputAssemblyState;
1111 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1112
1113 key->is_points = (topology == PIPE_PRIM_POINTS);
1114 key->is_lines = (topology >= PIPE_PRIM_LINES &&
1115 topology <= PIPE_PRIM_LINE_STRIP);
1116
1117 /* Vulkan doesn't appear to specify (anv does the same) */
1118 key->clamp_color = false;
1119
1120 const VkPipelineColorBlendStateCreateInfo *cb_info =
1121 pCreateInfo->pColorBlendState;
1122
1123 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1124 vk_to_pipe_logicop[cb_info->logicOp] :
1125 PIPE_LOGICOP_COPY;
1126
1127 const bool raster_enabled =
1128 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1129
1130 /* Multisample rasterization state must be ignored if rasterization
1131 * is disabled.
1132 */
1133 const VkPipelineMultisampleStateCreateInfo *ms_info =
1134 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1135 if (ms_info) {
1136 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1137 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1138 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1139
1140 if (key->msaa) {
1141 key->sample_coverage =
1142 p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1143 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1144 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1145 }
1146 }
1147
1148 /* Vulkan doesn't support alpha test */
1149 key->alpha_test = false;
1150 key->alpha_test_func = COMPARE_FUNC_NEVER;
1151
1152 /* This is intended for V3D versions before 4.1, otherwise we just use the
1153 * tile buffer load/store swap R/B bit.
1154 */
1155 key->swap_color_rb = 0;
1156
1157 const struct v3dv_render_pass *pass =
1158 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1159 const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1160 for (uint32_t i = 0; i < subpass->color_count; i++) {
1161 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1162 if (att_idx == VK_ATTACHMENT_UNUSED)
1163 continue;
1164
1165 key->cbufs |= 1 << i;
1166
1167 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1168 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1169
1170 /* If logic operations are enabled then we might emit color reads and we
1171 * need to know the color buffer format and swizzle for that
1172 */
1173 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1174 key->color_fmt[i].format = fb_pipe_format;
1175 key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format);
1176 }
1177
1178 const struct util_format_description *desc =
1179 vk_format_description(fb_format);
1180
1181 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1182 desc->channel[0].size == 32) {
1183 key->f32_color_rb |= 1 << i;
1184 }
1185
1186 if (p_stage->nir->info.fs.untyped_color_outputs) {
1187 if (util_format_is_pure_uint(fb_pipe_format))
1188 key->uint_color_rb |= 1 << i;
1189 else if (util_format_is_pure_sint(fb_pipe_format))
1190 key->int_color_rb |= 1 << i;
1191 }
1192
1193 if (key->is_points) {
1194 /* FIXME: The mask would need to be computed based on the shader
1195 * inputs. On gallium it is done at st_atom_rasterizer
1196 * (sprite_coord_enable). anv seems (need to confirm) to do that on
1197 * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1198 * better to have tests to guide filling the mask.
1199 */
1200 key->point_sprite_mask = 0;
1201
1202 /* Vulkan mandates upper left. */
1203 key->point_coord_upper_left = true;
1204 }
1205 }
1206
1207 /* FIXME: we understand that this is used on GL to configure fixed-function
1208 * two side lighting support, and not make sense for Vulkan. Need to
1209 * confirm though.
1210 */
1211 key->light_twoside = false;
1212
1213 /* FIXME: ditto, although for flat lighting. Again, neet to confirm.*/
1214 key->shade_model_flat = false;
1215 }
1216
1217 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1218 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1219 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1220 const struct v3dv_pipeline_stage *p_stage)
1221 {
1222 memset(key, 0, sizeof(*key));
1223
1224 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1225 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1226
1227 /* Vulkan doesn't appear to specify (anv does the same) */
1228 key->clamp_color = false;
1229
1230 /* Vulkan specifies a point size per vertex, so true for if the prim are
1231 * points, like on ES2)
1232 */
1233 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1234 pCreateInfo->pInputAssemblyState;
1235 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1236
1237 /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is
1238 * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1239 key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1240
1241 key->is_coord = p_stage->is_coord;
1242 if (p_stage->is_coord) {
1243 /* The only output varying on coord shaders are for transform
1244 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1245 */
1246 key->num_used_outputs = 0;
1247 } else {
1248 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1249 struct v3dv_shader_variant *fs_variant = pipeline->fs->current_variant;
1250
1251 key->num_used_outputs = fs_variant->prog_data.fs->num_inputs;
1252
1253 STATIC_ASSERT(sizeof(key->used_outputs) ==
1254 sizeof(fs_variant->prog_data.fs->input_slots));
1255 memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots,
1256 sizeof(key->used_outputs));
1257 }
1258
1259 const VkPipelineVertexInputStateCreateInfo *vi_info =
1260 pCreateInfo->pVertexInputState;
1261 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1262 const VkVertexInputAttributeDescription *desc =
1263 &vi_info->pVertexAttributeDescriptions[i];
1264 assert(desc->location < MAX_VERTEX_ATTRIBS);
1265 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1266 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1267 }
1268 }
1269
1270 /*
1271 * Creates the pipeline_stage for the coordinate shader. Initially a clone of
1272 * the vs pipeline_stage, with is_coord to true
1273 *
1274 * Returns NULL if it was not able to allocate the object, so it should be
1275 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1276 */
1277 static struct v3dv_pipeline_stage*
pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1278 pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
1279 const VkAllocationCallbacks *pAllocator)
1280 {
1281 struct v3dv_device *device = src->pipeline->device;
1282
1283 struct v3dv_pipeline_stage *p_stage =
1284 vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
1285 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1286
1287 if (p_stage == NULL)
1288 return NULL;
1289
1290 p_stage->pipeline = src->pipeline;
1291 assert(src->stage == MESA_SHADER_VERTEX);
1292 p_stage->stage = src->stage;
1293 p_stage->entrypoint = src->entrypoint;
1294 p_stage->module = src->module;
1295 p_stage->nir = nir_shader_clone(NULL, src->nir);
1296 p_stage->spec_info = src->spec_info;
1297 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1298
1299 p_stage->is_coord = true;
1300
1301 return p_stage;
1302 }
1303
1304 /* FIXME: right now this just asks for an bo for the exact size of the qpu
1305 * assembly. It would be good to be able to re-use bos to avoid bo
1306 * fragmentation. This could be tricky though, as right now we are uploading
1307 * the assembly from two paths, when compiling a shader, or when deserializing
1308 * from the pipeline cache. This also means that the same variant can be
1309 * shared by different objects. So with the current approach it is clear who
1310 * owns the assembly bo, but if shared, who owns the shared bo?
1311 *
1312 * For now one-bo per-assembly would work.
1313 *
1314 * Returns false if it was not able to allocate or map the assembly bo memory.
1315 */
1316 static bool
upload_assembly(struct v3dv_device * device,struct v3dv_shader_variant * variant,gl_shader_stage stage,bool is_coord,const void * data,uint32_t size)1317 upload_assembly(struct v3dv_device *device,
1318 struct v3dv_shader_variant *variant,
1319 gl_shader_stage stage,
1320 bool is_coord,
1321 const void *data,
1322 uint32_t size)
1323 {
1324 const char *name = NULL;
1325 /* We are uploading the assembly just once, so at this point we shouldn't
1326 * have any bo
1327 */
1328 assert(variant->assembly_bo == NULL);
1329
1330 switch (stage) {
1331 case MESA_SHADER_VERTEX:
1332 name = (is_coord == true) ? "coord_shader_assembly" :
1333 "vertex_shader_assembly";
1334 break;
1335 case MESA_SHADER_FRAGMENT:
1336 name = "fragment_shader_assembly";
1337 break;
1338 case MESA_SHADER_COMPUTE:
1339 name = "compute_shader_assembly";
1340 break;
1341 default:
1342 unreachable("Stage not supported\n");
1343 break;
1344 };
1345
1346 struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true);
1347 if (!bo) {
1348 fprintf(stderr, "failed to allocate memory for shader\n");
1349 return false;
1350 }
1351
1352 bool ok = v3dv_bo_map(device, bo, size);
1353 if (!ok) {
1354 fprintf(stderr, "failed to map source shader buffer\n");
1355 return false;
1356 }
1357
1358 memcpy(bo->map, data, size);
1359
1360 /* We don't unmap the assembly bo, as we would use to gather the assembly
1361 * when serializing the variant.
1362 */
1363 variant->assembly_bo = bo;
1364
1365 return true;
1366 }
1367
1368 static void
pipeline_hash_variant(const struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,unsigned char * sha1_out)1369 pipeline_hash_variant(const struct v3dv_pipeline_stage *p_stage,
1370 struct v3d_key *key,
1371 size_t key_size,
1372 unsigned char *sha1_out)
1373 {
1374 struct mesa_sha1 ctx;
1375 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1376 _mesa_sha1_init(&ctx);
1377
1378 if (p_stage->stage == MESA_SHADER_COMPUTE) {
1379 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1380 } else {
1381 /* We need to include both on the sha1 key as one could affect the other
1382 * during linking (like if vertex output are constants, then the
1383 * fragment shader would load_const intead of load_input). An
1384 * alternative would be to use the serialized nir, but that seems like
1385 * an overkill
1386 */
1387 _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1388 sizeof(pipeline->vs->shader_sha1));
1389 _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1390 sizeof(pipeline->fs->shader_sha1));
1391 }
1392 _mesa_sha1_update(&ctx, key, key_size);
1393
1394 _mesa_sha1_final(&ctx, sha1_out);
1395 }
1396
1397 /* Checks that the pipeline has enough spill size to use a specific variant */
1398 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant)1399 pipeline_check_spill_size(struct v3dv_pipeline *pipeline,
1400 struct v3dv_shader_variant *variant)
1401 {
1402 if (variant->prog_data.base->spill_size > pipeline->spill.size_per_thread) {
1403 struct v3dv_device *device = pipeline->device;
1404
1405 /* The TIDX register we use for choosing the area to access
1406 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1407 * Even at minimum threadcount in a particular shader, that
1408 * means we still multiply by qpus by 4.
1409 */
1410 const uint32_t total_spill_size =
1411 4 * device->devinfo.qpu_count * variant->prog_data.base->spill_size;
1412 if (pipeline->spill.bo) {
1413 assert(pipeline->spill.size_per_thread > 0);
1414 v3dv_bo_free(device, pipeline->spill.bo);
1415 }
1416 pipeline->spill.bo =
1417 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1418 pipeline->spill.size_per_thread = variant->prog_data.base->spill_size;
1419 }
1420 }
1421
1422 /*
1423 * Creates a new shader_variant_create. Note that for prog_data is const, so
1424 * it is used only to copy to their own prog_data
1425 *
1426 * Creation includes allocating a shader source bo, and filling it up.
1427 */
1428 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,gl_shader_stage stage,bool is_coord,const unsigned char * variant_sha1,const struct v3d_key * key,uint32_t key_size,struct v3d_prog_data * prog_data,uint32_t prog_data_size,const uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1429 v3dv_shader_variant_create(struct v3dv_device *device,
1430 gl_shader_stage stage,
1431 bool is_coord,
1432 const unsigned char *variant_sha1,
1433 const struct v3d_key *key,
1434 uint32_t key_size,
1435 struct v3d_prog_data *prog_data,
1436 uint32_t prog_data_size,
1437 const uint64_t *qpu_insts,
1438 uint32_t qpu_insts_size,
1439 VkResult *out_vk_result)
1440 {
1441 struct v3dv_shader_variant *variant =
1442 vk_zalloc(&device->alloc, sizeof(*variant), 8,
1443 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1444
1445 if (variant == NULL) {
1446 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1447 return NULL;
1448 }
1449
1450 variant->ref_cnt = 1;
1451 variant->stage = stage;
1452 variant->is_coord = is_coord;
1453 memcpy(&variant->key, key, key_size);
1454 variant->v3d_key_size = key_size;
1455 memcpy(variant->variant_sha1, variant_sha1, sizeof(variant->variant_sha1));
1456 variant->prog_data_size = prog_data_size;
1457 variant->prog_data.base = prog_data;
1458
1459 if (qpu_insts) {
1460 if (!upload_assembly(device, variant, stage, is_coord,
1461 qpu_insts, qpu_insts_size)) {
1462 ralloc_free(variant->prog_data.base);
1463 vk_free(&device->alloc, variant);
1464
1465 *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1466 return NULL;
1467 }
1468 variant->qpu_insts_size = qpu_insts_size;
1469 }
1470
1471 *out_vk_result = VK_SUCCESS;
1472
1473 return variant;
1474 }
1475
1476 /* For a given key, it returns the compiled version of the shader. If it was
1477 * already compiled, it gets it from the p_stage cache, if not it compiles is
1478 * through the v3d compiler
1479 *
1480 * If the method returns NULL it means that it was not able to allocate the
1481 * resources for the variant. out_vk_result would return which OOM applies.
1482 *
1483 * Returns a new reference of the shader_variant to the caller.
1484 */
1485 struct v3dv_shader_variant*
v3dv_get_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_cache * cache,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1486 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1487 struct v3dv_pipeline_cache *cache,
1488 struct v3d_key *key,
1489 size_t key_size,
1490 const VkAllocationCallbacks *pAllocator,
1491 VkResult *out_vk_result)
1492 {
1493 /* First we check if the current pipeline variant is such variant. For this
1494 * we can just use the v3d_key
1495 */
1496
1497 if (p_stage->current_variant &&
1498 memcmp(key, &p_stage->current_variant->key, key_size) == 0) {
1499 *out_vk_result = VK_SUCCESS;
1500 return p_stage->current_variant;
1501 }
1502
1503 /* We search on the pipeline cache if provided by the user, or the default
1504 * one
1505 */
1506 unsigned char variant_sha1[20];
1507 pipeline_hash_variant(p_stage, key, key_size, variant_sha1);
1508
1509 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1510 struct v3dv_device *device = pipeline->device;
1511 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
1512 cache = &device->default_pipeline_cache;
1513
1514 struct v3dv_shader_variant *variant =
1515 v3dv_pipeline_cache_search_for_variant(pipeline,
1516 cache,
1517 variant_sha1);
1518
1519 if (variant) {
1520 pipeline_check_spill_size(pipeline, variant);
1521 *out_vk_result = VK_SUCCESS;
1522 return variant;
1523 }
1524 /* If we don't find the variant in any cache, we compile one and add the
1525 * variant to the cache
1526 */
1527 struct v3dv_physical_device *physical_device =
1528 &pipeline->device->instance->physicalDevice;
1529 const struct v3d_compiler *compiler = physical_device->compiler;
1530
1531 uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count);
1532
1533 if (V3D_DEBUG & (V3D_DEBUG_NIR |
1534 v3d_debug_flag_for_shader_stage(p_stage->stage))) {
1535 fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n",
1536 gl_shader_stage_name(p_stage->stage),
1537 p_stage->program_id,
1538 variant_id);
1539 nir_print_shader(p_stage->nir, stderr);
1540 fprintf(stderr, "\n");
1541 }
1542
1543 uint64_t *qpu_insts;
1544 uint32_t qpu_insts_size;
1545 struct v3d_prog_data *prog_data;
1546
1547 qpu_insts = v3d_compile(compiler,
1548 key, &prog_data,
1549 p_stage->nir,
1550 shader_debug_output, NULL,
1551 p_stage->program_id,
1552 variant_id,
1553 &qpu_insts_size);
1554
1555 if (!qpu_insts) {
1556 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1557 gl_shader_stage_name(p_stage->stage),
1558 p_stage->program_id);
1559 }
1560
1561 variant = v3dv_shader_variant_create(device, p_stage->stage, p_stage->is_coord,
1562 variant_sha1,
1563 key, key_size,
1564 prog_data, v3d_prog_data_size(p_stage->stage),
1565 qpu_insts, qpu_insts_size,
1566 out_vk_result);
1567 if (qpu_insts)
1568 free(qpu_insts);
1569
1570 if (variant)
1571 pipeline_check_spill_size(pipeline, variant);
1572
1573 if (*out_vk_result == VK_SUCCESS) {
1574 struct v3dv_pipeline_cache *default_cache =
1575 &pipeline->device->default_pipeline_cache;
1576
1577 v3dv_pipeline_cache_upload_variant(pipeline, cache, variant);
1578
1579 /* Ensure that the NIR shader is on the default cache, as cmd_buffer could
1580 * need to change the current variant.
1581 */
1582 if (default_cache != cache) {
1583 v3dv_pipeline_cache_upload_variant(pipeline, default_cache, variant);
1584 }
1585 }
1586
1587 return variant;
1588 }
1589
1590 /* This methods updates the return size for a given key. It assumes that it
1591 * was already properly populated. So for example values for key->num_tex_used
1592 * should be correct at this point
1593 *
1594 * Note that even the @return_size to set is 32bit, it could be overriden to
1595 * 16bit, like for shadow textures, that we know in advance that they are
1596 * always 16bit.
1597 */
1598 void
v3d_key_update_return_size(struct v3dv_pipeline * pipeline,struct v3d_key * key,uint32_t return_size)1599 v3d_key_update_return_size(struct v3dv_pipeline *pipeline,
1600 struct v3d_key *key,
1601 uint32_t return_size)
1602 {
1603 assert(return_size == 32 || return_size == 16);
1604 struct v3dv_descriptor_map *texture_map = &pipeline->texture_map;
1605
1606 for (uint32_t tex_idx = 0; tex_idx < key->num_tex_used; tex_idx++) {
1607 key->tex[tex_idx].return_size =
1608 texture_map->is_shadow[tex_idx] ? 16 : return_size;
1609
1610 key->tex[tex_idx].return_channels =
1611 key->tex[tex_idx].return_size == 16 ? 2 : 4;
1612 }
1613 }
1614
1615 /*
1616 * To avoid needed too many shader re-compilation after pipeline creation
1617 * time, we pre-generate several options, so they are available on the default
1618 * cache. The poster boy here is return size for texture acceses, as the real
1619 * value needed would depend on the texture format used.
1620 */
1621 static struct v3dv_shader_variant*
pregenerate_shader_variants(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_cache * cache,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1622 pregenerate_shader_variants(struct v3dv_pipeline_stage *p_stage,
1623 struct v3dv_pipeline_cache *cache,
1624 struct v3d_key *key,
1625 size_t key_size,
1626 const VkAllocationCallbacks *pAllocator,
1627 VkResult *out_vk_result)
1628 {
1629 /* We assume that we receive the default 16 return size*/
1630 struct v3dv_shader_variant *variant_16 =
1631 v3dv_get_shader_variant(p_stage, cache, key, key_size,
1632 pAllocator, out_vk_result);
1633
1634 if (*out_vk_result != VK_SUCCESS)
1635 return variant_16;
1636
1637 if (!p_stage->pipeline->device->instance->default_pipeline_cache_enabled) {
1638 /* If pipeline cache is disabled it doesn't make sense to pre-generate,
1639 * as we are relying on the default pipeline cache to save the different
1640 * pre-compiled variants
1641 */
1642 return variant_16;
1643 }
1644
1645 v3d_key_update_return_size(p_stage->pipeline, key, 32);
1646
1647 struct v3dv_shader_variant *variant_32 =
1648 v3dv_get_shader_variant(p_stage, cache, key, key_size,
1649 pAllocator, out_vk_result);
1650
1651 /* get_shader_variant returns a new ref, so as we are going to use
1652 * variant_16, we need to unref this.
1653 */
1654 v3dv_shader_variant_unref(p_stage->pipeline->device, variant_32);
1655
1656 return variant_16;
1657 }
1658
1659 /* FIXME: C&P from st, common place? */
1660 static void
st_nir_opts(nir_shader * nir)1661 st_nir_opts(nir_shader *nir)
1662 {
1663 bool progress;
1664
1665 do {
1666 progress = false;
1667
1668 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1669
1670 /* Linking deals with unused inputs/outputs, but here we can remove
1671 * things local to the shader in the hopes that we can cleanup other
1672 * things. This pass will also remove variables with only stores, so we
1673 * might be able to make progress after it.
1674 */
1675 NIR_PASS(progress, nir, nir_remove_dead_variables,
1676 (nir_variable_mode)(nir_var_function_temp |
1677 nir_var_shader_temp |
1678 nir_var_mem_shared),
1679 NULL);
1680
1681 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1682 NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1683
1684 if (nir->options->lower_to_scalar) {
1685 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1686 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
1687 }
1688
1689 NIR_PASS_V(nir, nir_lower_alu);
1690 NIR_PASS_V(nir, nir_lower_pack);
1691 NIR_PASS(progress, nir, nir_copy_prop);
1692 NIR_PASS(progress, nir, nir_opt_remove_phis);
1693 NIR_PASS(progress, nir, nir_opt_dce);
1694 if (nir_opt_trivial_continues(nir)) {
1695 progress = true;
1696 NIR_PASS(progress, nir, nir_copy_prop);
1697 NIR_PASS(progress, nir, nir_opt_dce);
1698 }
1699 NIR_PASS(progress, nir, nir_opt_if, false);
1700 NIR_PASS(progress, nir, nir_opt_dead_cf);
1701 NIR_PASS(progress, nir, nir_opt_cse);
1702 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1703
1704 NIR_PASS(progress, nir, nir_opt_algebraic);
1705 NIR_PASS(progress, nir, nir_opt_constant_folding);
1706
1707 NIR_PASS(progress, nir, nir_opt_undef);
1708 NIR_PASS(progress, nir, nir_opt_conditional_discard);
1709 } while (progress);
1710 }
1711
1712 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1713 link_shaders(nir_shader *producer, nir_shader *consumer)
1714 {
1715 assert(producer);
1716 assert(consumer);
1717
1718 if (producer->options->lower_to_scalar) {
1719 NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1720 NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1721 }
1722
1723 nir_lower_io_arrays_to_elements(producer, consumer);
1724
1725 st_nir_opts(producer);
1726 st_nir_opts(consumer);
1727
1728 if (nir_link_opt_varyings(producer, consumer))
1729 st_nir_opts(consumer);
1730
1731 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1732 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1733
1734 if (nir_remove_unused_varyings(producer, consumer)) {
1735 NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1736 NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1737
1738 st_nir_opts(producer);
1739 st_nir_opts(consumer);
1740
1741 /* Optimizations can cause varyings to become unused.
1742 * nir_compact_varyings() depends on all dead varyings being removed so
1743 * we need to call nir_remove_dead_variables() again here.
1744 */
1745 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1746 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1747 }
1748 }
1749
1750 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1751 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1752 struct v3dv_pipeline_stage *p_stage,
1753 struct v3dv_pipeline_layout *layout)
1754 {
1755 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1756
1757 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1758 NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1759 }
1760
1761 /**
1762 * The SPIR-V compiler will insert a sized compact array for
1763 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1764 * where the size of the array determines the number of active clip planes.
1765 */
1766 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1767 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1768 {
1769 assert(p_stage->stage == MESA_SHADER_VERTEX);
1770 const nir_shader *shader = p_stage->nir;
1771 assert(shader);
1772
1773 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1774 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1775 assert(var->data.compact);
1776 return (1 << glsl_get_length(var->type)) - 1;
1777 }
1778 }
1779 return 0;
1780 }
1781
1782 static nir_shader*
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1783 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1784 struct v3dv_pipeline *pipeline,
1785 struct v3dv_pipeline_cache *cache)
1786 {
1787 nir_shader *nir = NULL;
1788
1789 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1790 &v3dv_nir_options,
1791 p_stage->shader_sha1);
1792
1793 if (nir) {
1794 assert(nir->info.stage == p_stage->stage);
1795 return nir;
1796 }
1797
1798 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1799
1800 if (nir) {
1801 struct v3dv_pipeline_cache *default_cache =
1802 &pipeline->device->default_pipeline_cache;
1803
1804 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1805 p_stage->shader_sha1);
1806
1807 /* Ensure that the variant is on the default cache, as cmd_buffer could
1808 * need to change the current variant
1809 */
1810 if (default_cache != cache) {
1811 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1812 p_stage->shader_sha1);
1813 }
1814 return nir;
1815 }
1816
1817 /* FIXME: this shouldn't happen, raise error? */
1818 return NULL;
1819 }
1820
1821 static void
pipeline_hash_shader(const struct v3dv_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)1822 pipeline_hash_shader(const struct v3dv_shader_module *module,
1823 const char *entrypoint,
1824 gl_shader_stage stage,
1825 const VkSpecializationInfo *spec_info,
1826 unsigned char *sha1_out)
1827 {
1828 struct mesa_sha1 ctx;
1829 _mesa_sha1_init(&ctx);
1830
1831 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1832 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1833 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1834 if (spec_info) {
1835 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1836 spec_info->mapEntryCount *
1837 sizeof(*spec_info->pMapEntries));
1838 _mesa_sha1_update(&ctx, spec_info->pData,
1839 spec_info->dataSize);
1840 }
1841
1842 _mesa_sha1_final(&ctx, sha1_out);
1843 }
1844
1845
1846 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)1847 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1848 struct v3dv_pipeline_cache *cache,
1849 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1850 const VkAllocationCallbacks *pAllocator)
1851 {
1852 struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1853
1854 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
1855 /* Right now we only support pipelines with both vertex and fragment
1856 * shader.
1857 */
1858 assert(pipeline->fs);
1859
1860 /* Make sure we do all our common lowering *before* we create the vs
1861 * and vs_bin pipeline stages, since from that point forward we need to
1862 * run lowerings for both of them separately, since each stage will
1863 * own its NIR code.
1864 */
1865 lower_vs_io(p_stage->nir);
1866
1867 pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, pAllocator);
1868 if (pipeline->vs_bin == NULL)
1869 return VK_ERROR_OUT_OF_HOST_MEMORY;
1870
1871 /* FIXME: likely this to be moved to a gather info method to a full
1872 * struct inside pipeline_stage
1873 */
1874 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1875 pCreateInfo->pInputAssemblyState;
1876 pipeline->vs->topology = vk_to_pipe_prim_type[ia_info->topology];
1877
1878 struct v3d_vs_key *key = &pipeline->vs->key.vs;
1879 pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs);
1880 VkResult vk_result;
1881 pipeline->vs->current_variant =
1882 pregenerate_shader_variants(pipeline->vs, cache, &key->base, sizeof(*key),
1883 pAllocator, &vk_result);
1884 if (vk_result != VK_SUCCESS)
1885 return vk_result;
1886
1887 key = &pipeline->vs_bin->key.vs;
1888 pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs_bin);
1889 pipeline->vs_bin->current_variant =
1890 pregenerate_shader_variants(pipeline->vs_bin, cache, &key->base, sizeof(*key),
1891 pAllocator, &vk_result);
1892
1893 return vk_result;
1894 }
1895
1896 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)1897 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1898 struct v3dv_pipeline_cache *cache,
1899 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1900 const VkAllocationCallbacks *pAllocator)
1901 {
1902 struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1903
1904 p_stage = pipeline->fs;
1905 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
1906
1907 struct v3d_fs_key *key = &p_stage->key.fs;
1908
1909 pipeline_populate_v3d_fs_key(key, pCreateInfo, p_stage,
1910 get_ucp_enable_mask(pipeline->vs));
1911
1912 lower_fs_io(p_stage->nir);
1913
1914 VkResult vk_result;
1915 p_stage->current_variant =
1916 pregenerate_shader_variants(p_stage, cache, &key->base, sizeof(*key),
1917 pAllocator, &vk_result);
1918
1919 return vk_result;
1920 }
1921
1922 /*
1923 * It compiles a pipeline. Note that it also allocate internal object, but if
1924 * some allocations success, but other fails, the method is not freeing the
1925 * successful ones.
1926 *
1927 * This is done to simplify the code, as what we do in this case is just call
1928 * the pipeline destroy method, and this would handle freeing the internal
1929 * objects allocated. We just need to be careful setting to NULL the objects
1930 * not allocated.
1931 */
1932 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)1933 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
1934 struct v3dv_pipeline_cache *cache,
1935 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1936 const VkAllocationCallbacks *pAllocator)
1937 {
1938 struct v3dv_device *device = pipeline->device;
1939 struct v3dv_physical_device *physical_device =
1940 &device->instance->physicalDevice;
1941
1942 /* First pass to get the the common info from the shader and the nir
1943 * shader. We don't care of the coord shader for now.
1944 */
1945 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
1946 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
1947 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1948
1949 struct v3dv_pipeline_stage *p_stage =
1950 vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
1951 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1952
1953 if (p_stage == NULL)
1954 return VK_ERROR_OUT_OF_HOST_MEMORY;
1955
1956 /* Note that we are assigning program_id slightly differently that
1957 * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
1958 * would have a different program_id, while v3d would have the same for
1959 * both. For the case of v3dv, it is more natural to have an id this way,
1960 * as right now we are using it for debugging, not for shader-db.
1961 */
1962 p_stage->program_id =
1963 p_atomic_inc_return(&physical_device->next_program_id);
1964 p_stage->compiled_variant_count = 0;
1965
1966 p_stage->pipeline = pipeline;
1967 p_stage->stage = stage;
1968 if (stage == MESA_SHADER_VERTEX)
1969 p_stage->is_coord = false;
1970 p_stage->entrypoint = sinfo->pName;
1971 p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
1972 p_stage->spec_info = sinfo->pSpecializationInfo;
1973
1974 pipeline_hash_shader(p_stage->module,
1975 p_stage->entrypoint,
1976 stage,
1977 p_stage->spec_info,
1978 p_stage->shader_sha1);
1979
1980 pipeline->active_stages |= sinfo->stage;
1981
1982 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
1983
1984 switch(stage) {
1985 case MESA_SHADER_VERTEX:
1986 pipeline->vs = p_stage;
1987 break;
1988 case MESA_SHADER_FRAGMENT:
1989 pipeline->fs = p_stage;
1990 break;
1991 default:
1992 unreachable("not supported shader stage");
1993 }
1994 }
1995
1996 /* Add a no-op fragment shader if needed */
1997 if (!pipeline->fs) {
1998 nir_builder b;
1999 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT,
2000 &v3dv_nir_options);
2001 b.shader->info.name = ralloc_strdup(b.shader, "noop_fs");
2002
2003 struct v3dv_pipeline_stage *p_stage =
2004 vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
2005 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2006
2007 if (p_stage == NULL)
2008 return VK_ERROR_OUT_OF_HOST_MEMORY;
2009
2010 p_stage->pipeline = pipeline;
2011 p_stage->stage = MESA_SHADER_FRAGMENT;
2012 p_stage->entrypoint = "main";
2013 p_stage->module = 0;
2014 p_stage->nir = b.shader;
2015 /* The no-op shader is always the same, so we can just create the sha1
2016 * using the name
2017 */
2018 _mesa_sha1_compute(b.shader->info.name, strlen(b.shader->info.name),
2019 p_stage->shader_sha1);
2020
2021 p_stage->program_id =
2022 p_atomic_inc_return(&physical_device->next_program_id);
2023 p_stage->compiled_variant_count = 0;
2024
2025 pipeline->fs = p_stage;
2026 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2027 }
2028
2029 /* Linking */
2030 link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2031
2032 /* Compiling to vir (or getting it from a cache);
2033 */
2034 VkResult vk_result;
2035 vk_result = pipeline_compile_fragment_shader(pipeline, cache,
2036 pCreateInfo, pAllocator);
2037 if (vk_result != VK_SUCCESS)
2038 return vk_result;
2039
2040 vk_result = pipeline_compile_vertex_shader(pipeline, cache,
2041 pCreateInfo, pAllocator);
2042 if (vk_result != VK_SUCCESS)
2043 return vk_result;
2044
2045 /* FIXME: values below are default when non-GS is available. Would need to
2046 * provide real values if GS gets supported
2047 */
2048 pipeline->vpm_cfg_bin.As = 1;
2049 pipeline->vpm_cfg_bin.Ve = 0;
2050 pipeline->vpm_cfg_bin.Vc =
2051 pipeline->vs_bin->current_variant->prog_data.vs->vcm_cache_size;
2052
2053 pipeline->vpm_cfg.As = 1;
2054 pipeline->vpm_cfg.Ve = 0;
2055 pipeline->vpm_cfg.Vc =
2056 pipeline->vs->current_variant->prog_data.vs->vcm_cache_size;
2057
2058 return VK_SUCCESS;
2059 }
2060
2061 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2062 v3dv_dynamic_state_mask(VkDynamicState state)
2063 {
2064 switch(state) {
2065 case VK_DYNAMIC_STATE_VIEWPORT:
2066 return V3DV_DYNAMIC_VIEWPORT;
2067 case VK_DYNAMIC_STATE_SCISSOR:
2068 return V3DV_DYNAMIC_SCISSOR;
2069 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2070 return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2071 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2072 return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2073 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2074 return V3DV_DYNAMIC_STENCIL_REFERENCE;
2075 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2076 return V3DV_DYNAMIC_BLEND_CONSTANTS;
2077 case VK_DYNAMIC_STATE_DEPTH_BIAS:
2078 return V3DV_DYNAMIC_DEPTH_BIAS;
2079 case VK_DYNAMIC_STATE_LINE_WIDTH:
2080 return V3DV_DYNAMIC_LINE_WIDTH;
2081
2082 /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2083 * ignoring this dynamic state. We are already asserting at pipeline creation
2084 * time that depth bounds testing is not enabled.
2085 */
2086 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2087 return 0;
2088
2089 default:
2090 unreachable("Unhandled dynamic state");
2091 }
2092 }
2093
2094 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState)2095 pipeline_init_dynamic_state(
2096 struct v3dv_pipeline *pipeline,
2097 const VkPipelineDynamicStateCreateInfo *pDynamicState,
2098 const VkPipelineViewportStateCreateInfo *pViewportState,
2099 const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2100 const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2101 const VkPipelineRasterizationStateCreateInfo *pRasterizationState)
2102 {
2103 pipeline->dynamic_state = default_dynamic_state;
2104 struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2105
2106 /* Create a mask of enabled dynamic states */
2107 uint32_t dynamic_states = 0;
2108 if (pDynamicState) {
2109 uint32_t count = pDynamicState->dynamicStateCount;
2110 for (uint32_t s = 0; s < count; s++) {
2111 dynamic_states |=
2112 v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2113 }
2114 }
2115
2116 /* For any pipeline states that are not dynamic, set the dynamic state
2117 * from the static pipeline state.
2118 */
2119 if (pViewportState) {
2120 if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2121 dynamic->viewport.count = pViewportState->viewportCount;
2122 typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2123 pViewportState->viewportCount);
2124
2125 for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2126 v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2127 dynamic->viewport.scale[i],
2128 dynamic->viewport.translate[i]);
2129 }
2130 }
2131
2132 if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2133 dynamic->scissor.count = pViewportState->scissorCount;
2134 typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2135 pViewportState->scissorCount);
2136 }
2137 }
2138
2139 if (pDepthStencilState) {
2140 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2141 dynamic->stencil_compare_mask.front =
2142 pDepthStencilState->front.compareMask;
2143 dynamic->stencil_compare_mask.back =
2144 pDepthStencilState->back.compareMask;
2145 }
2146
2147 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2148 dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2149 dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2150 }
2151
2152 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2153 dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2154 dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2155 }
2156 }
2157
2158 if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2159 memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2160 sizeof(dynamic->blend_constants));
2161 }
2162
2163 if (pRasterizationState) {
2164 if (pRasterizationState->depthBiasEnable &&
2165 !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2166 dynamic->depth_bias.constant_factor =
2167 pRasterizationState->depthBiasConstantFactor;
2168 dynamic->depth_bias.slope_factor =
2169 pRasterizationState->depthBiasSlopeFactor;
2170 }
2171 if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2172 dynamic->line_width = pRasterizationState->lineWidth;
2173 }
2174
2175 pipeline->dynamic_state.mask = dynamic_states;
2176 }
2177
2178 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)2179 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
2180 {
2181 switch (factor) {
2182 case VK_BLEND_FACTOR_ZERO:
2183 case VK_BLEND_FACTOR_ONE:
2184 case VK_BLEND_FACTOR_SRC_COLOR:
2185 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
2186 case VK_BLEND_FACTOR_DST_COLOR:
2187 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
2188 case VK_BLEND_FACTOR_SRC_ALPHA:
2189 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
2190 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
2191 return factor;
2192 case VK_BLEND_FACTOR_CONSTANT_COLOR:
2193 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
2194 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
2195 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
2196 *needs_constants = true;
2197 return factor;
2198 case VK_BLEND_FACTOR_DST_ALPHA:
2199 return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
2200 V3D_BLEND_FACTOR_DST_ALPHA;
2201 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
2202 return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
2203 V3D_BLEND_FACTOR_INV_DST_ALPHA;
2204 case VK_BLEND_FACTOR_SRC1_COLOR:
2205 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
2206 case VK_BLEND_FACTOR_SRC1_ALPHA:
2207 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
2208 assert(!"Invalid blend factor: dual source blending not supported.");
2209 default:
2210 assert(!"Unknown blend factor.");
2211 }
2212
2213 /* Should be handled by the switch, added to avoid a "end of non-void
2214 * function" error
2215 */
2216 unreachable("Unknown blend factor.");
2217 }
2218
2219 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)2220 pack_blend(struct v3dv_pipeline *pipeline,
2221 const VkPipelineColorBlendStateCreateInfo *cb_info)
2222 {
2223 /* By default, we are not enabling blending and all color channel writes are
2224 * enabled. Color write enables are independent of whether blending is
2225 * enabled or not.
2226 *
2227 * Vulkan specifies color write masks so that bits set correspond to
2228 * enabled channels. Our hardware does it the other way around.
2229 */
2230 pipeline->blend.enables = 0;
2231 pipeline->blend.color_write_masks = 0; /* All channels enabled */
2232
2233 if (!cb_info)
2234 return;
2235
2236 assert(pipeline->subpass);
2237 if (pipeline->subpass->color_count == 0)
2238 return;
2239
2240 assert(pipeline->subpass->color_count == cb_info->attachmentCount);
2241
2242 pipeline->blend.needs_color_constants = false;
2243 uint32_t color_write_masks = 0;
2244 for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
2245 const VkPipelineColorBlendAttachmentState *b_state =
2246 &cb_info->pAttachments[i];
2247
2248 uint32_t attachment_idx =
2249 pipeline->subpass->color_attachments[i].attachment;
2250 if (attachment_idx == VK_ATTACHMENT_UNUSED)
2251 continue;
2252
2253 color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
2254
2255 if (!b_state->blendEnable)
2256 continue;
2257
2258 VkAttachmentDescription *desc =
2259 &pipeline->pass->attachments[attachment_idx].desc;
2260 const struct v3dv_format *format = v3dv_get_format(desc->format);
2261 bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
2262
2263 uint8_t rt_mask = 1 << i;
2264 pipeline->blend.enables |= rt_mask;
2265
2266 v3dv_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
2267 config.render_target_mask = rt_mask;
2268
2269 config.color_blend_mode = b_state->colorBlendOp;
2270 config.color_blend_dst_factor =
2271 blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
2272 &pipeline->blend.needs_color_constants);
2273 config.color_blend_src_factor =
2274 blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
2275 &pipeline->blend.needs_color_constants);
2276
2277 config.alpha_blend_mode = b_state->alphaBlendOp;
2278 config.alpha_blend_dst_factor =
2279 blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
2280 &pipeline->blend.needs_color_constants);
2281 config.alpha_blend_src_factor =
2282 blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
2283 &pipeline->blend.needs_color_constants);
2284 }
2285 }
2286
2287 pipeline->blend.color_write_masks = color_write_masks;
2288 }
2289
2290 /* This requires that pack_blend() had been called before so we can set
2291 * the overall blend enable bit in the CFG_BITS packet.
2292 */
2293 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineMultisampleStateCreateInfo * ms_info)2294 pack_cfg_bits(struct v3dv_pipeline *pipeline,
2295 const VkPipelineDepthStencilStateCreateInfo *ds_info,
2296 const VkPipelineRasterizationStateCreateInfo *rs_info,
2297 const VkPipelineMultisampleStateCreateInfo *ms_info)
2298 {
2299 assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
2300
2301 pipeline->msaa =
2302 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2303
2304 v3dv_pack(pipeline->cfg_bits, CFG_BITS, config) {
2305 config.enable_forward_facing_primitive =
2306 rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
2307
2308 config.enable_reverse_facing_primitive =
2309 rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
2310
2311 /* Seems like the hardware is backwards regarding this setting... */
2312 config.clockwise_primitives =
2313 rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
2314
2315 config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
2316
2317 /* This is required to pass line rasterization tests in CTS while
2318 * exposing, at least, a minimum of 4-bits of subpixel precision
2319 * (the minimum requirement).
2320 */
2321 config.line_rasterization = 1; /* perp end caps */
2322
2323 if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
2324 config.direct3d_wireframe_triangles_mode = true;
2325 config.direct3d_point_fill_mode =
2326 rs_info->polygonMode == VK_POLYGON_MODE_POINT;
2327 }
2328
2329 config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
2330
2331 /* From the Vulkan spec:
2332 *
2333 * "Provoking Vertex:
2334 *
2335 * The vertex in a primitive from which flat shaded attribute
2336 * values are taken. This is generally the “first” vertex in the
2337 * primitive, and depends on the primitive topology."
2338 *
2339 * First vertex is the Direct3D style for provoking vertex. OpenGL uses
2340 * the last vertex by default.
2341 */
2342 config.direct3d_provoking_vertex = true;
2343
2344 config.blend_enable = pipeline->blend.enables != 0;
2345
2346 /* Disable depth/stencil if we don't have a D/S attachment */
2347 bool has_ds_attachment =
2348 pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
2349
2350 if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
2351 config.z_updates_enable = ds_info->depthWriteEnable;
2352 config.depth_test_function = ds_info->depthCompareOp;
2353 } else {
2354 config.depth_test_function = VK_COMPARE_OP_ALWAYS;
2355 }
2356
2357 /* EZ state will be updated at draw time based on bound pipeline state */
2358 config.early_z_updates_enable = false;
2359 config.early_z_enable = false;
2360
2361 config.stencil_enable =
2362 ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
2363 };
2364 }
2365
2366 static uint32_t
translate_stencil_op(enum pipe_stencil_op op)2367 translate_stencil_op(enum pipe_stencil_op op)
2368 {
2369 switch (op) {
2370 case VK_STENCIL_OP_KEEP:
2371 return V3D_STENCIL_OP_KEEP;
2372 case VK_STENCIL_OP_ZERO:
2373 return V3D_STENCIL_OP_ZERO;
2374 case VK_STENCIL_OP_REPLACE:
2375 return V3D_STENCIL_OP_REPLACE;
2376 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
2377 return V3D_STENCIL_OP_INCR;
2378 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
2379 return V3D_STENCIL_OP_DECR;
2380 case VK_STENCIL_OP_INVERT:
2381 return V3D_STENCIL_OP_INVERT;
2382 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
2383 return V3D_STENCIL_OP_INCWRAP;
2384 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
2385 return V3D_STENCIL_OP_DECWRAP;
2386 default:
2387 unreachable("bad stencil op");
2388 }
2389 }
2390
2391 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state)2392 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
2393 uint8_t *stencil_cfg,
2394 bool is_front,
2395 bool is_back,
2396 const VkStencilOpState *stencil_state)
2397 {
2398 /* From the Vulkan spec:
2399 *
2400 * "Reference is an integer reference value that is used in the unsigned
2401 * stencil comparison. The reference value used by stencil comparison
2402 * must be within the range [0,2^s-1] , where s is the number of bits in
2403 * the stencil framebuffer attachment, otherwise the reference value is
2404 * considered undefined."
2405 *
2406 * In our case, 's' is always 8, so we clamp to that to prevent our packing
2407 * functions to assert in debug mode if they see larger values.
2408 *
2409 * If we have dynamic state we need to make sure we set the corresponding
2410 * state bits to 0, since cl_emit_with_prepacked ORs the new value with
2411 * the old.
2412 */
2413 const uint8_t write_mask =
2414 pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
2415 0 : stencil_state->writeMask & 0xff;
2416
2417 const uint8_t compare_mask =
2418 pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
2419 0 : stencil_state->compareMask & 0xff;
2420
2421 const uint8_t reference =
2422 pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
2423 0 : stencil_state->reference & 0xff;
2424
2425 v3dv_pack(stencil_cfg, STENCIL_CFG, config) {
2426 config.front_config = is_front;
2427 config.back_config = is_back;
2428 config.stencil_write_mask = write_mask;
2429 config.stencil_test_mask = compare_mask;
2430 config.stencil_test_function = stencil_state->compareOp;
2431 config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
2432 config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
2433 config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
2434 config.stencil_ref_value = reference;
2435 }
2436 }
2437
2438 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2439 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
2440 const VkPipelineDepthStencilStateCreateInfo *ds_info)
2441 {
2442 assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
2443
2444 if (!ds_info || !ds_info->stencilTestEnable)
2445 return;
2446
2447 if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2448 return;
2449
2450 const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
2451 V3DV_DYNAMIC_STENCIL_WRITE_MASK |
2452 V3DV_DYNAMIC_STENCIL_REFERENCE;
2453
2454
2455 /* If front != back or we have dynamic stencil state we can't emit a single
2456 * packet for both faces.
2457 */
2458 bool needs_front_and_back = false;
2459 if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
2460 memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
2461 needs_front_and_back = true;
2462
2463 /* If the front and back configurations are the same we can emit both with
2464 * a single packet.
2465 */
2466 pipeline->emit_stencil_cfg[0] = true;
2467 if (!needs_front_and_back) {
2468 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
2469 true, true, &ds_info->front);
2470 } else {
2471 pipeline->emit_stencil_cfg[1] = true;
2472 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
2473 true, false, &ds_info->front);
2474 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
2475 false, true, &ds_info->back);
2476 }
2477 }
2478
2479 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2480 stencil_op_is_no_op(const VkStencilOpState *stencil)
2481 {
2482 return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2483 stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2484 }
2485
2486 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2487 enable_depth_bias(struct v3dv_pipeline *pipeline,
2488 const VkPipelineRasterizationStateCreateInfo *rs_info)
2489 {
2490 pipeline->depth_bias.enabled = false;
2491 pipeline->depth_bias.is_z16 = false;
2492
2493 if (!rs_info || !rs_info->depthBiasEnable)
2494 return;
2495
2496 /* Check the depth/stencil attachment description for the subpass used with
2497 * this pipeline.
2498 */
2499 assert(pipeline->pass && pipeline->subpass);
2500 struct v3dv_render_pass *pass = pipeline->pass;
2501 struct v3dv_subpass *subpass = pipeline->subpass;
2502
2503 if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2504 return;
2505
2506 assert(subpass->ds_attachment.attachment < pass->attachment_count);
2507 struct v3dv_render_pass_attachment *att =
2508 &pass->attachments[subpass->ds_attachment.attachment];
2509
2510 if (att->desc.format == VK_FORMAT_D16_UNORM)
2511 pipeline->depth_bias.is_z16 = true;
2512
2513 pipeline->depth_bias.enabled = true;
2514 }
2515
2516 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2517 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2518 const VkPipelineDepthStencilStateCreateInfo *ds_info)
2519 {
2520 if (!ds_info || !ds_info->depthTestEnable) {
2521 pipeline->ez_state = VC5_EZ_DISABLED;
2522 return;
2523 }
2524
2525 switch (ds_info->depthCompareOp) {
2526 case VK_COMPARE_OP_LESS:
2527 case VK_COMPARE_OP_LESS_OR_EQUAL:
2528 pipeline->ez_state = VC5_EZ_LT_LE;
2529 break;
2530 case VK_COMPARE_OP_GREATER:
2531 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2532 pipeline->ez_state = VC5_EZ_GT_GE;
2533 break;
2534 case VK_COMPARE_OP_NEVER:
2535 case VK_COMPARE_OP_EQUAL:
2536 pipeline->ez_state = VC5_EZ_UNDECIDED;
2537 break;
2538 default:
2539 pipeline->ez_state = VC5_EZ_DISABLED;
2540 break;
2541 }
2542
2543 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2544 if (ds_info->stencilTestEnable &&
2545 (!stencil_op_is_no_op(&ds_info->front) ||
2546 !stencil_op_is_no_op(&ds_info->back))) {
2547 pipeline->ez_state = VC5_EZ_DISABLED;
2548 }
2549 }
2550
2551 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)2552 pack_shader_state_record(struct v3dv_pipeline *pipeline)
2553 {
2554 assert(sizeof(pipeline->shader_state_record) ==
2555 cl_packet_length(GL_SHADER_STATE_RECORD));
2556
2557 struct v3d_fs_prog_data *prog_data_fs =
2558 pipeline->fs->current_variant->prog_data.fs;
2559
2560 struct v3d_vs_prog_data *prog_data_vs =
2561 pipeline->vs->current_variant->prog_data.vs;
2562
2563 struct v3d_vs_prog_data *prog_data_vs_bin =
2564 pipeline->vs_bin->current_variant->prog_data.vs;
2565
2566
2567 /* Note: we are not packing addresses, as we need the job (see
2568 * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
2569 * point as they depend on dynamic info that can be set after create the
2570 * pipeline (like viewport), . Would need to be filled later, so we are
2571 * doing a partial prepacking.
2572 */
2573 v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
2574 shader.enable_clipping = true;
2575
2576 shader.point_size_in_shaded_vertex_data =
2577 pipeline->vs->topology == PIPE_PRIM_POINTS;
2578
2579 /* Must be set if the shader modifies Z, discards, or modifies
2580 * the sample mask. For any of these cases, the fragment
2581 * shader needs to write the Z value (even just discards).
2582 */
2583 shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
2584 /* Set if the EZ test must be disabled (due to shader side
2585 * effects and the early_z flag not being present in the
2586 * shader).
2587 */
2588 shader.turn_off_early_z_test = prog_data_fs->disable_ez;
2589
2590 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
2591 prog_data_fs->uses_center_w;
2592
2593 /* The description for gl_SampleID states that if a fragment shader reads
2594 * it, then we should automatically activate per-sample shading. However,
2595 * the Vulkan spec also states that if a framebuffer has no attachments:
2596 *
2597 * "The subpass continues to use the width, height, and layers of the
2598 * framebuffer to define the dimensions of the rendering area, and the
2599 * rasterizationSamples from each pipeline’s
2600 * VkPipelineMultisampleStateCreateInfo to define the number of
2601 * samples used in rasterization multisample rasterization."
2602 *
2603 * So in this scenario, if the pipeline doesn't enable multiple samples
2604 * but the fragment shader accesses gl_SampleID we would be requested
2605 * to do per-sample shading in single sample rasterization mode, which
2606 * is pointless, so just disable it in that case.
2607 */
2608 shader.enable_sample_rate_shading =
2609 pipeline->sample_rate_shading ||
2610 (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
2611
2612 shader.any_shader_reads_hardware_written_primitive_id = false;
2613
2614 shader.do_scoreboard_wait_on_first_thread_switch =
2615 prog_data_fs->lock_scoreboard_on_first_thrsw;
2616 shader.disable_implicit_point_line_varyings =
2617 !prog_data_fs->uses_implicit_point_line_varyings;
2618
2619 shader.number_of_varyings_in_fragment_shader =
2620 prog_data_fs->num_inputs;
2621
2622 shader.coordinate_shader_propagate_nans = true;
2623 shader.vertex_shader_propagate_nans = true;
2624 shader.fragment_shader_propagate_nans = true;
2625
2626 /* Note: see previous note about adresses */
2627 /* shader.coordinate_shader_code_address */
2628 /* shader.vertex_shader_code_address */
2629 /* shader.fragment_shader_code_address */
2630
2631 /* FIXME: Use combined input/output size flag in the common case (also
2632 * on v3d, see v3dx_draw).
2633 */
2634 shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
2635 prog_data_vs_bin->separate_segments;
2636 shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
2637 prog_data_vs->separate_segments;
2638
2639 shader.coordinate_shader_input_vpm_segment_size =
2640 prog_data_vs_bin->separate_segments ?
2641 prog_data_vs_bin->vpm_input_size : 1;
2642 shader.vertex_shader_input_vpm_segment_size =
2643 prog_data_vs->separate_segments ?
2644 prog_data_vs->vpm_input_size : 1;
2645
2646 shader.coordinate_shader_output_vpm_segment_size =
2647 prog_data_vs_bin->vpm_output_size;
2648 shader.vertex_shader_output_vpm_segment_size =
2649 prog_data_vs->vpm_output_size;
2650
2651 /* Note: see previous note about adresses */
2652 /* shader.coordinate_shader_uniforms_address */
2653 /* shader.vertex_shader_uniforms_address */
2654 /* shader.fragment_shader_uniforms_address */
2655
2656 shader.min_coord_shader_input_segments_required_in_play =
2657 pipeline->vpm_cfg_bin.As;
2658 shader.min_vertex_shader_input_segments_required_in_play =
2659 pipeline->vpm_cfg.As;
2660
2661 shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
2662 pipeline->vpm_cfg_bin.Ve;
2663 shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
2664 pipeline->vpm_cfg.Ve;
2665
2666 shader.coordinate_shader_4_way_threadable =
2667 prog_data_vs_bin->base.threads == 4;
2668 shader.vertex_shader_4_way_threadable =
2669 prog_data_vs->base.threads == 4;
2670 shader.fragment_shader_4_way_threadable =
2671 prog_data_fs->base.threads == 4;
2672
2673 shader.coordinate_shader_start_in_final_thread_section =
2674 prog_data_vs_bin->base.single_seg;
2675 shader.vertex_shader_start_in_final_thread_section =
2676 prog_data_vs->base.single_seg;
2677 shader.fragment_shader_start_in_final_thread_section =
2678 prog_data_fs->base.single_seg;
2679
2680 shader.vertex_id_read_by_coordinate_shader =
2681 prog_data_vs_bin->uses_vid;
2682 shader.base_instance_id_read_by_coordinate_shader =
2683 prog_data_vs_bin->uses_biid;
2684 shader.instance_id_read_by_coordinate_shader =
2685 prog_data_vs_bin->uses_iid;
2686 shader.vertex_id_read_by_vertex_shader =
2687 prog_data_vs->uses_vid;
2688 shader.base_instance_id_read_by_vertex_shader =
2689 prog_data_vs->uses_biid;
2690 shader.instance_id_read_by_vertex_shader =
2691 prog_data_vs->uses_iid;
2692
2693 /* Note: see previous note about adresses */
2694 /* shader.address_of_default_attribute_values */
2695 }
2696 }
2697
2698 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)2699 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
2700 {
2701 assert(sizeof(pipeline->vcm_cache_size) ==
2702 cl_packet_length(VCM_CACHE_SIZE));
2703
2704 v3dv_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
2705 vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
2706 vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
2707 }
2708 }
2709
2710 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
2711 static uint8_t
get_attr_type(const struct util_format_description * desc)2712 get_attr_type(const struct util_format_description *desc)
2713 {
2714 uint32_t r_size = desc->channel[0].size;
2715 uint8_t attr_type = ATTRIBUTE_FLOAT;
2716
2717 switch (desc->channel[0].type) {
2718 case UTIL_FORMAT_TYPE_FLOAT:
2719 if (r_size == 32) {
2720 attr_type = ATTRIBUTE_FLOAT;
2721 } else {
2722 assert(r_size == 16);
2723 attr_type = ATTRIBUTE_HALF_FLOAT;
2724 }
2725 break;
2726
2727 case UTIL_FORMAT_TYPE_SIGNED:
2728 case UTIL_FORMAT_TYPE_UNSIGNED:
2729 switch (r_size) {
2730 case 32:
2731 attr_type = ATTRIBUTE_INT;
2732 break;
2733 case 16:
2734 attr_type = ATTRIBUTE_SHORT;
2735 break;
2736 case 10:
2737 attr_type = ATTRIBUTE_INT2_10_10_10;
2738 break;
2739 case 8:
2740 attr_type = ATTRIBUTE_BYTE;
2741 break;
2742 default:
2743 fprintf(stderr,
2744 "format %s unsupported\n",
2745 desc->name);
2746 attr_type = ATTRIBUTE_BYTE;
2747 abort();
2748 }
2749 break;
2750
2751 default:
2752 fprintf(stderr,
2753 "format %s unsupported\n",
2754 desc->name);
2755 abort();
2756 }
2757
2758 return attr_type;
2759 }
2760
2761 static bool
create_default_attribute_values(struct v3dv_pipeline * pipeline,const VkPipelineVertexInputStateCreateInfo * vi_info)2762 create_default_attribute_values(struct v3dv_pipeline *pipeline,
2763 const VkPipelineVertexInputStateCreateInfo *vi_info)
2764 {
2765 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2766
2767 if (pipeline->default_attribute_values == NULL) {
2768 pipeline->default_attribute_values = v3dv_bo_alloc(pipeline->device, size,
2769 "default_vi_attributes",
2770 true);
2771
2772 if (!pipeline->default_attribute_values) {
2773 fprintf(stderr, "failed to allocate memory for the default "
2774 "attribute values\n");
2775 return false;
2776 }
2777 }
2778
2779 bool ok = v3dv_bo_map(pipeline->device,
2780 pipeline->default_attribute_values, size);
2781 if (!ok) {
2782 fprintf(stderr, "failed to map default attribute values buffer\n");
2783 return false;
2784 }
2785
2786 uint32_t *attrs = pipeline->default_attribute_values->map;
2787
2788 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2789 attrs[i * 4 + 0] = 0;
2790 attrs[i * 4 + 1] = 0;
2791 attrs[i * 4 + 2] = 0;
2792 if (i < pipeline->va_count && vk_format_is_int(pipeline->va[i].vk_format)) {
2793 attrs[i * 4 + 3] = 1;
2794 } else {
2795 attrs[i * 4 + 3] = fui(1.0);
2796 }
2797 }
2798
2799 v3dv_bo_unmap(pipeline->device, pipeline->default_attribute_values);
2800
2801 return true;
2802 }
2803
2804 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)2805 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
2806 uint32_t index,
2807 const VkVertexInputAttributeDescription *vi_desc)
2808 {
2809 const uint32_t packet_length =
2810 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
2811
2812 const struct util_format_description *desc =
2813 vk_format_description(vi_desc->format);
2814
2815 uint32_t binding = vi_desc->binding;
2816
2817 v3dv_pack(&pipeline->vertex_attrs[index * packet_length],
2818 GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
2819
2820 /* vec_size == 0 means 4 */
2821 attr.vec_size = desc->nr_channels & 3;
2822 attr.signed_int_type = (desc->channel[0].type ==
2823 UTIL_FORMAT_TYPE_SIGNED);
2824 attr.normalized_int_type = desc->channel[0].normalized;
2825 attr.read_as_int_uint = desc->channel[0].pure_integer;
2826
2827 attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
2828 0xffff);
2829 attr.stride = pipeline->vb[binding].stride;
2830 attr.type = get_attr_type(desc);
2831 }
2832 }
2833
2834 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2835 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2836 const VkPipelineMultisampleStateCreateInfo *ms_info)
2837 {
2838 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2839
2840 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2841 * requires this to be 0xf or 0x0 if using a single sample.
2842 */
2843 if (ms_info && ms_info->pSampleMask &&
2844 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2845 pipeline->sample_mask &= ms_info->pSampleMask[0];
2846 }
2847 }
2848
2849 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2850 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2851 const VkPipelineMultisampleStateCreateInfo *ms_info)
2852 {
2853 pipeline->sample_rate_shading =
2854 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2855 ms_info->sampleShadingEnable;
2856 }
2857
2858 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2859 pipeline_init(struct v3dv_pipeline *pipeline,
2860 struct v3dv_device *device,
2861 struct v3dv_pipeline_cache *cache,
2862 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2863 const VkAllocationCallbacks *pAllocator)
2864 {
2865 VkResult result = VK_SUCCESS;
2866
2867 pipeline->device = device;
2868
2869 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2870 pipeline->layout = layout;
2871
2872 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2873 assert(pCreateInfo->subpass < render_pass->subpass_count);
2874 pipeline->pass = render_pass;
2875 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2876
2877 /* If rasterization is not enabled, various CreateInfo structs must be
2878 * ignored.
2879 */
2880 const bool raster_enabled =
2881 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2882
2883 const VkPipelineViewportStateCreateInfo *vp_info =
2884 raster_enabled ? pCreateInfo->pViewportState : NULL;
2885
2886 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2887 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2888
2889 const VkPipelineRasterizationStateCreateInfo *rs_info =
2890 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2891
2892 const VkPipelineColorBlendStateCreateInfo *cb_info =
2893 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2894
2895 const VkPipelineMultisampleStateCreateInfo *ms_info =
2896 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2897
2898 pipeline_init_dynamic_state(pipeline,
2899 pCreateInfo->pDynamicState,
2900 vp_info, ds_info, cb_info, rs_info);
2901
2902 /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2903 * feature and it shouldn't be used by any pipeline.
2904 */
2905 assert(!ds_info || !ds_info->depthBoundsTestEnable);
2906
2907 pack_blend(pipeline, cb_info);
2908 pack_cfg_bits(pipeline, ds_info, rs_info, ms_info);
2909 pack_stencil_cfg(pipeline, ds_info);
2910 pipeline_set_ez_state(pipeline, ds_info);
2911 enable_depth_bias(pipeline, rs_info);
2912 pipeline_set_sample_mask(pipeline, ms_info);
2913 pipeline_set_sample_rate_shading(pipeline, ms_info);
2914
2915 pipeline->primitive_restart =
2916 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2917
2918 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2919
2920 if (result != VK_SUCCESS) {
2921 /* Caller would already destroy the pipeline, and we didn't allocate any
2922 * extra info. We don't need to do anything else.
2923 */
2924 return result;
2925 }
2926
2927 pack_shader_state_record(pipeline);
2928 pack_vcm_cache_size(pipeline);
2929
2930 const VkPipelineVertexInputStateCreateInfo *vi_info =
2931 pCreateInfo->pVertexInputState;
2932
2933 pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
2934 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2935 const VkVertexInputBindingDescription *desc =
2936 &vi_info->pVertexBindingDescriptions[i];
2937
2938 pipeline->vb[desc->binding].stride = desc->stride;
2939 pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
2940 }
2941
2942 pipeline->va_count = 0;
2943 nir_shader *shader = pipeline->vs->nir;
2944
2945 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2946 const VkVertexInputAttributeDescription *desc =
2947 &vi_info->pVertexAttributeDescriptions[i];
2948 uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
2949
2950 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_in, location);
2951
2952 if (var != NULL) {
2953 unsigned driver_location = var->data.driver_location;
2954
2955 assert(driver_location < MAX_VERTEX_ATTRIBS);
2956 pipeline->va[driver_location].offset = desc->offset;
2957 pipeline->va[driver_location].binding = desc->binding;
2958 pipeline->va[driver_location].vk_format = desc->format;
2959
2960 pack_shader_state_attribute_record(pipeline, driver_location, desc);
2961
2962 pipeline->va_count++;
2963 }
2964 }
2965
2966 if (!create_default_attribute_values(pipeline, vi_info))
2967 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2968
2969 return result;
2970 }
2971
2972 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2973 graphics_pipeline_create(VkDevice _device,
2974 VkPipelineCache _cache,
2975 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2976 const VkAllocationCallbacks *pAllocator,
2977 VkPipeline *pPipeline)
2978 {
2979 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2980 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2981
2982 struct v3dv_pipeline *pipeline;
2983 VkResult result;
2984
2985 /* Use the default pipeline cache if none is specified */
2986 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
2987 cache = &device->default_pipeline_cache;
2988
2989 pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
2990 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2991 if (pipeline == NULL)
2992 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2993
2994 result = pipeline_init(pipeline, device, cache,
2995 pCreateInfo,
2996 pAllocator);
2997
2998 if (result != VK_SUCCESS) {
2999 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3000 return result;
3001 }
3002
3003 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3004
3005 return VK_SUCCESS;
3006 }
3007
3008 VkResult
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3009 v3dv_CreateGraphicsPipelines(VkDevice _device,
3010 VkPipelineCache pipelineCache,
3011 uint32_t count,
3012 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3013 const VkAllocationCallbacks *pAllocator,
3014 VkPipeline *pPipelines)
3015 {
3016 VkResult result = VK_SUCCESS;
3017
3018 for (uint32_t i = 0; i < count; i++) {
3019 VkResult local_result;
3020
3021 local_result = graphics_pipeline_create(_device,
3022 pipelineCache,
3023 &pCreateInfos[i],
3024 pAllocator,
3025 &pPipelines[i]);
3026
3027 if (local_result != VK_SUCCESS) {
3028 result = local_result;
3029 pPipelines[i] = VK_NULL_HANDLE;
3030 }
3031 }
3032
3033 return result;
3034 }
3035
3036 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3037 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3038 {
3039 assert(glsl_type_is_vector_or_scalar(type));
3040
3041 uint32_t comp_size = glsl_type_is_boolean(type)
3042 ? 4 : glsl_get_bit_size(type) / 8;
3043 unsigned length = glsl_get_vector_elements(type);
3044 *size = comp_size * length,
3045 *align = comp_size * (length == 3 ? 4 : length);
3046 }
3047
3048 static void
lower_cs_shared(struct nir_shader * nir)3049 lower_cs_shared(struct nir_shader *nir)
3050 {
3051 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
3052 nir_var_mem_shared, shared_type_info);
3053 NIR_PASS_V(nir, nir_lower_explicit_io,
3054 nir_var_mem_shared, nir_address_format_32bit_offset);
3055 }
3056
3057 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3058 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3059 struct v3dv_pipeline_cache *cache,
3060 const VkComputePipelineCreateInfo *info,
3061 const VkAllocationCallbacks *alloc)
3062 {
3063 struct v3dv_device *device = pipeline->device;
3064 struct v3dv_physical_device *physical_device =
3065 &device->instance->physicalDevice;
3066
3067 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3068 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3069
3070 struct v3dv_pipeline_stage *p_stage =
3071 vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8,
3072 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3073 if (!p_stage)
3074 return VK_ERROR_OUT_OF_HOST_MEMORY;
3075
3076 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3077 p_stage->compiled_variant_count = 0;
3078 p_stage->pipeline = pipeline;
3079 p_stage->stage = stage;
3080 p_stage->entrypoint = sinfo->pName;
3081 p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
3082 p_stage->spec_info = sinfo->pSpecializationInfo;
3083
3084 pipeline_hash_shader(p_stage->module,
3085 p_stage->entrypoint,
3086 stage,
3087 p_stage->spec_info,
3088 p_stage->shader_sha1);
3089
3090 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3091
3092 pipeline->active_stages |= sinfo->stage;
3093 st_nir_opts(p_stage->nir);
3094 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3095 lower_cs_shared(p_stage->nir);
3096
3097 pipeline->cs = p_stage;
3098
3099 struct v3d_key *key = &p_stage->key.base;
3100 memset(key, 0, sizeof(*key));
3101 pipeline_populate_v3d_key(key, p_stage, 0,
3102 pipeline->device->features.robustBufferAccess);
3103
3104 VkResult result;
3105 p_stage->current_variant =
3106 pregenerate_shader_variants(p_stage, cache, key, sizeof(*key), alloc, &result);
3107 return result;
3108 }
3109
3110 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3111 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3112 struct v3dv_device *device,
3113 struct v3dv_pipeline_cache *cache,
3114 const VkComputePipelineCreateInfo *info,
3115 const VkAllocationCallbacks *alloc)
3116 {
3117 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3118
3119 pipeline->device = device;
3120 pipeline->layout = layout;
3121
3122 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3123
3124 return result;
3125 }
3126
3127 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3128 compute_pipeline_create(VkDevice _device,
3129 VkPipelineCache _cache,
3130 const VkComputePipelineCreateInfo *pCreateInfo,
3131 const VkAllocationCallbacks *pAllocator,
3132 VkPipeline *pPipeline)
3133 {
3134 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3135 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3136
3137 struct v3dv_pipeline *pipeline;
3138 VkResult result;
3139
3140 /* Use the default pipeline cache if none is specified */
3141 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3142 cache = &device->default_pipeline_cache;
3143
3144 pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
3145 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3146 if (pipeline == NULL)
3147 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3148
3149 result = compute_pipeline_init(pipeline, device, cache,
3150 pCreateInfo, pAllocator);
3151 if (result != VK_SUCCESS) {
3152 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3153 return result;
3154 }
3155
3156 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3157
3158 return VK_SUCCESS;
3159 }
3160
3161 VkResult
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3162 v3dv_CreateComputePipelines(VkDevice _device,
3163 VkPipelineCache pipelineCache,
3164 uint32_t createInfoCount,
3165 const VkComputePipelineCreateInfo *pCreateInfos,
3166 const VkAllocationCallbacks *pAllocator,
3167 VkPipeline *pPipelines)
3168 {
3169 VkResult result = VK_SUCCESS;
3170
3171 for (uint32_t i = 0; i < createInfoCount; i++) {
3172 VkResult local_result;
3173 local_result = compute_pipeline_create(_device,
3174 pipelineCache,
3175 &pCreateInfos[i],
3176 pAllocator,
3177 &pPipelines[i]);
3178
3179 if (local_result != VK_SUCCESS) {
3180 result = local_result;
3181 pPipelines[i] = VK_NULL_HANDLE;
3182 }
3183 }
3184
3185 return result;
3186 }
3187