1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28
29 #include "common/v3d_debug.h"
30 #include "qpu/qpu_disasm.h"
31
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34
35 #include "util/u_atomic.h"
36 #include "util/os_time.h"
37
38 #include "vk_nir_convert_ycbcr.h"
39 #include "vk_pipeline.h"
40 #include "vulkan/util/vk_format.h"
41
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47 uint32_t v3d_key_size)
48 {
49 struct mesa_sha1 ctx;
50 unsigned char sha1[20];
51 char sha1buf[41];
52
53 _mesa_sha1_init(&ctx);
54
55 _mesa_sha1_update(&ctx, key, v3d_key_size);
56
57 _mesa_sha1_final(&ctx, sha1);
58 _mesa_sha1_format(sha1buf, sha1);
59
60 fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62
63 static void
pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage * p_stage)64 pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65 {
66 VkPipelineShaderStageCreateInfo info = {
67 .module = vk_shader_module_handle_from_nir(p_stage->nir),
68 .pName = p_stage->entrypoint,
69 .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70 };
71
72 vk_pipeline_hash_shader_stage(&info, NULL, p_stage->shader_sha1);
73 }
74
75 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)76 v3dv_shader_variant_destroy(struct v3dv_device *device,
77 struct v3dv_shader_variant *variant)
78 {
79 /* The assembly BO is shared by all variants in the pipeline, so it can't
80 * be freed here and should be freed with the pipeline
81 */
82 if (variant->qpu_insts) {
83 free(variant->qpu_insts);
84 variant->qpu_insts = NULL;
85 }
86 ralloc_free(variant->prog_data.base);
87 vk_free(&device->vk.alloc, variant);
88 }
89
90 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)91 destroy_pipeline_stage(struct v3dv_device *device,
92 struct v3dv_pipeline_stage *p_stage,
93 const VkAllocationCallbacks *pAllocator)
94 {
95 if (!p_stage)
96 return;
97
98 ralloc_free(p_stage->nir);
99 vk_free2(&device->vk.alloc, pAllocator, p_stage);
100 }
101
102 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)103 pipeline_free_stages(struct v3dv_device *device,
104 struct v3dv_pipeline *pipeline,
105 const VkAllocationCallbacks *pAllocator)
106 {
107 assert(pipeline);
108
109 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
110 destroy_pipeline_stage(device, pipeline->stages[stage], pAllocator);
111 pipeline->stages[stage] = NULL;
112 }
113 }
114
115 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)116 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
117 struct v3dv_device *device,
118 const VkAllocationCallbacks *pAllocator)
119 {
120 if (!pipeline)
121 return;
122
123 pipeline_free_stages(device, pipeline, pAllocator);
124
125 if (pipeline->shared_data) {
126 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
127 pipeline->shared_data = NULL;
128 }
129
130 if (pipeline->spill.bo) {
131 assert(pipeline->spill.size_per_thread > 0);
132 v3dv_bo_free(device, pipeline->spill.bo);
133 }
134
135 if (pipeline->default_attribute_values) {
136 v3dv_bo_free(device, pipeline->default_attribute_values);
137 pipeline->default_attribute_values = NULL;
138 }
139
140 if (pipeline->executables.mem_ctx)
141 ralloc_free(pipeline->executables.mem_ctx);
142
143 if (pipeline->layout)
144 v3dv_pipeline_layout_unref(device, pipeline->layout, pAllocator);
145
146 vk_object_free(&device->vk, pAllocator, pipeline);
147 }
148
149 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)150 v3dv_DestroyPipeline(VkDevice _device,
151 VkPipeline _pipeline,
152 const VkAllocationCallbacks *pAllocator)
153 {
154 V3DV_FROM_HANDLE(v3dv_device, device, _device);
155 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
156
157 if (!pipeline)
158 return;
159
160 v3dv_destroy_pipeline(pipeline, device, pAllocator);
161 }
162
163 static const struct spirv_to_nir_options default_spirv_options = {
164 .caps = {
165 .device_group = true,
166 .float_controls = true,
167 .multiview = true,
168 .storage_8bit = true,
169 .storage_16bit = true,
170 .subgroup_ballot = true,
171 .subgroup_basic = true,
172 .subgroup_quad = true,
173 .subgroup_shuffle = true,
174 .subgroup_vote = true,
175 .variable_pointers = true,
176 .vk_memory_model = true,
177 .vk_memory_model_device_scope = true,
178 .physical_storage_buffer_address = true,
179 .workgroup_memory_explicit_layout = true,
180 .image_read_without_format = true,
181 .demote_to_helper_invocation = true,
182 },
183 .ubo_addr_format = nir_address_format_32bit_index_offset,
184 .ssbo_addr_format = nir_address_format_32bit_index_offset,
185 .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
186 .push_const_addr_format = nir_address_format_logical,
187 .shared_addr_format = nir_address_format_32bit_offset,
188 };
189
190 const nir_shader_compiler_options v3dv_nir_options = {
191 .lower_uadd_sat = true,
192 .lower_usub_sat = true,
193 .lower_iadd_sat = true,
194 .lower_all_io_to_temps = true,
195 .lower_extract_byte = true,
196 .lower_extract_word = true,
197 .lower_insert_byte = true,
198 .lower_insert_word = true,
199 .lower_bitfield_insert = true,
200 .lower_bitfield_extract = true,
201 .lower_bitfield_reverse = true,
202 .lower_bit_count = true,
203 .lower_cs_local_id_to_index = true,
204 .lower_ffract = true,
205 .lower_fmod = true,
206 .lower_pack_unorm_2x16 = true,
207 .lower_pack_snorm_2x16 = true,
208 .lower_unpack_unorm_2x16 = true,
209 .lower_unpack_snorm_2x16 = true,
210 .lower_pack_unorm_4x8 = true,
211 .lower_pack_snorm_4x8 = true,
212 .lower_unpack_unorm_4x8 = true,
213 .lower_unpack_snorm_4x8 = true,
214 .lower_pack_half_2x16 = true,
215 .lower_unpack_half_2x16 = true,
216 .lower_pack_32_2x16 = true,
217 .lower_pack_32_2x16_split = true,
218 .lower_unpack_32_2x16_split = true,
219 .lower_mul_2x32_64 = true,
220 .lower_fdiv = true,
221 .lower_find_lsb = true,
222 .lower_ffma16 = true,
223 .lower_ffma32 = true,
224 .lower_ffma64 = true,
225 .lower_flrp32 = true,
226 .lower_fpow = true,
227 .lower_fsat = true,
228 .lower_fsqrt = true,
229 .lower_ifind_msb = true,
230 .lower_isign = true,
231 .lower_ldexp = true,
232 .lower_mul_high = true,
233 .lower_wpos_pntc = false,
234 .lower_to_scalar = true,
235 .lower_device_index_to_zero = true,
236 .lower_fquantize2f16 = true,
237 .has_fsub = true,
238 .has_isub = true,
239 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
240 * needs to be supported */
241 .lower_interpolate_at = true,
242 .max_unroll_iterations = 16,
243 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
244 .divergence_analysis_options =
245 nir_divergence_multiple_workgroup_per_compute_subgroup,
246 };
247
248 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)249 v3dv_pipeline_get_nir_options(void)
250 {
251 return &v3dv_nir_options;
252 }
253
254 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _pipeline_layout,uint32_t set,uint32_t binding,uint32_t array_index)255 lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set,
256 uint32_t binding, uint32_t array_index)
257 {
258 struct v3dv_pipeline_layout *pipeline_layout =
259 (struct v3dv_pipeline_layout *) _pipeline_layout;
260
261 assert(set < pipeline_layout->num_sets);
262 struct v3dv_descriptor_set_layout *set_layout =
263 pipeline_layout->set[set].layout;
264
265 assert(binding < set_layout->binding_count);
266 struct v3dv_descriptor_set_binding_layout *bind_layout =
267 &set_layout->binding[binding];
268
269 if (bind_layout->immutable_samplers_offset) {
270 const struct v3dv_sampler *immutable_samplers =
271 v3dv_immutable_samplers(set_layout, bind_layout);
272 const struct v3dv_sampler *sampler = &immutable_samplers[array_index];
273 return sampler->conversion ? &sampler->conversion->state : NULL;
274 } else {
275 return NULL;
276 }
277 }
278
279 static void
preprocess_nir(nir_shader * nir)280 preprocess_nir(nir_shader *nir)
281 {
282 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
283 .frag_coord = true,
284 .point_coord = true,
285 };
286 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
287
288 /* Vulkan uses the separate-shader linking model */
289 nir->info.separate_shader = true;
290
291 /* Make sure we lower variable initializers on output variables so that
292 * nir_remove_dead_variables below sees the corresponding stores
293 */
294 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
295
296 if (nir->info.stage == MESA_SHADER_FRAGMENT)
297 NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
298 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
299 NIR_PASS(_, nir, nir_lower_input_attachments,
300 &(nir_input_attachment_options) {
301 .use_fragcoord_sysval = false,
302 });
303 }
304
305 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
306 nir_shader_get_entrypoint(nir), true, false);
307
308 NIR_PASS(_, nir, nir_lower_system_values);
309
310 NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
311
312 NIR_PASS(_, nir, nir_normalize_cubemap_coords);
313
314 NIR_PASS(_, nir, nir_lower_global_vars_to_local);
315
316 NIR_PASS(_, nir, nir_split_var_copies);
317 NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
318
319 v3d_optimize_nir(NULL, nir);
320
321 NIR_PASS(_, nir, nir_lower_explicit_io,
322 nir_var_mem_push_const,
323 nir_address_format_32bit_offset);
324
325 NIR_PASS(_, nir, nir_lower_explicit_io,
326 nir_var_mem_ubo | nir_var_mem_ssbo,
327 nir_address_format_32bit_index_offset);
328
329 NIR_PASS(_, nir, nir_lower_explicit_io,
330 nir_var_mem_global,
331 nir_address_format_2x32bit_global);
332
333 NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
334
335 /* Lower a bunch of stuff */
336 NIR_PASS(_, nir, nir_lower_var_copies);
337
338 NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
339
340 NIR_PASS(_, nir, nir_lower_indirect_derefs,
341 nir_var_function_temp, 2);
342
343 NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
344 nir_var_mem_ubo | nir_var_mem_ssbo,
345 nir_lower_direct_array_deref_of_vec_load);
346
347 NIR_PASS(_, nir, nir_lower_frexp);
348
349 /* Get rid of split copies */
350 v3d_optimize_nir(NULL, nir);
351 }
352
353 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)354 shader_module_compile_to_nir(struct v3dv_device *device,
355 struct v3dv_pipeline_stage *stage)
356 {
357 nir_shader *nir;
358 const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
359 gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(stage->stage);
360
361
362 if (V3D_DBG(DUMP_SPIRV) && stage->module->nir == NULL)
363 v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
364
365 /* vk_shader_module_to_nir also handles internal shaders, when module->nir
366 * != NULL. It also calls nir_validate_shader on both cases, so we don't
367 * call it again here.
368 */
369 VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
370 gl_stage,
371 stage->entrypoint,
372 stage->spec_info,
373 &default_spirv_options,
374 nir_options,
375 NULL, &nir);
376 if (result != VK_SUCCESS)
377 return NULL;
378 assert(nir->info.stage == gl_stage);
379
380 if (V3D_DBG(SHADERDB) && stage->module->nir == NULL) {
381 char sha1buf[41];
382 _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
383 nir->info.name = ralloc_strdup(nir, sha1buf);
384 }
385
386 if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
387 fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
388 broadcom_shader_stage_name(stage->stage),
389 stage->program_id);
390 nir_print_shader(nir, stderr);
391 fprintf(stderr, "\n");
392 }
393
394 preprocess_nir(nir);
395
396 return nir;
397 }
398
399 static int
type_size_vec4(const struct glsl_type * type,bool bindless)400 type_size_vec4(const struct glsl_type *type, bool bindless)
401 {
402 return glsl_count_attribute_slots(type, false);
403 }
404
405 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
406 * rethink.
407 */
408 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size,uint8_t plane)409 descriptor_map_add(struct v3dv_descriptor_map *map,
410 int set,
411 int binding,
412 int array_index,
413 int array_size,
414 int start_index,
415 uint8_t return_size,
416 uint8_t plane)
417 {
418 assert(array_index < array_size);
419 assert(return_size == 16 || return_size == 32);
420
421 unsigned index = start_index;
422 for (; index < map->num_desc; index++) {
423 if (map->used[index] &&
424 set == map->set[index] &&
425 binding == map->binding[index] &&
426 array_index == map->array_index[index] &&
427 plane == map->plane[index]) {
428 assert(array_size == map->array_size[index]);
429 if (return_size != map->return_size[index]) {
430 /* It the return_size is different it means that the same sampler
431 * was used for operations with different precision
432 * requirement. In this case we need to ensure that we use the
433 * larger one.
434 */
435 map->return_size[index] = 32;
436 }
437 return index;
438 } else if (!map->used[index]) {
439 break;
440 }
441 }
442
443 assert(index < DESCRIPTOR_MAP_SIZE);
444 assert(!map->used[index]);
445
446 map->used[index] = true;
447 map->set[index] = set;
448 map->binding[index] = binding;
449 map->array_index[index] = array_index;
450 map->array_size[index] = array_size;
451 map->return_size[index] = return_size;
452 map->plane[index] = plane;
453 map->num_desc = MAX2(map->num_desc, index + 1);
454
455 return index;
456 }
457
458 struct lower_pipeline_layout_state {
459 struct v3dv_pipeline *pipeline;
460 const struct v3dv_pipeline_layout *layout;
461 bool needs_default_sampler_state;
462 };
463
464
465 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)466 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
467 struct lower_pipeline_layout_state *state)
468 {
469 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
470 instr->intrinsic = nir_intrinsic_load_uniform;
471 }
472
473 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)474 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
475 VkDescriptorType desc_type,
476 gl_shader_stage gl_stage,
477 bool is_sampler)
478 {
479 enum broadcom_shader_stage broadcom_stage =
480 gl_shader_stage_to_broadcom(gl_stage);
481
482 assert(pipeline->shared_data &&
483 pipeline->shared_data->maps[broadcom_stage]);
484
485 switch(desc_type) {
486 case VK_DESCRIPTOR_TYPE_SAMPLER:
487 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
488 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
489 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
490 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
491 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
492 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
493 return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
494 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
495 return is_sampler ?
496 &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
497 &pipeline->shared_data->maps[broadcom_stage]->texture_map;
498 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
499 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
500 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
501 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
502 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
503 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
504 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
505 default:
506 unreachable("Descriptor type unknown or not having a descriptor map");
507 }
508 }
509
510 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
511 * could be used by the v3d_compiler */
512 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)513 lower_vulkan_resource_index(nir_builder *b,
514 nir_intrinsic_instr *instr,
515 struct lower_pipeline_layout_state *state)
516 {
517 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
518
519 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
520
521 unsigned set = nir_intrinsic_desc_set(instr);
522 unsigned binding = nir_intrinsic_binding(instr);
523 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
524 struct v3dv_descriptor_set_binding_layout *binding_layout =
525 &set_layout->binding[binding];
526 unsigned index = 0;
527
528 switch (binding_layout->type) {
529 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
530 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
531 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
532 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
533 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
534 struct v3dv_descriptor_map *descriptor_map =
535 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
536 b->shader->info.stage, false);
537
538 if (!const_val)
539 unreachable("non-constant vulkan_resource_index array index");
540
541 /* At compile-time we will need to know if we are processing a UBO load
542 * for an inline or a regular UBO so we can handle inline loads like
543 * push constants. At the level of NIR level however, the inline
544 * information is gone, so we rely on the index to make this distinction.
545 * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
546 * inline buffers. This means that at the descriptor map level
547 * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
548 * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
549 */
550 uint32_t start_index = 0;
551 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
552 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
553 start_index += MAX_INLINE_UNIFORM_BUFFERS;
554 }
555
556 index = descriptor_map_add(descriptor_map, set, binding,
557 const_val->u32,
558 binding_layout->array_size,
559 start_index,
560 32 /* return_size: doesn't really apply for this case */,
561 0);
562 break;
563 }
564
565 default:
566 unreachable("unsupported descriptor type for vulkan_resource_index");
567 break;
568 }
569
570 /* Since we use the deref pass, both vulkan_resource_index and
571 * vulkan_load_descriptor return a vec2 providing an index and
572 * offset. Our backend compiler only cares about the index part.
573 */
574 nir_def_rewrite_uses(&instr->def,
575 nir_imm_ivec2(b, index, 0));
576 nir_instr_remove(&instr->instr);
577 }
578
579 static uint8_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)580 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
581 {
582 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
583 if (plane_src_idx < 0)
584 return 0;
585
586 uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src);
587 nir_tex_instr_remove_src(tex, plane_src_idx);
588 return plane;
589 }
590
591 /* Returns return_size, so it could be used for the case of not having a
592 * sampler object
593 */
594 static uint8_t
lower_tex_src(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct lower_pipeline_layout_state * state)595 lower_tex_src(nir_builder *b,
596 nir_tex_instr *instr,
597 unsigned src_idx,
598 struct lower_pipeline_layout_state *state)
599 {
600 nir_def *index = NULL;
601 unsigned base_index = 0;
602 unsigned array_elements = 1;
603 nir_tex_src *src = &instr->src[src_idx];
604 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
605
606 uint8_t plane = tex_instr_get_and_remove_plane_src(instr);
607
608 /* We compute first the offsets */
609 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
610 while (deref->deref_type != nir_deref_type_var) {
611 nir_deref_instr *parent =
612 nir_instr_as_deref(deref->parent.ssa->parent_instr);
613
614 assert(deref->deref_type == nir_deref_type_array);
615
616 if (nir_src_is_const(deref->arr.index) && index == NULL) {
617 /* We're still building a direct index */
618 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
619 } else {
620 if (index == NULL) {
621 /* We used to be direct but not anymore */
622 index = nir_imm_int(b, base_index);
623 base_index = 0;
624 }
625
626 index = nir_iadd(b, index,
627 nir_imul_imm(b, deref->arr.index.ssa,
628 array_elements));
629 }
630
631 array_elements *= glsl_get_length(parent->type);
632
633 deref = parent;
634 }
635
636 if (index)
637 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
638
639 /* We have the offsets, we apply them, rewriting the source or removing
640 * instr if needed
641 */
642 if (index) {
643 nir_src_rewrite(&src->src, index);
644
645 src->src_type = is_sampler ?
646 nir_tex_src_sampler_offset :
647 nir_tex_src_texture_offset;
648 } else {
649 nir_tex_instr_remove_src(instr, src_idx);
650 }
651
652 uint32_t set = deref->var->data.descriptor_set;
653 uint32_t binding = deref->var->data.binding;
654 /* FIXME: this is a really simplified check for the precision to be used
655 * for the sampling. Right now we are only checking for the variables used
656 * on the operation itself, but there are other cases that we could use to
657 * infer the precision requirement.
658 */
659 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
660 deref->var->data.precision == GLSL_PRECISION_LOW;
661 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
662 struct v3dv_descriptor_set_binding_layout *binding_layout =
663 &set_layout->binding[binding];
664
665 /* For input attachments, the shader includes the attachment_idx. As we are
666 * treating them as a texture, we only want the base_index
667 */
668 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
669 deref->var->data.index + base_index :
670 base_index;
671
672 uint8_t return_size;
673 if (V3D_DBG(TMU_16BIT))
674 return_size = 16;
675 else if (V3D_DBG(TMU_32BIT))
676 return_size = 32;
677 else
678 return_size = relaxed_precision ? 16 : 32;
679
680 struct v3dv_descriptor_map *map =
681 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
682 b->shader->info.stage, is_sampler);
683 int desc_index =
684 descriptor_map_add(map,
685 deref->var->data.descriptor_set,
686 deref->var->data.binding,
687 array_index,
688 binding_layout->array_size,
689 0,
690 return_size,
691 plane);
692
693 if (is_sampler)
694 instr->sampler_index = desc_index;
695 else
696 instr->texture_index = desc_index;
697
698 return return_size;
699 }
700
701 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct lower_pipeline_layout_state * state)702 lower_sampler(nir_builder *b,
703 nir_tex_instr *instr,
704 struct lower_pipeline_layout_state *state)
705 {
706 uint8_t return_size = 0;
707
708 int texture_idx =
709 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
710
711 if (texture_idx >= 0)
712 return_size = lower_tex_src(b, instr, texture_idx, state);
713
714 int sampler_idx =
715 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
716
717 if (sampler_idx >= 0) {
718 assert(nir_tex_instr_need_sampler(instr));
719 lower_tex_src(b, instr, sampler_idx, state);
720 }
721
722 if (texture_idx < 0 && sampler_idx < 0)
723 return false;
724
725 /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags
726 * to bind a default sampler state to configure precission.
727 */
728 if (sampler_idx < 0) {
729 state->needs_default_sampler_state = true;
730 instr->backend_flags = return_size == 16 ?
731 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
732 }
733
734 return true;
735 }
736
737 /* FIXME: really similar to lower_tex_src, perhaps refactor? */
738 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)739 lower_image_deref(nir_builder *b,
740 nir_intrinsic_instr *instr,
741 struct lower_pipeline_layout_state *state)
742 {
743 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
744 nir_def *index = NULL;
745 unsigned array_elements = 1;
746 unsigned base_index = 0;
747
748 while (deref->deref_type != nir_deref_type_var) {
749 nir_deref_instr *parent =
750 nir_instr_as_deref(deref->parent.ssa->parent_instr);
751
752 assert(deref->deref_type == nir_deref_type_array);
753
754 if (nir_src_is_const(deref->arr.index) && index == NULL) {
755 /* We're still building a direct index */
756 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
757 } else {
758 if (index == NULL) {
759 /* We used to be direct but not anymore */
760 index = nir_imm_int(b, base_index);
761 base_index = 0;
762 }
763
764 index = nir_iadd(b, index,
765 nir_imul_imm(b, deref->arr.index.ssa,
766 array_elements));
767 }
768
769 array_elements *= glsl_get_length(parent->type);
770
771 deref = parent;
772 }
773
774 if (index)
775 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
776
777 uint32_t set = deref->var->data.descriptor_set;
778 uint32_t binding = deref->var->data.binding;
779 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
780 struct v3dv_descriptor_set_binding_layout *binding_layout =
781 &set_layout->binding[binding];
782
783 uint32_t array_index = deref->var->data.index + base_index;
784
785 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
786 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
787
788 struct v3dv_descriptor_map *map =
789 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
790 b->shader->info.stage, false);
791
792 int desc_index =
793 descriptor_map_add(map,
794 deref->var->data.descriptor_set,
795 deref->var->data.binding,
796 array_index,
797 binding_layout->array_size,
798 0,
799 32 /* return_size: doesn't apply for textures */,
800 0);
801
802 /* Note: we don't need to do anything here in relation to the precision and
803 * the output size because for images we can infer that info from the image
804 * intrinsic, that includes the image format (see
805 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
806 */
807
808 index = nir_imm_int(b, desc_index);
809
810 nir_rewrite_image_intrinsic(instr, index, false);
811 }
812
813 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)814 lower_intrinsic(nir_builder *b,
815 nir_intrinsic_instr *instr,
816 struct lower_pipeline_layout_state *state)
817 {
818 switch (instr->intrinsic) {
819 case nir_intrinsic_load_push_constant:
820 lower_load_push_constant(b, instr, state);
821 return true;
822
823 case nir_intrinsic_vulkan_resource_index:
824 lower_vulkan_resource_index(b, instr, state);
825 return true;
826
827 case nir_intrinsic_load_vulkan_descriptor: {
828 /* Loading the descriptor happens as part of load/store instructions,
829 * so for us this is a no-op.
830 */
831 nir_def_rewrite_uses(&instr->def, instr->src[0].ssa);
832 nir_instr_remove(&instr->instr);
833 return true;
834 }
835
836 case nir_intrinsic_image_deref_load:
837 case nir_intrinsic_image_deref_store:
838 case nir_intrinsic_image_deref_atomic:
839 case nir_intrinsic_image_deref_atomic_swap:
840 case nir_intrinsic_image_deref_size:
841 case nir_intrinsic_image_deref_samples:
842 lower_image_deref(b, instr, state);
843 return true;
844
845 default:
846 return false;
847 }
848 }
849
850 static bool
lower_pipeline_layout_cb(nir_builder * b,nir_instr * instr,void * _state)851 lower_pipeline_layout_cb(nir_builder *b,
852 nir_instr *instr,
853 void *_state)
854 {
855 bool progress = false;
856 struct lower_pipeline_layout_state *state = _state;
857
858 b->cursor = nir_before_instr(instr);
859 switch (instr->type) {
860 case nir_instr_type_tex:
861 progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
862 break;
863 case nir_instr_type_intrinsic:
864 progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
865 break;
866 default:
867 break;
868 }
869
870 return progress;
871 }
872
873 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout,bool * needs_default_sampler_state)874 lower_pipeline_layout_info(nir_shader *shader,
875 struct v3dv_pipeline *pipeline,
876 const struct v3dv_pipeline_layout *layout,
877 bool *needs_default_sampler_state)
878 {
879 bool progress = false;
880
881 struct lower_pipeline_layout_state state = {
882 .pipeline = pipeline,
883 .layout = layout,
884 .needs_default_sampler_state = false,
885 };
886
887 progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
888 nir_metadata_block_index |
889 nir_metadata_dominance,
890 &state);
891
892 *needs_default_sampler_state = state.needs_default_sampler_state;
893
894 return progress;
895 }
896
897 /* This flips gl_PointCoord.y to match Vulkan requirements */
898 static bool
lower_point_coord_cb(nir_builder * b,nir_intrinsic_instr * intr,void * _state)899 lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
900 {
901 if (intr->intrinsic != nir_intrinsic_load_input)
902 return false;
903
904 if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC)
905 return false;
906
907 b->cursor = nir_after_instr(&intr->instr);
908 nir_def *result = &intr->def;
909 result =
910 nir_vector_insert_imm(b, result,
911 nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
912 nir_def_rewrite_uses_after(&intr->def,
913 result, result->parent_instr);
914 return true;
915 }
916
917 static bool
v3d_nir_lower_point_coord(nir_shader * s)918 v3d_nir_lower_point_coord(nir_shader *s)
919 {
920 assert(s->info.stage == MESA_SHADER_FRAGMENT);
921 return nir_shader_intrinsics_pass(s, lower_point_coord_cb,
922 nir_metadata_block_index |
923 nir_metadata_dominance, NULL);
924 }
925
926 static void
lower_fs_io(nir_shader * nir)927 lower_fs_io(nir_shader *nir)
928 {
929 /* Our backend doesn't handle array fragment shader outputs */
930 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
931 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
932
933 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
934 MESA_SHADER_FRAGMENT);
935
936 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
937 MESA_SHADER_FRAGMENT);
938
939 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
940 type_size_vec4, 0);
941 }
942
943 static void
lower_gs_io(struct nir_shader * nir)944 lower_gs_io(struct nir_shader *nir)
945 {
946 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
947
948 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
949 MESA_SHADER_GEOMETRY);
950
951 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
952 MESA_SHADER_GEOMETRY);
953 }
954
955 static void
lower_vs_io(struct nir_shader * nir)956 lower_vs_io(struct nir_shader *nir)
957 {
958 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
959
960 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
961 MESA_SHADER_VERTEX);
962
963 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
964 MESA_SHADER_VERTEX);
965
966 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
967 * overlaps with v3d_nir_lower_io. Need further research though.
968 */
969 }
970
971 static void
shader_debug_output(const char * message,void * data)972 shader_debug_output(const char *message, void *data)
973 {
974 /* FIXME: We probably don't want to debug anything extra here, and in fact
975 * the compiler is not using this callback too much, only as an alternative
976 * way to debug out the shaderdb stats, that you can already get using
977 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
978 * compiler to remove that callback.
979 */
980 }
981
982 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)983 pipeline_populate_v3d_key(struct v3d_key *key,
984 const struct v3dv_pipeline_stage *p_stage,
985 uint32_t ucp_enables)
986 {
987 assert(p_stage->pipeline->shared_data &&
988 p_stage->pipeline->shared_data->maps[p_stage->stage]);
989
990 /* The following values are default values used at pipeline create. We use
991 * there 32 bit as default return size.
992 */
993 struct v3dv_descriptor_map *sampler_map =
994 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
995 struct v3dv_descriptor_map *texture_map =
996 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
997
998 key->num_tex_used = texture_map->num_desc;
999 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1000 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1001 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1002 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1003 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1004 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1005 }
1006
1007 key->num_samplers_used = sampler_map->num_desc;
1008 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1009 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1010 sampler_idx++) {
1011 key->sampler[sampler_idx].return_size =
1012 sampler_map->return_size[sampler_idx];
1013
1014 key->sampler[sampler_idx].return_channels =
1015 key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1016 }
1017
1018 switch (p_stage->stage) {
1019 case BROADCOM_SHADER_VERTEX:
1020 case BROADCOM_SHADER_VERTEX_BIN:
1021 key->is_last_geometry_stage =
1022 p_stage->pipeline->stages[BROADCOM_SHADER_GEOMETRY] == NULL;
1023 break;
1024 case BROADCOM_SHADER_GEOMETRY:
1025 case BROADCOM_SHADER_GEOMETRY_BIN:
1026 /* FIXME: while we don't implement tessellation shaders */
1027 key->is_last_geometry_stage = true;
1028 break;
1029 case BROADCOM_SHADER_FRAGMENT:
1030 case BROADCOM_SHADER_COMPUTE:
1031 key->is_last_geometry_stage = false;
1032 break;
1033 default:
1034 unreachable("unsupported shader stage");
1035 }
1036
1037 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1038 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1039 * takes care of adding a single compact array variable at
1040 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1041 *
1042 * The only lowering we are interested is specific to the fragment shader,
1043 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1044 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1045 * the ucp enable mask for that stage.
1046 */
1047 key->ucp_enables = ucp_enables;
1048
1049 const VkPipelineRobustnessBufferBehaviorEXT robust_buffer_enabled =
1050 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1051
1052 const VkPipelineRobustnessImageBehaviorEXT robust_image_enabled =
1053 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT;
1054
1055 key->robust_uniform_access =
1056 p_stage->robustness.uniform_buffers == robust_buffer_enabled;
1057 key->robust_storage_access =
1058 p_stage->robustness.storage_buffers == robust_buffer_enabled;
1059 key->robust_image_access =
1060 p_stage->robustness.images == robust_image_enabled;
1061 }
1062
1063 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1064 * same. For not using prim_mode that is the one already used on v3d
1065 */
1066 static const enum mesa_prim vk_to_mesa_prim[] = {
1067 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS,
1068 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES,
1069 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP,
1070 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES,
1071 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP,
1072 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN,
1073 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY,
1074 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY,
1075 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY,
1076 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY,
1077 };
1078
1079 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1080 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1081 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1082 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1083 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1084 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1085 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1086 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1087 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1088 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1089 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1090 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1091 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1092 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1093 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1094 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1095 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1096 };
1097
1098 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1099 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1100 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1101 const struct v3dv_pipeline_stage *p_stage,
1102 bool has_geometry_shader,
1103 uint32_t ucp_enables)
1104 {
1105 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1106
1107 memset(key, 0, sizeof(*key));
1108
1109 struct v3dv_device *device = p_stage->pipeline->device;
1110 assert(device);
1111
1112 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables);
1113
1114 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1115 pCreateInfo->pInputAssemblyState;
1116 uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1117
1118 key->is_points = (topology == MESA_PRIM_POINTS);
1119 key->is_lines = (topology >= MESA_PRIM_LINES &&
1120 topology <= MESA_PRIM_LINE_STRIP);
1121
1122 if (key->is_points) {
1123 /* This mask represents state for GL_ARB_point_sprite which is not
1124 * relevant to Vulkan.
1125 */
1126 key->point_sprite_mask = 0;
1127
1128 /* Vulkan mandates upper left. */
1129 key->point_coord_upper_left = true;
1130 }
1131
1132 key->has_gs = has_geometry_shader;
1133
1134 const VkPipelineColorBlendStateCreateInfo *cb_info =
1135 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1136 pCreateInfo->pColorBlendState : NULL;
1137
1138 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1139 vk_to_pipe_logicop[cb_info->logicOp] :
1140 PIPE_LOGICOP_COPY;
1141
1142 const bool raster_enabled =
1143 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1144
1145 /* Multisample rasterization state must be ignored if rasterization
1146 * is disabled.
1147 */
1148 const VkPipelineMultisampleStateCreateInfo *ms_info =
1149 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1150 if (ms_info) {
1151 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1152 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1153 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1154
1155 if (key->msaa)
1156 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1157
1158 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1159 }
1160
1161 /* This is intended for V3D versions before 4.1, otherwise we just use the
1162 * tile buffer load/store swap R/B bit.
1163 */
1164 key->swap_color_rb = 0;
1165
1166 const struct v3dv_render_pass *pass =
1167 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1168 const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1169 for (uint32_t i = 0; i < subpass->color_count; i++) {
1170 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1171 if (att_idx == VK_ATTACHMENT_UNUSED)
1172 continue;
1173
1174 key->cbufs |= 1 << i;
1175
1176 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1177 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1178
1179 /* If logic operations are enabled then we might emit color reads and we
1180 * need to know the color buffer format and swizzle for that
1181 *
1182 */
1183 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1184 /* Framebuffer formats should be single plane */
1185 assert(vk_format_get_plane_count(fb_format) == 1);
1186 key->color_fmt[i].format = fb_pipe_format;
1187 memcpy(key->color_fmt[i].swizzle,
1188 v3dv_get_format_swizzle(p_stage->pipeline->device,
1189 fb_format,
1190 0),
1191 sizeof(key->color_fmt[i].swizzle));
1192 }
1193
1194 const struct util_format_description *desc =
1195 vk_format_description(fb_format);
1196
1197 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1198 desc->channel[0].size == 32) {
1199 key->f32_color_rb |= 1 << i;
1200 }
1201
1202 if (p_stage->nir->info.fs.untyped_color_outputs) {
1203 if (util_format_is_pure_uint(fb_pipe_format))
1204 key->uint_color_rb |= 1 << i;
1205 else if (util_format_is_pure_sint(fb_pipe_format))
1206 key->int_color_rb |= 1 << i;
1207 }
1208 }
1209 }
1210
1211 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1212 setup_stage_outputs_from_next_stage_inputs(
1213 uint8_t next_stage_num_inputs,
1214 struct v3d_varying_slot *next_stage_input_slots,
1215 uint8_t *num_used_outputs,
1216 struct v3d_varying_slot *used_output_slots,
1217 uint32_t size_of_used_output_slots)
1218 {
1219 *num_used_outputs = next_stage_num_inputs;
1220 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1221 }
1222
1223 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1224 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1225 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1226 const struct v3dv_pipeline_stage *p_stage)
1227 {
1228 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1229 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1230
1231 struct v3dv_device *device = p_stage->pipeline->device;
1232 assert(device);
1233
1234 memset(key, 0, sizeof(*key));
1235
1236 pipeline_populate_v3d_key(&key->base, p_stage, 0);
1237
1238 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1239
1240 key->per_vertex_point_size =
1241 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1242
1243 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1244
1245 assert(key->base.is_last_geometry_stage);
1246 if (key->is_coord) {
1247 /* Output varyings in the last binning shader are only used for transform
1248 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1249 */
1250 key->num_used_outputs = 0;
1251 } else {
1252 struct v3dv_shader_variant *fs_variant =
1253 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1254
1255 STATIC_ASSERT(sizeof(key->used_outputs) ==
1256 sizeof(fs_variant->prog_data.fs->input_slots));
1257
1258 setup_stage_outputs_from_next_stage_inputs(
1259 fs_variant->prog_data.fs->num_inputs,
1260 fs_variant->prog_data.fs->input_slots,
1261 &key->num_used_outputs,
1262 key->used_outputs,
1263 sizeof(key->used_outputs));
1264 }
1265 }
1266
1267 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1268 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1269 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1270 const struct v3dv_pipeline_stage *p_stage)
1271 {
1272 assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1273 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1274
1275 struct v3dv_device *device = p_stage->pipeline->device;
1276 assert(device);
1277
1278 memset(key, 0, sizeof(*key));
1279 pipeline_populate_v3d_key(&key->base, p_stage, 0);
1280
1281 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1282
1283 /* Vulkan specifies a point size per vertex, so true for if the prim are
1284 * points, like on ES2)
1285 */
1286 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1287 pCreateInfo->pInputAssemblyState;
1288 uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1289
1290 /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1291 * MESA_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1292 key->per_vertex_point_size = (topology == MESA_PRIM_POINTS);
1293
1294 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1295
1296 if (key->is_coord) { /* Binning VS*/
1297 if (key->base.is_last_geometry_stage) {
1298 /* Output varyings in the last binning shader are only used for
1299 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1300 * supported.
1301 */
1302 key->num_used_outputs = 0;
1303 } else {
1304 /* Linking against GS binning program */
1305 assert(pipeline->stages[BROADCOM_SHADER_GEOMETRY]);
1306 struct v3dv_shader_variant *gs_bin_variant =
1307 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1308
1309 STATIC_ASSERT(sizeof(key->used_outputs) ==
1310 sizeof(gs_bin_variant->prog_data.gs->input_slots));
1311
1312 setup_stage_outputs_from_next_stage_inputs(
1313 gs_bin_variant->prog_data.gs->num_inputs,
1314 gs_bin_variant->prog_data.gs->input_slots,
1315 &key->num_used_outputs,
1316 key->used_outputs,
1317 sizeof(key->used_outputs));
1318 }
1319 } else { /* Render VS */
1320 if (pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
1321 /* Linking against GS render program */
1322 struct v3dv_shader_variant *gs_variant =
1323 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1324
1325 STATIC_ASSERT(sizeof(key->used_outputs) ==
1326 sizeof(gs_variant->prog_data.gs->input_slots));
1327
1328 setup_stage_outputs_from_next_stage_inputs(
1329 gs_variant->prog_data.gs->num_inputs,
1330 gs_variant->prog_data.gs->input_slots,
1331 &key->num_used_outputs,
1332 key->used_outputs,
1333 sizeof(key->used_outputs));
1334 } else {
1335 /* Linking against FS program */
1336 struct v3dv_shader_variant *fs_variant =
1337 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1338
1339 STATIC_ASSERT(sizeof(key->used_outputs) ==
1340 sizeof(fs_variant->prog_data.fs->input_slots));
1341
1342 setup_stage_outputs_from_next_stage_inputs(
1343 fs_variant->prog_data.fs->num_inputs,
1344 fs_variant->prog_data.fs->input_slots,
1345 &key->num_used_outputs,
1346 key->used_outputs,
1347 sizeof(key->used_outputs));
1348 }
1349 }
1350
1351 const VkPipelineVertexInputStateCreateInfo *vi_info =
1352 pCreateInfo->pVertexInputState;
1353 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1354 const VkVertexInputAttributeDescription *desc =
1355 &vi_info->pVertexAttributeDescriptions[i];
1356 assert(desc->location < MAX_VERTEX_ATTRIBS);
1357 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
1358 desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
1359 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1360 }
1361 }
1362 }
1363
1364 /**
1365 * Creates the initial form of the pipeline stage for a binning shader by
1366 * cloning the render shader and flagging it as a coordinate shader.
1367 *
1368 * Returns NULL if it was not able to allocate the object, so it should be
1369 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1370 */
1371 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1372 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1373 const VkAllocationCallbacks *pAllocator)
1374 {
1375 struct v3dv_device *device = src->pipeline->device;
1376
1377 struct v3dv_pipeline_stage *p_stage =
1378 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1379 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1380
1381 if (p_stage == NULL)
1382 return NULL;
1383
1384 assert(src->stage == BROADCOM_SHADER_VERTEX ||
1385 src->stage == BROADCOM_SHADER_GEOMETRY);
1386
1387 enum broadcom_shader_stage bin_stage =
1388 src->stage == BROADCOM_SHADER_VERTEX ?
1389 BROADCOM_SHADER_VERTEX_BIN :
1390 BROADCOM_SHADER_GEOMETRY_BIN;
1391
1392 p_stage->pipeline = src->pipeline;
1393 p_stage->stage = bin_stage;
1394 p_stage->entrypoint = src->entrypoint;
1395 p_stage->module = src->module;
1396 /* For binning shaders we will clone the NIR code from the corresponding
1397 * render shader later, when we call pipeline_compile_xxx_shader. This way
1398 * we only have to run the relevant NIR lowerings once for render shaders
1399 */
1400 p_stage->nir = NULL;
1401 p_stage->program_id = src->program_id;
1402 p_stage->spec_info = src->spec_info;
1403 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1404 p_stage->robustness = src->robustness;
1405 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1406
1407 return p_stage;
1408 }
1409
1410 /*
1411 * Based on some creation flags we assume that the QPU would be needed later
1412 * to gather further info. In that case we just keep the qput_insts around,
1413 * instead of map/unmap the bo later.
1414 */
1415 static bool
pipeline_keep_qpu(struct v3dv_pipeline * pipeline)1416 pipeline_keep_qpu(struct v3dv_pipeline *pipeline)
1417 {
1418 return pipeline->flags &
1419 (VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR |
1420 VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR);
1421 }
1422
1423 /**
1424 * Returns false if it was not able to allocate or map the assembly bo memory.
1425 */
1426 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1427 upload_assembly(struct v3dv_pipeline *pipeline)
1428 {
1429 uint32_t total_size = 0;
1430 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1431 struct v3dv_shader_variant *variant =
1432 pipeline->shared_data->variants[stage];
1433
1434 if (variant != NULL)
1435 total_size += variant->qpu_insts_size;
1436 }
1437
1438 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1439 "pipeline shader assembly", true);
1440 if (!bo) {
1441 fprintf(stderr, "failed to allocate memory for shader\n");
1442 return false;
1443 }
1444
1445 bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1446 if (!ok) {
1447 fprintf(stderr, "failed to map source shader buffer\n");
1448 return false;
1449 }
1450
1451 uint32_t offset = 0;
1452 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1453 struct v3dv_shader_variant *variant =
1454 pipeline->shared_data->variants[stage];
1455
1456 if (variant != NULL) {
1457 variant->assembly_offset = offset;
1458
1459 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1460 offset += variant->qpu_insts_size;
1461
1462 if (!pipeline_keep_qpu(pipeline)) {
1463 free(variant->qpu_insts);
1464 variant->qpu_insts = NULL;
1465 }
1466 }
1467 }
1468 assert(total_size == offset);
1469
1470 pipeline->shared_data->assembly_bo = bo;
1471
1472 return true;
1473 }
1474
1475 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1476 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1477 struct v3dv_pipeline_key *key,
1478 unsigned char *sha1_out)
1479 {
1480 struct mesa_sha1 ctx;
1481 _mesa_sha1_init(&ctx);
1482
1483 if (pipeline->layout) {
1484 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1485 sizeof(pipeline->layout->sha1));
1486 }
1487
1488 /* We need to include all shader stages in the sha1 key as linking may
1489 * modify the shader code in any stage. An alternative would be to use the
1490 * serialized NIR, but that seems like an overkill.
1491 */
1492 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1493 if (broadcom_shader_stage_is_binning(stage))
1494 continue;
1495
1496 struct v3dv_pipeline_stage *p_stage = pipeline->stages[stage];
1497 if (p_stage == NULL)
1498 continue;
1499
1500 assert(stage != BROADCOM_SHADER_COMPUTE);
1501
1502 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1503 }
1504
1505 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1506
1507 _mesa_sha1_final(&ctx, sha1_out);
1508 }
1509
1510 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1511 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1512 struct v3dv_pipeline_key *key,
1513 unsigned char *sha1_out)
1514 {
1515 struct mesa_sha1 ctx;
1516 _mesa_sha1_init(&ctx);
1517
1518 if (pipeline->layout) {
1519 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1520 sizeof(pipeline->layout->sha1));
1521 }
1522
1523 struct v3dv_pipeline_stage *p_stage =
1524 pipeline->stages[BROADCOM_SHADER_COMPUTE];
1525
1526 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1527
1528 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1529
1530 _mesa_sha1_final(&ctx, sha1_out);
1531 }
1532
1533 /* Checks that the pipeline has enough spill size to use for any of their
1534 * variants
1535 */
1536 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1537 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1538 {
1539 uint32_t max_spill_size = 0;
1540
1541 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1542 struct v3dv_shader_variant *variant =
1543 pipeline->shared_data->variants[stage];
1544
1545 if (variant != NULL) {
1546 max_spill_size = MAX2(variant->prog_data.base->spill_size,
1547 max_spill_size);
1548 }
1549 }
1550
1551 if (max_spill_size > 0) {
1552 struct v3dv_device *device = pipeline->device;
1553
1554 /* The TIDX register we use for choosing the area to access
1555 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1556 * Even at minimum threadcount in a particular shader, that
1557 * means we still multiply by qpus by 4.
1558 */
1559 const uint32_t total_spill_size =
1560 4 * device->devinfo.qpu_count * max_spill_size;
1561 if (pipeline->spill.bo) {
1562 assert(pipeline->spill.size_per_thread > 0);
1563 v3dv_bo_free(device, pipeline->spill.bo);
1564 }
1565 pipeline->spill.bo =
1566 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1567 pipeline->spill.size_per_thread = max_spill_size;
1568 }
1569 }
1570
1571 /**
1572 * Creates a new shader_variant_create. Note that for prog_data is not const,
1573 * so it is assumed that the caller will prove a pointer that the
1574 * shader_variant will own.
1575 *
1576 * Creation doesn't include allocate a BO to store the content of qpu_insts,
1577 * as we will try to share the same bo for several shader variants. Also note
1578 * that qpu_ints being NULL is valid, for example if we are creating the
1579 * shader_variants from the cache, so we can just upload the assembly of all
1580 * the shader stages at once.
1581 */
1582 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1583 v3dv_shader_variant_create(struct v3dv_device *device,
1584 enum broadcom_shader_stage stage,
1585 struct v3d_prog_data *prog_data,
1586 uint32_t prog_data_size,
1587 uint32_t assembly_offset,
1588 uint64_t *qpu_insts,
1589 uint32_t qpu_insts_size,
1590 VkResult *out_vk_result)
1591 {
1592 struct v3dv_shader_variant *variant =
1593 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1594 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1595
1596 if (variant == NULL) {
1597 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1598 return NULL;
1599 }
1600
1601 variant->stage = stage;
1602 variant->prog_data_size = prog_data_size;
1603 variant->prog_data.base = prog_data;
1604
1605 variant->assembly_offset = assembly_offset;
1606 variant->qpu_insts_size = qpu_insts_size;
1607 variant->qpu_insts = qpu_insts;
1608
1609 *out_vk_result = VK_SUCCESS;
1610
1611 return variant;
1612 }
1613
1614 /* For a given key, it returns the compiled version of the shader. Returns a
1615 * new reference to the shader_variant to the caller, or NULL.
1616 *
1617 * If the method returns NULL it means that something wrong happened:
1618 * * Not enough memory: this is one of the possible outcomes defined by
1619 * vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1620 * * Compilation error: hypothetically this shouldn't happen, as the spec
1621 * states that vkShaderModule needs to be created with a valid SPIR-V, so
1622 * any compilation failure is a driver bug. In the practice, something as
1623 * common as failing to register allocate can lead to a compilation
1624 * failure. In that case the only option (for any driver) is
1625 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1626 * error.
1627 */
1628 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1629 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1630 struct v3d_key *key,
1631 size_t key_size,
1632 const VkAllocationCallbacks *pAllocator,
1633 VkResult *out_vk_result)
1634 {
1635 int64_t stage_start = os_time_get_nano();
1636
1637 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1638 struct v3dv_physical_device *physical_device = pipeline->device->pdevice;
1639 const struct v3d_compiler *compiler = physical_device->compiler;
1640 gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(p_stage->stage);
1641
1642 if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
1643 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1644 broadcom_shader_stage_name(p_stage->stage),
1645 p_stage->program_id);
1646 nir_print_shader(p_stage->nir, stderr);
1647 fprintf(stderr, "\n");
1648 }
1649
1650 uint64_t *qpu_insts;
1651 uint32_t qpu_insts_size;
1652 struct v3d_prog_data *prog_data;
1653 uint32_t prog_data_size = v3d_prog_data_size(gl_stage);
1654
1655 qpu_insts = v3d_compile(compiler,
1656 key, &prog_data,
1657 p_stage->nir,
1658 shader_debug_output, NULL,
1659 p_stage->program_id, 0,
1660 &qpu_insts_size);
1661
1662 struct v3dv_shader_variant *variant = NULL;
1663
1664 if (!qpu_insts) {
1665 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1666 broadcom_shader_stage_name(p_stage->stage),
1667 p_stage->program_id);
1668 *out_vk_result = VK_ERROR_UNKNOWN;
1669 } else {
1670 variant =
1671 v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1672 prog_data, prog_data_size,
1673 0, /* assembly_offset, no final value yet */
1674 qpu_insts, qpu_insts_size,
1675 out_vk_result);
1676 }
1677 /* At this point we don't need anymore the nir shader, but we are freeing
1678 * all the temporary p_stage structs used during the pipeline creation when
1679 * we finish it, so let's not worry about freeing the nir here.
1680 */
1681
1682 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1683
1684 return variant;
1685 }
1686
1687 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1688 link_shaders(nir_shader *producer, nir_shader *consumer)
1689 {
1690 assert(producer);
1691 assert(consumer);
1692
1693 if (producer->options->lower_to_scalar) {
1694 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1695 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1696 }
1697
1698 nir_lower_io_arrays_to_elements(producer, consumer);
1699
1700 v3d_optimize_nir(NULL, producer);
1701 v3d_optimize_nir(NULL, consumer);
1702
1703 if (nir_link_opt_varyings(producer, consumer))
1704 v3d_optimize_nir(NULL, consumer);
1705
1706 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1707 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1708
1709 if (nir_remove_unused_varyings(producer, consumer)) {
1710 NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1711 NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1712
1713 v3d_optimize_nir(NULL, producer);
1714 v3d_optimize_nir(NULL, consumer);
1715
1716 /* Optimizations can cause varyings to become unused.
1717 * nir_compact_varyings() depends on all dead varyings being removed so
1718 * we need to call nir_remove_dead_variables() again here.
1719 */
1720 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1721 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1722 }
1723 }
1724
1725 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1726 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1727 struct v3dv_pipeline_stage *p_stage,
1728 struct v3dv_pipeline_layout *layout)
1729 {
1730 int64_t stage_start = os_time_get_nano();
1731
1732 assert(pipeline->shared_data &&
1733 pipeline->shared_data->maps[p_stage->stage]);
1734
1735 NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex,
1736 lookup_ycbcr_conversion, layout);
1737
1738 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1739
1740 /* We add this because we need a valid sampler for nir_lower_tex to do
1741 * unpacking of the texture operation result, even for the case where there
1742 * is no sampler state.
1743 *
1744 * We add two of those, one for the case we need a 16bit return_size, and
1745 * another for the case we need a 32bit return size.
1746 */
1747 struct v3dv_descriptor_maps *maps =
1748 pipeline->shared_data->maps[p_stage->stage];
1749
1750 UNUSED unsigned index;
1751 index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0);
1752 assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1753
1754 index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0);
1755 assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1756
1757 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1758 bool needs_default_sampler_state = false;
1759 NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1760 &needs_default_sampler_state);
1761
1762 /* If in the end we didn't need to use the default sampler states and the
1763 * shader doesn't need any other samplers, get rid of them so we can
1764 * recognize that this program doesn't use any samplers at all.
1765 */
1766 if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1767 maps->sampler_map.num_desc = 0;
1768
1769 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1770 }
1771
1772 /**
1773 * The SPIR-V compiler will insert a sized compact array for
1774 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1775 * where the size of the array determines the number of active clip planes.
1776 */
1777 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1778 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1779 {
1780 assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1781 const nir_shader *shader = p_stage->nir;
1782 assert(shader);
1783
1784 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1785 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1786 assert(var->data.compact);
1787 return (1 << glsl_get_length(var->type)) - 1;
1788 }
1789 }
1790 return 0;
1791 }
1792
1793 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1794 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1795 struct v3dv_pipeline *pipeline,
1796 struct v3dv_pipeline_cache *cache)
1797 {
1798 int64_t stage_start = os_time_get_nano();
1799
1800 nir_shader *nir = NULL;
1801
1802 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1803 &v3dv_nir_options,
1804 p_stage->shader_sha1);
1805
1806 if (nir) {
1807 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1808
1809 /* A NIR cache hit doesn't avoid the large majority of pipeline stage
1810 * creation so the cache hit is not recorded in the pipeline feedback
1811 * flags
1812 */
1813
1814 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1815
1816 return nir;
1817 }
1818
1819 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1820
1821 if (nir) {
1822 struct v3dv_pipeline_cache *default_cache =
1823 &pipeline->device->default_pipeline_cache;
1824
1825 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1826 p_stage->shader_sha1);
1827
1828 /* Ensure that the variant is on the default cache, as cmd_buffer could
1829 * need to change the current variant
1830 */
1831 if (default_cache != cache) {
1832 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1833 p_stage->shader_sha1);
1834 }
1835
1836 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1837
1838 return nir;
1839 }
1840
1841 /* FIXME: this shouldn't happen, raise error? */
1842 return NULL;
1843 }
1844
1845 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1846 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1847 const VkAllocationCallbacks *pAllocator,
1848 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1849 {
1850 struct v3dv_pipeline_stage *p_stage_vs =
1851 pipeline->stages[BROADCOM_SHADER_VERTEX];
1852 struct v3dv_pipeline_stage *p_stage_vs_bin =
1853 pipeline->stages[BROADCOM_SHADER_VERTEX_BIN];
1854
1855 assert(p_stage_vs_bin != NULL);
1856 if (p_stage_vs_bin->nir == NULL) {
1857 assert(p_stage_vs->nir);
1858 p_stage_vs_bin->nir = nir_shader_clone(NULL, p_stage_vs->nir);
1859 }
1860
1861 VkResult vk_result;
1862 struct v3d_vs_key key;
1863 pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs);
1864 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1865 pipeline_compile_shader_variant(p_stage_vs, &key.base, sizeof(key),
1866 pAllocator, &vk_result);
1867 if (vk_result != VK_SUCCESS)
1868 return vk_result;
1869
1870 pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs_bin);
1871 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1872 pipeline_compile_shader_variant(p_stage_vs_bin, &key.base, sizeof(key),
1873 pAllocator, &vk_result);
1874
1875 return vk_result;
1876 }
1877
1878 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1879 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1880 const VkAllocationCallbacks *pAllocator,
1881 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1882 {
1883 struct v3dv_pipeline_stage *p_stage_gs =
1884 pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1885 struct v3dv_pipeline_stage *p_stage_gs_bin =
1886 pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN];
1887
1888 assert(p_stage_gs);
1889 assert(p_stage_gs_bin != NULL);
1890 if (p_stage_gs_bin->nir == NULL) {
1891 assert(p_stage_gs->nir);
1892 p_stage_gs_bin->nir = nir_shader_clone(NULL, p_stage_gs->nir);
1893 }
1894
1895 VkResult vk_result;
1896 struct v3d_gs_key key;
1897 pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs);
1898 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1899 pipeline_compile_shader_variant(p_stage_gs, &key.base, sizeof(key),
1900 pAllocator, &vk_result);
1901 if (vk_result != VK_SUCCESS)
1902 return vk_result;
1903
1904 pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs_bin);
1905 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1906 pipeline_compile_shader_variant(p_stage_gs_bin, &key.base, sizeof(key),
1907 pAllocator, &vk_result);
1908
1909 return vk_result;
1910 }
1911
1912 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1913 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1914 const VkAllocationCallbacks *pAllocator,
1915 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1916 {
1917 struct v3dv_pipeline_stage *p_stage_vs =
1918 pipeline->stages[BROADCOM_SHADER_VERTEX];
1919 struct v3dv_pipeline_stage *p_stage_fs =
1920 pipeline->stages[BROADCOM_SHADER_FRAGMENT];
1921 struct v3dv_pipeline_stage *p_stage_gs =
1922 pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1923
1924 struct v3d_fs_key key;
1925 pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage_fs,
1926 p_stage_gs != NULL,
1927 get_ucp_enable_mask(p_stage_vs));
1928
1929 if (key.is_points) {
1930 assert(key.point_coord_upper_left);
1931 NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord);
1932 }
1933
1934 VkResult vk_result;
1935 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1936 pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key),
1937 pAllocator, &vk_result);
1938
1939 return vk_result;
1940 }
1941
1942 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1943 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1944 struct v3dv_pipeline_key *key,
1945 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1946 {
1947 struct v3dv_device *device = pipeline->device;
1948 assert(device);
1949
1950 memset(key, 0, sizeof(*key));
1951
1952 const bool raster_enabled =
1953 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1954
1955 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1956 pCreateInfo->pInputAssemblyState;
1957 key->topology = vk_to_mesa_prim[ia_info->topology];
1958
1959 const VkPipelineColorBlendStateCreateInfo *cb_info =
1960 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1961
1962 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1963 vk_to_pipe_logicop[cb_info->logicOp] :
1964 PIPE_LOGICOP_COPY;
1965
1966 /* Multisample rasterization state must be ignored if rasterization
1967 * is disabled.
1968 */
1969 const VkPipelineMultisampleStateCreateInfo *ms_info =
1970 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1971 if (ms_info) {
1972 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1973 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1974 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1975
1976 if (key->msaa)
1977 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1978
1979 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1980 }
1981
1982 const struct v3dv_render_pass *pass =
1983 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1984 const struct v3dv_subpass *subpass = pipeline->subpass;
1985 for (uint32_t i = 0; i < subpass->color_count; i++) {
1986 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1987 if (att_idx == VK_ATTACHMENT_UNUSED)
1988 continue;
1989
1990 key->cbufs |= 1 << i;
1991
1992 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1993 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1994
1995 /* If logic operations are enabled then we might emit color reads and we
1996 * need to know the color buffer format and swizzle for that
1997 */
1998 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1999 /* Framebuffer formats should be single plane */
2000 assert(vk_format_get_plane_count(fb_format) == 1);
2001 key->color_fmt[i].format = fb_pipe_format;
2002 memcpy(key->color_fmt[i].swizzle,
2003 v3dv_get_format_swizzle(pipeline->device, fb_format, 0),
2004 sizeof(key->color_fmt[i].swizzle));
2005 }
2006
2007 const struct util_format_description *desc =
2008 vk_format_description(fb_format);
2009
2010 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2011 desc->channel[0].size == 32) {
2012 key->f32_color_rb |= 1 << i;
2013 }
2014 }
2015
2016 const VkPipelineVertexInputStateCreateInfo *vi_info =
2017 pCreateInfo->pVertexInputState;
2018 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2019 const VkVertexInputAttributeDescription *desc =
2020 &vi_info->pVertexAttributeDescriptions[i];
2021 assert(desc->location < MAX_VERTEX_ATTRIBS);
2022 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
2023 desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
2024 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2025 }
2026 }
2027
2028 assert(pipeline->subpass);
2029 key->has_multiview = pipeline->subpass->view_mask != 0;
2030 }
2031
2032 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2033 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2034 struct v3dv_pipeline_key *key,
2035 const VkComputePipelineCreateInfo *pCreateInfo)
2036 {
2037 struct v3dv_device *device = pipeline->device;
2038 assert(device);
2039
2040 /* We use the same pipeline key for graphics and compute, but we don't need
2041 * to add a field to flag compute keys because this key is not used alone
2042 * to search in the cache, we also use the SPIR-V or the serialized NIR for
2043 * example, which already flags compute shaders.
2044 */
2045 memset(key, 0, sizeof(*key));
2046 }
2047
2048 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2049 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2050 struct v3dv_pipeline *pipeline,
2051 bool is_graphics_pipeline)
2052 {
2053 /* We create new_entry using the device alloc. Right now shared_data is ref
2054 * and unref by both the pipeline and the pipeline cache, so we can't
2055 * ensure that the cache or pipeline alloc will be available on the last
2056 * unref.
2057 */
2058 struct v3dv_pipeline_shared_data *new_entry =
2059 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2060 sizeof(struct v3dv_pipeline_shared_data), 8,
2061 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2062
2063 if (new_entry == NULL)
2064 return NULL;
2065
2066 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2067 /* We don't need specific descriptor maps for binning stages we use the
2068 * map for the render stage.
2069 */
2070 if (broadcom_shader_stage_is_binning(stage))
2071 continue;
2072
2073 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2074 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2075 continue;
2076 }
2077
2078 if (stage == BROADCOM_SHADER_GEOMETRY &&
2079 !pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
2080 /* We always inject a custom GS if we have multiview */
2081 if (!pipeline->subpass->view_mask)
2082 continue;
2083 }
2084
2085 struct v3dv_descriptor_maps *new_maps =
2086 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2087 sizeof(struct v3dv_descriptor_maps), 8,
2088 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2089
2090 if (new_maps == NULL)
2091 goto fail;
2092
2093 new_entry->maps[stage] = new_maps;
2094 }
2095
2096 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2097 new_entry->maps[BROADCOM_SHADER_VERTEX];
2098
2099 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2100 new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2101
2102 new_entry->ref_cnt = 1;
2103 memcpy(new_entry->sha1_key, sha1_key, 20);
2104
2105 return new_entry;
2106
2107 fail:
2108 if (new_entry != NULL) {
2109 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2110 if (new_entry->maps[stage] != NULL)
2111 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2112 }
2113 }
2114
2115 vk_free(&pipeline->device->vk.alloc, new_entry);
2116
2117 return NULL;
2118 }
2119
2120 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedback * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2121 write_creation_feedback(struct v3dv_pipeline *pipeline,
2122 const void *next,
2123 const VkPipelineCreationFeedback *pipeline_feedback,
2124 uint32_t stage_count,
2125 const VkPipelineShaderStageCreateInfo *stages)
2126 {
2127 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2128 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2129
2130 if (create_feedback) {
2131 typed_memcpy(create_feedback->pPipelineCreationFeedback,
2132 pipeline_feedback,
2133 1);
2134
2135 const uint32_t feedback_stage_count =
2136 create_feedback->pipelineStageCreationFeedbackCount;
2137 assert(feedback_stage_count <= stage_count);
2138
2139 for (uint32_t i = 0; i < feedback_stage_count; i++) {
2140 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2141 enum broadcom_shader_stage bs = gl_shader_stage_to_broadcom(s);
2142
2143 create_feedback->pPipelineStageCreationFeedbacks[i] =
2144 pipeline->stages[bs]->feedback;
2145
2146 if (broadcom_shader_stage_is_render_with_binning(bs)) {
2147 enum broadcom_shader_stage bs_bin =
2148 broadcom_binning_shader_stage_for_render_stage(bs);
2149 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2150 pipeline->stages[bs_bin]->feedback.duration;
2151 }
2152 }
2153 }
2154 }
2155
2156 static enum mesa_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2157 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2158 {
2159 switch (pipeline->topology) {
2160 case MESA_PRIM_POINTS:
2161 return MESA_PRIM_POINTS;
2162 case MESA_PRIM_LINES:
2163 case MESA_PRIM_LINE_STRIP:
2164 return MESA_PRIM_LINES;
2165 case MESA_PRIM_TRIANGLES:
2166 case MESA_PRIM_TRIANGLE_STRIP:
2167 case MESA_PRIM_TRIANGLE_FAN:
2168 return MESA_PRIM_TRIANGLES;
2169 default:
2170 /* Since we don't allow GS with multiview, we can only see non-adjacency
2171 * primitives.
2172 */
2173 unreachable("Unexpected pipeline primitive type");
2174 }
2175 }
2176
2177 static enum mesa_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2178 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2179 {
2180 switch (pipeline->topology) {
2181 case MESA_PRIM_POINTS:
2182 return MESA_PRIM_POINTS;
2183 case MESA_PRIM_LINES:
2184 case MESA_PRIM_LINE_STRIP:
2185 return MESA_PRIM_LINE_STRIP;
2186 case MESA_PRIM_TRIANGLES:
2187 case MESA_PRIM_TRIANGLE_STRIP:
2188 case MESA_PRIM_TRIANGLE_FAN:
2189 return MESA_PRIM_TRIANGLE_STRIP;
2190 default:
2191 /* Since we don't allow GS with multiview, we can only see non-adjacency
2192 * primitives.
2193 */
2194 unreachable("Unexpected pipeline primitive type");
2195 }
2196 }
2197
2198 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2199 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2200 struct v3dv_pipeline_cache *cache,
2201 const VkAllocationCallbacks *pAllocator)
2202 {
2203 /* Create the passthrough GS from the VS output interface */
2204 struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2205 p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2206 nir_shader *vs_nir = p_stage_vs->nir;
2207
2208 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2209 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2210 "multiview broadcast gs");
2211 nir_shader *nir = b.shader;
2212 nir->info.inputs_read = vs_nir->info.outputs_written;
2213 nir->info.outputs_written = vs_nir->info.outputs_written |
2214 (1ull << VARYING_SLOT_LAYER);
2215
2216 uint32_t vertex_count = mesa_vertices_per_prim(pipeline->topology);
2217 nir->info.gs.input_primitive =
2218 multiview_gs_input_primitive_from_pipeline(pipeline);
2219 nir->info.gs.output_primitive =
2220 multiview_gs_output_primitive_from_pipeline(pipeline);
2221 nir->info.gs.vertices_in = vertex_count;
2222 nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2223 nir->info.gs.invocations = 1;
2224 nir->info.gs.active_stream_mask = 0x1;
2225
2226 /* Make a list of GS input/output variables from the VS outputs */
2227 nir_variable *in_vars[100];
2228 nir_variable *out_vars[100];
2229 uint32_t var_count = 0;
2230 nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2231 char name[8];
2232 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2233
2234 in_vars[var_count] =
2235 nir_variable_create(nir, nir_var_shader_in,
2236 glsl_array_type(out_vs_var->type, vertex_count, 0),
2237 name);
2238 in_vars[var_count]->data.location = out_vs_var->data.location;
2239 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2240 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2241
2242 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2243 out_vars[var_count] =
2244 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2245 out_vars[var_count]->data.location = out_vs_var->data.location;
2246 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2247
2248 var_count++;
2249 }
2250
2251 /* Add the gl_Layer output variable */
2252 nir_variable *out_layer =
2253 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2254 "out_Layer");
2255 out_layer->data.location = VARYING_SLOT_LAYER;
2256
2257 /* Get the view index value that we will write to gl_Layer */
2258 nir_def *layer =
2259 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2260
2261 /* Emit all output vertices */
2262 for (uint32_t vi = 0; vi < vertex_count; vi++) {
2263 /* Emit all output varyings */
2264 for (uint32_t i = 0; i < var_count; i++) {
2265 nir_deref_instr *in_value =
2266 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2267 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2268 }
2269
2270 /* Emit gl_Layer write */
2271 nir_store_var(&b, out_layer, layer, 0x1);
2272
2273 nir_emit_vertex(&b, 0);
2274 }
2275 nir_end_primitive(&b, 0);
2276
2277 /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2278 * with what we expect from SPIR-V modules.
2279 */
2280 preprocess_nir(nir);
2281
2282 /* Attach the geometry shader to the pipeline */
2283 struct v3dv_device *device = pipeline->device;
2284 struct v3dv_physical_device *physical_device = device->pdevice;
2285
2286 struct v3dv_pipeline_stage *p_stage =
2287 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2288 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2289
2290 if (p_stage == NULL) {
2291 ralloc_free(nir);
2292 return false;
2293 }
2294
2295 p_stage->pipeline = pipeline;
2296 p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2297 p_stage->entrypoint = "main";
2298 p_stage->module = 0;
2299 p_stage->nir = nir;
2300 pipeline_compute_sha1_from_nir(p_stage);
2301 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2302 p_stage->robustness = pipeline->stages[BROADCOM_SHADER_VERTEX]->robustness;
2303
2304 pipeline->has_gs = true;
2305 pipeline->stages[BROADCOM_SHADER_GEOMETRY] = p_stage;
2306 pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2307
2308 pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] =
2309 pipeline_stage_create_binning(p_stage, pAllocator);
2310 if (pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] == NULL)
2311 return false;
2312
2313 return true;
2314 }
2315
2316 static void
pipeline_check_buffer_device_address(struct v3dv_pipeline * pipeline)2317 pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2318 {
2319 for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2320 struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2321 if (variant && variant->prog_data.base->has_global_address) {
2322 pipeline->uses_buffer_device_address = true;
2323 return;
2324 }
2325 }
2326
2327 pipeline->uses_buffer_device_address = false;
2328 }
2329
2330 /*
2331 * It compiles a pipeline. Note that it also allocate internal object, but if
2332 * some allocations success, but other fails, the method is not freeing the
2333 * successful ones.
2334 *
2335 * This is done to simplify the code, as what we do in this case is just call
2336 * the pipeline destroy method, and this would handle freeing the internal
2337 * objects allocated. We just need to be careful setting to NULL the objects
2338 * not allocated.
2339 */
2340 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2341 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2342 struct v3dv_pipeline_cache *cache,
2343 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2344 const VkAllocationCallbacks *pAllocator)
2345 {
2346 VkPipelineCreationFeedback pipeline_feedback = {
2347 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2348 };
2349 int64_t pipeline_start = os_time_get_nano();
2350
2351 struct v3dv_device *device = pipeline->device;
2352 struct v3dv_physical_device *physical_device = device->pdevice;
2353
2354 /* First pass to get some common info from the shader, and create the
2355 * individual pipeline_stage objects
2356 */
2357 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2358 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2359 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2360
2361 struct v3dv_pipeline_stage *p_stage =
2362 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2363 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2364
2365 if (p_stage == NULL)
2366 return VK_ERROR_OUT_OF_HOST_MEMORY;
2367
2368 p_stage->program_id =
2369 p_atomic_inc_return(&physical_device->next_program_id);
2370
2371 enum broadcom_shader_stage broadcom_stage =
2372 gl_shader_stage_to_broadcom(stage);
2373
2374 p_stage->pipeline = pipeline;
2375 p_stage->stage = broadcom_stage;
2376 p_stage->entrypoint = sinfo->pName;
2377 p_stage->module = vk_shader_module_from_handle(sinfo->module);
2378 p_stage->spec_info = sinfo->pSpecializationInfo;
2379
2380 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2381 pCreateInfo->pNext, sinfo->pNext);
2382
2383 vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i],
2384 &p_stage->robustness,
2385 p_stage->shader_sha1);
2386
2387 pipeline->active_stages |= sinfo->stage;
2388
2389 /* We will try to get directly the compiled shader variant, so let's not
2390 * worry about getting the nir shader for now.
2391 */
2392 p_stage->nir = NULL;
2393 pipeline->stages[broadcom_stage] = p_stage;
2394 if (broadcom_stage == BROADCOM_SHADER_GEOMETRY)
2395 pipeline->has_gs = true;
2396
2397 if (broadcom_shader_stage_is_render_with_binning(broadcom_stage)) {
2398 enum broadcom_shader_stage broadcom_stage_bin =
2399 broadcom_binning_shader_stage_for_render_stage(broadcom_stage);
2400
2401 pipeline->stages[broadcom_stage_bin] =
2402 pipeline_stage_create_binning(p_stage, pAllocator);
2403
2404 if (pipeline->stages[broadcom_stage_bin] == NULL)
2405 return VK_ERROR_OUT_OF_HOST_MEMORY;
2406 }
2407 }
2408
2409 /* Add a no-op fragment shader if needed */
2410 if (!pipeline->stages[BROADCOM_SHADER_FRAGMENT]) {
2411 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2412 &v3dv_nir_options,
2413 "noop_fs");
2414
2415 struct v3dv_pipeline_stage *p_stage =
2416 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2417 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2418
2419 if (p_stage == NULL)
2420 return VK_ERROR_OUT_OF_HOST_MEMORY;
2421
2422 p_stage->pipeline = pipeline;
2423 p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2424 p_stage->entrypoint = "main";
2425 p_stage->module = 0;
2426 p_stage->nir = b.shader;
2427 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2428 NULL, NULL);
2429 pipeline_compute_sha1_from_nir(p_stage);
2430 p_stage->program_id =
2431 p_atomic_inc_return(&physical_device->next_program_id);
2432
2433 pipeline->stages[BROADCOM_SHADER_FRAGMENT] = p_stage;
2434 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2435 }
2436
2437 /* If multiview is enabled, we inject a custom passthrough geometry shader
2438 * to broadcast draw calls to the appropriate views.
2439 */
2440 assert(!pipeline->subpass->view_mask ||
2441 (!pipeline->has_gs && !pipeline->stages[BROADCOM_SHADER_GEOMETRY]));
2442 if (pipeline->subpass->view_mask) {
2443 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2444 return VK_ERROR_OUT_OF_HOST_MEMORY;
2445 }
2446
2447 /* First we try to get the variants from the pipeline cache (unless we are
2448 * required to capture internal representations, since in that case we need
2449 * compile).
2450 */
2451 bool needs_executable_info =
2452 pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2453 if (!needs_executable_info) {
2454 struct v3dv_pipeline_key pipeline_key;
2455 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2456 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2457
2458 bool cache_hit = false;
2459
2460 pipeline->shared_data =
2461 v3dv_pipeline_cache_search_for_pipeline(cache,
2462 pipeline->sha1,
2463 &cache_hit);
2464
2465 if (pipeline->shared_data != NULL) {
2466 /* A correct pipeline must have at least a VS and FS */
2467 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2468 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2469 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2470 assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2471 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2472 assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2473 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2474
2475 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2476 pipeline_feedback.flags |=
2477 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2478
2479 goto success;
2480 }
2481 }
2482
2483 if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2484 return VK_PIPELINE_COMPILE_REQUIRED;
2485
2486 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2487 * shader or the pipeline cache) and compile.
2488 */
2489 pipeline->shared_data =
2490 v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2491 if (!pipeline->shared_data)
2492 return VK_ERROR_OUT_OF_HOST_MEMORY;
2493
2494 struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2495 struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT];
2496 struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY];
2497
2498 p_stage_vs->feedback.flags |=
2499 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2500 if (p_stage_gs)
2501 p_stage_gs->feedback.flags |=
2502 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2503 p_stage_fs->feedback.flags |=
2504 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2505
2506 if (!p_stage_vs->nir)
2507 p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2508 if (p_stage_gs && !p_stage_gs->nir)
2509 p_stage_gs->nir = pipeline_stage_get_nir(p_stage_gs, pipeline, cache);
2510 if (!p_stage_fs->nir)
2511 p_stage_fs->nir = pipeline_stage_get_nir(p_stage_fs, pipeline, cache);
2512
2513 /* Linking + pipeline lowerings */
2514 if (p_stage_gs) {
2515 link_shaders(p_stage_gs->nir, p_stage_fs->nir);
2516 link_shaders(p_stage_vs->nir, p_stage_gs->nir);
2517 } else {
2518 link_shaders(p_stage_vs->nir, p_stage_fs->nir);
2519 }
2520
2521 pipeline_lower_nir(pipeline, p_stage_fs, pipeline->layout);
2522 lower_fs_io(p_stage_fs->nir);
2523
2524 if (p_stage_gs) {
2525 pipeline_lower_nir(pipeline, p_stage_gs, pipeline->layout);
2526 lower_gs_io(p_stage_gs->nir);
2527 }
2528
2529 pipeline_lower_nir(pipeline, p_stage_vs, pipeline->layout);
2530 lower_vs_io(p_stage_vs->nir);
2531
2532 /* Compiling to vir */
2533 VkResult vk_result;
2534
2535 /* We should have got all the variants or no variants from the cache */
2536 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2537 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2538 if (vk_result != VK_SUCCESS)
2539 return vk_result;
2540
2541 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2542 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2543
2544 if (p_stage_gs) {
2545 vk_result =
2546 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2547 if (vk_result != VK_SUCCESS)
2548 return vk_result;
2549 }
2550
2551 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2552 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2553
2554 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2555 if (vk_result != VK_SUCCESS)
2556 return vk_result;
2557
2558 if (!upload_assembly(pipeline))
2559 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2560
2561 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2562
2563 success:
2564
2565 pipeline_check_buffer_device_address(pipeline);
2566
2567 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2568 write_creation_feedback(pipeline,
2569 pCreateInfo->pNext,
2570 &pipeline_feedback,
2571 pCreateInfo->stageCount,
2572 pCreateInfo->pStages);
2573
2574 /* Since we have the variants in the pipeline shared data we can now free
2575 * the pipeline stages.
2576 */
2577 if (!needs_executable_info)
2578 pipeline_free_stages(device, pipeline, pAllocator);
2579
2580 pipeline_check_spill_size(pipeline);
2581
2582 return compute_vpm_config(pipeline);
2583 }
2584
2585 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2586 compute_vpm_config(struct v3dv_pipeline *pipeline)
2587 {
2588 struct v3dv_shader_variant *vs_variant =
2589 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2590 struct v3dv_shader_variant *vs_bin_variant =
2591 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2592 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2593 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2594
2595 struct v3d_gs_prog_data *gs = NULL;
2596 struct v3d_gs_prog_data *gs_bin = NULL;
2597 if (pipeline->has_gs) {
2598 struct v3dv_shader_variant *gs_variant =
2599 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2600 struct v3dv_shader_variant *gs_bin_variant =
2601 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2602 gs = gs_variant->prog_data.gs;
2603 gs_bin = gs_bin_variant->prog_data.gs;
2604 }
2605
2606 if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2607 vs_bin, vs, gs_bin, gs,
2608 &pipeline->vpm_cfg_bin,
2609 &pipeline->vpm_cfg)) {
2610 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2611 }
2612
2613 return VK_SUCCESS;
2614 }
2615
2616 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2617 v3dv_dynamic_state_mask(VkDynamicState state)
2618 {
2619 switch(state) {
2620 case VK_DYNAMIC_STATE_VIEWPORT:
2621 return V3DV_DYNAMIC_VIEWPORT;
2622 case VK_DYNAMIC_STATE_SCISSOR:
2623 return V3DV_DYNAMIC_SCISSOR;
2624 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2625 return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2626 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2627 return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2628 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2629 return V3DV_DYNAMIC_STENCIL_REFERENCE;
2630 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2631 return V3DV_DYNAMIC_BLEND_CONSTANTS;
2632 case VK_DYNAMIC_STATE_DEPTH_BIAS:
2633 return V3DV_DYNAMIC_DEPTH_BIAS;
2634 case VK_DYNAMIC_STATE_LINE_WIDTH:
2635 return V3DV_DYNAMIC_LINE_WIDTH;
2636 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2637 return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2638 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2639 return V3DV_DYNAMIC_DEPTH_BOUNDS;
2640
2641 default:
2642 unreachable("Unhandled dynamic state");
2643 }
2644 }
2645
2646 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2647 pipeline_init_dynamic_state(
2648 struct v3dv_pipeline *pipeline,
2649 const VkPipelineDynamicStateCreateInfo *pDynamicState,
2650 const VkPipelineViewportStateCreateInfo *pViewportState,
2651 const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2652 const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2653 const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2654 const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2655 {
2656 /* Initialize to default values */
2657 const struct v3d_device_info *devinfo = &pipeline->device->devinfo;
2658 struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2659 memset(dynamic, 0, sizeof(*dynamic));
2660 dynamic->stencil_compare_mask.front = ~0;
2661 dynamic->stencil_compare_mask.back = ~0;
2662 dynamic->stencil_write_mask.front = ~0;
2663 dynamic->stencil_write_mask.back = ~0;
2664 dynamic->line_width = 1.0f;
2665 dynamic->color_write_enable =
2666 (1ull << (4 * V3D_MAX_RENDER_TARGETS(devinfo->ver))) - 1;
2667 dynamic->depth_bounds.max = 1.0f;
2668
2669 /* Create a mask of enabled dynamic states */
2670 uint32_t dynamic_states = 0;
2671 if (pDynamicState) {
2672 uint32_t count = pDynamicState->dynamicStateCount;
2673 for (uint32_t s = 0; s < count; s++) {
2674 dynamic_states |=
2675 v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2676 }
2677 }
2678
2679 /* For any pipeline states that are not dynamic, set the dynamic state
2680 * from the static pipeline state.
2681 */
2682 if (pViewportState) {
2683 if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2684 dynamic->viewport.count = pViewportState->viewportCount;
2685 typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2686 pViewportState->viewportCount);
2687
2688 for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2689 v3dv_X(pipeline->device, viewport_compute_xform)
2690 (&dynamic->viewport.viewports[i],
2691 dynamic->viewport.scale[i],
2692 dynamic->viewport.translate[i]);
2693 }
2694 }
2695
2696 if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2697 dynamic->scissor.count = pViewportState->scissorCount;
2698 typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2699 pViewportState->scissorCount);
2700 }
2701 }
2702
2703 if (pDepthStencilState) {
2704 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2705 dynamic->stencil_compare_mask.front =
2706 pDepthStencilState->front.compareMask;
2707 dynamic->stencil_compare_mask.back =
2708 pDepthStencilState->back.compareMask;
2709 }
2710
2711 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2712 dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2713 dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2714 }
2715
2716 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2717 dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2718 dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2719 }
2720
2721 if (!(dynamic_states & V3DV_DYNAMIC_DEPTH_BOUNDS)) {
2722 dynamic->depth_bounds.min = pDepthStencilState->minDepthBounds;
2723 dynamic->depth_bounds.max = pDepthStencilState->maxDepthBounds;
2724 }
2725 }
2726
2727 if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2728 memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2729 sizeof(dynamic->blend_constants));
2730 }
2731
2732 if (pRasterizationState) {
2733 if (pRasterizationState->depthBiasEnable &&
2734 !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2735 dynamic->depth_bias.constant_factor =
2736 pRasterizationState->depthBiasConstantFactor;
2737 dynamic->depth_bias.depth_bias_clamp =
2738 pRasterizationState->depthBiasClamp;
2739 dynamic->depth_bias.slope_factor =
2740 pRasterizationState->depthBiasSlopeFactor;
2741 }
2742 if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2743 dynamic->line_width = pRasterizationState->lineWidth;
2744 }
2745
2746 if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2747 dynamic->color_write_enable = 0;
2748 for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2749 dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2750 }
2751
2752 pipeline->dynamic_state.mask = dynamic_states;
2753 }
2754
2755 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2756 stencil_op_is_no_op(const VkStencilOpState *stencil)
2757 {
2758 return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2759 stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2760 }
2761
2762 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2763 enable_depth_bias(struct v3dv_pipeline *pipeline,
2764 const VkPipelineRasterizationStateCreateInfo *rs_info)
2765 {
2766 pipeline->depth_bias.enabled = false;
2767 pipeline->depth_bias.is_z16 = false;
2768
2769 if (!rs_info || !rs_info->depthBiasEnable)
2770 return;
2771
2772 /* Check the depth/stencil attachment description for the subpass used with
2773 * this pipeline.
2774 */
2775 assert(pipeline->pass && pipeline->subpass);
2776 struct v3dv_render_pass *pass = pipeline->pass;
2777 struct v3dv_subpass *subpass = pipeline->subpass;
2778
2779 if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2780 return;
2781
2782 assert(subpass->ds_attachment.attachment < pass->attachment_count);
2783 struct v3dv_render_pass_attachment *att =
2784 &pass->attachments[subpass->ds_attachment.attachment];
2785
2786 if (att->desc.format == VK_FORMAT_D16_UNORM)
2787 pipeline->depth_bias.is_z16 = true;
2788
2789 pipeline->depth_bias.enabled = true;
2790 }
2791
2792 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2793 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2794 const VkPipelineDepthStencilStateCreateInfo *ds_info)
2795 {
2796 if (!ds_info || !ds_info->depthTestEnable) {
2797 pipeline->ez_state = V3D_EZ_DISABLED;
2798 return;
2799 }
2800
2801 switch (ds_info->depthCompareOp) {
2802 case VK_COMPARE_OP_LESS:
2803 case VK_COMPARE_OP_LESS_OR_EQUAL:
2804 pipeline->ez_state = V3D_EZ_LT_LE;
2805 break;
2806 case VK_COMPARE_OP_GREATER:
2807 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2808 pipeline->ez_state = V3D_EZ_GT_GE;
2809 break;
2810 case VK_COMPARE_OP_NEVER:
2811 case VK_COMPARE_OP_EQUAL:
2812 pipeline->ez_state = V3D_EZ_UNDECIDED;
2813 break;
2814 default:
2815 pipeline->ez_state = V3D_EZ_DISABLED;
2816 pipeline->incompatible_ez_test = true;
2817 break;
2818 }
2819
2820 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2821 if (ds_info->stencilTestEnable &&
2822 (!stencil_op_is_no_op(&ds_info->front) ||
2823 !stencil_op_is_no_op(&ds_info->back))) {
2824 pipeline->ez_state = V3D_EZ_DISABLED;
2825 }
2826
2827 /* If the FS writes Z, then it may update against the chosen EZ direction */
2828 struct v3dv_shader_variant *fs_variant =
2829 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2830 if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2831 !fs_variant->prog_data.fs->writes_z_from_fep) {
2832 pipeline->ez_state = V3D_EZ_DISABLED;
2833 }
2834 }
2835
2836 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2837 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2838 const VkPipelineMultisampleStateCreateInfo *ms_info)
2839 {
2840 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2841
2842 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2843 * requires this to be 0xf or 0x0 if using a single sample.
2844 */
2845 if (ms_info && ms_info->pSampleMask &&
2846 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2847 pipeline->sample_mask &= ms_info->pSampleMask[0];
2848 }
2849 }
2850
2851 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2852 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2853 const VkPipelineMultisampleStateCreateInfo *ms_info)
2854 {
2855 pipeline->sample_rate_shading =
2856 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2857 ms_info->sampleShadingEnable;
2858 }
2859
2860 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2861 pipeline_init(struct v3dv_pipeline *pipeline,
2862 struct v3dv_device *device,
2863 struct v3dv_pipeline_cache *cache,
2864 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2865 const VkAllocationCallbacks *pAllocator)
2866 {
2867 VkResult result = VK_SUCCESS;
2868
2869 pipeline->device = device;
2870 pipeline->flags = pCreateInfo->flags;
2871
2872 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2873 pipeline->layout = layout;
2874 v3dv_pipeline_layout_ref(pipeline->layout);
2875
2876 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2877 assert(pCreateInfo->subpass < render_pass->subpass_count);
2878 pipeline->pass = render_pass;
2879 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2880
2881 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2882 pCreateInfo->pInputAssemblyState;
2883 pipeline->topology = vk_to_mesa_prim[ia_info->topology];
2884
2885 /* If rasterization is not enabled, various CreateInfo structs must be
2886 * ignored.
2887 */
2888 const bool raster_enabled =
2889 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2890
2891 const VkPipelineViewportStateCreateInfo *vp_info =
2892 raster_enabled ? pCreateInfo->pViewportState : NULL;
2893
2894 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2895 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2896
2897 const VkPipelineRasterizationStateCreateInfo *rs_info =
2898 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2899
2900 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2901 rs_info ? vk_find_struct_const(
2902 rs_info->pNext,
2903 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2904 NULL;
2905
2906 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2907 rs_info ? vk_find_struct_const(
2908 rs_info->pNext,
2909 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2910 NULL;
2911
2912 const VkPipelineColorBlendStateCreateInfo *cb_info =
2913 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2914
2915 const VkPipelineMultisampleStateCreateInfo *ms_info =
2916 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2917
2918 const VkPipelineColorWriteCreateInfoEXT *cw_info =
2919 cb_info ? vk_find_struct_const(cb_info->pNext,
2920 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2921 NULL;
2922
2923 if (vp_info) {
2924 const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control =
2925 vk_find_struct_const(vp_info->pNext,
2926 PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT);
2927 if (depth_clip_control)
2928 pipeline->negative_one_to_one = depth_clip_control->negativeOneToOne;
2929 }
2930
2931 pipeline_init_dynamic_state(pipeline,
2932 pCreateInfo->pDynamicState,
2933 vp_info, ds_info, cb_info, rs_info, cw_info);
2934
2935 /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2936 * feature and it shouldn't be used by any pipeline.
2937 */
2938 assert(device->devinfo.ver >= 71 ||
2939 !ds_info || !ds_info->depthBoundsTestEnable);
2940 pipeline->depth_bounds_test_enabled = ds_info && ds_info->depthBoundsTestEnable;
2941
2942 enable_depth_bias(pipeline, rs_info);
2943
2944 v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2945 rs_info, pv_info, ls_info,
2946 ms_info);
2947
2948 pipeline_set_sample_mask(pipeline, ms_info);
2949 pipeline_set_sample_rate_shading(pipeline, ms_info);
2950
2951 pipeline->primitive_restart =
2952 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2953
2954 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2955
2956 if (result != VK_SUCCESS) {
2957 /* Caller would already destroy the pipeline, and we didn't allocate any
2958 * extra info. We don't need to do anything else.
2959 */
2960 return result;
2961 }
2962
2963 const VkPipelineVertexInputStateCreateInfo *vi_info =
2964 pCreateInfo->pVertexInputState;
2965
2966 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
2967 vk_find_struct_const(vi_info->pNext,
2968 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2969
2970 v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
2971
2972 if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
2973 pipeline->default_attribute_values =
2974 v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
2975
2976 if (!pipeline->default_attribute_values)
2977 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2978 } else {
2979 pipeline->default_attribute_values = NULL;
2980 }
2981
2982 /* This must be done after the pipeline has been compiled */
2983 pipeline_set_ez_state(pipeline, ds_info);
2984
2985 return result;
2986 }
2987
2988 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2989 graphics_pipeline_create(VkDevice _device,
2990 VkPipelineCache _cache,
2991 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2992 const VkAllocationCallbacks *pAllocator,
2993 VkPipeline *pPipeline)
2994 {
2995 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2996 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2997
2998 struct v3dv_pipeline *pipeline;
2999 VkResult result;
3000
3001 /* Use the default pipeline cache if none is specified */
3002 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3003 cache = &device->default_pipeline_cache;
3004
3005 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3006 VK_OBJECT_TYPE_PIPELINE);
3007
3008 if (pipeline == NULL)
3009 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3010
3011 result = pipeline_init(pipeline, device, cache,
3012 pCreateInfo,
3013 pAllocator);
3014
3015 if (result != VK_SUCCESS) {
3016 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3017 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3018 *pPipeline = VK_NULL_HANDLE;
3019 return result;
3020 }
3021
3022 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3023
3024 return VK_SUCCESS;
3025 }
3026
3027 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3028 v3dv_CreateGraphicsPipelines(VkDevice _device,
3029 VkPipelineCache pipelineCache,
3030 uint32_t count,
3031 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3032 const VkAllocationCallbacks *pAllocator,
3033 VkPipeline *pPipelines)
3034 {
3035 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3036 VkResult result = VK_SUCCESS;
3037
3038 if (V3D_DBG(SHADERS))
3039 mtx_lock(&device->pdevice->mutex);
3040
3041 uint32_t i = 0;
3042 for (; i < count; i++) {
3043 VkResult local_result;
3044
3045 local_result = graphics_pipeline_create(_device,
3046 pipelineCache,
3047 &pCreateInfos[i],
3048 pAllocator,
3049 &pPipelines[i]);
3050
3051 if (local_result != VK_SUCCESS) {
3052 result = local_result;
3053 pPipelines[i] = VK_NULL_HANDLE;
3054
3055 if (pCreateInfos[i].flags &
3056 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3057 break;
3058 }
3059 }
3060
3061 for (; i < count; i++)
3062 pPipelines[i] = VK_NULL_HANDLE;
3063
3064 if (V3D_DBG(SHADERS))
3065 mtx_unlock(&device->pdevice->mutex);
3066
3067 return result;
3068 }
3069
3070 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3071 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3072 {
3073 assert(glsl_type_is_vector_or_scalar(type));
3074
3075 uint32_t comp_size = glsl_type_is_boolean(type)
3076 ? 4 : glsl_get_bit_size(type) / 8;
3077 unsigned length = glsl_get_vector_elements(type);
3078 *size = comp_size * length,
3079 *align = comp_size * (length == 3 ? 4 : length);
3080 }
3081
3082 static void
lower_compute(struct nir_shader * nir)3083 lower_compute(struct nir_shader *nir)
3084 {
3085 if (!nir->info.shared_memory_explicit_layout) {
3086 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3087 nir_var_mem_shared, shared_type_info);
3088 }
3089
3090 NIR_PASS(_, nir, nir_lower_explicit_io,
3091 nir_var_mem_shared, nir_address_format_32bit_offset);
3092
3093 struct nir_lower_compute_system_values_options sysval_options = {
3094 .has_base_workgroup_id = true,
3095 };
3096 NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
3097 }
3098
3099 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3100 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3101 struct v3dv_pipeline_cache *cache,
3102 const VkComputePipelineCreateInfo *info,
3103 const VkAllocationCallbacks *alloc)
3104 {
3105 VkPipelineCreationFeedback pipeline_feedback = {
3106 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3107 };
3108 int64_t pipeline_start = os_time_get_nano();
3109
3110 struct v3dv_device *device = pipeline->device;
3111 struct v3dv_physical_device *physical_device = device->pdevice;
3112
3113 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3114 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3115
3116 struct v3dv_pipeline_stage *p_stage =
3117 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3118 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3119 if (!p_stage)
3120 return VK_ERROR_OUT_OF_HOST_MEMORY;
3121
3122 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3123 p_stage->pipeline = pipeline;
3124 p_stage->stage = gl_shader_stage_to_broadcom(stage);
3125 p_stage->entrypoint = sinfo->pName;
3126 p_stage->module = vk_shader_module_from_handle(sinfo->module);
3127 p_stage->spec_info = sinfo->pSpecializationInfo;
3128 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3129
3130 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
3131 info->pNext, sinfo->pNext);
3132
3133 vk_pipeline_hash_shader_stage(&info->stage,
3134 &p_stage->robustness,
3135 p_stage->shader_sha1);
3136
3137 p_stage->nir = NULL;
3138
3139 pipeline->stages[BROADCOM_SHADER_COMPUTE] = p_stage;
3140 pipeline->active_stages |= sinfo->stage;
3141
3142 /* First we try to get the variants from the pipeline cache (unless we are
3143 * required to capture internal representations, since in that case we need
3144 * compile).
3145 */
3146 bool needs_executable_info =
3147 info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3148 if (!needs_executable_info) {
3149 struct v3dv_pipeline_key pipeline_key;
3150 pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3151 pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3152
3153 bool cache_hit = false;
3154 pipeline->shared_data =
3155 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3156
3157 if (pipeline->shared_data != NULL) {
3158 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3159 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3160 pipeline_feedback.flags |=
3161 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3162
3163 goto success;
3164 }
3165 }
3166
3167 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3168 return VK_PIPELINE_COMPILE_REQUIRED;
3169
3170 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3171 pipeline,
3172 false);
3173 if (!pipeline->shared_data)
3174 return VK_ERROR_OUT_OF_HOST_MEMORY;
3175
3176 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3177
3178 /* If not found on cache, compile it */
3179 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3180 assert(p_stage->nir);
3181
3182 v3d_optimize_nir(NULL, p_stage->nir);
3183 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3184 lower_compute(p_stage->nir);
3185
3186 VkResult result = VK_SUCCESS;
3187
3188 struct v3d_key key;
3189 memset(&key, 0, sizeof(key));
3190 pipeline_populate_v3d_key(&key, p_stage, 0);
3191 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3192 pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3193 alloc, &result);
3194
3195 if (result != VK_SUCCESS)
3196 return result;
3197
3198 if (!upload_assembly(pipeline))
3199 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3200
3201 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3202
3203 success:
3204
3205 pipeline_check_buffer_device_address(pipeline);
3206
3207 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3208 write_creation_feedback(pipeline,
3209 info->pNext,
3210 &pipeline_feedback,
3211 1,
3212 &info->stage);
3213
3214 /* As we got the variants in pipeline->shared_data, after compiling we
3215 * don't need the pipeline_stages.
3216 */
3217 if (!needs_executable_info)
3218 pipeline_free_stages(device, pipeline, alloc);
3219
3220 pipeline_check_spill_size(pipeline);
3221
3222 return VK_SUCCESS;
3223 }
3224
3225 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3226 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3227 struct v3dv_device *device,
3228 struct v3dv_pipeline_cache *cache,
3229 const VkComputePipelineCreateInfo *info,
3230 const VkAllocationCallbacks *alloc)
3231 {
3232 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3233
3234 pipeline->device = device;
3235 pipeline->layout = layout;
3236 v3dv_pipeline_layout_ref(pipeline->layout);
3237
3238 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3239 if (result != VK_SUCCESS)
3240 return result;
3241
3242 return result;
3243 }
3244
3245 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3246 compute_pipeline_create(VkDevice _device,
3247 VkPipelineCache _cache,
3248 const VkComputePipelineCreateInfo *pCreateInfo,
3249 const VkAllocationCallbacks *pAllocator,
3250 VkPipeline *pPipeline)
3251 {
3252 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3253 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3254
3255 struct v3dv_pipeline *pipeline;
3256 VkResult result;
3257
3258 /* Use the default pipeline cache if none is specified */
3259 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3260 cache = &device->default_pipeline_cache;
3261
3262 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3263 VK_OBJECT_TYPE_PIPELINE);
3264 if (pipeline == NULL)
3265 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3266
3267 result = compute_pipeline_init(pipeline, device, cache,
3268 pCreateInfo, pAllocator);
3269 if (result != VK_SUCCESS) {
3270 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3271 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3272 *pPipeline = VK_NULL_HANDLE;
3273 return result;
3274 }
3275
3276 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3277
3278 return VK_SUCCESS;
3279 }
3280
3281 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3282 v3dv_CreateComputePipelines(VkDevice _device,
3283 VkPipelineCache pipelineCache,
3284 uint32_t createInfoCount,
3285 const VkComputePipelineCreateInfo *pCreateInfos,
3286 const VkAllocationCallbacks *pAllocator,
3287 VkPipeline *pPipelines)
3288 {
3289 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3290 VkResult result = VK_SUCCESS;
3291
3292 if (V3D_DBG(SHADERS))
3293 mtx_lock(&device->pdevice->mutex);
3294
3295 uint32_t i = 0;
3296 for (; i < createInfoCount; i++) {
3297 VkResult local_result;
3298 local_result = compute_pipeline_create(_device,
3299 pipelineCache,
3300 &pCreateInfos[i],
3301 pAllocator,
3302 &pPipelines[i]);
3303
3304 if (local_result != VK_SUCCESS) {
3305 result = local_result;
3306 pPipelines[i] = VK_NULL_HANDLE;
3307
3308 if (pCreateInfos[i].flags &
3309 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3310 break;
3311 }
3312 }
3313
3314 for (; i < createInfoCount; i++)
3315 pPipelines[i] = VK_NULL_HANDLE;
3316
3317 if (V3D_DBG(SHADERS))
3318 mtx_unlock(&device->pdevice->mutex);
3319
3320 return result;
3321 }
3322
3323 static nir_shader *
pipeline_get_nir(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3324 pipeline_get_nir(struct v3dv_pipeline *pipeline,
3325 enum broadcom_shader_stage stage)
3326 {
3327 assert(stage >= 0 && stage < BROADCOM_SHADER_STAGES);
3328 if (pipeline->stages[stage])
3329 return pipeline->stages[stage]->nir;
3330
3331 return NULL;
3332 }
3333
3334 static struct v3d_prog_data *
pipeline_get_prog_data(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3335 pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3336 enum broadcom_shader_stage stage)
3337 {
3338 if (pipeline->shared_data->variants[stage])
3339 return pipeline->shared_data->variants[stage]->prog_data.base;
3340 return NULL;
3341 }
3342
3343 static uint64_t *
pipeline_get_qpu(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,uint32_t * qpu_size)3344 pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3345 enum broadcom_shader_stage stage,
3346 uint32_t *qpu_size)
3347 {
3348 struct v3dv_shader_variant *variant =
3349 pipeline->shared_data->variants[stage];
3350 if (!variant) {
3351 *qpu_size = 0;
3352 return NULL;
3353 }
3354
3355 *qpu_size = variant->qpu_insts_size;
3356 return variant->qpu_insts;
3357 }
3358
3359 /* FIXME: we use the same macro in various drivers, maybe move it to
3360 * the common vk_util.h?
3361 */
3362 #define WRITE_STR(field, ...) ({ \
3363 memset(field, 0, sizeof(field)); \
3364 UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3365 assert(_i > 0 && _i < sizeof(field)); \
3366 })
3367
3368 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3369 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3370 const char *data)
3371 {
3372 ir->isText = VK_TRUE;
3373
3374 size_t data_len = strlen(data) + 1;
3375
3376 if (ir->pData == NULL) {
3377 ir->dataSize = data_len;
3378 return true;
3379 }
3380
3381 strncpy(ir->pData, data, ir->dataSize);
3382 if (ir->dataSize < data_len)
3383 return false;
3384
3385 ir->dataSize = data_len;
3386 return true;
3387 }
3388
3389 static void
append(char ** str,size_t * offset,const char * fmt,...)3390 append(char **str, size_t *offset, const char *fmt, ...)
3391 {
3392 va_list args;
3393 va_start(args, fmt);
3394 ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3395 va_end(args);
3396 }
3397
3398 static void
pipeline_collect_executable_data(struct v3dv_pipeline * pipeline)3399 pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3400 {
3401 if (pipeline->executables.mem_ctx)
3402 return;
3403
3404 pipeline->executables.mem_ctx = ralloc_context(NULL);
3405 util_dynarray_init(&pipeline->executables.data,
3406 pipeline->executables.mem_ctx);
3407
3408 /* Don't crash for failed/bogus pipelines */
3409 if (!pipeline->shared_data)
3410 return;
3411
3412 for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3413 VkShaderStageFlags vk_stage =
3414 mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3415 if (!(vk_stage & pipeline->active_stages))
3416 continue;
3417
3418 char *nir_str = NULL;
3419 char *qpu_str = NULL;
3420
3421 if (pipeline_keep_qpu(pipeline)) {
3422 nir_shader *nir = pipeline_get_nir(pipeline, s);
3423 nir_str = nir ?
3424 nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3425
3426 uint32_t qpu_size;
3427 uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3428 if (qpu) {
3429 uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3430 qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3431 qpu_inst_count * 96);
3432 size_t offset = 0;
3433 for (int i = 0; i < qpu_inst_count; i++) {
3434 const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3435 append(&qpu_str, &offset, "%s\n", str);
3436 ralloc_free((void *)str);
3437 }
3438 }
3439 }
3440
3441 struct v3dv_pipeline_executable_data data = {
3442 .stage = s,
3443 .nir_str = nir_str,
3444 .qpu_str = qpu_str,
3445 };
3446 util_dynarray_append(&pipeline->executables.data,
3447 struct v3dv_pipeline_executable_data, data);
3448 }
3449 }
3450
3451 static const struct v3dv_pipeline_executable_data *
pipeline_get_executable(struct v3dv_pipeline * pipeline,uint32_t index)3452 pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3453 {
3454 assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3455 struct v3dv_pipeline_executable_data));
3456 return util_dynarray_element(&pipeline->executables.data,
3457 struct v3dv_pipeline_executable_data,
3458 index);
3459 }
3460
3461 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3462 v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3463 VkDevice device,
3464 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3465 uint32_t *pInternalRepresentationCount,
3466 VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3467 {
3468 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3469
3470 pipeline_collect_executable_data(pipeline);
3471
3472 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3473 pInternalRepresentations, pInternalRepresentationCount);
3474
3475 bool incomplete = false;
3476 const struct v3dv_pipeline_executable_data *exe =
3477 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3478
3479 if (exe->nir_str) {
3480 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3481 &out, ir) {
3482 WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3483 WRITE_STR(ir->description, "Final NIR form");
3484 if (!write_ir_text(ir, exe->nir_str))
3485 incomplete = true;
3486 }
3487 }
3488
3489 if (exe->qpu_str) {
3490 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3491 &out, ir) {
3492 WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3493 WRITE_STR(ir->description, "Final QPU assembly");
3494 if (!write_ir_text(ir, exe->qpu_str))
3495 incomplete = true;
3496 }
3497 }
3498
3499 return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3500 }
3501
3502 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3503 v3dv_GetPipelineExecutablePropertiesKHR(
3504 VkDevice device,
3505 const VkPipelineInfoKHR *pPipelineInfo,
3506 uint32_t *pExecutableCount,
3507 VkPipelineExecutablePropertiesKHR *pProperties)
3508 {
3509 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3510
3511 pipeline_collect_executable_data(pipeline);
3512
3513 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3514 pProperties, pExecutableCount);
3515
3516 util_dynarray_foreach(&pipeline->executables.data,
3517 struct v3dv_pipeline_executable_data, exe) {
3518 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3519 gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3520 props->stages = mesa_to_vk_shader_stage(mesa_stage);
3521
3522 WRITE_STR(props->name, "%s (%s)",
3523 _mesa_shader_stage_to_abbrev(mesa_stage),
3524 broadcom_shader_stage_is_binning(exe->stage) ?
3525 "Binning" : "Render");
3526
3527 WRITE_STR(props->description, "%s",
3528 _mesa_shader_stage_to_string(mesa_stage));
3529
3530 props->subgroupSize = V3D_CHANNELS;
3531 }
3532 }
3533
3534 return vk_outarray_status(&out);
3535 }
3536
3537 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3538 v3dv_GetPipelineExecutableStatisticsKHR(
3539 VkDevice device,
3540 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3541 uint32_t *pStatisticCount,
3542 VkPipelineExecutableStatisticKHR *pStatistics)
3543 {
3544 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3545
3546 pipeline_collect_executable_data(pipeline);
3547
3548 const struct v3dv_pipeline_executable_data *exe =
3549 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3550
3551 struct v3d_prog_data *prog_data =
3552 pipeline_get_prog_data(pipeline, exe->stage);
3553
3554 struct v3dv_shader_variant *variant =
3555 pipeline->shared_data->variants[exe->stage];
3556 uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3557
3558 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3559 pStatistics, pStatisticCount);
3560
3561 if (qpu_inst_count > 0) {
3562 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3563 WRITE_STR(stat->name, "Compile Strategy");
3564 WRITE_STR(stat->description, "Chosen compile strategy index");
3565 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3566 stat->value.u64 = prog_data->compile_strategy_idx;
3567 }
3568
3569 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3570 WRITE_STR(stat->name, "Instruction Count");
3571 WRITE_STR(stat->description, "Number of QPU instructions");
3572 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3573 stat->value.u64 = qpu_inst_count;
3574 }
3575
3576 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3577 WRITE_STR(stat->name, "Thread Count");
3578 WRITE_STR(stat->description, "Number of QPU threads dispatched");
3579 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3580 stat->value.u64 = prog_data->threads;
3581 }
3582
3583 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3584 WRITE_STR(stat->name, "Spill Size");
3585 WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3586 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3587 stat->value.u64 = prog_data->spill_size;
3588 }
3589
3590 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3591 WRITE_STR(stat->name, "TMU Spills");
3592 WRITE_STR(stat->description, "Number of times a register was spilled "
3593 "to memory");
3594 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3595 stat->value.u64 = prog_data->spill_size;
3596 }
3597
3598 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3599 WRITE_STR(stat->name, "TMU Fills");
3600 WRITE_STR(stat->description, "Number of times a register was filled "
3601 "from memory");
3602 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3603 stat->value.u64 = prog_data->spill_size;
3604 }
3605
3606 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3607 WRITE_STR(stat->name, "QPU Read Stalls");
3608 WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3609 "register read dependency");
3610 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3611 stat->value.u64 = prog_data->qpu_read_stalls;
3612 }
3613 }
3614
3615 return vk_outarray_status(&out);
3616 }
3617