1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_private.h"
27
28 #include "common/v3d_debug.h"
29 #include "qpu/qpu_disasm.h"
30
31 #include "compiler/nir/nir_builder.h"
32 #include "nir/nir_serialize.h"
33
34 #include "util/u_atomic.h"
35 #include "util/os_time.h"
36 #include "util/perf/cpu_trace.h"
37
38 #include "vk_format.h"
39 #include "vk_nir_convert_ycbcr.h"
40 #include "vk_pipeline.h"
41
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44
45 static void
pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage * p_stage)46 pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
47 {
48 VkPipelineShaderStageCreateInfo info = {
49 .module = vk_shader_module_handle_from_nir(p_stage->nir),
50 .pName = p_stage->entrypoint,
51 .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
52 };
53
54 vk_pipeline_hash_shader_stage(0, &info, NULL, p_stage->shader_sha1);
55 }
56
57 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)58 v3dv_shader_variant_destroy(struct v3dv_device *device,
59 struct v3dv_shader_variant *variant)
60 {
61 /* The assembly BO is shared by all variants in the pipeline, so it can't
62 * be freed here and should be freed with the pipeline
63 */
64 if (variant->qpu_insts) {
65 free(variant->qpu_insts);
66 variant->qpu_insts = NULL;
67 }
68 ralloc_free(variant->prog_data.base);
69 vk_free(&device->vk.alloc, variant);
70 }
71
72 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)73 destroy_pipeline_stage(struct v3dv_device *device,
74 struct v3dv_pipeline_stage *p_stage,
75 const VkAllocationCallbacks *pAllocator)
76 {
77 if (!p_stage)
78 return;
79
80 ralloc_free(p_stage->nir);
81 vk_free2(&device->vk.alloc, pAllocator, p_stage);
82 }
83
84 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)85 pipeline_free_stages(struct v3dv_device *device,
86 struct v3dv_pipeline *pipeline,
87 const VkAllocationCallbacks *pAllocator)
88 {
89 assert(pipeline);
90
91 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
92 destroy_pipeline_stage(device, pipeline->stages[stage], pAllocator);
93 pipeline->stages[stage] = NULL;
94 }
95 }
96
97 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)98 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
99 struct v3dv_device *device,
100 const VkAllocationCallbacks *pAllocator)
101 {
102 if (!pipeline)
103 return;
104
105 pipeline_free_stages(device, pipeline, pAllocator);
106
107 if (pipeline->shared_data) {
108 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
109 pipeline->shared_data = NULL;
110 }
111
112 if (pipeline->spill.bo) {
113 assert(pipeline->spill.size_per_thread > 0);
114 v3dv_bo_free(device, pipeline->spill.bo);
115 }
116
117 if (pipeline->default_attribute_values) {
118 v3dv_bo_free(device, pipeline->default_attribute_values);
119 pipeline->default_attribute_values = NULL;
120 }
121
122 if (pipeline->executables.mem_ctx)
123 ralloc_free(pipeline->executables.mem_ctx);
124
125 if (pipeline->layout)
126 v3dv_pipeline_layout_unref(device, pipeline->layout, pAllocator);
127
128 vk_object_free(&device->vk, pAllocator, pipeline);
129 }
130
131 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)132 v3dv_DestroyPipeline(VkDevice _device,
133 VkPipeline _pipeline,
134 const VkAllocationCallbacks *pAllocator)
135 {
136 V3DV_FROM_HANDLE(v3dv_device, device, _device);
137 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
138
139 if (!pipeline)
140 return;
141
142 v3dv_destroy_pipeline(pipeline, device, pAllocator);
143 }
144
145 static const struct spirv_to_nir_options default_spirv_options = {
146 .ubo_addr_format = nir_address_format_32bit_index_offset,
147 .ssbo_addr_format = nir_address_format_32bit_index_offset,
148 .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
149 .push_const_addr_format = nir_address_format_logical,
150 .shared_addr_format = nir_address_format_32bit_offset,
151 };
152
153 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(const struct v3d_device_info * devinfo)154 v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo)
155 {
156 static bool initialized = false;
157 static nir_shader_compiler_options options = {
158 .lower_uadd_sat = true,
159 .lower_usub_sat = true,
160 .lower_iadd_sat = true,
161 .lower_all_io_to_temps = true,
162 .lower_extract_byte = true,
163 .lower_extract_word = true,
164 .lower_insert_byte = true,
165 .lower_insert_word = true,
166 .lower_bitfield_insert = true,
167 .lower_bitfield_extract = true,
168 .lower_bitfield_reverse = true,
169 .lower_bit_count = true,
170 .lower_cs_local_id_to_index = true,
171 .lower_ffract = true,
172 .lower_fmod = true,
173 .lower_pack_unorm_2x16 = true,
174 .lower_pack_snorm_2x16 = true,
175 .lower_unpack_unorm_2x16 = true,
176 .lower_unpack_snorm_2x16 = true,
177 .lower_pack_unorm_4x8 = true,
178 .lower_pack_snorm_4x8 = true,
179 .lower_unpack_unorm_4x8 = true,
180 .lower_unpack_snorm_4x8 = true,
181 .lower_pack_half_2x16 = true,
182 .lower_unpack_half_2x16 = true,
183 .lower_pack_32_2x16 = true,
184 .lower_pack_32_2x16_split = true,
185 .lower_unpack_32_2x16_split = true,
186 .lower_mul_2x32_64 = true,
187 .lower_fdiv = true,
188 .lower_find_lsb = true,
189 .lower_ffma16 = true,
190 .lower_ffma32 = true,
191 .lower_ffma64 = true,
192 .lower_flrp32 = true,
193 .lower_fpow = true,
194 .lower_fsqrt = true,
195 .lower_ifind_msb = true,
196 .lower_isign = true,
197 .lower_ldexp = true,
198 .lower_mul_high = true,
199 .lower_wpos_pntc = false,
200 .lower_to_scalar = true,
201 .lower_device_index_to_zero = true,
202 .lower_fquantize2f16 = true,
203 .lower_ufind_msb = true,
204 .has_fsub = true,
205 .has_isub = true,
206 .has_uclz = true,
207 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
208 * needs to be supported */
209 .lower_interpolate_at = true,
210 .max_unroll_iterations = 16,
211 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
212 .divergence_analysis_options =
213 nir_divergence_multiple_workgroup_per_compute_subgroup,
214 .discard_is_demote = true,
215 .scalarize_ddx = true,
216 };
217
218 if (!initialized) {
219 options.lower_fsat = devinfo->ver < 71;
220 initialized = true;
221 }
222
223 return &options;
224 }
225
226 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _pipeline_layout,uint32_t set,uint32_t binding,uint32_t array_index)227 lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set,
228 uint32_t binding, uint32_t array_index)
229 {
230 struct v3dv_pipeline_layout *pipeline_layout =
231 (struct v3dv_pipeline_layout *) _pipeline_layout;
232
233 assert(set < pipeline_layout->num_sets);
234 struct v3dv_descriptor_set_layout *set_layout =
235 pipeline_layout->set[set].layout;
236
237 assert(binding < set_layout->binding_count);
238 struct v3dv_descriptor_set_binding_layout *bind_layout =
239 &set_layout->binding[binding];
240
241 if (bind_layout->immutable_samplers_offset) {
242 const struct v3dv_sampler *immutable_samplers =
243 v3dv_immutable_samplers(set_layout, bind_layout);
244 const struct v3dv_sampler *sampler = &immutable_samplers[array_index];
245 return sampler->conversion ? &sampler->conversion->state : NULL;
246 } else {
247 return NULL;
248 }
249 }
250
251 static void
preprocess_nir(nir_shader * nir)252 preprocess_nir(nir_shader *nir)
253 {
254 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
255 .frag_coord = true,
256 .point_coord = true,
257 };
258 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
259
260 /* Vulkan uses the separate-shader linking model */
261 nir->info.separate_shader = true;
262
263 /* Make sure we lower variable initializers on output variables so that
264 * nir_remove_dead_variables below sees the corresponding stores
265 */
266 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
267
268 if (nir->info.stage == MESA_SHADER_FRAGMENT)
269 NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
270 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
271 NIR_PASS(_, nir, nir_lower_input_attachments,
272 &(nir_input_attachment_options) {
273 .use_fragcoord_sysval = false,
274 });
275 }
276
277 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
278 nir_shader_get_entrypoint(nir), true, false);
279
280 NIR_PASS(_, nir, nir_lower_system_values);
281
282 NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
283
284 NIR_PASS(_, nir, nir_normalize_cubemap_coords);
285
286 NIR_PASS(_, nir, nir_lower_global_vars_to_local);
287
288 NIR_PASS(_, nir, nir_split_var_copies);
289 NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
290
291 v3d_optimize_nir(NULL, nir);
292
293 NIR_PASS(_, nir, nir_lower_explicit_io,
294 nir_var_mem_push_const,
295 nir_address_format_32bit_offset);
296
297 NIR_PASS(_, nir, nir_lower_explicit_io,
298 nir_var_mem_ubo | nir_var_mem_ssbo,
299 nir_address_format_32bit_index_offset);
300
301 NIR_PASS(_, nir, nir_lower_explicit_io,
302 nir_var_mem_global,
303 nir_address_format_2x32bit_global);
304
305 NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
306
307 /* Lower a bunch of stuff */
308 NIR_PASS(_, nir, nir_lower_var_copies);
309
310 NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
311
312 NIR_PASS(_, nir, nir_lower_indirect_derefs,
313 nir_var_function_temp, 2);
314
315 NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
316 nir_var_mem_ubo | nir_var_mem_ssbo, NULL,
317 nir_lower_direct_array_deref_of_vec_load);
318
319 NIR_PASS(_, nir, nir_lower_frexp);
320
321 /* Get rid of split copies */
322 v3d_optimize_nir(NULL, nir);
323 }
324
325 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)326 shader_module_compile_to_nir(struct v3dv_device *device,
327 struct v3dv_pipeline_stage *stage)
328 {
329 assert(stage->module || stage->module_info);
330
331 nir_shader *nir;
332 const nir_shader_compiler_options *nir_options =
333 v3dv_pipeline_get_nir_options(&device->devinfo);
334
335 gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(stage->stage);
336
337 const VkPipelineShaderStageCreateInfo stage_info = {
338 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
339 .pNext = !stage->module ? stage->module_info : NULL,
340 .stage = mesa_to_vk_shader_stage(gl_stage),
341 .module = vk_shader_module_to_handle((struct vk_shader_module *)stage->module),
342 .pName = stage->entrypoint,
343 .pSpecializationInfo = stage->spec_info,
344 };
345
346 /* vk_pipeline_shader_stage_to_nir also handles internal shaders when
347 * module->nir != NULL. It also calls nir_validate_shader on both cases
348 * so we don't have to call it here.
349 */
350 VkResult result = vk_pipeline_shader_stage_to_nir(&device->vk,
351 stage->pipeline->flags,
352 &stage_info,
353 &default_spirv_options,
354 nir_options,
355 NULL, &nir);
356 if (result != VK_SUCCESS)
357 return NULL;
358 assert(nir->info.stage == gl_stage);
359
360 if (V3D_DBG(SHADERDB) && (!stage->module || stage->module->nir == NULL)) {
361 char sha1buf[41];
362 _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
363 nir->info.name = ralloc_strdup(nir, sha1buf);
364 }
365
366 if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
367 fprintf(stderr, "NIR after vk_pipeline_shader_stage_to_nir: %s prog %d NIR:\n",
368 broadcom_shader_stage_name(stage->stage),
369 stage->program_id);
370 nir_print_shader(nir, stderr);
371 fprintf(stderr, "\n");
372 }
373
374 preprocess_nir(nir);
375
376 return nir;
377 }
378
379 static int
type_size_vec4(const struct glsl_type * type,bool bindless)380 type_size_vec4(const struct glsl_type *type, bool bindless)
381 {
382 return glsl_count_attribute_slots(type, false);
383 }
384
385 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
386 * rethink.
387 */
388 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size,uint8_t plane)389 descriptor_map_add(struct v3dv_descriptor_map *map,
390 int set,
391 int binding,
392 int array_index,
393 int array_size,
394 int start_index,
395 uint8_t return_size,
396 uint8_t plane)
397 {
398 assert(array_index < array_size);
399 assert(return_size == 16 || return_size == 32);
400
401 unsigned index = start_index;
402 for (; index < map->num_desc; index++) {
403 if (map->used[index] &&
404 set == map->set[index] &&
405 binding == map->binding[index] &&
406 array_index == map->array_index[index] &&
407 plane == map->plane[index]) {
408 assert(array_size == map->array_size[index]);
409 if (return_size != map->return_size[index]) {
410 /* It the return_size is different it means that the same sampler
411 * was used for operations with different precision
412 * requirement. In this case we need to ensure that we use the
413 * larger one.
414 */
415 map->return_size[index] = 32;
416 }
417 return index;
418 } else if (!map->used[index]) {
419 break;
420 }
421 }
422
423 assert(index < DESCRIPTOR_MAP_SIZE);
424 assert(!map->used[index]);
425
426 map->used[index] = true;
427 map->set[index] = set;
428 map->binding[index] = binding;
429 map->array_index[index] = array_index;
430 map->array_size[index] = array_size;
431 map->return_size[index] = return_size;
432 map->plane[index] = plane;
433 map->num_desc = MAX2(map->num_desc, index + 1);
434
435 return index;
436 }
437
438 struct lower_pipeline_layout_state {
439 struct v3dv_pipeline *pipeline;
440 const struct v3dv_pipeline_layout *layout;
441 bool needs_default_sampler_state;
442 };
443
444
445 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)446 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
447 struct lower_pipeline_layout_state *state)
448 {
449 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
450 instr->intrinsic = nir_intrinsic_load_uniform;
451 }
452
453 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)454 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
455 VkDescriptorType desc_type,
456 gl_shader_stage gl_stage,
457 bool is_sampler)
458 {
459 enum broadcom_shader_stage broadcom_stage =
460 gl_shader_stage_to_broadcom(gl_stage);
461
462 assert(pipeline->shared_data &&
463 pipeline->shared_data->maps[broadcom_stage]);
464
465 switch(desc_type) {
466 case VK_DESCRIPTOR_TYPE_SAMPLER:
467 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
468 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
469 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
470 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
471 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
472 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
473 return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
474 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
475 return is_sampler ?
476 &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
477 &pipeline->shared_data->maps[broadcom_stage]->texture_map;
478 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
479 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
480 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
481 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
482 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
483 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
484 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
485 default:
486 unreachable("Descriptor type unknown or not having a descriptor map");
487 }
488 }
489
490 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
491 * could be used by the v3d_compiler */
492 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)493 lower_vulkan_resource_index(nir_builder *b,
494 nir_intrinsic_instr *instr,
495 struct lower_pipeline_layout_state *state)
496 {
497 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
498
499 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
500
501 unsigned set = nir_intrinsic_desc_set(instr);
502 unsigned binding = nir_intrinsic_binding(instr);
503 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
504 struct v3dv_descriptor_set_binding_layout *binding_layout =
505 &set_layout->binding[binding];
506 unsigned index = 0;
507
508 switch (binding_layout->type) {
509 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
510 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
511 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
512 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
513 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
514 struct v3dv_descriptor_map *descriptor_map =
515 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
516 b->shader->info.stage, false);
517
518 if (!const_val)
519 unreachable("non-constant vulkan_resource_index array index");
520
521 /* At compile-time we will need to know if we are processing a UBO load
522 * for an inline or a regular UBO so we can handle inline loads like
523 * push constants. At the level of NIR level however, the inline
524 * information is gone, so we rely on the index to make this distinction.
525 * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
526 * inline buffers. This means that at the descriptor map level
527 * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
528 * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
529 */
530 uint32_t start_index = 0;
531 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
532 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
533 start_index += MAX_INLINE_UNIFORM_BUFFERS;
534 }
535
536 index = descriptor_map_add(descriptor_map, set, binding,
537 const_val->u32,
538 binding_layout->array_size,
539 start_index,
540 32 /* return_size: doesn't really apply for this case */,
541 0);
542 break;
543 }
544
545 default:
546 unreachable("unsupported descriptor type for vulkan_resource_index");
547 break;
548 }
549
550 /* Since we use the deref pass, both vulkan_resource_index and
551 * vulkan_load_descriptor return a vec2 providing an index and
552 * offset. Our backend compiler only cares about the index part.
553 */
554 nir_def_replace(&instr->def, nir_imm_ivec2(b, index, 0));
555 }
556
557 static uint8_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)558 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
559 {
560 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
561 if (plane_src_idx < 0)
562 return 0;
563
564 uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src);
565 nir_tex_instr_remove_src(tex, plane_src_idx);
566 return plane;
567 }
568
569 /* Returns return_size, so it could be used for the case of not having a
570 * sampler object
571 */
572 static uint8_t
lower_tex_src(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct lower_pipeline_layout_state * state)573 lower_tex_src(nir_builder *b,
574 nir_tex_instr *instr,
575 unsigned src_idx,
576 struct lower_pipeline_layout_state *state)
577 {
578 nir_def *index = NULL;
579 unsigned base_index = 0;
580 unsigned array_elements = 1;
581 nir_tex_src *src = &instr->src[src_idx];
582 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
583
584 uint8_t plane = tex_instr_get_and_remove_plane_src(instr);
585
586 /* We compute first the offsets */
587 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
588 while (deref->deref_type != nir_deref_type_var) {
589 nir_deref_instr *parent =
590 nir_instr_as_deref(deref->parent.ssa->parent_instr);
591
592 assert(deref->deref_type == nir_deref_type_array);
593
594 if (nir_src_is_const(deref->arr.index) && index == NULL) {
595 /* We're still building a direct index */
596 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
597 } else {
598 if (index == NULL) {
599 /* We used to be direct but not anymore */
600 index = nir_imm_int(b, base_index);
601 base_index = 0;
602 }
603
604 index = nir_iadd(b, index,
605 nir_imul_imm(b, deref->arr.index.ssa,
606 array_elements));
607 }
608
609 array_elements *= glsl_get_length(parent->type);
610
611 deref = parent;
612 }
613
614 if (index)
615 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
616
617 /* We have the offsets, we apply them, rewriting the source or removing
618 * instr if needed
619 */
620 if (index) {
621 nir_src_rewrite(&src->src, index);
622
623 src->src_type = is_sampler ?
624 nir_tex_src_sampler_offset :
625 nir_tex_src_texture_offset;
626 } else {
627 nir_tex_instr_remove_src(instr, src_idx);
628 }
629
630 uint32_t set = deref->var->data.descriptor_set;
631 uint32_t binding = deref->var->data.binding;
632 /* FIXME: this is a really simplified check for the precision to be used
633 * for the sampling. Right now we are only checking for the variables used
634 * on the operation itself, but there are other cases that we could use to
635 * infer the precision requirement.
636 */
637 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
638 deref->var->data.precision == GLSL_PRECISION_LOW;
639 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
640 struct v3dv_descriptor_set_binding_layout *binding_layout =
641 &set_layout->binding[binding];
642
643 uint8_t return_size;
644 if (V3D_DBG(TMU_16BIT))
645 return_size = 16;
646 else if (V3D_DBG(TMU_32BIT))
647 return_size = 32;
648 else
649 return_size = relaxed_precision ? 16 : 32;
650
651 struct v3dv_descriptor_map *map =
652 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
653 b->shader->info.stage, is_sampler);
654 int desc_index =
655 descriptor_map_add(map,
656 deref->var->data.descriptor_set,
657 deref->var->data.binding,
658 base_index,
659 binding_layout->array_size,
660 0,
661 return_size,
662 plane);
663
664 if (is_sampler)
665 instr->sampler_index = desc_index;
666 else
667 instr->texture_index = desc_index;
668
669 return return_size;
670 }
671
672 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct lower_pipeline_layout_state * state)673 lower_sampler(nir_builder *b,
674 nir_tex_instr *instr,
675 struct lower_pipeline_layout_state *state)
676 {
677 uint8_t return_size = 0;
678
679 int texture_idx =
680 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
681
682 if (texture_idx >= 0)
683 return_size = lower_tex_src(b, instr, texture_idx, state);
684
685 int sampler_idx =
686 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
687
688 if (sampler_idx >= 0) {
689 assert(nir_tex_instr_need_sampler(instr));
690 lower_tex_src(b, instr, sampler_idx, state);
691 }
692
693 if (texture_idx < 0 && sampler_idx < 0)
694 return false;
695
696 /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags
697 * to bind a default sampler state to configure precission.
698 */
699 if (sampler_idx < 0) {
700 state->needs_default_sampler_state = true;
701 instr->backend_flags = return_size == 16 ?
702 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
703 }
704
705 return true;
706 }
707
708 /* FIXME: really similar to lower_tex_src, perhaps refactor? */
709 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)710 lower_image_deref(nir_builder *b,
711 nir_intrinsic_instr *instr,
712 struct lower_pipeline_layout_state *state)
713 {
714 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
715 nir_def *index = NULL;
716 unsigned array_elements = 1;
717 unsigned base_index = 0;
718
719 while (deref->deref_type != nir_deref_type_var) {
720 nir_deref_instr *parent =
721 nir_instr_as_deref(deref->parent.ssa->parent_instr);
722
723 assert(deref->deref_type == nir_deref_type_array);
724
725 if (nir_src_is_const(deref->arr.index) && index == NULL) {
726 /* We're still building a direct index */
727 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
728 } else {
729 if (index == NULL) {
730 /* We used to be direct but not anymore */
731 index = nir_imm_int(b, base_index);
732 base_index = 0;
733 }
734
735 index = nir_iadd(b, index,
736 nir_imul_imm(b, deref->arr.index.ssa,
737 array_elements));
738 }
739
740 array_elements *= glsl_get_length(parent->type);
741
742 deref = parent;
743 }
744
745 if (index)
746 nir_umin(b, index, nir_imm_int(b, array_elements - 1));
747
748 uint32_t set = deref->var->data.descriptor_set;
749 uint32_t binding = deref->var->data.binding;
750 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
751 struct v3dv_descriptor_set_binding_layout *binding_layout =
752 &set_layout->binding[binding];
753
754 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
755 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
756
757 struct v3dv_descriptor_map *map =
758 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
759 b->shader->info.stage, false);
760
761 int desc_index =
762 descriptor_map_add(map,
763 deref->var->data.descriptor_set,
764 deref->var->data.binding,
765 base_index,
766 binding_layout->array_size,
767 0,
768 32 /* return_size: doesn't apply for textures */,
769 0);
770
771 /* Note: we don't need to do anything here in relation to the precision and
772 * the output size because for images we can infer that info from the image
773 * intrinsic, that includes the image format (see
774 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
775 */
776
777 index = nir_imm_int(b, desc_index);
778
779 nir_rewrite_image_intrinsic(instr, index, false);
780 }
781
782 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)783 lower_intrinsic(nir_builder *b,
784 nir_intrinsic_instr *instr,
785 struct lower_pipeline_layout_state *state)
786 {
787 switch (instr->intrinsic) {
788 case nir_intrinsic_load_push_constant:
789 lower_load_push_constant(b, instr, state);
790 return true;
791
792 case nir_intrinsic_vulkan_resource_index:
793 lower_vulkan_resource_index(b, instr, state);
794 return true;
795
796 case nir_intrinsic_load_vulkan_descriptor: {
797 /* Loading the descriptor happens as part of load/store instructions,
798 * so for us this is a no-op.
799 */
800 nir_def_replace(&instr->def, instr->src[0].ssa);
801 return true;
802 }
803
804 case nir_intrinsic_image_deref_load:
805 case nir_intrinsic_image_deref_store:
806 case nir_intrinsic_image_deref_atomic:
807 case nir_intrinsic_image_deref_atomic_swap:
808 case nir_intrinsic_image_deref_size:
809 case nir_intrinsic_image_deref_samples:
810 lower_image_deref(b, instr, state);
811 return true;
812
813 default:
814 return false;
815 }
816 }
817
818 static bool
lower_pipeline_layout_cb(nir_builder * b,nir_instr * instr,void * _state)819 lower_pipeline_layout_cb(nir_builder *b,
820 nir_instr *instr,
821 void *_state)
822 {
823 bool progress = false;
824 struct lower_pipeline_layout_state *state = _state;
825
826 b->cursor = nir_before_instr(instr);
827 switch (instr->type) {
828 case nir_instr_type_tex:
829 progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
830 break;
831 case nir_instr_type_intrinsic:
832 progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
833 break;
834 default:
835 break;
836 }
837
838 return progress;
839 }
840
841 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout,bool * needs_default_sampler_state)842 lower_pipeline_layout_info(nir_shader *shader,
843 struct v3dv_pipeline *pipeline,
844 const struct v3dv_pipeline_layout *layout,
845 bool *needs_default_sampler_state)
846 {
847 bool progress = false;
848
849 struct lower_pipeline_layout_state state = {
850 .pipeline = pipeline,
851 .layout = layout,
852 .needs_default_sampler_state = false,
853 };
854
855 progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
856 nir_metadata_control_flow,
857 &state);
858
859 *needs_default_sampler_state = state.needs_default_sampler_state;
860
861 return progress;
862 }
863
864 /* This flips gl_PointCoord.y to match Vulkan requirements */
865 static bool
lower_point_coord_cb(nir_builder * b,nir_intrinsic_instr * intr,void * _state)866 lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
867 {
868 if (intr->intrinsic != nir_intrinsic_load_input)
869 return false;
870
871 if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC)
872 return false;
873
874 b->cursor = nir_after_instr(&intr->instr);
875 nir_def *result = &intr->def;
876 result =
877 nir_vector_insert_imm(b, result,
878 nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
879 nir_def_rewrite_uses_after(&intr->def,
880 result, result->parent_instr);
881 return true;
882 }
883
884 static bool
v3d_nir_lower_point_coord(nir_shader * s)885 v3d_nir_lower_point_coord(nir_shader *s)
886 {
887 assert(s->info.stage == MESA_SHADER_FRAGMENT);
888 return nir_shader_intrinsics_pass(s, lower_point_coord_cb,
889 nir_metadata_control_flow, NULL);
890 }
891
892 static void
lower_fs_io(nir_shader * nir)893 lower_fs_io(nir_shader *nir)
894 {
895 /* Our backend doesn't handle array fragment shader outputs */
896 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
897 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
898
899 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
900 MESA_SHADER_FRAGMENT);
901
902 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
903 MESA_SHADER_FRAGMENT);
904
905 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
906 type_size_vec4, 0);
907 }
908
909 static void
lower_gs_io(struct nir_shader * nir)910 lower_gs_io(struct nir_shader *nir)
911 {
912 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
913
914 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
915 MESA_SHADER_GEOMETRY);
916
917 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
918 MESA_SHADER_GEOMETRY);
919 }
920
921 static void
lower_vs_io(struct nir_shader * nir)922 lower_vs_io(struct nir_shader *nir)
923 {
924 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
925
926 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
927 MESA_SHADER_VERTEX);
928
929 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
930 MESA_SHADER_VERTEX);
931
932 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
933 * overlaps with v3d_nir_lower_io. Need further research though.
934 */
935 }
936
937 static void
shader_debug_output(const char * message,void * data)938 shader_debug_output(const char *message, void *data)
939 {
940 /* FIXME: We probably don't want to debug anything extra here, and in fact
941 * the compiler is not using this callback too much, only as an alternative
942 * way to debug out the shaderdb stats, that you can already get using
943 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
944 * compiler to remove that callback.
945 */
946 }
947
948 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)949 pipeline_populate_v3d_key(struct v3d_key *key,
950 const struct v3dv_pipeline_stage *p_stage,
951 uint32_t ucp_enables)
952 {
953 assert(p_stage->pipeline->shared_data &&
954 p_stage->pipeline->shared_data->maps[p_stage->stage]);
955
956 /* The following values are default values used at pipeline create. We use
957 * there 32 bit as default return size.
958 */
959 struct v3dv_descriptor_map *sampler_map =
960 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
961 struct v3dv_descriptor_map *texture_map =
962 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
963
964 key->num_tex_used = texture_map->num_desc;
965 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
966 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
967 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
968 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
969 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
970 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
971 }
972
973 key->num_samplers_used = sampler_map->num_desc;
974 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
975 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
976 sampler_idx++) {
977 key->sampler[sampler_idx].return_size =
978 sampler_map->return_size[sampler_idx];
979
980 key->sampler[sampler_idx].return_channels =
981 key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
982 }
983
984 switch (p_stage->stage) {
985 case BROADCOM_SHADER_VERTEX:
986 case BROADCOM_SHADER_VERTEX_BIN:
987 key->is_last_geometry_stage =
988 p_stage->pipeline->stages[BROADCOM_SHADER_GEOMETRY] == NULL;
989 break;
990 case BROADCOM_SHADER_GEOMETRY:
991 case BROADCOM_SHADER_GEOMETRY_BIN:
992 /* FIXME: while we don't implement tessellation shaders */
993 key->is_last_geometry_stage = true;
994 break;
995 case BROADCOM_SHADER_FRAGMENT:
996 case BROADCOM_SHADER_COMPUTE:
997 key->is_last_geometry_stage = false;
998 break;
999 default:
1000 unreachable("unsupported shader stage");
1001 }
1002
1003 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1004 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1005 * takes care of adding a single compact array variable at
1006 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1007 *
1008 * The only lowering we are interested is specific to the fragment shader,
1009 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1010 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1011 * the ucp enable mask for that stage.
1012 */
1013 key->ucp_enables = ucp_enables;
1014
1015 const VkPipelineRobustnessBufferBehaviorEXT robust_buffer_enabled =
1016 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1017
1018 const VkPipelineRobustnessImageBehaviorEXT robust_image_enabled =
1019 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT;
1020
1021 key->robust_uniform_access =
1022 p_stage->robustness.uniform_buffers == robust_buffer_enabled;
1023 key->robust_storage_access =
1024 p_stage->robustness.storage_buffers == robust_buffer_enabled;
1025 key->robust_image_access =
1026 p_stage->robustness.images == robust_image_enabled;
1027 }
1028
1029 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1030 * same. For not using prim_mode that is the one already used on v3d
1031 */
1032 static const enum mesa_prim vk_to_mesa_prim[] = {
1033 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS,
1034 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES,
1035 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP,
1036 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES,
1037 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP,
1038 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN,
1039 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY,
1040 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY,
1041 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY,
1042 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY,
1043 };
1044
1045 uint32_t
v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim)1046 v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim)
1047 {
1048 return v3d_hw_prim_type(vk_to_mesa_prim[vk_prim]);
1049 }
1050
1051 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1052 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1053 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1054 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1055 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1056 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1057 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1058 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1059 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1060 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1061 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1062 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1063 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1064 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1065 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1066 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1067 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1068 };
1069
1070 static bool
enable_line_smooth(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)1071 enable_line_smooth(struct v3dv_pipeline *pipeline,
1072 const VkPipelineRasterizationStateCreateInfo *rs_info)
1073 {
1074 if (!pipeline->rasterization_enabled)
1075 return false;
1076
1077 const VkPipelineRasterizationLineStateCreateInfoKHR *ls_info =
1078 vk_find_struct_const(rs_info->pNext,
1079 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_KHR);
1080
1081 if (!ls_info)
1082 return false;
1083
1084 /* Although topology is dynamic now, the topology class can't change
1085 * because we don't support dynamicPrimitiveTopologyUnrestricted, so we can
1086 * use the static topology from the pipeline for this.
1087 */
1088 switch(pipeline->topology) {
1089 case MESA_PRIM_LINES:
1090 case MESA_PRIM_LINE_LOOP:
1091 case MESA_PRIM_LINE_STRIP:
1092 case MESA_PRIM_LINES_ADJACENCY:
1093 case MESA_PRIM_LINE_STRIP_ADJACENCY:
1094 return ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
1095 default:
1096 return false;
1097 }
1098 }
1099
1100 static void
v3d_fs_key_set_color_attachment(struct v3d_fs_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t index,VkFormat fb_format)1101 v3d_fs_key_set_color_attachment(struct v3d_fs_key *key,
1102 const struct v3dv_pipeline_stage *p_stage,
1103 uint32_t index,
1104 VkFormat fb_format)
1105 {
1106 key->cbufs |= 1 << index;
1107
1108 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1109
1110 /* If logic operations are enabled then we might emit color reads and we
1111 * need to know the color buffer format and swizzle for that
1112 */
1113 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1114 /* Framebuffer formats should be single plane */
1115 assert(vk_format_get_plane_count(fb_format) == 1);
1116 key->color_fmt[index].format = fb_pipe_format;
1117 memcpy(key->color_fmt[index].swizzle,
1118 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format, 0),
1119 sizeof(key->color_fmt[index].swizzle));
1120 }
1121
1122 const struct util_format_description *desc =
1123 vk_format_description(fb_format);
1124
1125 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1126 desc->channel[0].size == 32) {
1127 key->f32_color_rb |= 1 << index;
1128 }
1129
1130 if (p_stage->nir->info.fs.untyped_color_outputs) {
1131 if (util_format_is_pure_uint(fb_pipe_format))
1132 key->uint_color_rb |= 1 << index;
1133 else if (util_format_is_pure_sint(fb_pipe_format))
1134 key->int_color_rb |= 1 << index;
1135 }
1136 }
1137
1138 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_render_pass_state * rendering_info,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1139 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1140 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1141 const struct vk_render_pass_state *rendering_info,
1142 const struct v3dv_pipeline_stage *p_stage,
1143 bool has_geometry_shader,
1144 uint32_t ucp_enables)
1145 {
1146 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1147
1148 memset(key, 0, sizeof(*key));
1149
1150 struct v3dv_device *device = p_stage->pipeline->device;
1151 assert(device);
1152
1153 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables);
1154
1155 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1156 pCreateInfo->pInputAssemblyState;
1157 uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1158
1159 key->is_points = (topology == MESA_PRIM_POINTS);
1160 key->is_lines = (topology >= MESA_PRIM_LINES &&
1161 topology <= MESA_PRIM_LINE_STRIP);
1162
1163 if (key->is_points) {
1164 /* This mask represents state for GL_ARB_point_sprite which is not
1165 * relevant to Vulkan.
1166 */
1167 key->point_sprite_mask = 0;
1168
1169 /* Vulkan mandates upper left. */
1170 key->point_coord_upper_left = true;
1171 }
1172
1173 key->has_gs = has_geometry_shader;
1174
1175 const VkPipelineColorBlendStateCreateInfo *cb_info =
1176 p_stage->pipeline->rasterization_enabled ?
1177 pCreateInfo->pColorBlendState : NULL;
1178
1179 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1180 vk_to_pipe_logicop[cb_info->logicOp] :
1181 PIPE_LOGICOP_COPY;
1182
1183 /* Multisample rasterization state must be ignored if rasterization
1184 * is disabled.
1185 */
1186 const VkPipelineMultisampleStateCreateInfo *ms_info =
1187 p_stage->pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL;
1188 if (ms_info) {
1189 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1190 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1191 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1192
1193 if (key->msaa)
1194 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1195
1196 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1197 }
1198
1199 key->line_smoothing = enable_line_smooth(p_stage->pipeline,
1200 pCreateInfo->pRasterizationState);
1201
1202 /* This is intended for V3D versions before 4.1, otherwise we just use the
1203 * tile buffer load/store swap R/B bit.
1204 */
1205 key->swap_color_rb = 0;
1206
1207 for (uint32_t i = 0; i < rendering_info->color_attachment_count; i++) {
1208 if (rendering_info->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
1209 continue;
1210 v3d_fs_key_set_color_attachment(key, p_stage, i,
1211 rendering_info->color_attachment_formats[i]);
1212 }
1213 }
1214
1215 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1216 setup_stage_outputs_from_next_stage_inputs(
1217 uint8_t next_stage_num_inputs,
1218 struct v3d_varying_slot *next_stage_input_slots,
1219 uint8_t *num_used_outputs,
1220 struct v3d_varying_slot *used_output_slots,
1221 uint32_t size_of_used_output_slots)
1222 {
1223 *num_used_outputs = next_stage_num_inputs;
1224 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1225 }
1226
1227 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1228 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1229 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1230 const struct v3dv_pipeline_stage *p_stage)
1231 {
1232 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1233 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1234
1235 struct v3dv_device *device = p_stage->pipeline->device;
1236 assert(device);
1237
1238 memset(key, 0, sizeof(*key));
1239
1240 pipeline_populate_v3d_key(&key->base, p_stage, 0);
1241
1242 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1243
1244 key->per_vertex_point_size =
1245 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1246
1247 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1248
1249 assert(key->base.is_last_geometry_stage);
1250 if (key->is_coord) {
1251 /* Output varyings in the last binning shader are only used for transform
1252 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1253 */
1254 key->num_used_outputs = 0;
1255 } else {
1256 struct v3dv_shader_variant *fs_variant =
1257 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1258
1259 STATIC_ASSERT(sizeof(key->used_outputs) ==
1260 sizeof(fs_variant->prog_data.fs->input_slots));
1261
1262 setup_stage_outputs_from_next_stage_inputs(
1263 fs_variant->prog_data.fs->num_inputs,
1264 fs_variant->prog_data.fs->input_slots,
1265 &key->num_used_outputs,
1266 key->used_outputs,
1267 sizeof(key->used_outputs));
1268 }
1269 }
1270
1271 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1272 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1273 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1274 const struct v3dv_pipeline_stage *p_stage)
1275 {
1276 assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1277 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1278
1279 struct v3dv_device *device = p_stage->pipeline->device;
1280 assert(device);
1281
1282 memset(key, 0, sizeof(*key));
1283 pipeline_populate_v3d_key(&key->base, p_stage, 0);
1284
1285 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1286
1287 key->per_vertex_point_size =
1288 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1289
1290 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1291
1292 if (key->is_coord) { /* Binning VS*/
1293 if (key->base.is_last_geometry_stage) {
1294 /* Output varyings in the last binning shader are only used for
1295 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1296 * supported.
1297 */
1298 key->num_used_outputs = 0;
1299 } else {
1300 /* Linking against GS binning program */
1301 assert(pipeline->stages[BROADCOM_SHADER_GEOMETRY]);
1302 struct v3dv_shader_variant *gs_bin_variant =
1303 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1304
1305 STATIC_ASSERT(sizeof(key->used_outputs) ==
1306 sizeof(gs_bin_variant->prog_data.gs->input_slots));
1307
1308 setup_stage_outputs_from_next_stage_inputs(
1309 gs_bin_variant->prog_data.gs->num_inputs,
1310 gs_bin_variant->prog_data.gs->input_slots,
1311 &key->num_used_outputs,
1312 key->used_outputs,
1313 sizeof(key->used_outputs));
1314 }
1315 } else { /* Render VS */
1316 if (pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
1317 /* Linking against GS render program */
1318 struct v3dv_shader_variant *gs_variant =
1319 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1320
1321 STATIC_ASSERT(sizeof(key->used_outputs) ==
1322 sizeof(gs_variant->prog_data.gs->input_slots));
1323
1324 setup_stage_outputs_from_next_stage_inputs(
1325 gs_variant->prog_data.gs->num_inputs,
1326 gs_variant->prog_data.gs->input_slots,
1327 &key->num_used_outputs,
1328 key->used_outputs,
1329 sizeof(key->used_outputs));
1330 } else {
1331 /* Linking against FS program */
1332 struct v3dv_shader_variant *fs_variant =
1333 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1334
1335 STATIC_ASSERT(sizeof(key->used_outputs) ==
1336 sizeof(fs_variant->prog_data.fs->input_slots));
1337
1338 setup_stage_outputs_from_next_stage_inputs(
1339 fs_variant->prog_data.fs->num_inputs,
1340 fs_variant->prog_data.fs->input_slots,
1341 &key->num_used_outputs,
1342 key->used_outputs,
1343 sizeof(key->used_outputs));
1344 }
1345 }
1346
1347 const VkPipelineVertexInputStateCreateInfo *vi_info =
1348 pCreateInfo->pVertexInputState;
1349 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1350 const VkVertexInputAttributeDescription *desc =
1351 &vi_info->pVertexAttributeDescriptions[i];
1352 assert(desc->location < MAX_VERTEX_ATTRIBS);
1353 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
1354 desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
1355 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1356 }
1357 }
1358 }
1359
1360 /**
1361 * Creates the initial form of the pipeline stage for a binning shader by
1362 * cloning the render shader and flagging it as a coordinate shader.
1363 *
1364 * Returns NULL if it was not able to allocate the object, so it should be
1365 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1366 */
1367 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1368 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1369 const VkAllocationCallbacks *pAllocator)
1370 {
1371 struct v3dv_device *device = src->pipeline->device;
1372
1373 struct v3dv_pipeline_stage *p_stage =
1374 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1375 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1376
1377 if (p_stage == NULL)
1378 return NULL;
1379
1380 assert(src->stage == BROADCOM_SHADER_VERTEX ||
1381 src->stage == BROADCOM_SHADER_GEOMETRY);
1382
1383 enum broadcom_shader_stage bin_stage =
1384 src->stage == BROADCOM_SHADER_VERTEX ?
1385 BROADCOM_SHADER_VERTEX_BIN :
1386 BROADCOM_SHADER_GEOMETRY_BIN;
1387
1388 p_stage->pipeline = src->pipeline;
1389 p_stage->stage = bin_stage;
1390 p_stage->entrypoint = src->entrypoint;
1391 p_stage->module = src->module;
1392 p_stage->module_info = src->module_info;
1393
1394 /* For binning shaders we will clone the NIR code from the corresponding
1395 * render shader later, when we call pipeline_compile_xxx_shader. This way
1396 * we only have to run the relevant NIR lowerings once for render shaders
1397 */
1398 p_stage->nir = NULL;
1399 p_stage->program_id = src->program_id;
1400 p_stage->spec_info = src->spec_info;
1401 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1402 p_stage->robustness = src->robustness;
1403 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1404
1405 return p_stage;
1406 }
1407
1408 /*
1409 * Based on some creation flags we assume that the QPU would be needed later
1410 * to gather further info. In that case we just keep the qput_insts around,
1411 * instead of map/unmap the bo later.
1412 */
1413 static bool
pipeline_keep_qpu(struct v3dv_pipeline * pipeline)1414 pipeline_keep_qpu(struct v3dv_pipeline *pipeline)
1415 {
1416 return pipeline->flags &
1417 (VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR |
1418 VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR);
1419 }
1420
1421 /**
1422 * Returns false if it was not able to allocate or map the assembly bo memory.
1423 */
1424 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1425 upload_assembly(struct v3dv_pipeline *pipeline)
1426 {
1427 uint32_t total_size = 0;
1428 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1429 struct v3dv_shader_variant *variant =
1430 pipeline->shared_data->variants[stage];
1431
1432 if (variant != NULL)
1433 total_size += variant->qpu_insts_size;
1434 }
1435
1436 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1437 "pipeline shader assembly", true);
1438 if (!bo) {
1439 mesa_loge("failed to allocate memory for shader\n");
1440 return false;
1441 }
1442
1443 bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1444 if (!ok) {
1445 mesa_loge("failed to map source shader buffer\n");
1446 return false;
1447 }
1448
1449 uint32_t offset = 0;
1450 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1451 struct v3dv_shader_variant *variant =
1452 pipeline->shared_data->variants[stage];
1453
1454 if (variant != NULL) {
1455 variant->assembly_offset = offset;
1456
1457 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1458 offset += variant->qpu_insts_size;
1459
1460 if (!pipeline_keep_qpu(pipeline)) {
1461 free(variant->qpu_insts);
1462 variant->qpu_insts = NULL;
1463 }
1464 }
1465 }
1466 assert(total_size == offset);
1467
1468 pipeline->shared_data->assembly_bo = bo;
1469
1470 return true;
1471 }
1472
1473 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1474 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1475 struct v3dv_pipeline_key *key,
1476 unsigned char *sha1_out)
1477 {
1478 struct mesa_sha1 ctx;
1479 _mesa_sha1_init(&ctx);
1480
1481 if (pipeline->layout) {
1482 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1483 sizeof(pipeline->layout->sha1));
1484 }
1485
1486 /* We need to include all shader stages in the sha1 key as linking may
1487 * modify the shader code in any stage. An alternative would be to use the
1488 * serialized NIR, but that seems like an overkill.
1489 */
1490 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1491 if (broadcom_shader_stage_is_binning(stage))
1492 continue;
1493
1494 struct v3dv_pipeline_stage *p_stage = pipeline->stages[stage];
1495 if (p_stage == NULL)
1496 continue;
1497
1498 assert(stage != BROADCOM_SHADER_COMPUTE);
1499
1500 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1501 }
1502
1503 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1504
1505 _mesa_sha1_final(&ctx, sha1_out);
1506 }
1507
1508 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1509 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1510 struct v3dv_pipeline_key *key,
1511 unsigned char *sha1_out)
1512 {
1513 struct mesa_sha1 ctx;
1514 _mesa_sha1_init(&ctx);
1515
1516 if (pipeline->layout) {
1517 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1518 sizeof(pipeline->layout->sha1));
1519 }
1520
1521 struct v3dv_pipeline_stage *p_stage =
1522 pipeline->stages[BROADCOM_SHADER_COMPUTE];
1523
1524 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1525
1526 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1527
1528 _mesa_sha1_final(&ctx, sha1_out);
1529 }
1530
1531 /* Checks that the pipeline has enough spill size to use for any of their
1532 * variants
1533 */
1534 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1535 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1536 {
1537 uint32_t max_spill_size = 0;
1538
1539 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1540 struct v3dv_shader_variant *variant =
1541 pipeline->shared_data->variants[stage];
1542
1543 if (variant != NULL) {
1544 max_spill_size = MAX2(variant->prog_data.base->spill_size,
1545 max_spill_size);
1546 }
1547 }
1548
1549 if (max_spill_size > 0) {
1550 struct v3dv_device *device = pipeline->device;
1551
1552 /* The TIDX register we use for choosing the area to access
1553 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1554 * Even at minimum threadcount in a particular shader, that
1555 * means we still multiply by qpus by 4.
1556 */
1557 const uint32_t total_spill_size =
1558 4 * device->devinfo.qpu_count * max_spill_size;
1559 if (pipeline->spill.bo) {
1560 assert(pipeline->spill.size_per_thread > 0);
1561 v3dv_bo_free(device, pipeline->spill.bo);
1562 }
1563 pipeline->spill.bo =
1564 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1565 pipeline->spill.size_per_thread = max_spill_size;
1566 }
1567 }
1568
1569 /**
1570 * Creates a new shader_variant_create. Note that for prog_data is not const,
1571 * so it is assumed that the caller will prove a pointer that the
1572 * shader_variant will own.
1573 *
1574 * Creation doesn't include allocate a BO to store the content of qpu_insts,
1575 * as we will try to share the same bo for several shader variants. Also note
1576 * that qpu_ints being NULL is valid, for example if we are creating the
1577 * shader_variants from the cache, so we can just upload the assembly of all
1578 * the shader stages at once.
1579 */
1580 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1581 v3dv_shader_variant_create(struct v3dv_device *device,
1582 enum broadcom_shader_stage stage,
1583 struct v3d_prog_data *prog_data,
1584 uint32_t prog_data_size,
1585 uint32_t assembly_offset,
1586 uint64_t *qpu_insts,
1587 uint32_t qpu_insts_size,
1588 VkResult *out_vk_result)
1589 {
1590 struct v3dv_shader_variant *variant =
1591 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1592 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1593
1594 if (variant == NULL) {
1595 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1596 return NULL;
1597 }
1598
1599 variant->stage = stage;
1600 variant->prog_data_size = prog_data_size;
1601 variant->prog_data.base = prog_data;
1602
1603 variant->assembly_offset = assembly_offset;
1604 variant->qpu_insts_size = qpu_insts_size;
1605 variant->qpu_insts = qpu_insts;
1606
1607 *out_vk_result = VK_SUCCESS;
1608
1609 return variant;
1610 }
1611
1612 /* For a given key, it returns the compiled version of the shader. Returns a
1613 * new reference to the shader_variant to the caller, or NULL.
1614 *
1615 * If the method returns NULL it means that something wrong happened:
1616 * * Not enough memory: this is one of the possible outcomes defined by
1617 * vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1618 * * Compilation error: hypothetically this shouldn't happen, as the spec
1619 * states that vkShaderModule needs to be created with a valid SPIR-V, so
1620 * any compilation failure is a driver bug. In the practice, something as
1621 * common as failing to register allocate can lead to a compilation
1622 * failure. In that case the only option (for any driver) is
1623 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1624 * error.
1625 */
1626 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1627 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1628 struct v3d_key *key,
1629 size_t key_size,
1630 const VkAllocationCallbacks *pAllocator,
1631 VkResult *out_vk_result)
1632 {
1633 int64_t stage_start = os_time_get_nano();
1634
1635 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1636 struct v3dv_physical_device *physical_device = pipeline->device->pdevice;
1637 const struct v3d_compiler *compiler = physical_device->compiler;
1638 gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(p_stage->stage);
1639
1640 if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
1641 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1642 broadcom_shader_stage_name(p_stage->stage),
1643 p_stage->program_id);
1644 nir_print_shader(p_stage->nir, stderr);
1645 fprintf(stderr, "\n");
1646 }
1647
1648 uint64_t *qpu_insts;
1649 uint32_t qpu_insts_size;
1650 struct v3d_prog_data *prog_data;
1651 uint32_t prog_data_size = v3d_prog_data_size(gl_stage);
1652
1653 qpu_insts = v3d_compile(compiler,
1654 key, &prog_data,
1655 p_stage->nir,
1656 shader_debug_output, NULL,
1657 p_stage->program_id, 0,
1658 &qpu_insts_size);
1659
1660 struct v3dv_shader_variant *variant = NULL;
1661
1662 if (!qpu_insts) {
1663 mesa_loge("Failed to compile %s prog %d NIR to VIR\n",
1664 broadcom_shader_stage_name(p_stage->stage),
1665 p_stage->program_id);
1666 *out_vk_result = VK_ERROR_UNKNOWN;
1667 } else {
1668 variant =
1669 v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1670 prog_data, prog_data_size,
1671 0, /* assembly_offset, no final value yet */
1672 qpu_insts, qpu_insts_size,
1673 out_vk_result);
1674 }
1675 /* At this point we don't need anymore the nir shader, but we are freeing
1676 * all the temporary p_stage structs used during the pipeline creation when
1677 * we finish it, so let's not worry about freeing the nir here.
1678 */
1679
1680 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1681
1682 return variant;
1683 }
1684
1685 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1686 link_shaders(nir_shader *producer, nir_shader *consumer)
1687 {
1688 assert(producer);
1689 assert(consumer);
1690
1691 if (producer->options->lower_to_scalar) {
1692 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1693 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1694 }
1695
1696 nir_lower_io_arrays_to_elements(producer, consumer);
1697
1698 v3d_optimize_nir(NULL, producer);
1699 v3d_optimize_nir(NULL, consumer);
1700
1701 if (nir_link_opt_varyings(producer, consumer))
1702 v3d_optimize_nir(NULL, consumer);
1703
1704 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1705 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1706
1707 if (nir_remove_unused_varyings(producer, consumer)) {
1708 NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1709 NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1710
1711 v3d_optimize_nir(NULL, producer);
1712 v3d_optimize_nir(NULL, consumer);
1713
1714 /* Optimizations can cause varyings to become unused.
1715 * nir_compact_varyings() depends on all dead varyings being removed so
1716 * we need to call nir_remove_dead_variables() again here.
1717 */
1718 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1719 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1720 }
1721 }
1722
1723 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1724 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1725 struct v3dv_pipeline_stage *p_stage,
1726 struct v3dv_pipeline_layout *layout)
1727 {
1728 int64_t stage_start = os_time_get_nano();
1729
1730 assert(pipeline->shared_data &&
1731 pipeline->shared_data->maps[p_stage->stage]);
1732
1733 NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex,
1734 lookup_ycbcr_conversion, layout);
1735
1736 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1737
1738 /* We add this because we need a valid sampler for nir_lower_tex to do
1739 * unpacking of the texture operation result, even for the case where there
1740 * is no sampler state.
1741 *
1742 * We add two of those, one for the case we need a 16bit return_size, and
1743 * another for the case we need a 32bit return size.
1744 */
1745 struct v3dv_descriptor_maps *maps =
1746 pipeline->shared_data->maps[p_stage->stage];
1747
1748 UNUSED unsigned index;
1749 index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0);
1750 assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1751
1752 index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0);
1753 assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1754
1755 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1756 bool needs_default_sampler_state = false;
1757 NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1758 &needs_default_sampler_state);
1759
1760 /* If in the end we didn't need to use the default sampler states and the
1761 * shader doesn't need any other samplers, get rid of them so we can
1762 * recognize that this program doesn't use any samplers at all.
1763 */
1764 if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1765 maps->sampler_map.num_desc = 0;
1766
1767 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1768 }
1769
1770 /**
1771 * The SPIR-V compiler will insert a sized compact array for
1772 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1773 * where the size of the array determines the number of active clip planes.
1774 */
1775 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1776 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1777 {
1778 assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1779 const nir_shader *shader = p_stage->nir;
1780 assert(shader);
1781
1782 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1783 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1784 assert(var->data.compact);
1785 return (1 << glsl_get_length(var->type)) - 1;
1786 }
1787 }
1788 return 0;
1789 }
1790
1791 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1792 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1793 struct v3dv_pipeline *pipeline,
1794 struct v3dv_pipeline_cache *cache)
1795 {
1796 int64_t stage_start = os_time_get_nano();
1797
1798 nir_shader *nir = NULL;
1799 const nir_shader_compiler_options *nir_options =
1800 v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
1801
1802 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1803 nir_options,
1804 p_stage->shader_sha1);
1805
1806 if (nir) {
1807 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1808
1809 /* A NIR cache hit doesn't avoid the large majority of pipeline stage
1810 * creation so the cache hit is not recorded in the pipeline feedback
1811 * flags
1812 */
1813
1814 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1815
1816 return nir;
1817 }
1818
1819 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1820
1821 if (nir) {
1822 struct v3dv_pipeline_cache *default_cache =
1823 &pipeline->device->default_pipeline_cache;
1824
1825 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1826 p_stage->shader_sha1);
1827
1828 /* Ensure that the variant is on the default cache, as cmd_buffer could
1829 * need to change the current variant
1830 */
1831 if (default_cache != cache) {
1832 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1833 p_stage->shader_sha1);
1834 }
1835
1836 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1837
1838 return nir;
1839 }
1840
1841 /* FIXME: this shouldn't happen, raise error? */
1842 return NULL;
1843 }
1844
1845 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1846 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1847 const VkAllocationCallbacks *pAllocator,
1848 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1849 {
1850 struct v3dv_pipeline_stage *p_stage_vs =
1851 pipeline->stages[BROADCOM_SHADER_VERTEX];
1852 struct v3dv_pipeline_stage *p_stage_vs_bin =
1853 pipeline->stages[BROADCOM_SHADER_VERTEX_BIN];
1854
1855 assert(p_stage_vs_bin != NULL);
1856 if (p_stage_vs_bin->nir == NULL) {
1857 assert(p_stage_vs->nir);
1858 p_stage_vs_bin->nir = nir_shader_clone(NULL, p_stage_vs->nir);
1859 }
1860
1861 VkResult vk_result;
1862 struct v3d_vs_key key;
1863 pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs);
1864 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1865 pipeline_compile_shader_variant(p_stage_vs, &key.base, sizeof(key),
1866 pAllocator, &vk_result);
1867 if (vk_result != VK_SUCCESS)
1868 return vk_result;
1869
1870 pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs_bin);
1871 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1872 pipeline_compile_shader_variant(p_stage_vs_bin, &key.base, sizeof(key),
1873 pAllocator, &vk_result);
1874
1875 return vk_result;
1876 }
1877
1878 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1879 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1880 const VkAllocationCallbacks *pAllocator,
1881 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1882 {
1883 struct v3dv_pipeline_stage *p_stage_gs =
1884 pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1885 struct v3dv_pipeline_stage *p_stage_gs_bin =
1886 pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN];
1887
1888 assert(p_stage_gs);
1889 assert(p_stage_gs_bin != NULL);
1890 if (p_stage_gs_bin->nir == NULL) {
1891 assert(p_stage_gs->nir);
1892 p_stage_gs_bin->nir = nir_shader_clone(NULL, p_stage_gs->nir);
1893 }
1894
1895 VkResult vk_result;
1896 struct v3d_gs_key key;
1897 pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs);
1898 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1899 pipeline_compile_shader_variant(p_stage_gs, &key.base, sizeof(key),
1900 pAllocator, &vk_result);
1901 if (vk_result != VK_SUCCESS)
1902 return vk_result;
1903
1904 pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs_bin);
1905 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1906 pipeline_compile_shader_variant(p_stage_gs_bin, &key.base, sizeof(key),
1907 pAllocator, &vk_result);
1908
1909 return vk_result;
1910 }
1911
1912 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1913 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1914 const VkAllocationCallbacks *pAllocator,
1915 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1916 {
1917 struct v3dv_pipeline_stage *p_stage_vs =
1918 pipeline->stages[BROADCOM_SHADER_VERTEX];
1919 struct v3dv_pipeline_stage *p_stage_fs =
1920 pipeline->stages[BROADCOM_SHADER_FRAGMENT];
1921 struct v3dv_pipeline_stage *p_stage_gs =
1922 pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1923
1924 struct v3d_fs_key key;
1925 pipeline_populate_v3d_fs_key(&key, pCreateInfo, &pipeline->rendering_info,
1926 p_stage_fs, p_stage_gs != NULL,
1927 get_ucp_enable_mask(p_stage_vs));
1928
1929 if (key.is_points) {
1930 assert(key.point_coord_upper_left);
1931 NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord);
1932 }
1933
1934 VkResult vk_result;
1935 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1936 pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key),
1937 pAllocator, &vk_result);
1938
1939 return vk_result;
1940 }
1941
1942 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1943 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1944 struct v3dv_pipeline_key *key,
1945 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1946 {
1947 struct v3dv_device *device = pipeline->device;
1948 assert(device);
1949
1950 memset(key, 0, sizeof(*key));
1951
1952 key->line_smooth = pipeline->line_smooth;
1953
1954 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1955 pCreateInfo->pInputAssemblyState;
1956 key->topology = vk_to_mesa_prim[ia_info->topology];
1957
1958 const VkPipelineColorBlendStateCreateInfo *cb_info =
1959 pipeline->rasterization_enabled ? pCreateInfo->pColorBlendState : NULL;
1960
1961 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1962 vk_to_pipe_logicop[cb_info->logicOp] :
1963 PIPE_LOGICOP_COPY;
1964
1965 /* Multisample rasterization state must be ignored if rasterization
1966 * is disabled.
1967 */
1968 const VkPipelineMultisampleStateCreateInfo *ms_info =
1969 pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL;
1970 if (ms_info) {
1971 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1972 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1973 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1974
1975 if (key->msaa)
1976 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1977
1978 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1979 }
1980
1981 struct vk_render_pass_state *ri = &pipeline->rendering_info;
1982 for (uint32_t i = 0; i < ri->color_attachment_count; i++) {
1983 if (ri->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
1984 continue;
1985
1986 key->cbufs |= 1 << i;
1987
1988 VkFormat fb_format = ri->color_attachment_formats[i];
1989 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1990
1991 /* If logic operations are enabled then we might emit color reads and we
1992 * need to know the color buffer format and swizzle for that
1993 */
1994 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1995 /* Framebuffer formats should be single plane */
1996 assert(vk_format_get_plane_count(fb_format) == 1);
1997 key->color_fmt[i].format = fb_pipe_format;
1998 memcpy(key->color_fmt[i].swizzle,
1999 v3dv_get_format_swizzle(pipeline->device, fb_format, 0),
2000 sizeof(key->color_fmt[i].swizzle));
2001 }
2002
2003 const struct util_format_description *desc =
2004 vk_format_description(fb_format);
2005
2006 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2007 desc->channel[0].size == 32) {
2008 key->f32_color_rb |= 1 << i;
2009 }
2010 }
2011
2012 const VkPipelineVertexInputStateCreateInfo *vi_info =
2013 pCreateInfo->pVertexInputState;
2014 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2015 const VkVertexInputAttributeDescription *desc =
2016 &vi_info->pVertexAttributeDescriptions[i];
2017 assert(desc->location < MAX_VERTEX_ATTRIBS);
2018 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
2019 desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
2020 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2021 }
2022 }
2023
2024 key->has_multiview = ri->view_mask != 0;
2025 }
2026
2027 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2028 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2029 struct v3dv_pipeline_key *key,
2030 const VkComputePipelineCreateInfo *pCreateInfo)
2031 {
2032 struct v3dv_device *device = pipeline->device;
2033 assert(device);
2034
2035 /* We use the same pipeline key for graphics and compute, but we don't need
2036 * to add a field to flag compute keys because this key is not used alone
2037 * to search in the cache, we also use the SPIR-V or the serialized NIR for
2038 * example, which already flags compute shaders.
2039 */
2040 memset(key, 0, sizeof(*key));
2041 }
2042
2043 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2044 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2045 struct v3dv_pipeline *pipeline,
2046 bool is_graphics_pipeline)
2047 {
2048 /* We create new_entry using the device alloc. Right now shared_data is ref
2049 * and unref by both the pipeline and the pipeline cache, so we can't
2050 * ensure that the cache or pipeline alloc will be available on the last
2051 * unref.
2052 */
2053 struct v3dv_pipeline_shared_data *new_entry =
2054 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2055 sizeof(struct v3dv_pipeline_shared_data), 8,
2056 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2057
2058 if (new_entry == NULL)
2059 return NULL;
2060
2061 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2062 /* We don't need specific descriptor maps for binning stages we use the
2063 * map for the render stage.
2064 */
2065 if (broadcom_shader_stage_is_binning(stage))
2066 continue;
2067
2068 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2069 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2070 continue;
2071 }
2072
2073 if (stage == BROADCOM_SHADER_GEOMETRY &&
2074 !pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
2075 /* We always inject a custom GS if we have multiview */
2076 if (!pipeline->rendering_info.view_mask)
2077 continue;
2078 }
2079
2080 struct v3dv_descriptor_maps *new_maps =
2081 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2082 sizeof(struct v3dv_descriptor_maps), 8,
2083 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2084
2085 if (new_maps == NULL)
2086 goto fail;
2087
2088 new_entry->maps[stage] = new_maps;
2089 }
2090
2091 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2092 new_entry->maps[BROADCOM_SHADER_VERTEX];
2093
2094 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2095 new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2096
2097 new_entry->ref_cnt = 1;
2098 memcpy(new_entry->sha1_key, sha1_key, 20);
2099
2100 return new_entry;
2101
2102 fail:
2103 if (new_entry != NULL) {
2104 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2105 if (new_entry->maps[stage] != NULL)
2106 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2107 }
2108 }
2109
2110 vk_free(&pipeline->device->vk.alloc, new_entry);
2111
2112 return NULL;
2113 }
2114
2115 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedback * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2116 write_creation_feedback(struct v3dv_pipeline *pipeline,
2117 const void *next,
2118 const VkPipelineCreationFeedback *pipeline_feedback,
2119 uint32_t stage_count,
2120 const VkPipelineShaderStageCreateInfo *stages)
2121 {
2122 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2123 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2124
2125 if (create_feedback) {
2126 typed_memcpy(create_feedback->pPipelineCreationFeedback,
2127 pipeline_feedback,
2128 1);
2129
2130 const uint32_t feedback_stage_count =
2131 create_feedback->pipelineStageCreationFeedbackCount;
2132 assert(feedback_stage_count <= stage_count);
2133
2134 for (uint32_t i = 0; i < feedback_stage_count; i++) {
2135 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2136 enum broadcom_shader_stage bs = gl_shader_stage_to_broadcom(s);
2137
2138 create_feedback->pPipelineStageCreationFeedbacks[i] =
2139 pipeline->stages[bs]->feedback;
2140
2141 if (broadcom_shader_stage_is_render_with_binning(bs)) {
2142 enum broadcom_shader_stage bs_bin =
2143 broadcom_binning_shader_stage_for_render_stage(bs);
2144 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2145 pipeline->stages[bs_bin]->feedback.duration;
2146 }
2147 }
2148 }
2149 }
2150
2151 /* Note that although PrimitiveTopology is now dynamic, it is still safe to
2152 * compute the gs_input/output_primitive from the topology saved at the
2153 * pipeline, as the topology class will not change, because we don't support
2154 * dynamicPrimitiveTopologyUnrestricted
2155 */
2156 static enum mesa_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2157 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2158 {
2159 switch (pipeline->topology) {
2160 case MESA_PRIM_POINTS:
2161 return MESA_PRIM_POINTS;
2162 case MESA_PRIM_LINES:
2163 case MESA_PRIM_LINE_STRIP:
2164 return MESA_PRIM_LINES;
2165 case MESA_PRIM_TRIANGLES:
2166 case MESA_PRIM_TRIANGLE_STRIP:
2167 case MESA_PRIM_TRIANGLE_FAN:
2168 return MESA_PRIM_TRIANGLES;
2169 default:
2170 /* Since we don't allow GS with multiview, we can only see non-adjacency
2171 * primitives.
2172 */
2173 unreachable("Unexpected pipeline primitive type");
2174 }
2175 }
2176
2177 static enum mesa_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2178 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2179 {
2180 switch (pipeline->topology) {
2181 case MESA_PRIM_POINTS:
2182 return MESA_PRIM_POINTS;
2183 case MESA_PRIM_LINES:
2184 case MESA_PRIM_LINE_STRIP:
2185 return MESA_PRIM_LINE_STRIP;
2186 case MESA_PRIM_TRIANGLES:
2187 case MESA_PRIM_TRIANGLE_STRIP:
2188 case MESA_PRIM_TRIANGLE_FAN:
2189 return MESA_PRIM_TRIANGLE_STRIP;
2190 default:
2191 /* Since we don't allow GS with multiview, we can only see non-adjacency
2192 * primitives.
2193 */
2194 unreachable("Unexpected pipeline primitive type");
2195 }
2196 }
2197
2198 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2199 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2200 struct v3dv_pipeline_cache *cache,
2201 const VkAllocationCallbacks *pAllocator)
2202 {
2203 /* Create the passthrough GS from the VS output interface */
2204 struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2205 p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2206 nir_shader *vs_nir = p_stage_vs->nir;
2207
2208 const nir_shader_compiler_options *options =
2209 v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
2210 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2211 "multiview broadcast gs");
2212 nir_shader *nir = b.shader;
2213 nir->info.inputs_read = vs_nir->info.outputs_written;
2214 nir->info.outputs_written = vs_nir->info.outputs_written |
2215 (1ull << VARYING_SLOT_LAYER);
2216
2217 uint32_t vertex_count = mesa_vertices_per_prim(pipeline->topology);
2218 nir->info.gs.input_primitive =
2219 multiview_gs_input_primitive_from_pipeline(pipeline);
2220 nir->info.gs.output_primitive =
2221 multiview_gs_output_primitive_from_pipeline(pipeline);
2222 nir->info.gs.vertices_in = vertex_count;
2223 nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2224 nir->info.gs.invocations = 1;
2225 nir->info.gs.active_stream_mask = 0x1;
2226
2227 /* Make a list of GS input/output variables from the VS outputs */
2228 nir_variable *in_vars[100];
2229 nir_variable *out_vars[100];
2230 uint32_t var_count = 0;
2231 nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2232 char name[8];
2233 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2234
2235 in_vars[var_count] =
2236 nir_variable_create(nir, nir_var_shader_in,
2237 glsl_array_type(out_vs_var->type, vertex_count, 0),
2238 name);
2239 in_vars[var_count]->data.location = out_vs_var->data.location;
2240 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2241 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2242
2243 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2244 out_vars[var_count] =
2245 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2246 out_vars[var_count]->data.location = out_vs_var->data.location;
2247 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2248
2249 var_count++;
2250 }
2251
2252 /* Add the gl_Layer output variable */
2253 nir_variable *out_layer =
2254 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2255 "out_Layer");
2256 out_layer->data.location = VARYING_SLOT_LAYER;
2257
2258 /* Get the view index value that we will write to gl_Layer */
2259 nir_def *layer =
2260 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2261
2262 /* Emit all output vertices */
2263 for (uint32_t vi = 0; vi < vertex_count; vi++) {
2264 /* Emit all output varyings */
2265 for (uint32_t i = 0; i < var_count; i++) {
2266 nir_deref_instr *in_value =
2267 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2268 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2269 }
2270
2271 /* Emit gl_Layer write */
2272 nir_store_var(&b, out_layer, layer, 0x1);
2273
2274 nir_emit_vertex(&b, 0);
2275 }
2276 nir_end_primitive(&b, 0);
2277
2278 /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2279 * with what we expect from SPIR-V modules.
2280 */
2281 preprocess_nir(nir);
2282
2283 /* Attach the geometry shader to the pipeline */
2284 struct v3dv_device *device = pipeline->device;
2285 struct v3dv_physical_device *physical_device = device->pdevice;
2286
2287 struct v3dv_pipeline_stage *p_stage =
2288 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2289 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2290
2291 if (p_stage == NULL) {
2292 ralloc_free(nir);
2293 return false;
2294 }
2295
2296 p_stage->pipeline = pipeline;
2297 p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2298 p_stage->entrypoint = "main";
2299 p_stage->module = NULL;
2300 p_stage->module_info = NULL;
2301 p_stage->nir = nir;
2302 pipeline_compute_sha1_from_nir(p_stage);
2303 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2304 p_stage->robustness = pipeline->stages[BROADCOM_SHADER_VERTEX]->robustness;
2305
2306 pipeline->has_gs = true;
2307 pipeline->stages[BROADCOM_SHADER_GEOMETRY] = p_stage;
2308 pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2309
2310 pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] =
2311 pipeline_stage_create_binning(p_stage, pAllocator);
2312 if (pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] == NULL)
2313 return false;
2314
2315 return true;
2316 }
2317
2318 static void
pipeline_check_buffer_device_address(struct v3dv_pipeline * pipeline)2319 pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2320 {
2321 for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2322 struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2323 if (variant && variant->prog_data.base->has_global_address) {
2324 pipeline->uses_buffer_device_address = true;
2325 return;
2326 }
2327 }
2328
2329 pipeline->uses_buffer_device_address = false;
2330 }
2331
2332 /*
2333 * It compiles a pipeline. Note that it also allocate internal object, but if
2334 * some allocations success, but other fails, the method is not freeing the
2335 * successful ones.
2336 *
2337 * This is done to simplify the code, as what we do in this case is just call
2338 * the pipeline destroy method, and this would handle freeing the internal
2339 * objects allocated. We just need to be careful setting to NULL the objects
2340 * not allocated.
2341 */
2342 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2343 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2344 struct v3dv_pipeline_cache *cache,
2345 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2346 const VkAllocationCallbacks *pAllocator)
2347 {
2348 VkPipelineCreationFeedback pipeline_feedback = {
2349 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2350 };
2351 int64_t pipeline_start = os_time_get_nano();
2352
2353 struct v3dv_device *device = pipeline->device;
2354 struct v3dv_physical_device *physical_device = device->pdevice;
2355
2356 /* First pass to get some common info from the shader, and create the
2357 * individual pipeline_stage objects
2358 */
2359 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2360 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2361 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2362
2363 struct v3dv_pipeline_stage *p_stage =
2364 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2365 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2366
2367 if (p_stage == NULL)
2368 return VK_ERROR_OUT_OF_HOST_MEMORY;
2369
2370 p_stage->program_id =
2371 p_atomic_inc_return(&physical_device->next_program_id);
2372
2373 enum broadcom_shader_stage broadcom_stage =
2374 gl_shader_stage_to_broadcom(stage);
2375
2376 p_stage->pipeline = pipeline;
2377 p_stage->stage = broadcom_stage;
2378 p_stage->entrypoint = sinfo->pName;
2379 p_stage->module = vk_shader_module_from_handle(sinfo->module);
2380 p_stage->spec_info = sinfo->pSpecializationInfo;
2381 if (!p_stage->module) {
2382 p_stage->module_info =
2383 vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
2384 }
2385
2386 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2387 pCreateInfo->pNext, sinfo->pNext);
2388
2389 vk_pipeline_hash_shader_stage(pipeline->flags,
2390 &pCreateInfo->pStages[i],
2391 &p_stage->robustness,
2392 p_stage->shader_sha1);
2393
2394 pipeline->active_stages |= sinfo->stage;
2395
2396 /* We will try to get directly the compiled shader variant, so let's not
2397 * worry about getting the nir shader for now.
2398 */
2399 p_stage->nir = NULL;
2400 pipeline->stages[broadcom_stage] = p_stage;
2401 if (broadcom_stage == BROADCOM_SHADER_GEOMETRY)
2402 pipeline->has_gs = true;
2403
2404 if (broadcom_shader_stage_is_render_with_binning(broadcom_stage)) {
2405 enum broadcom_shader_stage broadcom_stage_bin =
2406 broadcom_binning_shader_stage_for_render_stage(broadcom_stage);
2407
2408 pipeline->stages[broadcom_stage_bin] =
2409 pipeline_stage_create_binning(p_stage, pAllocator);
2410
2411 if (pipeline->stages[broadcom_stage_bin] == NULL)
2412 return VK_ERROR_OUT_OF_HOST_MEMORY;
2413 }
2414 }
2415
2416 /* Add a no-op fragment shader if needed */
2417 if (!pipeline->stages[BROADCOM_SHADER_FRAGMENT]) {
2418 const nir_shader_compiler_options *compiler_options =
2419 v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
2420 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2421 compiler_options,
2422 "noop_fs");
2423
2424 struct v3dv_pipeline_stage *p_stage =
2425 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2426 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2427
2428 if (p_stage == NULL)
2429 return VK_ERROR_OUT_OF_HOST_MEMORY;
2430
2431 p_stage->pipeline = pipeline;
2432 p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2433 p_stage->entrypoint = "main";
2434 p_stage->module = NULL;
2435 p_stage->module_info = NULL;
2436 p_stage->nir = b.shader;
2437 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2438 NULL, NULL);
2439 pipeline_compute_sha1_from_nir(p_stage);
2440 p_stage->program_id =
2441 p_atomic_inc_return(&physical_device->next_program_id);
2442
2443 pipeline->stages[BROADCOM_SHADER_FRAGMENT] = p_stage;
2444 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2445 }
2446
2447 /* If multiview is enabled, we inject a custom passthrough geometry shader
2448 * to broadcast draw calls to the appropriate views.
2449 */
2450 const uint32_t view_mask = pipeline->rendering_info.view_mask;
2451 assert(!view_mask ||
2452 (!pipeline->has_gs && !pipeline->stages[BROADCOM_SHADER_GEOMETRY]));
2453 if (view_mask) {
2454 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2455 return VK_ERROR_OUT_OF_HOST_MEMORY;
2456 }
2457
2458 /* First we try to get the variants from the pipeline cache (unless we are
2459 * required to capture internal representations, since in that case we need
2460 * compile).
2461 */
2462 bool needs_executable_info =
2463 pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2464 if (!needs_executable_info) {
2465 struct v3dv_pipeline_key pipeline_key;
2466 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2467 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2468
2469 bool cache_hit = false;
2470
2471 pipeline->shared_data =
2472 v3dv_pipeline_cache_search_for_pipeline(cache,
2473 pipeline->sha1,
2474 &cache_hit);
2475
2476 if (pipeline->shared_data != NULL) {
2477 /* A correct pipeline must have at least a VS and FS */
2478 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2479 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2480 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2481 assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2482 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2483 assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2484 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2485
2486 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2487 pipeline_feedback.flags |=
2488 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2489
2490 goto success;
2491 }
2492 }
2493
2494 if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2495 return VK_PIPELINE_COMPILE_REQUIRED;
2496
2497 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2498 * shader or the pipeline cache) and compile.
2499 */
2500 pipeline->shared_data =
2501 v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2502 if (!pipeline->shared_data)
2503 return VK_ERROR_OUT_OF_HOST_MEMORY;
2504
2505 struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2506 struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT];
2507 struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY];
2508
2509 p_stage_vs->feedback.flags |=
2510 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2511 if (p_stage_gs)
2512 p_stage_gs->feedback.flags |=
2513 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2514 p_stage_fs->feedback.flags |=
2515 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2516
2517 if (!p_stage_vs->nir)
2518 p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2519 if (p_stage_gs && !p_stage_gs->nir)
2520 p_stage_gs->nir = pipeline_stage_get_nir(p_stage_gs, pipeline, cache);
2521 if (!p_stage_fs->nir)
2522 p_stage_fs->nir = pipeline_stage_get_nir(p_stage_fs, pipeline, cache);
2523
2524 /* Linking + pipeline lowerings */
2525 if (p_stage_gs) {
2526 link_shaders(p_stage_gs->nir, p_stage_fs->nir);
2527 link_shaders(p_stage_vs->nir, p_stage_gs->nir);
2528 } else {
2529 link_shaders(p_stage_vs->nir, p_stage_fs->nir);
2530 }
2531
2532 pipeline_lower_nir(pipeline, p_stage_fs, pipeline->layout);
2533 lower_fs_io(p_stage_fs->nir);
2534
2535 if (p_stage_gs) {
2536 pipeline_lower_nir(pipeline, p_stage_gs, pipeline->layout);
2537 lower_gs_io(p_stage_gs->nir);
2538 }
2539
2540 pipeline_lower_nir(pipeline, p_stage_vs, pipeline->layout);
2541 lower_vs_io(p_stage_vs->nir);
2542
2543 /* Compiling to vir */
2544 VkResult vk_result;
2545
2546 /* We should have got all the variants or no variants from the cache */
2547 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2548 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator,
2549 pCreateInfo);
2550 if (vk_result != VK_SUCCESS)
2551 return vk_result;
2552
2553 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2554 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2555
2556 if (p_stage_gs) {
2557 vk_result =
2558 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2559 if (vk_result != VK_SUCCESS)
2560 return vk_result;
2561 }
2562
2563 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2564 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2565
2566 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2567 if (vk_result != VK_SUCCESS)
2568 return vk_result;
2569
2570 if (!upload_assembly(pipeline))
2571 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2572
2573 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2574
2575 success:
2576
2577 pipeline_check_buffer_device_address(pipeline);
2578
2579 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2580 write_creation_feedback(pipeline,
2581 pCreateInfo->pNext,
2582 &pipeline_feedback,
2583 pCreateInfo->stageCount,
2584 pCreateInfo->pStages);
2585
2586 /* Since we have the variants in the pipeline shared data we can now free
2587 * the pipeline stages.
2588 */
2589 if (!needs_executable_info)
2590 pipeline_free_stages(device, pipeline, pAllocator);
2591
2592 pipeline_check_spill_size(pipeline);
2593
2594 return compute_vpm_config(pipeline);
2595 }
2596
2597 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2598 compute_vpm_config(struct v3dv_pipeline *pipeline)
2599 {
2600 struct v3dv_shader_variant *vs_variant =
2601 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2602 struct v3dv_shader_variant *vs_bin_variant =
2603 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2604 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2605 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2606
2607 struct v3d_gs_prog_data *gs = NULL;
2608 struct v3d_gs_prog_data *gs_bin = NULL;
2609 if (pipeline->has_gs) {
2610 struct v3dv_shader_variant *gs_variant =
2611 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2612 struct v3dv_shader_variant *gs_bin_variant =
2613 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2614 gs = gs_variant->prog_data.gs;
2615 gs_bin = gs_bin_variant->prog_data.gs;
2616 }
2617
2618 if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2619 vs_bin, vs, gs_bin, gs,
2620 &pipeline->vpm_cfg_bin,
2621 &pipeline->vpm_cfg)) {
2622 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2623 }
2624
2625 return VK_SUCCESS;
2626 }
2627
2628 static bool
stencil_op_is_no_op(struct vk_stencil_test_face_state * stencil)2629 stencil_op_is_no_op(struct vk_stencil_test_face_state *stencil)
2630 {
2631 return stencil->op.depth_fail == VK_STENCIL_OP_KEEP &&
2632 stencil->op.compare == VK_COMPARE_OP_ALWAYS;
2633 }
2634
2635 /* Computes the ez_state based on a given vk_dynamic_graphics_state. Note
2636 * that the parameter dyn doesn't need to be pipeline->dynamic_graphics_state,
2637 * as this method can be used by the cmd_buffer too.
2638 */
2639 void
v3dv_compute_ez_state(struct vk_dynamic_graphics_state * dyn,struct v3dv_pipeline * pipeline,enum v3dv_ez_state * ez_state,bool * incompatible_ez_test)2640 v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn,
2641 struct v3dv_pipeline *pipeline,
2642 enum v3dv_ez_state *ez_state,
2643 bool *incompatible_ez_test)
2644 {
2645 if (!dyn->ds.depth.test_enable) {
2646 *ez_state = V3D_EZ_DISABLED;
2647 return;
2648 }
2649
2650 switch (dyn->ds.depth.compare_op) {
2651 case VK_COMPARE_OP_LESS:
2652 case VK_COMPARE_OP_LESS_OR_EQUAL:
2653 *ez_state = V3D_EZ_LT_LE;
2654 break;
2655 case VK_COMPARE_OP_GREATER:
2656 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2657 *ez_state = V3D_EZ_GT_GE;
2658 break;
2659 case VK_COMPARE_OP_NEVER:
2660 case VK_COMPARE_OP_EQUAL:
2661 *ez_state = V3D_EZ_UNDECIDED;
2662 break;
2663 default:
2664 *ez_state = V3D_EZ_DISABLED;
2665 *incompatible_ez_test = true;
2666 break;
2667 }
2668
2669 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2670 if (dyn->ds.stencil.test_enable &&
2671 (!stencil_op_is_no_op(&dyn->ds.stencil.front) ||
2672 !stencil_op_is_no_op(&dyn->ds.stencil.back))) {
2673 *ez_state = V3D_EZ_DISABLED;
2674 }
2675
2676 /* If the FS writes Z, then it may update against the chosen EZ direction */
2677 struct v3dv_shader_variant *fs_variant =
2678 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2679 if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2680 !fs_variant->prog_data.fs->writes_z_from_fep) {
2681 *ez_state = V3D_EZ_DISABLED;
2682 }
2683 }
2684
2685
2686 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2687 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2688 const VkPipelineMultisampleStateCreateInfo *ms_info)
2689 {
2690 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2691
2692 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2693 * requires this to be 0xf or 0x0 if using a single sample.
2694 */
2695 if (ms_info && ms_info->pSampleMask &&
2696 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2697 pipeline->sample_mask &= ms_info->pSampleMask[0];
2698 }
2699 }
2700
2701 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2702 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2703 const VkPipelineMultisampleStateCreateInfo *ms_info)
2704 {
2705 pipeline->sample_rate_shading =
2706 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2707 ms_info->sampleShadingEnable;
2708 }
2709
2710 static void
pipeline_setup_rendering_info(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2711 pipeline_setup_rendering_info(struct v3dv_device *device,
2712 struct v3dv_pipeline *pipeline,
2713 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2714 const VkAllocationCallbacks *alloc)
2715 {
2716 struct vk_render_pass_state *rp = &pipeline->rendering_info;
2717
2718 if (pipeline->pass) {
2719 assert(pipeline->subpass);
2720 struct v3dv_render_pass *pass = pipeline->pass;
2721 struct v3dv_subpass *subpass = pipeline->subpass;
2722 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
2723
2724 rp->view_mask = subpass->view_mask;
2725
2726 rp->depth_attachment_format = VK_FORMAT_UNDEFINED;
2727 rp->stencil_attachment_format = VK_FORMAT_UNDEFINED;
2728 rp->attachments = MESA_VK_RP_ATTACHMENT_NONE;
2729 if (attachment_idx != VK_ATTACHMENT_UNUSED) {
2730 VkFormat ds_format = pass->attachments[attachment_idx].desc.format;
2731 if (vk_format_has_depth(ds_format)) {
2732 rp->depth_attachment_format = ds_format;
2733 rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
2734 }
2735 if (vk_format_has_stencil(ds_format)) {
2736 rp->stencil_attachment_format = ds_format;
2737 rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
2738 }
2739 }
2740
2741 rp->color_attachment_count = subpass->color_count;
2742 for (uint32_t i = 0; i < subpass->color_count; i++) {
2743 const uint32_t attachment_idx = subpass->color_attachments[i].attachment;
2744 if (attachment_idx == VK_ATTACHMENT_UNUSED) {
2745 rp->color_attachment_formats[i] = VK_FORMAT_UNDEFINED;
2746 continue;
2747 }
2748 rp->color_attachment_formats[i] =
2749 pass->attachments[attachment_idx].desc.format;
2750 rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
2751 }
2752 return;
2753 }
2754
2755 const VkPipelineRenderingCreateInfo *ri =
2756 vk_find_struct_const(pCreateInfo->pNext,
2757 PIPELINE_RENDERING_CREATE_INFO);
2758 if (ri) {
2759 rp->view_mask = ri->viewMask;
2760
2761 rp->color_attachment_count = ri->colorAttachmentCount;
2762 for (int i = 0; i < ri->colorAttachmentCount; i++) {
2763 rp->color_attachment_formats[i] = ri->pColorAttachmentFormats[i];
2764 if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
2765 rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
2766 }
2767 }
2768
2769 rp->depth_attachment_format = ri->depthAttachmentFormat;
2770 if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
2771 rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
2772
2773 rp->stencil_attachment_format = ri->stencilAttachmentFormat;
2774 if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
2775 rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
2776
2777 return;
2778 }
2779
2780 /* From the Vulkan spec for VkPipelineRenderingCreateInfo:
2781 *
2782 * "if this structure is not specified, and the pipeline does not include
2783 * a VkRenderPass, viewMask and colorAttachmentCount are 0, and
2784 * depthAttachmentFormat and stencilAttachmentFormat are
2785 * VK_FORMAT_UNDEFINED.
2786 */
2787 pipeline->rendering_info = (struct vk_render_pass_state) {
2788 .view_mask = 0,
2789 .attachments = 0,
2790 .color_attachment_count = 0,
2791 .depth_attachment_format = VK_FORMAT_UNDEFINED,
2792 .stencil_attachment_format = VK_FORMAT_UNDEFINED,
2793 };
2794 }
2795
2796 static VkResult
pipeline_init_dynamic_state(struct v3dv_device * device,struct v3dv_pipeline * pipeline,struct vk_graphics_pipeline_all_state * pipeline_all_state,struct vk_graphics_pipeline_state * pipeline_state,const VkGraphicsPipelineCreateInfo * pCreateInfo)2797 pipeline_init_dynamic_state(struct v3dv_device *device,
2798 struct v3dv_pipeline *pipeline,
2799 struct vk_graphics_pipeline_all_state *pipeline_all_state,
2800 struct vk_graphics_pipeline_state *pipeline_state,
2801 const VkGraphicsPipelineCreateInfo *pCreateInfo)
2802 {
2803 VkResult result = VK_SUCCESS;
2804 result = vk_graphics_pipeline_state_fill(&pipeline->device->vk, pipeline_state,
2805 pCreateInfo, &pipeline->rendering_info, 0,
2806 pipeline_all_state, NULL, 0, NULL);
2807 if (result != VK_SUCCESS)
2808 return result;
2809
2810 vk_dynamic_graphics_state_fill(&pipeline->dynamic_graphics_state, pipeline_state);
2811
2812 struct v3dv_dynamic_state *v3dv_dyn = &pipeline->dynamic;
2813 struct vk_dynamic_graphics_state *dyn = &pipeline->dynamic_graphics_state;
2814
2815 if (BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
2816 BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_SCISSORS)) {
2817 /* FIXME: right now we don't support multiViewport so viewporst[0] would
2818 * work now, but would need to change if we allow multiple viewports.
2819 */
2820 v3d_X((&device->devinfo), viewport_compute_xform)(&dyn->vp.viewports[0],
2821 v3dv_dyn->viewport.scale[0],
2822 v3dv_dyn->viewport.translate[0]);
2823
2824 }
2825
2826 v3dv_dyn->color_write_enable =
2827 (1ull << (4 * V3D_MAX_RENDER_TARGETS(device->devinfo.ver))) - 1;
2828 if (pipeline_state->cb) {
2829 const uint8_t color_writes = pipeline_state->cb->color_write_enables;
2830 v3dv_dyn->color_write_enable = 0;
2831 for (uint32_t i = 0; i < pipeline_state->cb->attachment_count; i++) {
2832 v3dv_dyn->color_write_enable |=
2833 (color_writes & BITFIELD_BIT(i)) ? (0xfu << (i * 4)) : 0;
2834 }
2835 }
2836
2837 return result;
2838 }
2839
2840 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2841 pipeline_init(struct v3dv_pipeline *pipeline,
2842 struct v3dv_device *device,
2843 struct v3dv_pipeline_cache *cache,
2844 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2845 const VkAllocationCallbacks *pAllocator)
2846 {
2847 VkResult result = VK_SUCCESS;
2848
2849 pipeline->device = device;
2850
2851 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2852 pipeline->layout = layout;
2853 v3dv_pipeline_layout_ref(pipeline->layout);
2854
2855 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2856 if (render_pass) {
2857 assert(pCreateInfo->subpass < render_pass->subpass_count);
2858 pipeline->pass = render_pass;
2859 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2860 }
2861
2862 pipeline_setup_rendering_info(device, pipeline, pCreateInfo, pAllocator);
2863
2864 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2865 pCreateInfo->pInputAssemblyState;
2866 pipeline->topology = vk_to_mesa_prim[ia_info->topology];
2867
2868 struct vk_graphics_pipeline_all_state all;
2869 struct vk_graphics_pipeline_state pipeline_state = { };
2870 result = pipeline_init_dynamic_state(device, pipeline, &all, &pipeline_state,
2871 pCreateInfo);
2872
2873 if (result != VK_SUCCESS) {
2874 /* Caller would already destroy the pipeline, and we didn't allocate any
2875 * extra info. We don't need to do anything else.
2876 */
2877 return result;
2878 }
2879
2880 /* If rasterization is disabled, we just disable it through the CFG_BITS
2881 * packet, so for building the pipeline we always assume it is enabled
2882 */
2883 const bool raster_enabled =
2884 (pipeline_state.rs && !pipeline_state.rs->rasterizer_discard_enable) ||
2885 BITSET_TEST(pipeline_state.dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
2886
2887 pipeline->rasterization_enabled = raster_enabled;
2888
2889 const VkPipelineViewportStateCreateInfo *vp_info =
2890 raster_enabled ? pCreateInfo->pViewportState : NULL;
2891
2892 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2893 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2894
2895 const VkPipelineRasterizationStateCreateInfo *rs_info =
2896 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2897
2898 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2899 raster_enabled ? vk_find_struct_const(
2900 rs_info->pNext,
2901 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2902 NULL;
2903
2904 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2905 raster_enabled ? vk_find_struct_const(
2906 rs_info->pNext,
2907 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2908 NULL;
2909
2910 const VkPipelineColorBlendStateCreateInfo *cb_info =
2911 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2912
2913 const VkPipelineMultisampleStateCreateInfo *ms_info =
2914 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2915
2916 const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control =
2917 vp_info ? vk_find_struct_const(vp_info->pNext,
2918 PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT) :
2919 NULL;
2920
2921 if (depth_clip_control)
2922 pipeline->negative_one_to_one = depth_clip_control->negativeOneToOne;
2923
2924 v3d_X((&device->devinfo), pipeline_pack_state)(pipeline, cb_info, ds_info,
2925 rs_info, pv_info, ls_info,
2926 ms_info,
2927 &pipeline_state);
2928
2929 pipeline_set_sample_mask(pipeline, ms_info);
2930 pipeline_set_sample_rate_shading(pipeline, ms_info);
2931 pipeline->line_smooth = enable_line_smooth(pipeline, rs_info);
2932
2933 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2934
2935 if (result != VK_SUCCESS) {
2936 /* Caller would already destroy the pipeline, and we didn't allocate any
2937 * extra info. We don't need to do anything else.
2938 */
2939 return result;
2940 }
2941
2942 const VkPipelineVertexInputStateCreateInfo *vi_info =
2943 pCreateInfo->pVertexInputState;
2944
2945 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
2946 vk_find_struct_const(vi_info->pNext,
2947 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2948
2949 v3d_X((&device->devinfo), pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
2950
2951 if (v3d_X((&device->devinfo), pipeline_needs_default_attribute_values)(pipeline)) {
2952 pipeline->default_attribute_values =
2953 v3d_X((&pipeline->device->devinfo), create_default_attribute_values)(pipeline->device, pipeline);
2954
2955 if (!pipeline->default_attribute_values)
2956 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2957 } else {
2958 pipeline->default_attribute_values = NULL;
2959 }
2960
2961 /* This must be done after the pipeline has been compiled */
2962 v3dv_compute_ez_state(&pipeline->dynamic_graphics_state,
2963 pipeline,
2964 &pipeline->ez_state,
2965 &pipeline->incompatible_ez_test);
2966
2967 return result;
2968 }
2969
2970 static VkPipelineCreateFlagBits2KHR
pipeline_create_info_get_flags(VkPipelineCreateFlags flags,const void * pNext)2971 pipeline_create_info_get_flags(VkPipelineCreateFlags flags, const void *pNext)
2972 {
2973 const VkPipelineCreateFlags2CreateInfoKHR *flags2 =
2974 vk_find_struct_const(pNext, PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR);
2975 if (flags2)
2976 return flags2->flags;
2977 else
2978 return flags;
2979 }
2980
2981 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline,VkPipelineCreateFlagBits2KHR * flags)2982 graphics_pipeline_create(VkDevice _device,
2983 VkPipelineCache _cache,
2984 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2985 const VkAllocationCallbacks *pAllocator,
2986 VkPipeline *pPipeline,
2987 VkPipelineCreateFlagBits2KHR *flags)
2988 {
2989 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2990 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2991
2992 struct v3dv_pipeline *pipeline;
2993 VkResult result;
2994
2995 *flags = pipeline_create_info_get_flags(pCreateInfo->flags,
2996 pCreateInfo->pNext);
2997
2998 /* Use the default pipeline cache if none is specified */
2999 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3000 cache = &device->default_pipeline_cache;
3001
3002 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3003 VK_OBJECT_TYPE_PIPELINE);
3004
3005 if (pipeline == NULL)
3006 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3007
3008 pipeline->flags = *flags;
3009 result = pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator);
3010
3011 if (result != VK_SUCCESS) {
3012 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3013 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3014 *pPipeline = VK_NULL_HANDLE;
3015 return result;
3016 }
3017
3018 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3019
3020 return VK_SUCCESS;
3021 }
3022
3023 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3024 v3dv_CreateGraphicsPipelines(VkDevice _device,
3025 VkPipelineCache pipelineCache,
3026 uint32_t count,
3027 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3028 const VkAllocationCallbacks *pAllocator,
3029 VkPipeline *pPipelines)
3030 {
3031 MESA_TRACE_FUNC();
3032 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3033 VkResult result = VK_SUCCESS;
3034
3035 if (V3D_DBG(SHADERS))
3036 mtx_lock(&device->pdevice->mutex);
3037
3038 uint32_t i = 0;
3039 for (; i < count; i++) {
3040 VkResult local_result;
3041
3042 VkPipelineCreateFlagBits2KHR flags;
3043 local_result = graphics_pipeline_create(_device,
3044 pipelineCache,
3045 &pCreateInfos[i],
3046 pAllocator,
3047 &pPipelines[i],
3048 &flags);
3049
3050 if (local_result != VK_SUCCESS) {
3051 result = local_result;
3052 pPipelines[i] = VK_NULL_HANDLE;
3053 if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3054 break;
3055 }
3056 }
3057
3058 for (; i < count; i++)
3059 pPipelines[i] = VK_NULL_HANDLE;
3060
3061 if (V3D_DBG(SHADERS))
3062 mtx_unlock(&device->pdevice->mutex);
3063
3064 return result;
3065 }
3066
3067 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3068 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3069 {
3070 assert(glsl_type_is_vector_or_scalar(type));
3071
3072 uint32_t comp_size = glsl_type_is_boolean(type)
3073 ? 4 : glsl_get_bit_size(type) / 8;
3074 unsigned length = glsl_get_vector_elements(type);
3075 *size = comp_size * length,
3076 *align = comp_size * (length == 3 ? 4 : length);
3077 }
3078
3079 static void
lower_compute(struct nir_shader * nir)3080 lower_compute(struct nir_shader *nir)
3081 {
3082 if (!nir->info.shared_memory_explicit_layout) {
3083 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3084 nir_var_mem_shared, shared_type_info);
3085 }
3086
3087 NIR_PASS(_, nir, nir_lower_explicit_io,
3088 nir_var_mem_shared, nir_address_format_32bit_offset);
3089
3090 struct nir_lower_compute_system_values_options sysval_options = {
3091 .has_base_workgroup_id = true,
3092 };
3093 NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
3094 }
3095
3096 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3097 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3098 struct v3dv_pipeline_cache *cache,
3099 const VkComputePipelineCreateInfo *info,
3100 const VkAllocationCallbacks *alloc)
3101 {
3102 VkPipelineCreationFeedback pipeline_feedback = {
3103 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3104 };
3105 int64_t pipeline_start = os_time_get_nano();
3106
3107 struct v3dv_device *device = pipeline->device;
3108 struct v3dv_physical_device *physical_device = device->pdevice;
3109
3110 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3111 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3112
3113 struct v3dv_pipeline_stage *p_stage =
3114 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3115 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3116 if (!p_stage)
3117 return VK_ERROR_OUT_OF_HOST_MEMORY;
3118
3119 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3120 p_stage->pipeline = pipeline;
3121 p_stage->stage = gl_shader_stage_to_broadcom(stage);
3122 p_stage->entrypoint = sinfo->pName;
3123 p_stage->module = vk_shader_module_from_handle(sinfo->module);
3124 p_stage->spec_info = sinfo->pSpecializationInfo;
3125 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3126 if (!p_stage->module) {
3127 p_stage->module_info =
3128 vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
3129 }
3130
3131 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
3132 info->pNext, sinfo->pNext);
3133
3134 vk_pipeline_hash_shader_stage(pipeline->flags,
3135 &info->stage,
3136 &p_stage->robustness,
3137 p_stage->shader_sha1);
3138
3139 p_stage->nir = NULL;
3140
3141 pipeline->stages[BROADCOM_SHADER_COMPUTE] = p_stage;
3142 pipeline->active_stages |= sinfo->stage;
3143
3144 /* First we try to get the variants from the pipeline cache (unless we are
3145 * required to capture internal representations, since in that case we need
3146 * compile).
3147 */
3148 bool needs_executable_info =
3149 pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3150 if (!needs_executable_info) {
3151 struct v3dv_pipeline_key pipeline_key;
3152 pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3153 pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3154
3155 bool cache_hit = false;
3156 pipeline->shared_data =
3157 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3158
3159 if (pipeline->shared_data != NULL) {
3160 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3161 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3162 pipeline_feedback.flags |=
3163 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3164
3165 goto success;
3166 }
3167 }
3168
3169 if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3170 return VK_PIPELINE_COMPILE_REQUIRED;
3171
3172 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3173 pipeline,
3174 false);
3175 if (!pipeline->shared_data)
3176 return VK_ERROR_OUT_OF_HOST_MEMORY;
3177
3178 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3179
3180 /* If not found on cache, compile it */
3181 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3182 assert(p_stage->nir);
3183
3184 v3d_optimize_nir(NULL, p_stage->nir);
3185 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3186 lower_compute(p_stage->nir);
3187
3188 VkResult result = VK_SUCCESS;
3189
3190 struct v3d_key key;
3191 memset(&key, 0, sizeof(key));
3192 pipeline_populate_v3d_key(&key, p_stage, 0);
3193 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3194 pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3195 alloc, &result);
3196
3197 if (result != VK_SUCCESS)
3198 return result;
3199
3200 if (!upload_assembly(pipeline))
3201 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3202
3203 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3204
3205 success:
3206
3207 pipeline_check_buffer_device_address(pipeline);
3208
3209 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3210 write_creation_feedback(pipeline,
3211 info->pNext,
3212 &pipeline_feedback,
3213 1,
3214 &info->stage);
3215
3216 /* As we got the variants in pipeline->shared_data, after compiling we
3217 * don't need the pipeline_stages.
3218 */
3219 if (!needs_executable_info)
3220 pipeline_free_stages(device, pipeline, alloc);
3221
3222 pipeline_check_spill_size(pipeline);
3223
3224 return VK_SUCCESS;
3225 }
3226
3227 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3228 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3229 struct v3dv_device *device,
3230 struct v3dv_pipeline_cache *cache,
3231 const VkComputePipelineCreateInfo *info,
3232 const VkAllocationCallbacks *alloc)
3233 {
3234 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3235
3236 pipeline->device = device;
3237 pipeline->layout = layout;
3238 v3dv_pipeline_layout_ref(pipeline->layout);
3239
3240 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3241 if (result != VK_SUCCESS)
3242 return result;
3243
3244 return result;
3245 }
3246
3247 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline,VkPipelineCreateFlagBits2KHR * flags)3248 compute_pipeline_create(VkDevice _device,
3249 VkPipelineCache _cache,
3250 const VkComputePipelineCreateInfo *pCreateInfo,
3251 const VkAllocationCallbacks *pAllocator,
3252 VkPipeline *pPipeline,
3253 VkPipelineCreateFlagBits2KHR *flags)
3254 {
3255 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3256 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3257
3258 struct v3dv_pipeline *pipeline;
3259 VkResult result;
3260
3261 *flags = pipeline_create_info_get_flags(pCreateInfo->flags,
3262 pCreateInfo->pNext);
3263
3264 /* Use the default pipeline cache if none is specified */
3265 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3266 cache = &device->default_pipeline_cache;
3267
3268 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3269 VK_OBJECT_TYPE_PIPELINE);
3270 if (pipeline == NULL)
3271 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3272
3273 pipeline->flags = *flags;
3274 result = compute_pipeline_init(pipeline, device, cache,
3275 pCreateInfo, pAllocator);
3276 if (result != VK_SUCCESS) {
3277 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3278 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3279 *pPipeline = VK_NULL_HANDLE;
3280 return result;
3281 }
3282
3283 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3284
3285 return VK_SUCCESS;
3286 }
3287
3288 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3289 v3dv_CreateComputePipelines(VkDevice _device,
3290 VkPipelineCache pipelineCache,
3291 uint32_t createInfoCount,
3292 const VkComputePipelineCreateInfo *pCreateInfos,
3293 const VkAllocationCallbacks *pAllocator,
3294 VkPipeline *pPipelines)
3295 {
3296 MESA_TRACE_FUNC();
3297 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3298 VkResult result = VK_SUCCESS;
3299
3300 if (V3D_DBG(SHADERS))
3301 mtx_lock(&device->pdevice->mutex);
3302
3303 uint32_t i = 0;
3304 for (; i < createInfoCount; i++) {
3305 VkResult local_result;
3306 VkPipelineCreateFlagBits2KHR flags;
3307 local_result = compute_pipeline_create(_device,
3308 pipelineCache,
3309 &pCreateInfos[i],
3310 pAllocator,
3311 &pPipelines[i],
3312 &flags);
3313
3314 if (local_result != VK_SUCCESS) {
3315 result = local_result;
3316 pPipelines[i] = VK_NULL_HANDLE;
3317 if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3318 break;
3319 }
3320 }
3321
3322 for (; i < createInfoCount; i++)
3323 pPipelines[i] = VK_NULL_HANDLE;
3324
3325 if (V3D_DBG(SHADERS))
3326 mtx_unlock(&device->pdevice->mutex);
3327
3328 return result;
3329 }
3330
3331 static nir_shader *
pipeline_get_nir(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3332 pipeline_get_nir(struct v3dv_pipeline *pipeline,
3333 enum broadcom_shader_stage stage)
3334 {
3335 assert(stage >= 0 && stage < BROADCOM_SHADER_STAGES);
3336 if (pipeline->stages[stage])
3337 return pipeline->stages[stage]->nir;
3338
3339 return NULL;
3340 }
3341
3342 static struct v3d_prog_data *
pipeline_get_prog_data(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3343 pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3344 enum broadcom_shader_stage stage)
3345 {
3346 if (pipeline->shared_data->variants[stage])
3347 return pipeline->shared_data->variants[stage]->prog_data.base;
3348 return NULL;
3349 }
3350
3351 static uint64_t *
pipeline_get_qpu(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,uint32_t * qpu_size)3352 pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3353 enum broadcom_shader_stage stage,
3354 uint32_t *qpu_size)
3355 {
3356 struct v3dv_shader_variant *variant =
3357 pipeline->shared_data->variants[stage];
3358 if (!variant) {
3359 *qpu_size = 0;
3360 return NULL;
3361 }
3362
3363 *qpu_size = variant->qpu_insts_size;
3364 return variant->qpu_insts;
3365 }
3366
3367 /* FIXME: we use the same macro in various drivers, maybe move it to
3368 * the common vk_util.h?
3369 */
3370 #define WRITE_STR(field, ...) ({ \
3371 memset(field, 0, sizeof(field)); \
3372 UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3373 assert(_i > 0 && _i < sizeof(field)); \
3374 })
3375
3376 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3377 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3378 const char *data)
3379 {
3380 ir->isText = VK_TRUE;
3381
3382 size_t data_len = strlen(data) + 1;
3383
3384 if (ir->pData == NULL) {
3385 ir->dataSize = data_len;
3386 return true;
3387 }
3388
3389 strncpy(ir->pData, data, ir->dataSize);
3390 if (ir->dataSize < data_len)
3391 return false;
3392
3393 ir->dataSize = data_len;
3394 return true;
3395 }
3396
3397 static void
append(char ** str,size_t * offset,const char * fmt,...)3398 append(char **str, size_t *offset, const char *fmt, ...)
3399 {
3400 va_list args;
3401 va_start(args, fmt);
3402 ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3403 va_end(args);
3404 }
3405
3406 static void
pipeline_collect_executable_data(struct v3dv_pipeline * pipeline)3407 pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3408 {
3409 if (pipeline->executables.mem_ctx)
3410 return;
3411
3412 pipeline->executables.mem_ctx = ralloc_context(NULL);
3413 util_dynarray_init(&pipeline->executables.data,
3414 pipeline->executables.mem_ctx);
3415
3416 /* Don't crash for failed/bogus pipelines */
3417 if (!pipeline->shared_data)
3418 return;
3419
3420 for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3421 VkShaderStageFlags vk_stage =
3422 mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3423 if (!(vk_stage & pipeline->active_stages))
3424 continue;
3425
3426 char *nir_str = NULL;
3427 char *qpu_str = NULL;
3428
3429 if (pipeline_keep_qpu(pipeline)) {
3430 nir_shader *nir = pipeline_get_nir(pipeline, s);
3431 nir_str = nir ?
3432 nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3433
3434 uint32_t qpu_size;
3435 uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3436 if (qpu) {
3437 uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3438 qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3439 qpu_inst_count * 96);
3440 size_t offset = 0;
3441 for (int i = 0; i < qpu_inst_count; i++) {
3442 const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3443 append(&qpu_str, &offset, "%s\n", str);
3444 ralloc_free((void *)str);
3445 }
3446 }
3447 }
3448
3449 struct v3dv_pipeline_executable_data data = {
3450 .stage = s,
3451 .nir_str = nir_str,
3452 .qpu_str = qpu_str,
3453 };
3454 util_dynarray_append(&pipeline->executables.data,
3455 struct v3dv_pipeline_executable_data, data);
3456 }
3457 }
3458
3459 static const struct v3dv_pipeline_executable_data *
pipeline_get_executable(struct v3dv_pipeline * pipeline,uint32_t index)3460 pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3461 {
3462 assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3463 struct v3dv_pipeline_executable_data));
3464 return util_dynarray_element(&pipeline->executables.data,
3465 struct v3dv_pipeline_executable_data,
3466 index);
3467 }
3468
3469 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3470 v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3471 VkDevice device,
3472 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3473 uint32_t *pInternalRepresentationCount,
3474 VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3475 {
3476 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3477
3478 pipeline_collect_executable_data(pipeline);
3479
3480 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3481 pInternalRepresentations, pInternalRepresentationCount);
3482
3483 bool incomplete = false;
3484 const struct v3dv_pipeline_executable_data *exe =
3485 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3486
3487 if (exe->nir_str) {
3488 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3489 &out, ir) {
3490 WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3491 WRITE_STR(ir->description, "Final NIR form");
3492 if (!write_ir_text(ir, exe->nir_str))
3493 incomplete = true;
3494 }
3495 }
3496
3497 if (exe->qpu_str) {
3498 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3499 &out, ir) {
3500 WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3501 WRITE_STR(ir->description, "Final QPU assembly");
3502 if (!write_ir_text(ir, exe->qpu_str))
3503 incomplete = true;
3504 }
3505 }
3506
3507 return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3508 }
3509
3510 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3511 v3dv_GetPipelineExecutablePropertiesKHR(
3512 VkDevice device,
3513 const VkPipelineInfoKHR *pPipelineInfo,
3514 uint32_t *pExecutableCount,
3515 VkPipelineExecutablePropertiesKHR *pProperties)
3516 {
3517 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3518
3519 pipeline_collect_executable_data(pipeline);
3520
3521 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3522 pProperties, pExecutableCount);
3523
3524 util_dynarray_foreach(&pipeline->executables.data,
3525 struct v3dv_pipeline_executable_data, exe) {
3526 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3527 gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3528 props->stages = mesa_to_vk_shader_stage(mesa_stage);
3529
3530 WRITE_STR(props->name, "%s (%s)",
3531 _mesa_shader_stage_to_abbrev(mesa_stage),
3532 broadcom_shader_stage_is_binning(exe->stage) ?
3533 "Binning" : "Render");
3534
3535 WRITE_STR(props->description, "%s",
3536 _mesa_shader_stage_to_string(mesa_stage));
3537
3538 props->subgroupSize = V3D_CHANNELS;
3539 }
3540 }
3541
3542 return vk_outarray_status(&out);
3543 }
3544
3545 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3546 v3dv_GetPipelineExecutableStatisticsKHR(
3547 VkDevice device,
3548 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3549 uint32_t *pStatisticCount,
3550 VkPipelineExecutableStatisticKHR *pStatistics)
3551 {
3552 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3553
3554 pipeline_collect_executable_data(pipeline);
3555
3556 const struct v3dv_pipeline_executable_data *exe =
3557 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3558
3559 struct v3d_prog_data *prog_data =
3560 pipeline_get_prog_data(pipeline, exe->stage);
3561
3562 struct v3dv_shader_variant *variant =
3563 pipeline->shared_data->variants[exe->stage];
3564 uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3565
3566 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3567 pStatistics, pStatisticCount);
3568
3569 if (qpu_inst_count > 0) {
3570 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3571 WRITE_STR(stat->name, "Compile Strategy");
3572 WRITE_STR(stat->description, "Chosen compile strategy index");
3573 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3574 stat->value.u64 = prog_data->compile_strategy_idx;
3575 }
3576
3577 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3578 WRITE_STR(stat->name, "Instruction Count");
3579 WRITE_STR(stat->description, "Number of QPU instructions");
3580 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3581 stat->value.u64 = qpu_inst_count;
3582 }
3583
3584 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3585 WRITE_STR(stat->name, "Thread Count");
3586 WRITE_STR(stat->description, "Number of QPU threads dispatched");
3587 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3588 stat->value.u64 = prog_data->threads;
3589 }
3590
3591 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3592 WRITE_STR(stat->name, "Spill Size");
3593 WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3594 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3595 stat->value.u64 = prog_data->spill_size;
3596 }
3597
3598 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3599 WRITE_STR(stat->name, "TMU Spills");
3600 WRITE_STR(stat->description, "Number of times a register was spilled "
3601 "to memory");
3602 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3603 stat->value.u64 = prog_data->spill_size;
3604 }
3605
3606 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3607 WRITE_STR(stat->name, "TMU Fills");
3608 WRITE_STR(stat->description, "Number of times a register was filled "
3609 "from memory");
3610 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3611 stat->value.u64 = prog_data->spill_size;
3612 }
3613
3614 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3615 WRITE_STR(stat->name, "QPU Read Stalls");
3616 WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3617 "register read dependency");
3618 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3619 stat->value.u64 = prog_data->qpu_read_stalls;
3620 }
3621 }
3622
3623 return vk_outarray_status(&out);
3624 }
3625