• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_util.h"
25 
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28 
29 #include "common/v3d_debug.h"
30 #include "qpu/qpu_disasm.h"
31 
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34 
35 #include "util/u_atomic.h"
36 #include "util/os_time.h"
37 
38 #include "vk_nir_convert_ycbcr.h"
39 #include "vk_pipeline.h"
40 #include "vulkan/util/vk_format.h"
41 
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44 
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47                    uint32_t v3d_key_size)
48 {
49    struct mesa_sha1 ctx;
50    unsigned char sha1[20];
51    char sha1buf[41];
52 
53    _mesa_sha1_init(&ctx);
54 
55    _mesa_sha1_update(&ctx, key, v3d_key_size);
56 
57    _mesa_sha1_final(&ctx, sha1);
58    _mesa_sha1_format(sha1buf, sha1);
59 
60    fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62 
63 static void
pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage * p_stage)64 pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65 {
66    VkPipelineShaderStageCreateInfo info = {
67       .module = vk_shader_module_handle_from_nir(p_stage->nir),
68       .pName = p_stage->entrypoint,
69       .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70    };
71 
72    vk_pipeline_hash_shader_stage(&info, NULL, p_stage->shader_sha1);
73 }
74 
75 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)76 v3dv_shader_variant_destroy(struct v3dv_device *device,
77                             struct v3dv_shader_variant *variant)
78 {
79    /* The assembly BO is shared by all variants in the pipeline, so it can't
80     * be freed here and should be freed with the pipeline
81     */
82    if (variant->qpu_insts) {
83       free(variant->qpu_insts);
84       variant->qpu_insts = NULL;
85    }
86    ralloc_free(variant->prog_data.base);
87    vk_free(&device->vk.alloc, variant);
88 }
89 
90 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)91 destroy_pipeline_stage(struct v3dv_device *device,
92                        struct v3dv_pipeline_stage *p_stage,
93                        const VkAllocationCallbacks *pAllocator)
94 {
95    if (!p_stage)
96       return;
97 
98    ralloc_free(p_stage->nir);
99    vk_free2(&device->vk.alloc, pAllocator, p_stage);
100 }
101 
102 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)103 pipeline_free_stages(struct v3dv_device *device,
104                      struct v3dv_pipeline *pipeline,
105                      const VkAllocationCallbacks *pAllocator)
106 {
107    assert(pipeline);
108 
109    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
110       destroy_pipeline_stage(device, pipeline->stages[stage], pAllocator);
111       pipeline->stages[stage] = NULL;
112    }
113 }
114 
115 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)116 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
117                       struct v3dv_device *device,
118                       const VkAllocationCallbacks *pAllocator)
119 {
120    if (!pipeline)
121       return;
122 
123    pipeline_free_stages(device, pipeline, pAllocator);
124 
125    if (pipeline->shared_data) {
126       v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
127       pipeline->shared_data = NULL;
128    }
129 
130    if (pipeline->spill.bo) {
131       assert(pipeline->spill.size_per_thread > 0);
132       v3dv_bo_free(device, pipeline->spill.bo);
133    }
134 
135    if (pipeline->default_attribute_values) {
136       v3dv_bo_free(device, pipeline->default_attribute_values);
137       pipeline->default_attribute_values = NULL;
138    }
139 
140    if (pipeline->executables.mem_ctx)
141       ralloc_free(pipeline->executables.mem_ctx);
142 
143    if (pipeline->layout)
144       v3dv_pipeline_layout_unref(device, pipeline->layout, pAllocator);
145 
146    vk_object_free(&device->vk, pAllocator, pipeline);
147 }
148 
149 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)150 v3dv_DestroyPipeline(VkDevice _device,
151                      VkPipeline _pipeline,
152                      const VkAllocationCallbacks *pAllocator)
153 {
154    V3DV_FROM_HANDLE(v3dv_device, device, _device);
155    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
156 
157    if (!pipeline)
158       return;
159 
160    v3dv_destroy_pipeline(pipeline, device, pAllocator);
161 }
162 
163 static const struct spirv_to_nir_options default_spirv_options =  {
164    .caps = {
165       .device_group = true,
166       .float_controls = true,
167       .multiview = true,
168       .storage_8bit = true,
169       .storage_16bit = true,
170       .subgroup_ballot = true,
171       .subgroup_basic = true,
172       .subgroup_quad = true,
173       .subgroup_shuffle = true,
174       .subgroup_vote = true,
175       .variable_pointers = true,
176       .vk_memory_model = true,
177       .vk_memory_model_device_scope = true,
178       .physical_storage_buffer_address = true,
179       .workgroup_memory_explicit_layout = true,
180       .image_read_without_format = true,
181       .demote_to_helper_invocation = true,
182     },
183    .ubo_addr_format = nir_address_format_32bit_index_offset,
184    .ssbo_addr_format = nir_address_format_32bit_index_offset,
185    .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
186    .push_const_addr_format = nir_address_format_logical,
187    .shared_addr_format = nir_address_format_32bit_offset,
188 };
189 
190 const nir_shader_compiler_options v3dv_nir_options = {
191    .lower_uadd_sat = true,
192    .lower_usub_sat = true,
193    .lower_iadd_sat = true,
194    .lower_all_io_to_temps = true,
195    .lower_extract_byte = true,
196    .lower_extract_word = true,
197    .lower_insert_byte = true,
198    .lower_insert_word = true,
199    .lower_bitfield_insert = true,
200    .lower_bitfield_extract = true,
201    .lower_bitfield_reverse = true,
202    .lower_bit_count = true,
203    .lower_cs_local_id_to_index = true,
204    .lower_ffract = true,
205    .lower_fmod = true,
206    .lower_pack_unorm_2x16 = true,
207    .lower_pack_snorm_2x16 = true,
208    .lower_unpack_unorm_2x16 = true,
209    .lower_unpack_snorm_2x16 = true,
210    .lower_pack_unorm_4x8 = true,
211    .lower_pack_snorm_4x8 = true,
212    .lower_unpack_unorm_4x8 = true,
213    .lower_unpack_snorm_4x8 = true,
214    .lower_pack_half_2x16 = true,
215    .lower_unpack_half_2x16 = true,
216    .lower_pack_32_2x16 = true,
217    .lower_pack_32_2x16_split = true,
218    .lower_unpack_32_2x16_split = true,
219    .lower_mul_2x32_64 = true,
220    .lower_fdiv = true,
221    .lower_find_lsb = true,
222    .lower_ffma16 = true,
223    .lower_ffma32 = true,
224    .lower_ffma64 = true,
225    .lower_flrp32 = true,
226    .lower_fpow = true,
227    .lower_fsat = true,
228    .lower_fsqrt = true,
229    .lower_ifind_msb = true,
230    .lower_isign = true,
231    .lower_ldexp = true,
232    .lower_mul_high = true,
233    .lower_wpos_pntc = false,
234    .lower_to_scalar = true,
235    .lower_device_index_to_zero = true,
236    .lower_fquantize2f16 = true,
237    .has_fsub = true,
238    .has_isub = true,
239    .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
240                                    * needs to be supported */
241    .lower_interpolate_at = true,
242    .max_unroll_iterations = 16,
243    .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
244    .divergence_analysis_options =
245       nir_divergence_multiple_workgroup_per_compute_subgroup,
246 };
247 
248 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)249 v3dv_pipeline_get_nir_options(void)
250 {
251    return &v3dv_nir_options;
252 }
253 
254 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _pipeline_layout,uint32_t set,uint32_t binding,uint32_t array_index)255 lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set,
256                         uint32_t binding, uint32_t array_index)
257 {
258    struct v3dv_pipeline_layout *pipeline_layout =
259       (struct v3dv_pipeline_layout *) _pipeline_layout;
260 
261    assert(set < pipeline_layout->num_sets);
262    struct v3dv_descriptor_set_layout *set_layout =
263       pipeline_layout->set[set].layout;
264 
265    assert(binding < set_layout->binding_count);
266    struct v3dv_descriptor_set_binding_layout *bind_layout =
267       &set_layout->binding[binding];
268 
269    if (bind_layout->immutable_samplers_offset) {
270       const struct v3dv_sampler *immutable_samplers =
271          v3dv_immutable_samplers(set_layout, bind_layout);
272       const struct v3dv_sampler *sampler = &immutable_samplers[array_index];
273       return sampler->conversion ? &sampler->conversion->state : NULL;
274    } else {
275       return NULL;
276    }
277 }
278 
279 static void
preprocess_nir(nir_shader * nir)280 preprocess_nir(nir_shader *nir)
281 {
282    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
283       .frag_coord = true,
284       .point_coord = true,
285    };
286    NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
287 
288    /* Vulkan uses the separate-shader linking model */
289    nir->info.separate_shader = true;
290 
291    /* Make sure we lower variable initializers on output variables so that
292     * nir_remove_dead_variables below sees the corresponding stores
293     */
294    NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
295 
296    if (nir->info.stage == MESA_SHADER_FRAGMENT)
297       NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
298    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
299       NIR_PASS(_, nir, nir_lower_input_attachments,
300                  &(nir_input_attachment_options) {
301                     .use_fragcoord_sysval = false,
302                        });
303    }
304 
305    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
306               nir_shader_get_entrypoint(nir), true, false);
307 
308    NIR_PASS(_, nir, nir_lower_system_values);
309 
310    NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
311 
312    NIR_PASS(_, nir, nir_normalize_cubemap_coords);
313 
314    NIR_PASS(_, nir, nir_lower_global_vars_to_local);
315 
316    NIR_PASS(_, nir, nir_split_var_copies);
317    NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
318 
319    v3d_optimize_nir(NULL, nir);
320 
321    NIR_PASS(_, nir, nir_lower_explicit_io,
322             nir_var_mem_push_const,
323             nir_address_format_32bit_offset);
324 
325    NIR_PASS(_, nir, nir_lower_explicit_io,
326             nir_var_mem_ubo | nir_var_mem_ssbo,
327             nir_address_format_32bit_index_offset);
328 
329    NIR_PASS(_, nir, nir_lower_explicit_io,
330             nir_var_mem_global,
331             nir_address_format_2x32bit_global);
332 
333    NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
334 
335    /* Lower a bunch of stuff */
336    NIR_PASS(_, nir, nir_lower_var_copies);
337 
338    NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
339 
340    NIR_PASS(_, nir, nir_lower_indirect_derefs,
341             nir_var_function_temp, 2);
342 
343    NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
344             nir_var_mem_ubo | nir_var_mem_ssbo,
345             nir_lower_direct_array_deref_of_vec_load);
346 
347    NIR_PASS(_, nir, nir_lower_frexp);
348 
349    /* Get rid of split copies */
350    v3d_optimize_nir(NULL, nir);
351 }
352 
353 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)354 shader_module_compile_to_nir(struct v3dv_device *device,
355                              struct v3dv_pipeline_stage *stage)
356 {
357    nir_shader *nir;
358    const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
359    gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(stage->stage);
360 
361 
362    if (V3D_DBG(DUMP_SPIRV) && stage->module->nir == NULL)
363       v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
364 
365    /* vk_shader_module_to_nir also handles internal shaders, when module->nir
366     * != NULL. It also calls nir_validate_shader on both cases, so we don't
367     * call it again here.
368     */
369    VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
370                                              gl_stage,
371                                              stage->entrypoint,
372                                              stage->spec_info,
373                                              &default_spirv_options,
374                                              nir_options,
375                                              NULL, &nir);
376    if (result != VK_SUCCESS)
377       return NULL;
378    assert(nir->info.stage == gl_stage);
379 
380    if (V3D_DBG(SHADERDB) && stage->module->nir == NULL) {
381       char sha1buf[41];
382       _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
383       nir->info.name = ralloc_strdup(nir, sha1buf);
384    }
385 
386    if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
387       fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
388               broadcom_shader_stage_name(stage->stage),
389               stage->program_id);
390       nir_print_shader(nir, stderr);
391       fprintf(stderr, "\n");
392    }
393 
394    preprocess_nir(nir);
395 
396    return nir;
397 }
398 
399 static int
type_size_vec4(const struct glsl_type * type,bool bindless)400 type_size_vec4(const struct glsl_type *type, bool bindless)
401 {
402    return glsl_count_attribute_slots(type, false);
403 }
404 
405 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
406  * rethink.
407  */
408 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size,uint8_t plane)409 descriptor_map_add(struct v3dv_descriptor_map *map,
410                    int set,
411                    int binding,
412                    int array_index,
413                    int array_size,
414                    int start_index,
415                    uint8_t return_size,
416                    uint8_t plane)
417 {
418    assert(array_index < array_size);
419    assert(return_size == 16 || return_size == 32);
420 
421    unsigned index = start_index;
422    for (; index < map->num_desc; index++) {
423       if (map->used[index] &&
424           set == map->set[index] &&
425           binding == map->binding[index] &&
426           array_index == map->array_index[index] &&
427           plane == map->plane[index]) {
428          assert(array_size == map->array_size[index]);
429          if (return_size != map->return_size[index]) {
430             /* It the return_size is different it means that the same sampler
431              * was used for operations with different precision
432              * requirement. In this case we need to ensure that we use the
433              * larger one.
434              */
435             map->return_size[index] = 32;
436          }
437          return index;
438       } else if (!map->used[index]) {
439          break;
440       }
441    }
442 
443    assert(index < DESCRIPTOR_MAP_SIZE);
444    assert(!map->used[index]);
445 
446    map->used[index] = true;
447    map->set[index] = set;
448    map->binding[index] = binding;
449    map->array_index[index] = array_index;
450    map->array_size[index] = array_size;
451    map->return_size[index] = return_size;
452    map->plane[index] = plane;
453    map->num_desc = MAX2(map->num_desc, index + 1);
454 
455    return index;
456 }
457 
458 struct lower_pipeline_layout_state {
459    struct v3dv_pipeline *pipeline;
460    const struct v3dv_pipeline_layout *layout;
461    bool needs_default_sampler_state;
462 };
463 
464 
465 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)466 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
467                          struct lower_pipeline_layout_state *state)
468 {
469    assert(instr->intrinsic == nir_intrinsic_load_push_constant);
470    instr->intrinsic = nir_intrinsic_load_uniform;
471 }
472 
473 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)474 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
475                             VkDescriptorType desc_type,
476                             gl_shader_stage gl_stage,
477                             bool is_sampler)
478 {
479    enum broadcom_shader_stage broadcom_stage =
480       gl_shader_stage_to_broadcom(gl_stage);
481 
482    assert(pipeline->shared_data &&
483           pipeline->shared_data->maps[broadcom_stage]);
484 
485    switch(desc_type) {
486    case VK_DESCRIPTOR_TYPE_SAMPLER:
487       return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
488    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
489    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
490    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
491    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
492    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
493       return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
494    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
495       return is_sampler ?
496          &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
497          &pipeline->shared_data->maps[broadcom_stage]->texture_map;
498    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
499    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
500    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
501       return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
502    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
503    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
504       return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
505    default:
506       unreachable("Descriptor type unknown or not having a descriptor map");
507    }
508 }
509 
510 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
511  * could be used by the v3d_compiler */
512 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)513 lower_vulkan_resource_index(nir_builder *b,
514                             nir_intrinsic_instr *instr,
515                             struct lower_pipeline_layout_state *state)
516 {
517    assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
518 
519    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
520 
521    unsigned set = nir_intrinsic_desc_set(instr);
522    unsigned binding = nir_intrinsic_binding(instr);
523    struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
524    struct v3dv_descriptor_set_binding_layout *binding_layout =
525       &set_layout->binding[binding];
526    unsigned index = 0;
527 
528    switch (binding_layout->type) {
529    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
530    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
531    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
532    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
533    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
534       struct v3dv_descriptor_map *descriptor_map =
535          pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
536                                      b->shader->info.stage, false);
537 
538       if (!const_val)
539          unreachable("non-constant vulkan_resource_index array index");
540 
541       /* At compile-time we will need to know if we are processing a UBO load
542        * for an inline or a regular UBO so we can handle inline loads like
543        * push constants. At the level of NIR level however, the inline
544        * information is gone, so we rely on the index to make this distinction.
545        * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
546        * inline buffers. This means that at the descriptor map level
547        * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
548        * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
549        */
550       uint32_t start_index = 0;
551       if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
552           binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
553          start_index += MAX_INLINE_UNIFORM_BUFFERS;
554       }
555 
556       index = descriptor_map_add(descriptor_map, set, binding,
557                                  const_val->u32,
558                                  binding_layout->array_size,
559                                  start_index,
560                                  32 /* return_size: doesn't really apply for this case */,
561                                  0);
562       break;
563    }
564 
565    default:
566       unreachable("unsupported descriptor type for vulkan_resource_index");
567       break;
568    }
569 
570    /* Since we use the deref pass, both vulkan_resource_index and
571     * vulkan_load_descriptor return a vec2 providing an index and
572     * offset. Our backend compiler only cares about the index part.
573     */
574    nir_def_rewrite_uses(&instr->def,
575                             nir_imm_ivec2(b, index, 0));
576    nir_instr_remove(&instr->instr);
577 }
578 
579 static uint8_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)580 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
581 {
582    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
583    if (plane_src_idx < 0)
584        return 0;
585 
586    uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src);
587    nir_tex_instr_remove_src(tex, plane_src_idx);
588    return plane;
589 }
590 
591 /* Returns return_size, so it could be used for the case of not having a
592  * sampler object
593  */
594 static uint8_t
lower_tex_src(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct lower_pipeline_layout_state * state)595 lower_tex_src(nir_builder *b,
596               nir_tex_instr *instr,
597               unsigned src_idx,
598               struct lower_pipeline_layout_state *state)
599 {
600    nir_def *index = NULL;
601    unsigned base_index = 0;
602    unsigned array_elements = 1;
603    nir_tex_src *src = &instr->src[src_idx];
604    bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
605 
606    uint8_t plane = tex_instr_get_and_remove_plane_src(instr);
607 
608    /* We compute first the offsets */
609    nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
610    while (deref->deref_type != nir_deref_type_var) {
611       nir_deref_instr *parent =
612          nir_instr_as_deref(deref->parent.ssa->parent_instr);
613 
614       assert(deref->deref_type == nir_deref_type_array);
615 
616       if (nir_src_is_const(deref->arr.index) && index == NULL) {
617          /* We're still building a direct index */
618          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
619       } else {
620          if (index == NULL) {
621             /* We used to be direct but not anymore */
622             index = nir_imm_int(b, base_index);
623             base_index = 0;
624          }
625 
626          index = nir_iadd(b, index,
627                           nir_imul_imm(b, deref->arr.index.ssa,
628                                        array_elements));
629       }
630 
631       array_elements *= glsl_get_length(parent->type);
632 
633       deref = parent;
634    }
635 
636    if (index)
637       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
638 
639    /* We have the offsets, we apply them, rewriting the source or removing
640     * instr if needed
641     */
642    if (index) {
643       nir_src_rewrite(&src->src, index);
644 
645       src->src_type = is_sampler ?
646          nir_tex_src_sampler_offset :
647          nir_tex_src_texture_offset;
648    } else {
649       nir_tex_instr_remove_src(instr, src_idx);
650    }
651 
652    uint32_t set = deref->var->data.descriptor_set;
653    uint32_t binding = deref->var->data.binding;
654    /* FIXME: this is a really simplified check for the precision to be used
655     * for the sampling. Right now we are only checking for the variables used
656     * on the operation itself, but there are other cases that we could use to
657     * infer the precision requirement.
658     */
659    bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
660                             deref->var->data.precision == GLSL_PRECISION_LOW;
661    struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
662    struct v3dv_descriptor_set_binding_layout *binding_layout =
663       &set_layout->binding[binding];
664 
665    /* For input attachments, the shader includes the attachment_idx. As we are
666     * treating them as a texture, we only want the base_index
667     */
668    uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
669       deref->var->data.index + base_index :
670       base_index;
671 
672    uint8_t return_size;
673    if (V3D_DBG(TMU_16BIT))
674       return_size = 16;
675    else  if (V3D_DBG(TMU_32BIT))
676       return_size = 32;
677    else
678       return_size = relaxed_precision ? 16 : 32;
679 
680    struct v3dv_descriptor_map *map =
681       pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
682                                   b->shader->info.stage, is_sampler);
683    int desc_index =
684       descriptor_map_add(map,
685                          deref->var->data.descriptor_set,
686                          deref->var->data.binding,
687                          array_index,
688                          binding_layout->array_size,
689                          0,
690                          return_size,
691                          plane);
692 
693    if (is_sampler)
694       instr->sampler_index = desc_index;
695    else
696       instr->texture_index = desc_index;
697 
698    return return_size;
699 }
700 
701 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct lower_pipeline_layout_state * state)702 lower_sampler(nir_builder *b,
703               nir_tex_instr *instr,
704               struct lower_pipeline_layout_state *state)
705 {
706    uint8_t return_size = 0;
707 
708    int texture_idx =
709       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
710 
711    if (texture_idx >= 0)
712       return_size = lower_tex_src(b, instr, texture_idx, state);
713 
714    int sampler_idx =
715       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
716 
717    if (sampler_idx >= 0) {
718       assert(nir_tex_instr_need_sampler(instr));
719       lower_tex_src(b, instr, sampler_idx, state);
720    }
721 
722    if (texture_idx < 0 && sampler_idx < 0)
723       return false;
724 
725    /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags
726     * to bind a default sampler state to configure precission.
727     */
728    if (sampler_idx < 0) {
729       state->needs_default_sampler_state = true;
730       instr->backend_flags = return_size == 16 ?
731          V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
732    }
733 
734    return true;
735 }
736 
737 /* FIXME: really similar to lower_tex_src, perhaps refactor? */
738 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)739 lower_image_deref(nir_builder *b,
740                   nir_intrinsic_instr *instr,
741                   struct lower_pipeline_layout_state *state)
742 {
743    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
744    nir_def *index = NULL;
745    unsigned array_elements = 1;
746    unsigned base_index = 0;
747 
748    while (deref->deref_type != nir_deref_type_var) {
749       nir_deref_instr *parent =
750          nir_instr_as_deref(deref->parent.ssa->parent_instr);
751 
752       assert(deref->deref_type == nir_deref_type_array);
753 
754       if (nir_src_is_const(deref->arr.index) && index == NULL) {
755          /* We're still building a direct index */
756          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
757       } else {
758          if (index == NULL) {
759             /* We used to be direct but not anymore */
760             index = nir_imm_int(b, base_index);
761             base_index = 0;
762          }
763 
764          index = nir_iadd(b, index,
765                           nir_imul_imm(b, deref->arr.index.ssa,
766                                        array_elements));
767       }
768 
769       array_elements *= glsl_get_length(parent->type);
770 
771       deref = parent;
772    }
773 
774    if (index)
775       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
776 
777    uint32_t set = deref->var->data.descriptor_set;
778    uint32_t binding = deref->var->data.binding;
779    struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
780    struct v3dv_descriptor_set_binding_layout *binding_layout =
781       &set_layout->binding[binding];
782 
783    uint32_t array_index = deref->var->data.index + base_index;
784 
785    assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
786           binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
787 
788    struct v3dv_descriptor_map *map =
789       pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
790                                   b->shader->info.stage, false);
791 
792    int desc_index =
793       descriptor_map_add(map,
794                          deref->var->data.descriptor_set,
795                          deref->var->data.binding,
796                          array_index,
797                          binding_layout->array_size,
798                          0,
799                          32 /* return_size: doesn't apply for textures */,
800                          0);
801 
802    /* Note: we don't need to do anything here in relation to the precision and
803     * the output size because for images we can infer that info from the image
804     * intrinsic, that includes the image format (see
805     * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
806     */
807 
808    index = nir_imm_int(b, desc_index);
809 
810    nir_rewrite_image_intrinsic(instr, index, false);
811 }
812 
813 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)814 lower_intrinsic(nir_builder *b,
815                 nir_intrinsic_instr *instr,
816                 struct lower_pipeline_layout_state *state)
817 {
818    switch (instr->intrinsic) {
819    case nir_intrinsic_load_push_constant:
820       lower_load_push_constant(b, instr, state);
821       return true;
822 
823    case nir_intrinsic_vulkan_resource_index:
824       lower_vulkan_resource_index(b, instr, state);
825       return true;
826 
827    case nir_intrinsic_load_vulkan_descriptor: {
828       /* Loading the descriptor happens as part of load/store instructions,
829        * so for us this is a no-op.
830        */
831       nir_def_rewrite_uses(&instr->def, instr->src[0].ssa);
832       nir_instr_remove(&instr->instr);
833       return true;
834    }
835 
836    case nir_intrinsic_image_deref_load:
837    case nir_intrinsic_image_deref_store:
838    case nir_intrinsic_image_deref_atomic:
839    case nir_intrinsic_image_deref_atomic_swap:
840    case nir_intrinsic_image_deref_size:
841    case nir_intrinsic_image_deref_samples:
842       lower_image_deref(b, instr, state);
843       return true;
844 
845    default:
846       return false;
847    }
848 }
849 
850 static bool
lower_pipeline_layout_cb(nir_builder * b,nir_instr * instr,void * _state)851 lower_pipeline_layout_cb(nir_builder *b,
852                          nir_instr *instr,
853                          void *_state)
854 {
855    bool progress = false;
856    struct lower_pipeline_layout_state *state = _state;
857 
858    b->cursor = nir_before_instr(instr);
859    switch (instr->type) {
860    case nir_instr_type_tex:
861       progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
862       break;
863    case nir_instr_type_intrinsic:
864       progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
865       break;
866    default:
867       break;
868    }
869 
870    return progress;
871 }
872 
873 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout,bool * needs_default_sampler_state)874 lower_pipeline_layout_info(nir_shader *shader,
875                            struct v3dv_pipeline *pipeline,
876                            const struct v3dv_pipeline_layout *layout,
877                            bool *needs_default_sampler_state)
878 {
879    bool progress = false;
880 
881    struct lower_pipeline_layout_state state = {
882       .pipeline = pipeline,
883       .layout = layout,
884       .needs_default_sampler_state = false,
885    };
886 
887    progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
888                                            nir_metadata_block_index |
889                                            nir_metadata_dominance,
890                                            &state);
891 
892    *needs_default_sampler_state = state.needs_default_sampler_state;
893 
894    return progress;
895 }
896 
897 /* This flips gl_PointCoord.y to match Vulkan requirements */
898 static bool
lower_point_coord_cb(nir_builder * b,nir_intrinsic_instr * intr,void * _state)899 lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
900 {
901    if (intr->intrinsic != nir_intrinsic_load_input)
902       return false;
903 
904    if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC)
905       return false;
906 
907    b->cursor = nir_after_instr(&intr->instr);
908    nir_def *result = &intr->def;
909    result =
910       nir_vector_insert_imm(b, result,
911                             nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
912    nir_def_rewrite_uses_after(&intr->def,
913                                   result, result->parent_instr);
914    return true;
915 }
916 
917 static bool
v3d_nir_lower_point_coord(nir_shader * s)918 v3d_nir_lower_point_coord(nir_shader *s)
919 {
920    assert(s->info.stage == MESA_SHADER_FRAGMENT);
921    return nir_shader_intrinsics_pass(s, lower_point_coord_cb,
922                                        nir_metadata_block_index |
923                                        nir_metadata_dominance, NULL);
924 }
925 
926 static void
lower_fs_io(nir_shader * nir)927 lower_fs_io(nir_shader *nir)
928 {
929    /* Our backend doesn't handle array fragment shader outputs */
930    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
931    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
932 
933    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
934                                MESA_SHADER_FRAGMENT);
935 
936    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
937                                MESA_SHADER_FRAGMENT);
938 
939    NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
940             type_size_vec4, 0);
941 }
942 
943 static void
lower_gs_io(struct nir_shader * nir)944 lower_gs_io(struct nir_shader *nir)
945 {
946    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
947 
948    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
949                                MESA_SHADER_GEOMETRY);
950 
951    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
952                                MESA_SHADER_GEOMETRY);
953 }
954 
955 static void
lower_vs_io(struct nir_shader * nir)956 lower_vs_io(struct nir_shader *nir)
957 {
958    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
959 
960    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
961                                MESA_SHADER_VERTEX);
962 
963    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
964                                MESA_SHADER_VERTEX);
965 
966    /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
967     * overlaps with v3d_nir_lower_io. Need further research though.
968     */
969 }
970 
971 static void
shader_debug_output(const char * message,void * data)972 shader_debug_output(const char *message, void *data)
973 {
974    /* FIXME: We probably don't want to debug anything extra here, and in fact
975     * the compiler is not using this callback too much, only as an alternative
976     * way to debug out the shaderdb stats, that you can already get using
977     * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
978     * compiler to remove that callback.
979     */
980 }
981 
982 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)983 pipeline_populate_v3d_key(struct v3d_key *key,
984                           const struct v3dv_pipeline_stage *p_stage,
985                           uint32_t ucp_enables)
986 {
987    assert(p_stage->pipeline->shared_data &&
988           p_stage->pipeline->shared_data->maps[p_stage->stage]);
989 
990    /* The following values are default values used at pipeline create. We use
991     * there 32 bit as default return size.
992     */
993    struct v3dv_descriptor_map *sampler_map =
994       &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
995    struct v3dv_descriptor_map *texture_map =
996       &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
997 
998    key->num_tex_used = texture_map->num_desc;
999    assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1000    for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1001       key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1002       key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1003       key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1004       key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1005    }
1006 
1007    key->num_samplers_used = sampler_map->num_desc;
1008    assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1009    for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1010         sampler_idx++) {
1011       key->sampler[sampler_idx].return_size =
1012          sampler_map->return_size[sampler_idx];
1013 
1014       key->sampler[sampler_idx].return_channels =
1015          key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1016    }
1017 
1018    switch (p_stage->stage) {
1019    case BROADCOM_SHADER_VERTEX:
1020    case BROADCOM_SHADER_VERTEX_BIN:
1021       key->is_last_geometry_stage =
1022          p_stage->pipeline->stages[BROADCOM_SHADER_GEOMETRY] == NULL;
1023       break;
1024    case BROADCOM_SHADER_GEOMETRY:
1025    case BROADCOM_SHADER_GEOMETRY_BIN:
1026       /* FIXME: while we don't implement tessellation shaders */
1027       key->is_last_geometry_stage = true;
1028       break;
1029    case BROADCOM_SHADER_FRAGMENT:
1030    case BROADCOM_SHADER_COMPUTE:
1031       key->is_last_geometry_stage = false;
1032       break;
1033    default:
1034       unreachable("unsupported shader stage");
1035    }
1036 
1037    /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1038     * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1039     * takes care of adding a single compact array variable at
1040     * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1041     *
1042     * The only lowering we are interested is specific to the fragment shader,
1043     * where we want to emit discards to honor writes to gl_ClipDistance[] in
1044     * previous stages. This is done via nir_lower_clip_fs() so we only set up
1045     * the ucp enable mask for that stage.
1046     */
1047    key->ucp_enables = ucp_enables;
1048 
1049    const VkPipelineRobustnessBufferBehaviorEXT robust_buffer_enabled =
1050       VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1051 
1052    const VkPipelineRobustnessImageBehaviorEXT robust_image_enabled =
1053       VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT;
1054 
1055    key->robust_uniform_access =
1056       p_stage->robustness.uniform_buffers == robust_buffer_enabled;
1057    key->robust_storage_access =
1058       p_stage->robustness.storage_buffers == robust_buffer_enabled;
1059    key->robust_image_access =
1060       p_stage->robustness.images == robust_image_enabled;
1061 }
1062 
1063 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1064  * same. For not using prim_mode that is the one already used on v3d
1065  */
1066 static const enum mesa_prim vk_to_mesa_prim[] = {
1067    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS,
1068    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES,
1069    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP,
1070    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES,
1071    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP,
1072    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN,
1073    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY,
1074    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY,
1075    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY,
1076    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY,
1077 };
1078 
1079 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1080    [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1081    [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1082    [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1083    [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1084    [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1085    [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1086    [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1087    [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1088    [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1089    [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1090    [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1091    [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1092    [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1093    [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1094    [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1095    [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1096 };
1097 
1098 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1099 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1100                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1101                              const struct v3dv_pipeline_stage *p_stage,
1102                              bool has_geometry_shader,
1103                              uint32_t ucp_enables)
1104 {
1105    assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1106 
1107    memset(key, 0, sizeof(*key));
1108 
1109    struct v3dv_device *device = p_stage->pipeline->device;
1110    assert(device);
1111 
1112    pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables);
1113 
1114    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1115       pCreateInfo->pInputAssemblyState;
1116    uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1117 
1118    key->is_points = (topology == MESA_PRIM_POINTS);
1119    key->is_lines = (topology >= MESA_PRIM_LINES &&
1120                     topology <= MESA_PRIM_LINE_STRIP);
1121 
1122    if (key->is_points) {
1123       /* This mask represents state for GL_ARB_point_sprite which is not
1124        * relevant to Vulkan.
1125        */
1126       key->point_sprite_mask = 0;
1127 
1128       /* Vulkan mandates upper left. */
1129       key->point_coord_upper_left = true;
1130    }
1131 
1132    key->has_gs = has_geometry_shader;
1133 
1134    const VkPipelineColorBlendStateCreateInfo *cb_info =
1135       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1136       pCreateInfo->pColorBlendState : NULL;
1137 
1138    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1139                        vk_to_pipe_logicop[cb_info->logicOp] :
1140                        PIPE_LOGICOP_COPY;
1141 
1142    const bool raster_enabled =
1143       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1144 
1145    /* Multisample rasterization state must be ignored if rasterization
1146     * is disabled.
1147     */
1148    const VkPipelineMultisampleStateCreateInfo *ms_info =
1149       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1150    if (ms_info) {
1151       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1152              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1153       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1154 
1155       if (key->msaa)
1156          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1157 
1158       key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1159    }
1160 
1161    /* This is intended for V3D versions before 4.1, otherwise we just use the
1162     * tile buffer load/store swap R/B bit.
1163     */
1164    key->swap_color_rb = 0;
1165 
1166    const struct v3dv_render_pass *pass =
1167       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1168    const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1169    for (uint32_t i = 0; i < subpass->color_count; i++) {
1170       const uint32_t att_idx = subpass->color_attachments[i].attachment;
1171       if (att_idx == VK_ATTACHMENT_UNUSED)
1172          continue;
1173 
1174       key->cbufs |= 1 << i;
1175 
1176       VkFormat fb_format = pass->attachments[att_idx].desc.format;
1177       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1178 
1179       /* If logic operations are enabled then we might emit color reads and we
1180        * need to know the color buffer format and swizzle for that
1181        *
1182        */
1183       if (key->logicop_func != PIPE_LOGICOP_COPY) {
1184          /* Framebuffer formats should be single plane */
1185          assert(vk_format_get_plane_count(fb_format) == 1);
1186          key->color_fmt[i].format = fb_pipe_format;
1187          memcpy(key->color_fmt[i].swizzle,
1188                 v3dv_get_format_swizzle(p_stage->pipeline->device,
1189                                         fb_format,
1190                                         0),
1191                 sizeof(key->color_fmt[i].swizzle));
1192       }
1193 
1194       const struct util_format_description *desc =
1195          vk_format_description(fb_format);
1196 
1197       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1198           desc->channel[0].size == 32) {
1199          key->f32_color_rb |= 1 << i;
1200       }
1201 
1202       if (p_stage->nir->info.fs.untyped_color_outputs) {
1203          if (util_format_is_pure_uint(fb_pipe_format))
1204             key->uint_color_rb |= 1 << i;
1205          else if (util_format_is_pure_sint(fb_pipe_format))
1206             key->int_color_rb |= 1 << i;
1207       }
1208    }
1209 }
1210 
1211 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1212 setup_stage_outputs_from_next_stage_inputs(
1213    uint8_t next_stage_num_inputs,
1214    struct v3d_varying_slot *next_stage_input_slots,
1215    uint8_t *num_used_outputs,
1216    struct v3d_varying_slot *used_output_slots,
1217    uint32_t size_of_used_output_slots)
1218 {
1219    *num_used_outputs = next_stage_num_inputs;
1220    memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1221 }
1222 
1223 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1224 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1225                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1226                              const struct v3dv_pipeline_stage *p_stage)
1227 {
1228    assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1229           p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1230 
1231    struct v3dv_device *device = p_stage->pipeline->device;
1232    assert(device);
1233 
1234    memset(key, 0, sizeof(*key));
1235 
1236    pipeline_populate_v3d_key(&key->base, p_stage, 0);
1237 
1238    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1239 
1240    key->per_vertex_point_size =
1241       p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1242 
1243    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1244 
1245    assert(key->base.is_last_geometry_stage);
1246    if (key->is_coord) {
1247       /* Output varyings in the last binning shader are only used for transform
1248        * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1249        */
1250       key->num_used_outputs = 0;
1251    } else {
1252       struct v3dv_shader_variant *fs_variant =
1253          pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1254 
1255       STATIC_ASSERT(sizeof(key->used_outputs) ==
1256                     sizeof(fs_variant->prog_data.fs->input_slots));
1257 
1258       setup_stage_outputs_from_next_stage_inputs(
1259          fs_variant->prog_data.fs->num_inputs,
1260          fs_variant->prog_data.fs->input_slots,
1261          &key->num_used_outputs,
1262          key->used_outputs,
1263          sizeof(key->used_outputs));
1264    }
1265 }
1266 
1267 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1268 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1269                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1270                              const struct v3dv_pipeline_stage *p_stage)
1271 {
1272    assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1273           p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1274 
1275    struct v3dv_device *device = p_stage->pipeline->device;
1276    assert(device);
1277 
1278    memset(key, 0, sizeof(*key));
1279    pipeline_populate_v3d_key(&key->base, p_stage, 0);
1280 
1281    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1282 
1283    /* Vulkan specifies a point size per vertex, so true for if the prim are
1284     * points, like on ES2)
1285     */
1286    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1287       pCreateInfo->pInputAssemblyState;
1288    uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1289 
1290    /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1291     * MESA_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1292    key->per_vertex_point_size = (topology == MESA_PRIM_POINTS);
1293 
1294    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1295 
1296    if (key->is_coord) { /* Binning VS*/
1297       if (key->base.is_last_geometry_stage) {
1298          /* Output varyings in the last binning shader are only used for
1299           * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1300           * supported.
1301           */
1302          key->num_used_outputs = 0;
1303       } else {
1304          /* Linking against GS binning program */
1305          assert(pipeline->stages[BROADCOM_SHADER_GEOMETRY]);
1306          struct v3dv_shader_variant *gs_bin_variant =
1307             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1308 
1309          STATIC_ASSERT(sizeof(key->used_outputs) ==
1310                        sizeof(gs_bin_variant->prog_data.gs->input_slots));
1311 
1312          setup_stage_outputs_from_next_stage_inputs(
1313             gs_bin_variant->prog_data.gs->num_inputs,
1314             gs_bin_variant->prog_data.gs->input_slots,
1315             &key->num_used_outputs,
1316             key->used_outputs,
1317             sizeof(key->used_outputs));
1318       }
1319    } else { /* Render VS */
1320       if (pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
1321          /* Linking against GS render program */
1322          struct v3dv_shader_variant *gs_variant =
1323             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1324 
1325          STATIC_ASSERT(sizeof(key->used_outputs) ==
1326                        sizeof(gs_variant->prog_data.gs->input_slots));
1327 
1328          setup_stage_outputs_from_next_stage_inputs(
1329             gs_variant->prog_data.gs->num_inputs,
1330             gs_variant->prog_data.gs->input_slots,
1331             &key->num_used_outputs,
1332             key->used_outputs,
1333             sizeof(key->used_outputs));
1334       } else {
1335          /* Linking against FS program */
1336          struct v3dv_shader_variant *fs_variant =
1337             pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1338 
1339          STATIC_ASSERT(sizeof(key->used_outputs) ==
1340                        sizeof(fs_variant->prog_data.fs->input_slots));
1341 
1342          setup_stage_outputs_from_next_stage_inputs(
1343             fs_variant->prog_data.fs->num_inputs,
1344             fs_variant->prog_data.fs->input_slots,
1345             &key->num_used_outputs,
1346             key->used_outputs,
1347             sizeof(key->used_outputs));
1348       }
1349    }
1350 
1351    const VkPipelineVertexInputStateCreateInfo *vi_info =
1352       pCreateInfo->pVertexInputState;
1353    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1354       const VkVertexInputAttributeDescription *desc =
1355          &vi_info->pVertexAttributeDescriptions[i];
1356       assert(desc->location < MAX_VERTEX_ATTRIBS);
1357       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
1358           desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
1359          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1360       }
1361    }
1362 }
1363 
1364 /**
1365  * Creates the initial form of the pipeline stage for a binning shader by
1366  * cloning the render shader and flagging it as a coordinate shader.
1367  *
1368  * Returns NULL if it was not able to allocate the object, so it should be
1369  * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1370  */
1371 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1372 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1373                               const VkAllocationCallbacks *pAllocator)
1374 {
1375    struct v3dv_device *device = src->pipeline->device;
1376 
1377    struct v3dv_pipeline_stage *p_stage =
1378       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1379                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1380 
1381    if (p_stage == NULL)
1382       return NULL;
1383 
1384    assert(src->stage == BROADCOM_SHADER_VERTEX ||
1385           src->stage == BROADCOM_SHADER_GEOMETRY);
1386 
1387    enum broadcom_shader_stage bin_stage =
1388       src->stage == BROADCOM_SHADER_VERTEX ?
1389          BROADCOM_SHADER_VERTEX_BIN :
1390          BROADCOM_SHADER_GEOMETRY_BIN;
1391 
1392    p_stage->pipeline = src->pipeline;
1393    p_stage->stage = bin_stage;
1394    p_stage->entrypoint = src->entrypoint;
1395    p_stage->module = src->module;
1396    /* For binning shaders we will clone the NIR code from the corresponding
1397     * render shader later, when we call pipeline_compile_xxx_shader. This way
1398     * we only have to run the relevant NIR lowerings once for render shaders
1399     */
1400    p_stage->nir = NULL;
1401    p_stage->program_id = src->program_id;
1402    p_stage->spec_info = src->spec_info;
1403    p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1404    p_stage->robustness = src->robustness;
1405    memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1406 
1407    return p_stage;
1408 }
1409 
1410 /*
1411  * Based on some creation flags we assume that the QPU would be needed later
1412  * to gather further info. In that case we just keep the qput_insts around,
1413  * instead of map/unmap the bo later.
1414  */
1415 static bool
pipeline_keep_qpu(struct v3dv_pipeline * pipeline)1416 pipeline_keep_qpu(struct v3dv_pipeline *pipeline)
1417 {
1418    return pipeline->flags &
1419       (VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR |
1420        VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR);
1421 }
1422 
1423 /**
1424  * Returns false if it was not able to allocate or map the assembly bo memory.
1425  */
1426 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1427 upload_assembly(struct v3dv_pipeline *pipeline)
1428 {
1429    uint32_t total_size = 0;
1430    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1431       struct v3dv_shader_variant *variant =
1432          pipeline->shared_data->variants[stage];
1433 
1434       if (variant != NULL)
1435          total_size += variant->qpu_insts_size;
1436    }
1437 
1438    struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1439                                       "pipeline shader assembly", true);
1440    if (!bo) {
1441       fprintf(stderr, "failed to allocate memory for shader\n");
1442       return false;
1443    }
1444 
1445    bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1446    if (!ok) {
1447       fprintf(stderr, "failed to map source shader buffer\n");
1448       return false;
1449    }
1450 
1451    uint32_t offset = 0;
1452    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1453       struct v3dv_shader_variant *variant =
1454          pipeline->shared_data->variants[stage];
1455 
1456       if (variant != NULL) {
1457          variant->assembly_offset = offset;
1458 
1459          memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1460          offset += variant->qpu_insts_size;
1461 
1462          if (!pipeline_keep_qpu(pipeline)) {
1463             free(variant->qpu_insts);
1464             variant->qpu_insts = NULL;
1465          }
1466       }
1467    }
1468    assert(total_size == offset);
1469 
1470    pipeline->shared_data->assembly_bo = bo;
1471 
1472    return true;
1473 }
1474 
1475 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1476 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1477                        struct v3dv_pipeline_key *key,
1478                        unsigned char *sha1_out)
1479 {
1480    struct mesa_sha1 ctx;
1481    _mesa_sha1_init(&ctx);
1482 
1483    if (pipeline->layout) {
1484       _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1485                         sizeof(pipeline->layout->sha1));
1486    }
1487 
1488    /* We need to include all shader stages in the sha1 key as linking may
1489     * modify the shader code in any stage. An alternative would be to use the
1490     * serialized NIR, but that seems like an overkill.
1491     */
1492    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1493       if (broadcom_shader_stage_is_binning(stage))
1494          continue;
1495 
1496       struct v3dv_pipeline_stage *p_stage = pipeline->stages[stage];
1497       if (p_stage == NULL)
1498          continue;
1499 
1500       assert(stage != BROADCOM_SHADER_COMPUTE);
1501 
1502       _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1503    }
1504 
1505    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1506 
1507    _mesa_sha1_final(&ctx, sha1_out);
1508 }
1509 
1510 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1511 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1512                       struct v3dv_pipeline_key *key,
1513                       unsigned char *sha1_out)
1514 {
1515    struct mesa_sha1 ctx;
1516    _mesa_sha1_init(&ctx);
1517 
1518    if (pipeline->layout) {
1519       _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1520                         sizeof(pipeline->layout->sha1));
1521    }
1522 
1523    struct v3dv_pipeline_stage *p_stage =
1524       pipeline->stages[BROADCOM_SHADER_COMPUTE];
1525 
1526    _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1527 
1528    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1529 
1530    _mesa_sha1_final(&ctx, sha1_out);
1531 }
1532 
1533 /* Checks that the pipeline has enough spill size to use for any of their
1534  * variants
1535  */
1536 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1537 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1538 {
1539    uint32_t max_spill_size = 0;
1540 
1541    for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1542       struct v3dv_shader_variant *variant =
1543          pipeline->shared_data->variants[stage];
1544 
1545       if (variant != NULL) {
1546          max_spill_size = MAX2(variant->prog_data.base->spill_size,
1547                                max_spill_size);
1548       }
1549    }
1550 
1551    if (max_spill_size > 0) {
1552       struct v3dv_device *device = pipeline->device;
1553 
1554       /* The TIDX register we use for choosing the area to access
1555        * for scratch space is: (core << 6) | (qpu << 2) | thread.
1556        * Even at minimum threadcount in a particular shader, that
1557        * means we still multiply by qpus by 4.
1558        */
1559       const uint32_t total_spill_size =
1560          4 * device->devinfo.qpu_count * max_spill_size;
1561       if (pipeline->spill.bo) {
1562          assert(pipeline->spill.size_per_thread > 0);
1563          v3dv_bo_free(device, pipeline->spill.bo);
1564       }
1565       pipeline->spill.bo =
1566          v3dv_bo_alloc(device, total_spill_size, "spill", true);
1567       pipeline->spill.size_per_thread = max_spill_size;
1568    }
1569 }
1570 
1571 /**
1572  * Creates a new shader_variant_create. Note that for prog_data is not const,
1573  * so it is assumed that the caller will prove a pointer that the
1574  * shader_variant will own.
1575  *
1576  * Creation doesn't include allocate a BO to store the content of qpu_insts,
1577  * as we will try to share the same bo for several shader variants. Also note
1578  * that qpu_ints being NULL is valid, for example if we are creating the
1579  * shader_variants from the cache, so we can just upload the assembly of all
1580  * the shader stages at once.
1581  */
1582 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1583 v3dv_shader_variant_create(struct v3dv_device *device,
1584                            enum broadcom_shader_stage stage,
1585                            struct v3d_prog_data *prog_data,
1586                            uint32_t prog_data_size,
1587                            uint32_t assembly_offset,
1588                            uint64_t *qpu_insts,
1589                            uint32_t qpu_insts_size,
1590                            VkResult *out_vk_result)
1591 {
1592    struct v3dv_shader_variant *variant =
1593       vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1594                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1595 
1596    if (variant == NULL) {
1597       *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1598       return NULL;
1599    }
1600 
1601    variant->stage = stage;
1602    variant->prog_data_size = prog_data_size;
1603    variant->prog_data.base = prog_data;
1604 
1605    variant->assembly_offset = assembly_offset;
1606    variant->qpu_insts_size = qpu_insts_size;
1607    variant->qpu_insts = qpu_insts;
1608 
1609    *out_vk_result = VK_SUCCESS;
1610 
1611    return variant;
1612 }
1613 
1614 /* For a given key, it returns the compiled version of the shader.  Returns a
1615  * new reference to the shader_variant to the caller, or NULL.
1616  *
1617  * If the method returns NULL it means that something wrong happened:
1618  *   * Not enough memory: this is one of the possible outcomes defined by
1619  *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1620  *   * Compilation error: hypothetically this shouldn't happen, as the spec
1621  *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1622  *     any compilation failure is a driver bug. In the practice, something as
1623  *     common as failing to register allocate can lead to a compilation
1624  *     failure. In that case the only option (for any driver) is
1625  *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1626  *     error.
1627  */
1628 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1629 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1630                                 struct v3d_key *key,
1631                                 size_t key_size,
1632                                 const VkAllocationCallbacks *pAllocator,
1633                                 VkResult *out_vk_result)
1634 {
1635    int64_t stage_start = os_time_get_nano();
1636 
1637    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1638    struct v3dv_physical_device *physical_device = pipeline->device->pdevice;
1639    const struct v3d_compiler *compiler = physical_device->compiler;
1640    gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(p_stage->stage);
1641 
1642    if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
1643       fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1644               broadcom_shader_stage_name(p_stage->stage),
1645               p_stage->program_id);
1646       nir_print_shader(p_stage->nir, stderr);
1647       fprintf(stderr, "\n");
1648    }
1649 
1650    uint64_t *qpu_insts;
1651    uint32_t qpu_insts_size;
1652    struct v3d_prog_data *prog_data;
1653    uint32_t prog_data_size = v3d_prog_data_size(gl_stage);
1654 
1655    qpu_insts = v3d_compile(compiler,
1656                            key, &prog_data,
1657                            p_stage->nir,
1658                            shader_debug_output, NULL,
1659                            p_stage->program_id, 0,
1660                            &qpu_insts_size);
1661 
1662    struct v3dv_shader_variant *variant = NULL;
1663 
1664    if (!qpu_insts) {
1665       fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1666               broadcom_shader_stage_name(p_stage->stage),
1667               p_stage->program_id);
1668       *out_vk_result = VK_ERROR_UNKNOWN;
1669    } else {
1670       variant =
1671          v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1672                                     prog_data, prog_data_size,
1673                                     0, /* assembly_offset, no final value yet */
1674                                     qpu_insts, qpu_insts_size,
1675                                     out_vk_result);
1676    }
1677    /* At this point we don't need anymore the nir shader, but we are freeing
1678     * all the temporary p_stage structs used during the pipeline creation when
1679     * we finish it, so let's not worry about freeing the nir here.
1680     */
1681 
1682    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1683 
1684    return variant;
1685 }
1686 
1687 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1688 link_shaders(nir_shader *producer, nir_shader *consumer)
1689 {
1690    assert(producer);
1691    assert(consumer);
1692 
1693    if (producer->options->lower_to_scalar) {
1694       NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1695       NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1696    }
1697 
1698    nir_lower_io_arrays_to_elements(producer, consumer);
1699 
1700    v3d_optimize_nir(NULL, producer);
1701    v3d_optimize_nir(NULL, consumer);
1702 
1703    if (nir_link_opt_varyings(producer, consumer))
1704       v3d_optimize_nir(NULL, consumer);
1705 
1706    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1707    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1708 
1709    if (nir_remove_unused_varyings(producer, consumer)) {
1710       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1711       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1712 
1713       v3d_optimize_nir(NULL, producer);
1714       v3d_optimize_nir(NULL, consumer);
1715 
1716       /* Optimizations can cause varyings to become unused.
1717        * nir_compact_varyings() depends on all dead varyings being removed so
1718        * we need to call nir_remove_dead_variables() again here.
1719        */
1720       NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1721       NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1722    }
1723 }
1724 
1725 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1726 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1727                    struct v3dv_pipeline_stage *p_stage,
1728                    struct v3dv_pipeline_layout *layout)
1729 {
1730    int64_t stage_start = os_time_get_nano();
1731 
1732    assert(pipeline->shared_data &&
1733           pipeline->shared_data->maps[p_stage->stage]);
1734 
1735    NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex,
1736               lookup_ycbcr_conversion, layout);
1737 
1738    nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1739 
1740    /* We add this because we need a valid sampler for nir_lower_tex to do
1741     * unpacking of the texture operation result, even for the case where there
1742     * is no sampler state.
1743     *
1744     * We add two of those, one for the case we need a 16bit return_size, and
1745     * another for the case we need a 32bit return size.
1746     */
1747    struct v3dv_descriptor_maps *maps =
1748       pipeline->shared_data->maps[p_stage->stage];
1749 
1750    UNUSED unsigned index;
1751    index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0);
1752    assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1753 
1754    index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0);
1755    assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1756 
1757    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1758    bool needs_default_sampler_state = false;
1759    NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1760             &needs_default_sampler_state);
1761 
1762    /* If in the end we didn't need to use the default sampler states and the
1763     * shader doesn't need any other samplers, get rid of them so we can
1764     * recognize that this program doesn't use any samplers at all.
1765     */
1766    if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1767       maps->sampler_map.num_desc = 0;
1768 
1769    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1770 }
1771 
1772 /**
1773  * The SPIR-V compiler will insert a sized compact array for
1774  * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1775  * where the size of the array determines the number of active clip planes.
1776  */
1777 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1778 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1779 {
1780    assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1781    const nir_shader *shader = p_stage->nir;
1782    assert(shader);
1783 
1784    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1785       if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1786          assert(var->data.compact);
1787          return (1 << glsl_get_length(var->type)) - 1;
1788       }
1789    }
1790    return 0;
1791 }
1792 
1793 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1794 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1795                        struct v3dv_pipeline *pipeline,
1796                        struct v3dv_pipeline_cache *cache)
1797 {
1798    int64_t stage_start = os_time_get_nano();
1799 
1800    nir_shader *nir = NULL;
1801 
1802    nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1803                                             &v3dv_nir_options,
1804                                             p_stage->shader_sha1);
1805 
1806    if (nir) {
1807       assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1808 
1809       /* A NIR cache hit doesn't avoid the large majority of pipeline stage
1810        * creation so the cache hit is not recorded in the pipeline feedback
1811        * flags
1812        */
1813 
1814       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1815 
1816       return nir;
1817    }
1818 
1819    nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1820 
1821    if (nir) {
1822       struct v3dv_pipeline_cache *default_cache =
1823          &pipeline->device->default_pipeline_cache;
1824 
1825       v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1826                                      p_stage->shader_sha1);
1827 
1828       /* Ensure that the variant is on the default cache, as cmd_buffer could
1829        * need to change the current variant
1830        */
1831       if (default_cache != cache) {
1832          v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1833                                         p_stage->shader_sha1);
1834       }
1835 
1836       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1837 
1838       return nir;
1839    }
1840 
1841    /* FIXME: this shouldn't happen, raise error? */
1842    return NULL;
1843 }
1844 
1845 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1846 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1847                                const VkAllocationCallbacks *pAllocator,
1848                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1849 {
1850    struct v3dv_pipeline_stage *p_stage_vs =
1851       pipeline->stages[BROADCOM_SHADER_VERTEX];
1852    struct v3dv_pipeline_stage *p_stage_vs_bin =
1853       pipeline->stages[BROADCOM_SHADER_VERTEX_BIN];
1854 
1855    assert(p_stage_vs_bin != NULL);
1856    if (p_stage_vs_bin->nir == NULL) {
1857       assert(p_stage_vs->nir);
1858       p_stage_vs_bin->nir = nir_shader_clone(NULL, p_stage_vs->nir);
1859    }
1860 
1861    VkResult vk_result;
1862    struct v3d_vs_key key;
1863    pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs);
1864    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1865       pipeline_compile_shader_variant(p_stage_vs, &key.base, sizeof(key),
1866                                       pAllocator, &vk_result);
1867    if (vk_result != VK_SUCCESS)
1868       return vk_result;
1869 
1870    pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs_bin);
1871    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1872       pipeline_compile_shader_variant(p_stage_vs_bin, &key.base, sizeof(key),
1873                                       pAllocator, &vk_result);
1874 
1875    return vk_result;
1876 }
1877 
1878 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1879 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1880                                  const VkAllocationCallbacks *pAllocator,
1881                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1882 {
1883    struct v3dv_pipeline_stage *p_stage_gs =
1884       pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1885    struct v3dv_pipeline_stage *p_stage_gs_bin =
1886       pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN];
1887 
1888    assert(p_stage_gs);
1889    assert(p_stage_gs_bin != NULL);
1890    if (p_stage_gs_bin->nir == NULL) {
1891       assert(p_stage_gs->nir);
1892       p_stage_gs_bin->nir = nir_shader_clone(NULL, p_stage_gs->nir);
1893    }
1894 
1895    VkResult vk_result;
1896    struct v3d_gs_key key;
1897    pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs);
1898    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1899       pipeline_compile_shader_variant(p_stage_gs, &key.base, sizeof(key),
1900                                       pAllocator, &vk_result);
1901    if (vk_result != VK_SUCCESS)
1902       return vk_result;
1903 
1904    pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs_bin);
1905    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1906       pipeline_compile_shader_variant(p_stage_gs_bin, &key.base, sizeof(key),
1907                                       pAllocator, &vk_result);
1908 
1909    return vk_result;
1910 }
1911 
1912 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1913 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1914                                  const VkAllocationCallbacks *pAllocator,
1915                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1916 {
1917    struct v3dv_pipeline_stage *p_stage_vs =
1918       pipeline->stages[BROADCOM_SHADER_VERTEX];
1919    struct v3dv_pipeline_stage *p_stage_fs =
1920       pipeline->stages[BROADCOM_SHADER_FRAGMENT];
1921    struct v3dv_pipeline_stage *p_stage_gs =
1922       pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1923 
1924    struct v3d_fs_key key;
1925    pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage_fs,
1926                                 p_stage_gs != NULL,
1927                                 get_ucp_enable_mask(p_stage_vs));
1928 
1929    if (key.is_points) {
1930       assert(key.point_coord_upper_left);
1931       NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord);
1932    }
1933 
1934    VkResult vk_result;
1935    pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1936       pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key),
1937                                       pAllocator, &vk_result);
1938 
1939    return vk_result;
1940 }
1941 
1942 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1943 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1944                                struct v3dv_pipeline_key *key,
1945                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1946 {
1947    struct v3dv_device *device = pipeline->device;
1948    assert(device);
1949 
1950    memset(key, 0, sizeof(*key));
1951 
1952    const bool raster_enabled =
1953       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1954 
1955    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1956       pCreateInfo->pInputAssemblyState;
1957    key->topology = vk_to_mesa_prim[ia_info->topology];
1958 
1959    const VkPipelineColorBlendStateCreateInfo *cb_info =
1960       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1961 
1962    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1963       vk_to_pipe_logicop[cb_info->logicOp] :
1964       PIPE_LOGICOP_COPY;
1965 
1966    /* Multisample rasterization state must be ignored if rasterization
1967     * is disabled.
1968     */
1969    const VkPipelineMultisampleStateCreateInfo *ms_info =
1970       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1971    if (ms_info) {
1972       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1973              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1974       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1975 
1976       if (key->msaa)
1977          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1978 
1979       key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1980    }
1981 
1982    const struct v3dv_render_pass *pass =
1983       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1984    const struct v3dv_subpass *subpass = pipeline->subpass;
1985    for (uint32_t i = 0; i < subpass->color_count; i++) {
1986       const uint32_t att_idx = subpass->color_attachments[i].attachment;
1987       if (att_idx == VK_ATTACHMENT_UNUSED)
1988          continue;
1989 
1990       key->cbufs |= 1 << i;
1991 
1992       VkFormat fb_format = pass->attachments[att_idx].desc.format;
1993       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1994 
1995       /* If logic operations are enabled then we might emit color reads and we
1996        * need to know the color buffer format and swizzle for that
1997        */
1998       if (key->logicop_func != PIPE_LOGICOP_COPY) {
1999          /* Framebuffer formats should be single plane */
2000          assert(vk_format_get_plane_count(fb_format) == 1);
2001          key->color_fmt[i].format = fb_pipe_format;
2002          memcpy(key->color_fmt[i].swizzle,
2003                 v3dv_get_format_swizzle(pipeline->device, fb_format, 0),
2004                 sizeof(key->color_fmt[i].swizzle));
2005       }
2006 
2007       const struct util_format_description *desc =
2008          vk_format_description(fb_format);
2009 
2010       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2011           desc->channel[0].size == 32) {
2012          key->f32_color_rb |= 1 << i;
2013       }
2014    }
2015 
2016    const VkPipelineVertexInputStateCreateInfo *vi_info =
2017       pCreateInfo->pVertexInputState;
2018    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2019       const VkVertexInputAttributeDescription *desc =
2020          &vi_info->pVertexAttributeDescriptions[i];
2021       assert(desc->location < MAX_VERTEX_ATTRIBS);
2022       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
2023           desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
2024          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2025       }
2026    }
2027 
2028    assert(pipeline->subpass);
2029    key->has_multiview = pipeline->subpass->view_mask != 0;
2030 }
2031 
2032 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2033 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2034                               struct v3dv_pipeline_key *key,
2035                               const VkComputePipelineCreateInfo *pCreateInfo)
2036 {
2037    struct v3dv_device *device = pipeline->device;
2038    assert(device);
2039 
2040    /* We use the same pipeline key for graphics and compute, but we don't need
2041     * to add a field to flag compute keys because this key is not used alone
2042     * to search in the cache, we also use the SPIR-V or the serialized NIR for
2043     * example, which already flags compute shaders.
2044     */
2045    memset(key, 0, sizeof(*key));
2046 }
2047 
2048 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2049 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2050                                     struct v3dv_pipeline *pipeline,
2051                                     bool is_graphics_pipeline)
2052 {
2053    /* We create new_entry using the device alloc. Right now shared_data is ref
2054     * and unref by both the pipeline and the pipeline cache, so we can't
2055     * ensure that the cache or pipeline alloc will be available on the last
2056     * unref.
2057     */
2058    struct v3dv_pipeline_shared_data *new_entry =
2059       vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2060                  sizeof(struct v3dv_pipeline_shared_data), 8,
2061                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2062 
2063    if (new_entry == NULL)
2064       return NULL;
2065 
2066    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2067       /* We don't need specific descriptor maps for binning stages we use the
2068        * map for the render stage.
2069        */
2070       if (broadcom_shader_stage_is_binning(stage))
2071          continue;
2072 
2073       if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2074           (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2075          continue;
2076       }
2077 
2078       if (stage == BROADCOM_SHADER_GEOMETRY &&
2079           !pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
2080          /* We always inject a custom GS if we have multiview */
2081          if (!pipeline->subpass->view_mask)
2082             continue;
2083       }
2084 
2085       struct v3dv_descriptor_maps *new_maps =
2086          vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2087                     sizeof(struct v3dv_descriptor_maps), 8,
2088                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2089 
2090       if (new_maps == NULL)
2091          goto fail;
2092 
2093       new_entry->maps[stage] = new_maps;
2094    }
2095 
2096    new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2097       new_entry->maps[BROADCOM_SHADER_VERTEX];
2098 
2099    new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2100       new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2101 
2102    new_entry->ref_cnt = 1;
2103    memcpy(new_entry->sha1_key, sha1_key, 20);
2104 
2105    return new_entry;
2106 
2107 fail:
2108    if (new_entry != NULL) {
2109       for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2110          if (new_entry->maps[stage] != NULL)
2111             vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2112       }
2113    }
2114 
2115    vk_free(&pipeline->device->vk.alloc, new_entry);
2116 
2117    return NULL;
2118 }
2119 
2120 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedback * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2121 write_creation_feedback(struct v3dv_pipeline *pipeline,
2122                         const void *next,
2123                         const VkPipelineCreationFeedback *pipeline_feedback,
2124                         uint32_t stage_count,
2125                         const VkPipelineShaderStageCreateInfo *stages)
2126 {
2127    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2128       vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2129 
2130    if (create_feedback) {
2131       typed_memcpy(create_feedback->pPipelineCreationFeedback,
2132              pipeline_feedback,
2133              1);
2134 
2135       const uint32_t feedback_stage_count =
2136          create_feedback->pipelineStageCreationFeedbackCount;
2137       assert(feedback_stage_count <= stage_count);
2138 
2139       for (uint32_t i = 0; i < feedback_stage_count; i++) {
2140          gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2141          enum broadcom_shader_stage bs = gl_shader_stage_to_broadcom(s);
2142 
2143          create_feedback->pPipelineStageCreationFeedbacks[i] =
2144             pipeline->stages[bs]->feedback;
2145 
2146          if (broadcom_shader_stage_is_render_with_binning(bs)) {
2147             enum broadcom_shader_stage bs_bin =
2148                broadcom_binning_shader_stage_for_render_stage(bs);
2149             create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2150                pipeline->stages[bs_bin]->feedback.duration;
2151          }
2152       }
2153    }
2154 }
2155 
2156 static enum mesa_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2157 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2158 {
2159    switch (pipeline->topology) {
2160    case MESA_PRIM_POINTS:
2161       return MESA_PRIM_POINTS;
2162    case MESA_PRIM_LINES:
2163    case MESA_PRIM_LINE_STRIP:
2164       return MESA_PRIM_LINES;
2165    case MESA_PRIM_TRIANGLES:
2166    case MESA_PRIM_TRIANGLE_STRIP:
2167    case MESA_PRIM_TRIANGLE_FAN:
2168       return MESA_PRIM_TRIANGLES;
2169    default:
2170       /* Since we don't allow GS with multiview, we can only see non-adjacency
2171        * primitives.
2172        */
2173       unreachable("Unexpected pipeline primitive type");
2174    }
2175 }
2176 
2177 static enum mesa_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2178 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2179 {
2180    switch (pipeline->topology) {
2181    case MESA_PRIM_POINTS:
2182       return MESA_PRIM_POINTS;
2183    case MESA_PRIM_LINES:
2184    case MESA_PRIM_LINE_STRIP:
2185       return MESA_PRIM_LINE_STRIP;
2186    case MESA_PRIM_TRIANGLES:
2187    case MESA_PRIM_TRIANGLE_STRIP:
2188    case MESA_PRIM_TRIANGLE_FAN:
2189       return MESA_PRIM_TRIANGLE_STRIP;
2190    default:
2191       /* Since we don't allow GS with multiview, we can only see non-adjacency
2192        * primitives.
2193        */
2194       unreachable("Unexpected pipeline primitive type");
2195    }
2196 }
2197 
2198 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2199 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2200                           struct v3dv_pipeline_cache *cache,
2201                           const VkAllocationCallbacks *pAllocator)
2202 {
2203    /* Create the passthrough GS from the VS output interface */
2204    struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2205    p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2206    nir_shader *vs_nir = p_stage_vs->nir;
2207 
2208    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2209    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2210                                                   "multiview broadcast gs");
2211    nir_shader *nir = b.shader;
2212    nir->info.inputs_read = vs_nir->info.outputs_written;
2213    nir->info.outputs_written = vs_nir->info.outputs_written |
2214                                (1ull << VARYING_SLOT_LAYER);
2215 
2216    uint32_t vertex_count = mesa_vertices_per_prim(pipeline->topology);
2217    nir->info.gs.input_primitive =
2218       multiview_gs_input_primitive_from_pipeline(pipeline);
2219    nir->info.gs.output_primitive =
2220       multiview_gs_output_primitive_from_pipeline(pipeline);
2221    nir->info.gs.vertices_in = vertex_count;
2222    nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2223    nir->info.gs.invocations = 1;
2224    nir->info.gs.active_stream_mask = 0x1;
2225 
2226    /* Make a list of GS input/output variables from the VS outputs */
2227    nir_variable *in_vars[100];
2228    nir_variable *out_vars[100];
2229    uint32_t var_count = 0;
2230    nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2231       char name[8];
2232       snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2233 
2234       in_vars[var_count] =
2235          nir_variable_create(nir, nir_var_shader_in,
2236                              glsl_array_type(out_vs_var->type, vertex_count, 0),
2237                              name);
2238       in_vars[var_count]->data.location = out_vs_var->data.location;
2239       in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2240       in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2241 
2242       snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2243       out_vars[var_count] =
2244          nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2245       out_vars[var_count]->data.location = out_vs_var->data.location;
2246       out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2247 
2248       var_count++;
2249    }
2250 
2251    /* Add the gl_Layer output variable */
2252    nir_variable *out_layer =
2253       nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2254                           "out_Layer");
2255    out_layer->data.location = VARYING_SLOT_LAYER;
2256 
2257    /* Get the view index value that we will write to gl_Layer */
2258    nir_def *layer =
2259       nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2260 
2261    /* Emit all output vertices */
2262    for (uint32_t vi = 0; vi < vertex_count; vi++) {
2263       /* Emit all output varyings */
2264       for (uint32_t i = 0; i < var_count; i++) {
2265          nir_deref_instr *in_value =
2266             nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2267          nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2268       }
2269 
2270       /* Emit gl_Layer write */
2271       nir_store_var(&b, out_layer, layer, 0x1);
2272 
2273       nir_emit_vertex(&b, 0);
2274    }
2275    nir_end_primitive(&b, 0);
2276 
2277    /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2278     * with what we expect from SPIR-V modules.
2279     */
2280    preprocess_nir(nir);
2281 
2282    /* Attach the geometry shader to the  pipeline */
2283    struct v3dv_device *device = pipeline->device;
2284    struct v3dv_physical_device *physical_device = device->pdevice;
2285 
2286    struct v3dv_pipeline_stage *p_stage =
2287       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2288                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2289 
2290    if (p_stage == NULL) {
2291       ralloc_free(nir);
2292       return false;
2293    }
2294 
2295    p_stage->pipeline = pipeline;
2296    p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2297    p_stage->entrypoint = "main";
2298    p_stage->module = 0;
2299    p_stage->nir = nir;
2300    pipeline_compute_sha1_from_nir(p_stage);
2301    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2302    p_stage->robustness = pipeline->stages[BROADCOM_SHADER_VERTEX]->robustness;
2303 
2304    pipeline->has_gs = true;
2305    pipeline->stages[BROADCOM_SHADER_GEOMETRY] = p_stage;
2306    pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2307 
2308    pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] =
2309       pipeline_stage_create_binning(p_stage, pAllocator);
2310    if (pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] == NULL)
2311       return false;
2312 
2313    return true;
2314 }
2315 
2316 static void
pipeline_check_buffer_device_address(struct v3dv_pipeline * pipeline)2317 pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2318 {
2319    for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2320       struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2321       if (variant && variant->prog_data.base->has_global_address) {
2322          pipeline->uses_buffer_device_address = true;
2323          return;
2324       }
2325    }
2326 
2327    pipeline->uses_buffer_device_address = false;
2328 }
2329 
2330 /*
2331  * It compiles a pipeline. Note that it also allocate internal object, but if
2332  * some allocations success, but other fails, the method is not freeing the
2333  * successful ones.
2334  *
2335  * This is done to simplify the code, as what we do in this case is just call
2336  * the pipeline destroy method, and this would handle freeing the internal
2337  * objects allocated. We just need to be careful setting to NULL the objects
2338  * not allocated.
2339  */
2340 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2341 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2342                           struct v3dv_pipeline_cache *cache,
2343                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
2344                           const VkAllocationCallbacks *pAllocator)
2345 {
2346    VkPipelineCreationFeedback pipeline_feedback = {
2347       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2348    };
2349    int64_t pipeline_start = os_time_get_nano();
2350 
2351    struct v3dv_device *device = pipeline->device;
2352    struct v3dv_physical_device *physical_device = device->pdevice;
2353 
2354    /* First pass to get some common info from the shader, and create the
2355     * individual pipeline_stage objects
2356     */
2357    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2358       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2359       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2360 
2361       struct v3dv_pipeline_stage *p_stage =
2362          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2363                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2364 
2365       if (p_stage == NULL)
2366          return VK_ERROR_OUT_OF_HOST_MEMORY;
2367 
2368       p_stage->program_id =
2369          p_atomic_inc_return(&physical_device->next_program_id);
2370 
2371       enum broadcom_shader_stage broadcom_stage =
2372          gl_shader_stage_to_broadcom(stage);
2373 
2374       p_stage->pipeline = pipeline;
2375       p_stage->stage = broadcom_stage;
2376       p_stage->entrypoint = sinfo->pName;
2377       p_stage->module = vk_shader_module_from_handle(sinfo->module);
2378       p_stage->spec_info = sinfo->pSpecializationInfo;
2379 
2380       vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2381                                         pCreateInfo->pNext, sinfo->pNext);
2382 
2383       vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i],
2384                                     &p_stage->robustness,
2385                                     p_stage->shader_sha1);
2386 
2387       pipeline->active_stages |= sinfo->stage;
2388 
2389       /* We will try to get directly the compiled shader variant, so let's not
2390        * worry about getting the nir shader for now.
2391        */
2392       p_stage->nir = NULL;
2393       pipeline->stages[broadcom_stage] = p_stage;
2394       if (broadcom_stage == BROADCOM_SHADER_GEOMETRY)
2395          pipeline->has_gs = true;
2396 
2397       if (broadcom_shader_stage_is_render_with_binning(broadcom_stage)) {
2398          enum broadcom_shader_stage broadcom_stage_bin =
2399             broadcom_binning_shader_stage_for_render_stage(broadcom_stage);
2400 
2401          pipeline->stages[broadcom_stage_bin] =
2402             pipeline_stage_create_binning(p_stage, pAllocator);
2403 
2404          if (pipeline->stages[broadcom_stage_bin] == NULL)
2405             return VK_ERROR_OUT_OF_HOST_MEMORY;
2406       }
2407    }
2408 
2409    /* Add a no-op fragment shader if needed */
2410    if (!pipeline->stages[BROADCOM_SHADER_FRAGMENT]) {
2411       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2412                                                      &v3dv_nir_options,
2413                                                      "noop_fs");
2414 
2415       struct v3dv_pipeline_stage *p_stage =
2416          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2417                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2418 
2419       if (p_stage == NULL)
2420          return VK_ERROR_OUT_OF_HOST_MEMORY;
2421 
2422       p_stage->pipeline = pipeline;
2423       p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2424       p_stage->entrypoint = "main";
2425       p_stage->module = 0;
2426       p_stage->nir = b.shader;
2427       vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2428                                         NULL, NULL);
2429       pipeline_compute_sha1_from_nir(p_stage);
2430       p_stage->program_id =
2431          p_atomic_inc_return(&physical_device->next_program_id);
2432 
2433       pipeline->stages[BROADCOM_SHADER_FRAGMENT] = p_stage;
2434       pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2435    }
2436 
2437    /* If multiview is enabled, we inject a custom passthrough geometry shader
2438     * to broadcast draw calls to the appropriate views.
2439     */
2440    assert(!pipeline->subpass->view_mask ||
2441           (!pipeline->has_gs && !pipeline->stages[BROADCOM_SHADER_GEOMETRY]));
2442    if (pipeline->subpass->view_mask) {
2443       if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2444          return VK_ERROR_OUT_OF_HOST_MEMORY;
2445    }
2446 
2447    /* First we try to get the variants from the pipeline cache (unless we are
2448     * required to capture internal representations, since in that case we need
2449     * compile).
2450     */
2451    bool needs_executable_info =
2452       pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2453    if (!needs_executable_info) {
2454       struct v3dv_pipeline_key pipeline_key;
2455       pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2456       pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2457 
2458       bool cache_hit = false;
2459 
2460       pipeline->shared_data =
2461          v3dv_pipeline_cache_search_for_pipeline(cache,
2462                                                  pipeline->sha1,
2463                                                  &cache_hit);
2464 
2465       if (pipeline->shared_data != NULL) {
2466          /* A correct pipeline must have at least a VS and FS */
2467          assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2468          assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2469          assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2470          assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2471                 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2472          assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2473                 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2474 
2475          if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2476             pipeline_feedback.flags |=
2477                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2478 
2479          goto success;
2480       }
2481    }
2482 
2483    if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2484       return VK_PIPELINE_COMPILE_REQUIRED;
2485 
2486    /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2487     * shader or the pipeline cache) and compile.
2488     */
2489    pipeline->shared_data =
2490       v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2491    if (!pipeline->shared_data)
2492       return VK_ERROR_OUT_OF_HOST_MEMORY;
2493 
2494    struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2495    struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT];
2496    struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY];
2497 
2498    p_stage_vs->feedback.flags |=
2499       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2500    if (p_stage_gs)
2501       p_stage_gs->feedback.flags |=
2502          VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2503    p_stage_fs->feedback.flags |=
2504       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2505 
2506    if (!p_stage_vs->nir)
2507       p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2508    if (p_stage_gs && !p_stage_gs->nir)
2509       p_stage_gs->nir = pipeline_stage_get_nir(p_stage_gs, pipeline, cache);
2510    if (!p_stage_fs->nir)
2511       p_stage_fs->nir = pipeline_stage_get_nir(p_stage_fs, pipeline, cache);
2512 
2513    /* Linking + pipeline lowerings */
2514    if (p_stage_gs) {
2515       link_shaders(p_stage_gs->nir, p_stage_fs->nir);
2516       link_shaders(p_stage_vs->nir, p_stage_gs->nir);
2517    } else {
2518       link_shaders(p_stage_vs->nir, p_stage_fs->nir);
2519    }
2520 
2521    pipeline_lower_nir(pipeline, p_stage_fs, pipeline->layout);
2522    lower_fs_io(p_stage_fs->nir);
2523 
2524    if (p_stage_gs) {
2525       pipeline_lower_nir(pipeline, p_stage_gs, pipeline->layout);
2526       lower_gs_io(p_stage_gs->nir);
2527    }
2528 
2529    pipeline_lower_nir(pipeline, p_stage_vs, pipeline->layout);
2530    lower_vs_io(p_stage_vs->nir);
2531 
2532    /* Compiling to vir */
2533    VkResult vk_result;
2534 
2535    /* We should have got all the variants or no variants from the cache */
2536    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2537    vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2538    if (vk_result != VK_SUCCESS)
2539       return vk_result;
2540 
2541    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2542           !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2543 
2544    if (p_stage_gs) {
2545       vk_result =
2546          pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2547       if (vk_result != VK_SUCCESS)
2548          return vk_result;
2549    }
2550 
2551    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2552           !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2553 
2554    vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2555    if (vk_result != VK_SUCCESS)
2556       return vk_result;
2557 
2558    if (!upload_assembly(pipeline))
2559       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2560 
2561    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2562 
2563  success:
2564 
2565    pipeline_check_buffer_device_address(pipeline);
2566 
2567    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2568    write_creation_feedback(pipeline,
2569                            pCreateInfo->pNext,
2570                            &pipeline_feedback,
2571                            pCreateInfo->stageCount,
2572                            pCreateInfo->pStages);
2573 
2574    /* Since we have the variants in the pipeline shared data we can now free
2575     * the pipeline stages.
2576     */
2577    if (!needs_executable_info)
2578       pipeline_free_stages(device, pipeline, pAllocator);
2579 
2580    pipeline_check_spill_size(pipeline);
2581 
2582    return compute_vpm_config(pipeline);
2583 }
2584 
2585 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2586 compute_vpm_config(struct v3dv_pipeline *pipeline)
2587 {
2588    struct v3dv_shader_variant *vs_variant =
2589       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2590    struct v3dv_shader_variant *vs_bin_variant =
2591       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2592    struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2593    struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2594 
2595    struct v3d_gs_prog_data *gs = NULL;
2596    struct v3d_gs_prog_data *gs_bin = NULL;
2597    if (pipeline->has_gs) {
2598       struct v3dv_shader_variant *gs_variant =
2599          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2600       struct v3dv_shader_variant *gs_bin_variant =
2601          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2602       gs = gs_variant->prog_data.gs;
2603       gs_bin = gs_bin_variant->prog_data.gs;
2604    }
2605 
2606    if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2607                                vs_bin, vs, gs_bin, gs,
2608                                &pipeline->vpm_cfg_bin,
2609                                &pipeline->vpm_cfg)) {
2610       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2611    }
2612 
2613    return VK_SUCCESS;
2614 }
2615 
2616 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2617 v3dv_dynamic_state_mask(VkDynamicState state)
2618 {
2619    switch(state) {
2620    case VK_DYNAMIC_STATE_VIEWPORT:
2621       return V3DV_DYNAMIC_VIEWPORT;
2622    case VK_DYNAMIC_STATE_SCISSOR:
2623       return V3DV_DYNAMIC_SCISSOR;
2624    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2625       return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2626    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2627       return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2628    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2629       return V3DV_DYNAMIC_STENCIL_REFERENCE;
2630    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2631       return V3DV_DYNAMIC_BLEND_CONSTANTS;
2632    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2633       return V3DV_DYNAMIC_DEPTH_BIAS;
2634    case VK_DYNAMIC_STATE_LINE_WIDTH:
2635       return V3DV_DYNAMIC_LINE_WIDTH;
2636    case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2637       return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2638    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2639       return V3DV_DYNAMIC_DEPTH_BOUNDS;
2640 
2641    default:
2642       unreachable("Unhandled dynamic state");
2643    }
2644 }
2645 
2646 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2647 pipeline_init_dynamic_state(
2648    struct v3dv_pipeline *pipeline,
2649    const VkPipelineDynamicStateCreateInfo *pDynamicState,
2650    const VkPipelineViewportStateCreateInfo *pViewportState,
2651    const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2652    const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2653    const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2654    const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2655 {
2656    /* Initialize to default values */
2657    const struct v3d_device_info *devinfo = &pipeline->device->devinfo;
2658    struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2659    memset(dynamic, 0, sizeof(*dynamic));
2660    dynamic->stencil_compare_mask.front = ~0;
2661    dynamic->stencil_compare_mask.back = ~0;
2662    dynamic->stencil_write_mask.front = ~0;
2663    dynamic->stencil_write_mask.back = ~0;
2664    dynamic->line_width = 1.0f;
2665    dynamic->color_write_enable =
2666       (1ull << (4 * V3D_MAX_RENDER_TARGETS(devinfo->ver))) - 1;
2667    dynamic->depth_bounds.max = 1.0f;
2668 
2669    /* Create a mask of enabled dynamic states */
2670    uint32_t dynamic_states = 0;
2671    if (pDynamicState) {
2672       uint32_t count = pDynamicState->dynamicStateCount;
2673       for (uint32_t s = 0; s < count; s++) {
2674          dynamic_states |=
2675             v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2676       }
2677    }
2678 
2679    /* For any pipeline states that are not dynamic, set the dynamic state
2680     * from the static pipeline state.
2681     */
2682    if (pViewportState) {
2683       if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2684          dynamic->viewport.count = pViewportState->viewportCount;
2685          typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2686                       pViewportState->viewportCount);
2687 
2688          for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2689             v3dv_X(pipeline->device, viewport_compute_xform)
2690                (&dynamic->viewport.viewports[i],
2691                 dynamic->viewport.scale[i],
2692                 dynamic->viewport.translate[i]);
2693          }
2694       }
2695 
2696       if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2697          dynamic->scissor.count = pViewportState->scissorCount;
2698          typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2699                       pViewportState->scissorCount);
2700       }
2701    }
2702 
2703    if (pDepthStencilState) {
2704       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2705          dynamic->stencil_compare_mask.front =
2706             pDepthStencilState->front.compareMask;
2707          dynamic->stencil_compare_mask.back =
2708             pDepthStencilState->back.compareMask;
2709       }
2710 
2711       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2712          dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2713          dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2714       }
2715 
2716       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2717          dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2718          dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2719       }
2720 
2721       if (!(dynamic_states & V3DV_DYNAMIC_DEPTH_BOUNDS)) {
2722          dynamic->depth_bounds.min = pDepthStencilState->minDepthBounds;
2723          dynamic->depth_bounds.max = pDepthStencilState->maxDepthBounds;
2724       }
2725    }
2726 
2727    if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2728       memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2729              sizeof(dynamic->blend_constants));
2730    }
2731 
2732    if (pRasterizationState) {
2733       if (pRasterizationState->depthBiasEnable &&
2734           !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2735          dynamic->depth_bias.constant_factor =
2736             pRasterizationState->depthBiasConstantFactor;
2737          dynamic->depth_bias.depth_bias_clamp =
2738             pRasterizationState->depthBiasClamp;
2739          dynamic->depth_bias.slope_factor =
2740             pRasterizationState->depthBiasSlopeFactor;
2741       }
2742       if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2743          dynamic->line_width = pRasterizationState->lineWidth;
2744    }
2745 
2746    if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2747       dynamic->color_write_enable = 0;
2748       for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2749          dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2750    }
2751 
2752    pipeline->dynamic_state.mask = dynamic_states;
2753 }
2754 
2755 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2756 stencil_op_is_no_op(const VkStencilOpState *stencil)
2757 {
2758    return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2759           stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2760 }
2761 
2762 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2763 enable_depth_bias(struct v3dv_pipeline *pipeline,
2764                   const VkPipelineRasterizationStateCreateInfo *rs_info)
2765 {
2766    pipeline->depth_bias.enabled = false;
2767    pipeline->depth_bias.is_z16 = false;
2768 
2769    if (!rs_info || !rs_info->depthBiasEnable)
2770       return;
2771 
2772    /* Check the depth/stencil attachment description for the subpass used with
2773     * this pipeline.
2774     */
2775    assert(pipeline->pass && pipeline->subpass);
2776    struct v3dv_render_pass *pass = pipeline->pass;
2777    struct v3dv_subpass *subpass = pipeline->subpass;
2778 
2779    if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2780       return;
2781 
2782    assert(subpass->ds_attachment.attachment < pass->attachment_count);
2783    struct v3dv_render_pass_attachment *att =
2784       &pass->attachments[subpass->ds_attachment.attachment];
2785 
2786    if (att->desc.format == VK_FORMAT_D16_UNORM)
2787       pipeline->depth_bias.is_z16 = true;
2788 
2789    pipeline->depth_bias.enabled = true;
2790 }
2791 
2792 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2793 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2794                       const VkPipelineDepthStencilStateCreateInfo *ds_info)
2795 {
2796    if (!ds_info || !ds_info->depthTestEnable) {
2797       pipeline->ez_state = V3D_EZ_DISABLED;
2798       return;
2799    }
2800 
2801    switch (ds_info->depthCompareOp) {
2802    case VK_COMPARE_OP_LESS:
2803    case VK_COMPARE_OP_LESS_OR_EQUAL:
2804       pipeline->ez_state = V3D_EZ_LT_LE;
2805       break;
2806    case VK_COMPARE_OP_GREATER:
2807    case VK_COMPARE_OP_GREATER_OR_EQUAL:
2808       pipeline->ez_state = V3D_EZ_GT_GE;
2809       break;
2810    case VK_COMPARE_OP_NEVER:
2811    case VK_COMPARE_OP_EQUAL:
2812       pipeline->ez_state = V3D_EZ_UNDECIDED;
2813       break;
2814    default:
2815       pipeline->ez_state = V3D_EZ_DISABLED;
2816       pipeline->incompatible_ez_test = true;
2817       break;
2818    }
2819 
2820    /* If stencil is enabled and is not a no-op, we need to disable EZ */
2821    if (ds_info->stencilTestEnable &&
2822        (!stencil_op_is_no_op(&ds_info->front) ||
2823         !stencil_op_is_no_op(&ds_info->back))) {
2824          pipeline->ez_state = V3D_EZ_DISABLED;
2825    }
2826 
2827    /* If the FS writes Z, then it may update against the chosen EZ direction */
2828    struct v3dv_shader_variant *fs_variant =
2829       pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2830    if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2831        !fs_variant->prog_data.fs->writes_z_from_fep) {
2832       pipeline->ez_state = V3D_EZ_DISABLED;
2833    }
2834 }
2835 
2836 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2837 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2838                          const VkPipelineMultisampleStateCreateInfo *ms_info)
2839 {
2840    pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2841 
2842    /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2843     * requires this to be 0xf or 0x0 if using a single sample.
2844     */
2845    if (ms_info && ms_info->pSampleMask &&
2846        ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2847       pipeline->sample_mask &= ms_info->pSampleMask[0];
2848    }
2849 }
2850 
2851 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2852 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2853                                  const VkPipelineMultisampleStateCreateInfo *ms_info)
2854 {
2855    pipeline->sample_rate_shading =
2856       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2857       ms_info->sampleShadingEnable;
2858 }
2859 
2860 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2861 pipeline_init(struct v3dv_pipeline *pipeline,
2862               struct v3dv_device *device,
2863               struct v3dv_pipeline_cache *cache,
2864               const VkGraphicsPipelineCreateInfo *pCreateInfo,
2865               const VkAllocationCallbacks *pAllocator)
2866 {
2867    VkResult result = VK_SUCCESS;
2868 
2869    pipeline->device = device;
2870    pipeline->flags = pCreateInfo->flags;
2871 
2872    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2873    pipeline->layout = layout;
2874    v3dv_pipeline_layout_ref(pipeline->layout);
2875 
2876    V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2877    assert(pCreateInfo->subpass < render_pass->subpass_count);
2878    pipeline->pass = render_pass;
2879    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2880 
2881    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2882       pCreateInfo->pInputAssemblyState;
2883    pipeline->topology = vk_to_mesa_prim[ia_info->topology];
2884 
2885    /* If rasterization is not enabled, various CreateInfo structs must be
2886     * ignored.
2887     */
2888    const bool raster_enabled =
2889       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2890 
2891    const VkPipelineViewportStateCreateInfo *vp_info =
2892       raster_enabled ? pCreateInfo->pViewportState : NULL;
2893 
2894    const VkPipelineDepthStencilStateCreateInfo *ds_info =
2895       raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2896 
2897    const VkPipelineRasterizationStateCreateInfo *rs_info =
2898       raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2899 
2900    const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2901       rs_info ? vk_find_struct_const(
2902          rs_info->pNext,
2903          PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2904             NULL;
2905 
2906    const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2907       rs_info ? vk_find_struct_const(
2908          rs_info->pNext,
2909          PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2910             NULL;
2911 
2912    const VkPipelineColorBlendStateCreateInfo *cb_info =
2913       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2914 
2915    const VkPipelineMultisampleStateCreateInfo *ms_info =
2916       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2917 
2918    const VkPipelineColorWriteCreateInfoEXT *cw_info =
2919       cb_info ? vk_find_struct_const(cb_info->pNext,
2920                                      PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2921                 NULL;
2922 
2923    if (vp_info) {
2924       const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control =
2925          vk_find_struct_const(vp_info->pNext,
2926                               PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT);
2927       if (depth_clip_control)
2928          pipeline->negative_one_to_one = depth_clip_control->negativeOneToOne;
2929    }
2930 
2931    pipeline_init_dynamic_state(pipeline,
2932                                pCreateInfo->pDynamicState,
2933                                vp_info, ds_info, cb_info, rs_info, cw_info);
2934 
2935    /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2936     * feature and it shouldn't be used by any pipeline.
2937     */
2938    assert(device->devinfo.ver >= 71 ||
2939           !ds_info || !ds_info->depthBoundsTestEnable);
2940    pipeline->depth_bounds_test_enabled = ds_info && ds_info->depthBoundsTestEnable;
2941 
2942    enable_depth_bias(pipeline, rs_info);
2943 
2944    v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2945                                        rs_info, pv_info, ls_info,
2946                                        ms_info);
2947 
2948    pipeline_set_sample_mask(pipeline, ms_info);
2949    pipeline_set_sample_rate_shading(pipeline, ms_info);
2950 
2951    pipeline->primitive_restart =
2952       pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2953 
2954    result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2955 
2956    if (result != VK_SUCCESS) {
2957       /* Caller would already destroy the pipeline, and we didn't allocate any
2958        * extra info. We don't need to do anything else.
2959        */
2960       return result;
2961    }
2962 
2963    const VkPipelineVertexInputStateCreateInfo *vi_info =
2964       pCreateInfo->pVertexInputState;
2965 
2966    const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
2967       vk_find_struct_const(vi_info->pNext,
2968                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2969 
2970    v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
2971 
2972    if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
2973       pipeline->default_attribute_values =
2974          v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
2975 
2976       if (!pipeline->default_attribute_values)
2977          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2978    } else {
2979       pipeline->default_attribute_values = NULL;
2980    }
2981 
2982    /* This must be done after the pipeline has been compiled */
2983    pipeline_set_ez_state(pipeline, ds_info);
2984 
2985    return result;
2986 }
2987 
2988 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2989 graphics_pipeline_create(VkDevice _device,
2990                          VkPipelineCache _cache,
2991                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
2992                          const VkAllocationCallbacks *pAllocator,
2993                          VkPipeline *pPipeline)
2994 {
2995    V3DV_FROM_HANDLE(v3dv_device, device, _device);
2996    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2997 
2998    struct v3dv_pipeline *pipeline;
2999    VkResult result;
3000 
3001    /* Use the default pipeline cache if none is specified */
3002    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3003       cache = &device->default_pipeline_cache;
3004 
3005    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3006                                VK_OBJECT_TYPE_PIPELINE);
3007 
3008    if (pipeline == NULL)
3009       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3010 
3011    result = pipeline_init(pipeline, device, cache,
3012                           pCreateInfo,
3013                           pAllocator);
3014 
3015    if (result != VK_SUCCESS) {
3016       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3017       if (result == VK_PIPELINE_COMPILE_REQUIRED)
3018          *pPipeline = VK_NULL_HANDLE;
3019       return result;
3020    }
3021 
3022    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3023 
3024    return VK_SUCCESS;
3025 }
3026 
3027 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3028 v3dv_CreateGraphicsPipelines(VkDevice _device,
3029                              VkPipelineCache pipelineCache,
3030                              uint32_t count,
3031                              const VkGraphicsPipelineCreateInfo *pCreateInfos,
3032                              const VkAllocationCallbacks *pAllocator,
3033                              VkPipeline *pPipelines)
3034 {
3035    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3036    VkResult result = VK_SUCCESS;
3037 
3038    if (V3D_DBG(SHADERS))
3039       mtx_lock(&device->pdevice->mutex);
3040 
3041    uint32_t i = 0;
3042    for (; i < count; i++) {
3043       VkResult local_result;
3044 
3045       local_result = graphics_pipeline_create(_device,
3046                                               pipelineCache,
3047                                               &pCreateInfos[i],
3048                                               pAllocator,
3049                                               &pPipelines[i]);
3050 
3051       if (local_result != VK_SUCCESS) {
3052          result = local_result;
3053          pPipelines[i] = VK_NULL_HANDLE;
3054 
3055          if (pCreateInfos[i].flags &
3056              VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3057             break;
3058       }
3059    }
3060 
3061    for (; i < count; i++)
3062       pPipelines[i] = VK_NULL_HANDLE;
3063 
3064    if (V3D_DBG(SHADERS))
3065       mtx_unlock(&device->pdevice->mutex);
3066 
3067    return result;
3068 }
3069 
3070 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3071 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3072 {
3073    assert(glsl_type_is_vector_or_scalar(type));
3074 
3075    uint32_t comp_size = glsl_type_is_boolean(type)
3076       ? 4 : glsl_get_bit_size(type) / 8;
3077    unsigned length = glsl_get_vector_elements(type);
3078    *size = comp_size * length,
3079    *align = comp_size * (length == 3 ? 4 : length);
3080 }
3081 
3082 static void
lower_compute(struct nir_shader * nir)3083 lower_compute(struct nir_shader *nir)
3084 {
3085    if (!nir->info.shared_memory_explicit_layout) {
3086       NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3087                nir_var_mem_shared, shared_type_info);
3088    }
3089 
3090    NIR_PASS(_, nir, nir_lower_explicit_io,
3091             nir_var_mem_shared, nir_address_format_32bit_offset);
3092 
3093    struct nir_lower_compute_system_values_options sysval_options = {
3094       .has_base_workgroup_id = true,
3095    };
3096    NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
3097 }
3098 
3099 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3100 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3101                          struct v3dv_pipeline_cache *cache,
3102                          const VkComputePipelineCreateInfo *info,
3103                          const VkAllocationCallbacks *alloc)
3104 {
3105    VkPipelineCreationFeedback pipeline_feedback = {
3106       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3107    };
3108    int64_t pipeline_start = os_time_get_nano();
3109 
3110    struct v3dv_device *device = pipeline->device;
3111    struct v3dv_physical_device *physical_device = device->pdevice;
3112 
3113    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3114    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3115 
3116    struct v3dv_pipeline_stage *p_stage =
3117       vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3118                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3119    if (!p_stage)
3120       return VK_ERROR_OUT_OF_HOST_MEMORY;
3121 
3122    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3123    p_stage->pipeline = pipeline;
3124    p_stage->stage = gl_shader_stage_to_broadcom(stage);
3125    p_stage->entrypoint = sinfo->pName;
3126    p_stage->module = vk_shader_module_from_handle(sinfo->module);
3127    p_stage->spec_info = sinfo->pSpecializationInfo;
3128    p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3129 
3130    vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
3131                                      info->pNext, sinfo->pNext);
3132 
3133    vk_pipeline_hash_shader_stage(&info->stage,
3134                                  &p_stage->robustness,
3135                                  p_stage->shader_sha1);
3136 
3137    p_stage->nir = NULL;
3138 
3139    pipeline->stages[BROADCOM_SHADER_COMPUTE] = p_stage;
3140    pipeline->active_stages |= sinfo->stage;
3141 
3142    /* First we try to get the variants from the pipeline cache (unless we are
3143     * required to capture internal representations, since in that case we need
3144     * compile).
3145     */
3146    bool needs_executable_info =
3147       info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3148    if (!needs_executable_info) {
3149       struct v3dv_pipeline_key pipeline_key;
3150       pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3151       pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3152 
3153       bool cache_hit = false;
3154       pipeline->shared_data =
3155          v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3156 
3157       if (pipeline->shared_data != NULL) {
3158          assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3159          if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3160             pipeline_feedback.flags |=
3161                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3162 
3163          goto success;
3164       }
3165    }
3166 
3167    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3168       return VK_PIPELINE_COMPILE_REQUIRED;
3169 
3170    pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3171                                                                pipeline,
3172                                                                false);
3173    if (!pipeline->shared_data)
3174       return VK_ERROR_OUT_OF_HOST_MEMORY;
3175 
3176    p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3177 
3178    /* If not found on cache, compile it */
3179    p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3180    assert(p_stage->nir);
3181 
3182    v3d_optimize_nir(NULL, p_stage->nir);
3183    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3184    lower_compute(p_stage->nir);
3185 
3186    VkResult result = VK_SUCCESS;
3187 
3188    struct v3d_key key;
3189    memset(&key, 0, sizeof(key));
3190    pipeline_populate_v3d_key(&key, p_stage, 0);
3191    pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3192       pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3193                                       alloc, &result);
3194 
3195    if (result != VK_SUCCESS)
3196       return result;
3197 
3198    if (!upload_assembly(pipeline))
3199       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3200 
3201    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3202 
3203 success:
3204 
3205    pipeline_check_buffer_device_address(pipeline);
3206 
3207    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3208    write_creation_feedback(pipeline,
3209                            info->pNext,
3210                            &pipeline_feedback,
3211                            1,
3212                            &info->stage);
3213 
3214    /* As we got the variants in pipeline->shared_data, after compiling we
3215     * don't need the pipeline_stages.
3216     */
3217    if (!needs_executable_info)
3218       pipeline_free_stages(device, pipeline, alloc);
3219 
3220    pipeline_check_spill_size(pipeline);
3221 
3222    return VK_SUCCESS;
3223 }
3224 
3225 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3226 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3227                       struct v3dv_device *device,
3228                       struct v3dv_pipeline_cache *cache,
3229                       const VkComputePipelineCreateInfo *info,
3230                       const VkAllocationCallbacks *alloc)
3231 {
3232    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3233 
3234    pipeline->device = device;
3235    pipeline->layout = layout;
3236    v3dv_pipeline_layout_ref(pipeline->layout);
3237 
3238    VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3239    if (result != VK_SUCCESS)
3240       return result;
3241 
3242    return result;
3243 }
3244 
3245 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3246 compute_pipeline_create(VkDevice _device,
3247                          VkPipelineCache _cache,
3248                          const VkComputePipelineCreateInfo *pCreateInfo,
3249                          const VkAllocationCallbacks *pAllocator,
3250                          VkPipeline *pPipeline)
3251 {
3252    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3253    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3254 
3255    struct v3dv_pipeline *pipeline;
3256    VkResult result;
3257 
3258    /* Use the default pipeline cache if none is specified */
3259    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3260       cache = &device->default_pipeline_cache;
3261 
3262    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3263                                VK_OBJECT_TYPE_PIPELINE);
3264    if (pipeline == NULL)
3265       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3266 
3267    result = compute_pipeline_init(pipeline, device, cache,
3268                                   pCreateInfo, pAllocator);
3269    if (result != VK_SUCCESS) {
3270       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3271       if (result == VK_PIPELINE_COMPILE_REQUIRED)
3272          *pPipeline = VK_NULL_HANDLE;
3273       return result;
3274    }
3275 
3276    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3277 
3278    return VK_SUCCESS;
3279 }
3280 
3281 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3282 v3dv_CreateComputePipelines(VkDevice _device,
3283                             VkPipelineCache pipelineCache,
3284                             uint32_t createInfoCount,
3285                             const VkComputePipelineCreateInfo *pCreateInfos,
3286                             const VkAllocationCallbacks *pAllocator,
3287                             VkPipeline *pPipelines)
3288 {
3289    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3290    VkResult result = VK_SUCCESS;
3291 
3292    if (V3D_DBG(SHADERS))
3293       mtx_lock(&device->pdevice->mutex);
3294 
3295    uint32_t i = 0;
3296    for (; i < createInfoCount; i++) {
3297       VkResult local_result;
3298       local_result = compute_pipeline_create(_device,
3299                                               pipelineCache,
3300                                               &pCreateInfos[i],
3301                                               pAllocator,
3302                                               &pPipelines[i]);
3303 
3304       if (local_result != VK_SUCCESS) {
3305          result = local_result;
3306          pPipelines[i] = VK_NULL_HANDLE;
3307 
3308          if (pCreateInfos[i].flags &
3309              VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3310             break;
3311       }
3312    }
3313 
3314    for (; i < createInfoCount; i++)
3315       pPipelines[i] = VK_NULL_HANDLE;
3316 
3317    if (V3D_DBG(SHADERS))
3318       mtx_unlock(&device->pdevice->mutex);
3319 
3320    return result;
3321 }
3322 
3323 static nir_shader *
pipeline_get_nir(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3324 pipeline_get_nir(struct v3dv_pipeline *pipeline,
3325                  enum broadcom_shader_stage stage)
3326 {
3327    assert(stage >= 0 && stage < BROADCOM_SHADER_STAGES);
3328    if (pipeline->stages[stage])
3329       return pipeline->stages[stage]->nir;
3330 
3331    return NULL;
3332 }
3333 
3334 static struct v3d_prog_data *
pipeline_get_prog_data(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3335 pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3336                        enum broadcom_shader_stage stage)
3337 {
3338    if (pipeline->shared_data->variants[stage])
3339       return pipeline->shared_data->variants[stage]->prog_data.base;
3340    return NULL;
3341 }
3342 
3343 static uint64_t *
pipeline_get_qpu(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,uint32_t * qpu_size)3344 pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3345                  enum broadcom_shader_stage stage,
3346                  uint32_t *qpu_size)
3347 {
3348    struct v3dv_shader_variant *variant =
3349       pipeline->shared_data->variants[stage];
3350    if (!variant) {
3351       *qpu_size = 0;
3352       return NULL;
3353    }
3354 
3355    *qpu_size = variant->qpu_insts_size;
3356    return variant->qpu_insts;
3357 }
3358 
3359 /* FIXME: we use the same macro in various drivers, maybe move it to
3360  * the common vk_util.h?
3361  */
3362 #define WRITE_STR(field, ...) ({                                \
3363    memset(field, 0, sizeof(field));                             \
3364    UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3365    assert(_i > 0 && _i < sizeof(field));                        \
3366 })
3367 
3368 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3369 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3370               const char *data)
3371 {
3372    ir->isText = VK_TRUE;
3373 
3374    size_t data_len = strlen(data) + 1;
3375 
3376    if (ir->pData == NULL) {
3377       ir->dataSize = data_len;
3378       return true;
3379    }
3380 
3381    strncpy(ir->pData, data, ir->dataSize);
3382    if (ir->dataSize < data_len)
3383       return false;
3384 
3385    ir->dataSize = data_len;
3386    return true;
3387 }
3388 
3389 static void
append(char ** str,size_t * offset,const char * fmt,...)3390 append(char **str, size_t *offset, const char *fmt, ...)
3391 {
3392    va_list args;
3393    va_start(args, fmt);
3394    ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3395    va_end(args);
3396 }
3397 
3398 static void
pipeline_collect_executable_data(struct v3dv_pipeline * pipeline)3399 pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3400 {
3401    if (pipeline->executables.mem_ctx)
3402       return;
3403 
3404    pipeline->executables.mem_ctx = ralloc_context(NULL);
3405    util_dynarray_init(&pipeline->executables.data,
3406                       pipeline->executables.mem_ctx);
3407 
3408    /* Don't crash for failed/bogus pipelines */
3409    if (!pipeline->shared_data)
3410       return;
3411 
3412    for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3413       VkShaderStageFlags vk_stage =
3414          mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3415       if (!(vk_stage & pipeline->active_stages))
3416          continue;
3417 
3418       char *nir_str = NULL;
3419       char *qpu_str = NULL;
3420 
3421       if (pipeline_keep_qpu(pipeline)) {
3422          nir_shader *nir = pipeline_get_nir(pipeline, s);
3423          nir_str = nir ?
3424             nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3425 
3426          uint32_t qpu_size;
3427          uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3428          if (qpu) {
3429             uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3430             qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3431                                    qpu_inst_count * 96);
3432             size_t offset = 0;
3433             for (int i = 0; i < qpu_inst_count; i++) {
3434                const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3435                append(&qpu_str, &offset, "%s\n", str);
3436                ralloc_free((void *)str);
3437             }
3438          }
3439       }
3440 
3441       struct v3dv_pipeline_executable_data data = {
3442          .stage = s,
3443          .nir_str = nir_str,
3444          .qpu_str = qpu_str,
3445       };
3446       util_dynarray_append(&pipeline->executables.data,
3447                            struct v3dv_pipeline_executable_data, data);
3448    }
3449 }
3450 
3451 static const struct v3dv_pipeline_executable_data *
pipeline_get_executable(struct v3dv_pipeline * pipeline,uint32_t index)3452 pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3453 {
3454    assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3455                                              struct v3dv_pipeline_executable_data));
3456    return util_dynarray_element(&pipeline->executables.data,
3457                                 struct v3dv_pipeline_executable_data,
3458                                 index);
3459 }
3460 
3461 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3462 v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3463    VkDevice device,
3464    const VkPipelineExecutableInfoKHR *pExecutableInfo,
3465    uint32_t *pInternalRepresentationCount,
3466    VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3467 {
3468    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3469 
3470    pipeline_collect_executable_data(pipeline);
3471 
3472    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3473                           pInternalRepresentations, pInternalRepresentationCount);
3474 
3475    bool incomplete = false;
3476    const struct v3dv_pipeline_executable_data *exe =
3477       pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3478 
3479    if (exe->nir_str) {
3480       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3481                                &out, ir) {
3482          WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3483          WRITE_STR(ir->description, "Final NIR form");
3484          if (!write_ir_text(ir, exe->nir_str))
3485             incomplete = true;
3486       }
3487    }
3488 
3489    if (exe->qpu_str) {
3490       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3491                                &out, ir) {
3492          WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3493          WRITE_STR(ir->description, "Final QPU assembly");
3494          if (!write_ir_text(ir, exe->qpu_str))
3495             incomplete = true;
3496       }
3497    }
3498 
3499    return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3500 }
3501 
3502 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3503 v3dv_GetPipelineExecutablePropertiesKHR(
3504    VkDevice device,
3505    const VkPipelineInfoKHR *pPipelineInfo,
3506    uint32_t *pExecutableCount,
3507    VkPipelineExecutablePropertiesKHR *pProperties)
3508 {
3509    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3510 
3511    pipeline_collect_executable_data(pipeline);
3512 
3513    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3514                           pProperties, pExecutableCount);
3515 
3516    util_dynarray_foreach(&pipeline->executables.data,
3517                          struct v3dv_pipeline_executable_data, exe) {
3518       vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3519          gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3520          props->stages = mesa_to_vk_shader_stage(mesa_stage);
3521 
3522          WRITE_STR(props->name, "%s (%s)",
3523                    _mesa_shader_stage_to_abbrev(mesa_stage),
3524                    broadcom_shader_stage_is_binning(exe->stage) ?
3525                      "Binning" : "Render");
3526 
3527          WRITE_STR(props->description, "%s",
3528                    _mesa_shader_stage_to_string(mesa_stage));
3529 
3530          props->subgroupSize = V3D_CHANNELS;
3531       }
3532    }
3533 
3534    return vk_outarray_status(&out);
3535 }
3536 
3537 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3538 v3dv_GetPipelineExecutableStatisticsKHR(
3539    VkDevice device,
3540    const VkPipelineExecutableInfoKHR *pExecutableInfo,
3541    uint32_t *pStatisticCount,
3542    VkPipelineExecutableStatisticKHR *pStatistics)
3543 {
3544    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3545 
3546    pipeline_collect_executable_data(pipeline);
3547 
3548    const struct v3dv_pipeline_executable_data *exe =
3549       pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3550 
3551    struct v3d_prog_data *prog_data =
3552       pipeline_get_prog_data(pipeline, exe->stage);
3553 
3554    struct v3dv_shader_variant *variant =
3555       pipeline->shared_data->variants[exe->stage];
3556    uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3557 
3558    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3559                           pStatistics, pStatisticCount);
3560 
3561    if (qpu_inst_count > 0) {
3562       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3563          WRITE_STR(stat->name, "Compile Strategy");
3564          WRITE_STR(stat->description, "Chosen compile strategy index");
3565          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3566          stat->value.u64 = prog_data->compile_strategy_idx;
3567       }
3568 
3569       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3570          WRITE_STR(stat->name, "Instruction Count");
3571          WRITE_STR(stat->description, "Number of QPU instructions");
3572          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3573          stat->value.u64 = qpu_inst_count;
3574       }
3575 
3576       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3577          WRITE_STR(stat->name, "Thread Count");
3578          WRITE_STR(stat->description, "Number of QPU threads dispatched");
3579          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3580          stat->value.u64 = prog_data->threads;
3581       }
3582 
3583       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3584          WRITE_STR(stat->name, "Spill Size");
3585          WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3586          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3587          stat->value.u64 = prog_data->spill_size;
3588       }
3589 
3590       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3591          WRITE_STR(stat->name, "TMU Spills");
3592          WRITE_STR(stat->description, "Number of times a register was spilled "
3593                                       "to memory");
3594          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3595          stat->value.u64 = prog_data->spill_size;
3596       }
3597 
3598       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3599          WRITE_STR(stat->name, "TMU Fills");
3600          WRITE_STR(stat->description, "Number of times a register was filled "
3601                                       "from memory");
3602          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3603          stat->value.u64 = prog_data->spill_size;
3604       }
3605 
3606       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3607          WRITE_STR(stat->name, "QPU Read Stalls");
3608          WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3609                                       "register read dependency");
3610          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3611          stat->value.u64 = prog_data->qpu_read_stalls;
3612       }
3613    }
3614 
3615    return vk_outarray_status(&out);
3616 }
3617