• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_util.h"
25 
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28 
29 #include "vk_format_info.h"
30 
31 #include "common/v3d_debug.h"
32 
33 #include "compiler/nir/nir_builder.h"
34 #include "nir/nir_serialize.h"
35 
36 #include "util/u_atomic.h"
37 
38 #include "vulkan/util/vk_format.h"
39 
40 #include "broadcom/cle/v3dx_pack.h"
41 
42 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)43 v3dv_print_v3d_key(struct v3d_key *key,
44                    uint32_t v3d_key_size)
45 {
46    struct mesa_sha1 ctx;
47    unsigned char sha1[20];
48    char sha1buf[41];
49 
50    _mesa_sha1_init(&ctx);
51 
52    _mesa_sha1_update(&ctx, key, v3d_key_size);
53 
54    _mesa_sha1_final(&ctx, sha1);
55    _mesa_sha1_format(sha1buf, sha1);
56 
57    fprintf(stderr, "key %p: %s\n", key, sha1buf);
58 }
59 
60 VkResult
v3dv_CreateShaderModule(VkDevice _device,const VkShaderModuleCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkShaderModule * pShaderModule)61 v3dv_CreateShaderModule(VkDevice _device,
62                         const VkShaderModuleCreateInfo *pCreateInfo,
63                         const VkAllocationCallbacks *pAllocator,
64                         VkShaderModule *pShaderModule)
65 {
66    V3DV_FROM_HANDLE(v3dv_device, device, _device);
67    struct v3dv_shader_module *module;
68 
69    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
70    assert(pCreateInfo->flags == 0);
71 
72    module = vk_alloc2(&device->alloc, pAllocator,
73                       sizeof(*module) + pCreateInfo->codeSize, 8,
74                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
75    if (module == NULL)
76       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
77 
78    module->nir = NULL;
79 
80    module->size = pCreateInfo->codeSize;
81    memcpy(module->data, pCreateInfo->pCode, module->size);
82 
83    _mesa_sha1_compute(module->data, module->size, module->sha1);
84 
85    *pShaderModule = v3dv_shader_module_to_handle(module);
86 
87    return VK_SUCCESS;
88 }
89 
90 void
v3dv_shader_module_internal_init(struct v3dv_shader_module * module,nir_shader * nir)91 v3dv_shader_module_internal_init(struct v3dv_shader_module *module,
92                                  nir_shader *nir)
93 {
94    module->nir = nir;
95    module->size = 0;
96 
97    if (nir != NULL) {
98       struct blob blob;
99       blob_init(&blob);
100 
101       nir_serialize(&blob, nir, false);
102       if (!blob.out_of_memory)
103          _mesa_sha1_compute(blob.data, blob.size, module->sha1);
104 
105       blob_finish(&blob);
106    }
107 }
108 
109 void
v3dv_DestroyShaderModule(VkDevice _device,VkShaderModule _module,const VkAllocationCallbacks * pAllocator)110 v3dv_DestroyShaderModule(VkDevice _device,
111                          VkShaderModule _module,
112                          const VkAllocationCallbacks *pAllocator)
113 {
114    V3DV_FROM_HANDLE(v3dv_device, device, _device);
115    V3DV_FROM_HANDLE(v3dv_shader_module, module, _module);
116 
117    if (!module)
118       return;
119 
120    /* NIR modules (which are only created internally by the driver) are not
121     * dynamically allocated so we should never call this for them.
122     * Instead the driver is responsible for freeing the NIR code when it is
123     * no longer needed.
124     */
125    assert(module->nir == NULL);
126 
127    vk_free2(&device->alloc, pAllocator, module);
128 }
129 
130 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)131 v3dv_shader_variant_destroy(struct v3dv_device *device,
132                             struct v3dv_shader_variant *variant)
133 {
134    if (variant->assembly_bo)
135       v3dv_bo_free(device, variant->assembly_bo);
136    ralloc_free(variant->prog_data.base);
137    vk_free(&device->alloc, variant);
138 }
139 
140 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)141 destroy_pipeline_stage(struct v3dv_device *device,
142                        struct v3dv_pipeline_stage *p_stage,
143                        const VkAllocationCallbacks *pAllocator)
144 {
145    if (!p_stage)
146       return;
147 
148    ralloc_free(p_stage->nir);
149    if (p_stage->current_variant)
150       v3dv_shader_variant_unref(device, p_stage->current_variant);
151    vk_free2(&device->alloc, pAllocator, p_stage);
152 }
153 
154 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)155 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
156                       struct v3dv_device *device,
157                       const VkAllocationCallbacks *pAllocator)
158 {
159    if (!pipeline)
160       return;
161 
162    /* FIXME: we can't just use a loop over mesa stage due the bin, would be
163     * good to find an alternative.
164     */
165    destroy_pipeline_stage(device, pipeline->vs, pAllocator);
166    destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
167    destroy_pipeline_stage(device, pipeline->fs, pAllocator);
168    destroy_pipeline_stage(device, pipeline->cs, pAllocator);
169 
170    if (pipeline->spill.bo) {
171       assert(pipeline->spill.size_per_thread > 0);
172       v3dv_bo_free(device, pipeline->spill.bo);
173    }
174 
175    if (pipeline->default_attribute_values) {
176       v3dv_bo_free(device, pipeline->default_attribute_values);
177       pipeline->default_attribute_values = NULL;
178    }
179 
180    if (pipeline->combined_index_map)
181       _mesa_hash_table_destroy(pipeline->combined_index_map, NULL);
182 
183    if (pipeline->default_attribute_values)
184       v3dv_bo_free(device, pipeline->default_attribute_values);
185 
186    vk_free2(&device->alloc, pAllocator, pipeline);
187 }
188 
189 void
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)190 v3dv_DestroyPipeline(VkDevice _device,
191                      VkPipeline _pipeline,
192                      const VkAllocationCallbacks *pAllocator)
193 {
194    V3DV_FROM_HANDLE(v3dv_device, device, _device);
195    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
196 
197    if (!pipeline)
198       return;
199 
200    v3dv_destroy_pipeline(pipeline, device, pAllocator);
201 }
202 
203 static const struct spirv_to_nir_options default_spirv_options =  {
204    .caps = { false },
205    .ubo_addr_format = nir_address_format_32bit_index_offset,
206    .ssbo_addr_format = nir_address_format_32bit_index_offset,
207    .phys_ssbo_addr_format = nir_address_format_64bit_global,
208    .push_const_addr_format = nir_address_format_logical,
209    .shared_addr_format = nir_address_format_32bit_offset,
210    .frag_coord_is_sysval = false,
211 };
212 
213 const nir_shader_compiler_options v3dv_nir_options = {
214    .lower_all_io_to_temps = true,
215    .lower_extract_byte = true,
216    .lower_extract_word = true,
217    .lower_bitfield_insert_to_shifts = true,
218    .lower_bitfield_extract_to_shifts = true,
219    .lower_bitfield_reverse = true,
220    .lower_bit_count = true,
221    .lower_cs_local_id_from_index = true,
222    .lower_ffract = true,
223    .lower_fmod = true,
224    .lower_pack_unorm_2x16 = true,
225    .lower_pack_snorm_2x16 = true,
226    .lower_unpack_unorm_2x16 = true,
227    .lower_unpack_snorm_2x16 = true,
228    .lower_pack_unorm_4x8 = true,
229    .lower_pack_snorm_4x8 = true,
230    .lower_unpack_unorm_4x8 = true,
231    .lower_unpack_snorm_4x8 = true,
232    .lower_pack_half_2x16 = true,
233    .lower_unpack_half_2x16 = true,
234    /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
235     * get the tests to pass since it might produce slightly better code.
236     */
237    .lower_uadd_carry = true,
238    .lower_usub_borrow = true,
239    /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
240     * without lowering.
241     */
242    .lower_mul_2x32_64 = true,
243    .lower_fdiv = true,
244    .lower_find_lsb = true,
245    .lower_ffma16 = true,
246    .lower_ffma32 = true,
247    .lower_ffma64 = true,
248    .lower_flrp32 = true,
249    .lower_fpow = true,
250    .lower_fsat = true,
251    .lower_fsqrt = true,
252    .lower_ifind_msb = true,
253    .lower_isign = true,
254    .lower_ldexp = true,
255    .lower_mul_high = true,
256    .lower_wpos_pntc = true,
257    .lower_rotate = true,
258    .lower_to_scalar = true,
259    .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
260                                    * needs to be supported */
261    .lower_interpolate_at = true,
262 };
263 
264 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)265 v3dv_pipeline_get_nir_options(void)
266 {
267    return &v3dv_nir_options;
268 }
269 
270 #define OPT(pass, ...) ({                                  \
271    bool this_progress = false;                             \
272    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
273    if (this_progress)                                      \
274       progress = true;                                     \
275    this_progress;                                          \
276 })
277 
278 static void
nir_optimize(nir_shader * nir,struct v3dv_pipeline_stage * stage,bool allow_copies)279 nir_optimize(nir_shader *nir,
280              struct v3dv_pipeline_stage *stage,
281              bool allow_copies)
282 {
283    bool progress;
284 
285    do {
286       progress = false;
287       OPT(nir_split_array_vars, nir_var_function_temp);
288       OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
289       OPT(nir_opt_deref);
290       OPT(nir_lower_vars_to_ssa);
291       if (allow_copies) {
292          /* Only run this pass in the first call to nir_optimize.  Later calls
293           * assume that we've lowered away any copy_deref instructions and we
294           * don't want to introduce any more.
295           */
296          OPT(nir_opt_find_array_copies);
297       }
298       OPT(nir_opt_copy_prop_vars);
299       OPT(nir_opt_dead_write_vars);
300       OPT(nir_opt_combine_stores, nir_var_all);
301 
302       OPT(nir_lower_alu_to_scalar, NULL, NULL);
303 
304       OPT(nir_copy_prop);
305       OPT(nir_lower_phis_to_scalar);
306 
307       OPT(nir_copy_prop);
308       OPT(nir_opt_dce);
309       OPT(nir_opt_cse);
310       OPT(nir_opt_combine_stores, nir_var_all);
311 
312       /* Passing 0 to the peephole select pass causes it to convert
313        * if-statements that contain only move instructions in the branches
314        * regardless of the count.
315        *
316        * Passing 1 to the peephole select pass causes it to convert
317        * if-statements that contain at most a single ALU instruction (total)
318        * in both branches.
319        */
320       OPT(nir_opt_peephole_select, 0, false, false);
321       OPT(nir_opt_peephole_select, 8, false, true);
322 
323       OPT(nir_opt_intrinsics);
324       OPT(nir_opt_idiv_const, 32);
325       OPT(nir_opt_algebraic);
326       OPT(nir_opt_constant_folding);
327 
328       OPT(nir_opt_dead_cf);
329 
330       OPT(nir_opt_if, false);
331       OPT(nir_opt_conditional_discard);
332 
333       OPT(nir_opt_remove_phis);
334       OPT(nir_opt_undef);
335       OPT(nir_lower_pack);
336    } while (progress);
337 
338    OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
339 }
340 
341 static void
preprocess_nir(nir_shader * nir,struct v3dv_pipeline_stage * stage)342 preprocess_nir(nir_shader *nir,
343                struct v3dv_pipeline_stage *stage)
344 {
345    /* Make sure we lower variable initializers on output variables so that
346     * nir_remove_dead_variables below sees the corresponding stores
347     */
348    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
349 
350    /* Now that we've deleted all but the main function, we can go ahead and
351     * lower the rest of the variable initializers.
352     */
353    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
354 
355    /* Split member structs.  We do this before lower_io_to_temporaries so that
356     * it doesn't lower system values to temporaries by accident.
357     */
358    NIR_PASS_V(nir, nir_split_var_copies);
359    NIR_PASS_V(nir, nir_split_per_member_structs);
360 
361    if (nir->info.stage == MESA_SHADER_FRAGMENT)
362       NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
363    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
364       NIR_PASS_V(nir, nir_lower_input_attachments,
365                  &(nir_input_attachment_options) {
366                     .use_fragcoord_sysval = false,
367                        });
368    }
369 
370    NIR_PASS_V(nir, nir_lower_explicit_io,
371               nir_var_mem_push_const,
372               nir_address_format_32bit_offset);
373 
374    NIR_PASS_V(nir, nir_lower_explicit_io,
375               nir_var_mem_ubo | nir_var_mem_ssbo,
376               nir_address_format_32bit_index_offset);
377 
378    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
379               nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
380               NULL);
381 
382    NIR_PASS_V(nir, nir_propagate_invariant);
383    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
384               nir_shader_get_entrypoint(nir), true, false);
385 
386    NIR_PASS_V(nir, nir_lower_system_values);
387    NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
388 
389    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
390 
391    NIR_PASS_V(nir, nir_normalize_cubemap_coords);
392 
393    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
394 
395    NIR_PASS_V(nir, nir_split_var_copies);
396    NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
397 
398    nir_optimize(nir, stage, true);
399 
400    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
401 
402    /* Lower a bunch of stuff */
403    NIR_PASS_V(nir, nir_lower_var_copies);
404 
405    NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in |
406               nir_var_shader_out |
407               nir_var_function_temp, UINT32_MAX);
408 
409    NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
410               nir_var_mem_ubo | nir_var_mem_ssbo,
411               nir_lower_direct_array_deref_of_vec_load);
412 
413    NIR_PASS_V(nir, nir_lower_frexp);
414 
415    /* Get rid of split copies */
416    nir_optimize(nir, stage, false);
417 }
418 
419 /* FIXME: This is basically the same code at anv, tu and radv. Move to common
420  * place?
421  */
422 static struct nir_spirv_specialization*
vk_spec_info_to_nir_spirv(const VkSpecializationInfo * spec_info,uint32_t * out_num_spec_entries)423 vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,
424                           uint32_t *out_num_spec_entries)
425 {
426    if (spec_info == NULL || spec_info->mapEntryCount == 0)
427       return NULL;
428 
429    uint32_t num_spec_entries = spec_info->mapEntryCount;
430    struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
431 
432    for (uint32_t i = 0; i < num_spec_entries; i++) {
433       VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
434       const void *data = spec_info->pData + entry.offset;
435       assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
436 
437       spec_entries[i].id = spec_info->pMapEntries[i].constantID;
438       switch (entry.size) {
439       case 8:
440          spec_entries[i].value.u64 = *(const uint64_t *)data;
441          break;
442       case 4:
443          spec_entries[i].value.u32 = *(const uint32_t *)data;
444          break;
445       case 2:
446          spec_entries[i].value.u16 = *(const uint16_t *)data;
447          break;
448       case 1:
449          spec_entries[i].value.u8 = *(const uint8_t *)data;
450          break;
451       default:
452          assert(!"Invalid spec constant size");
453          break;
454       }
455    }
456 
457    *out_num_spec_entries = num_spec_entries;
458    return spec_entries;
459 }
460 
461 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)462 shader_module_compile_to_nir(struct v3dv_device *device,
463                              struct v3dv_pipeline_stage *stage)
464 {
465    nir_shader *nir;
466    const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
467 
468    if (!stage->module->nir) {
469       uint32_t *spirv = (uint32_t *) stage->module->data;
470       assert(stage->module->size % 4 == 0);
471 
472       if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
473          v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
474 
475       uint32_t num_spec_entries = 0;
476       struct nir_spirv_specialization *spec_entries =
477          vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
478       const struct spirv_to_nir_options spirv_options = default_spirv_options;
479       nir = spirv_to_nir(spirv, stage->module->size / 4,
480                          spec_entries, num_spec_entries,
481                          stage->stage, stage->entrypoint,
482                          &spirv_options, nir_options);
483       nir_validate_shader(nir, "after spirv_to_nir");
484       free(spec_entries);
485    } else {
486       /* For NIR modules created by the driver we can't consume the NIR
487        * directly, we need to clone it first, since ownership of the NIR code
488        * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
489        * of the module and modules can be destroyed immediately after been used
490        * to create pipelines.
491        */
492       nir = nir_shader_clone(NULL, stage->module->nir);
493       nir_validate_shader(nir, "nir module");
494    }
495    assert(nir->info.stage == stage->stage);
496 
497    if (V3D_DEBUG & (V3D_DEBUG_NIR |
498                     v3d_debug_flag_for_shader_stage(stage->stage))) {
499       fprintf(stderr, "Initial form: %s prog %d NIR:\n",
500               gl_shader_stage_name(stage->stage),
501               stage->program_id);
502       nir_print_shader(nir, stderr);
503       fprintf(stderr, "\n");
504    }
505 
506    /* We have to lower away local variable initializers right before we
507     * inline functions.  That way they get properly initialized at the top
508     * of the function and not at the top of its caller.
509     */
510    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
511    NIR_PASS_V(nir, nir_lower_returns);
512    NIR_PASS_V(nir, nir_inline_functions);
513    NIR_PASS_V(nir, nir_opt_deref);
514 
515    /* Pick off the single entrypoint that we want */
516    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
517       if (func->is_entrypoint)
518          func->name = ralloc_strdup(func, "main");
519       else
520          exec_node_remove(&func->node);
521    }
522    assert(exec_list_length(&nir->functions) == 1);
523 
524    /* Vulkan uses the separate-shader linking model */
525    nir->info.separate_shader = true;
526 
527    preprocess_nir(nir, stage);
528 
529    return nir;
530 }
531 
532 static int
type_size_vec4(const struct glsl_type * type,bool bindless)533 type_size_vec4(const struct glsl_type *type, bool bindless)
534 {
535    return glsl_count_attribute_slots(type, false);
536 }
537 
538 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,bool is_shadow)539 descriptor_map_add(struct v3dv_descriptor_map *map,
540                    int set,
541                    int binding,
542                    int array_index,
543                    int array_size,
544                    bool is_shadow)
545 {
546    assert(array_index < array_size);
547 
548    unsigned index = 0;
549    for (unsigned i = 0; i < map->num_desc; i++) {
550       if (set == map->set[i] &&
551           binding == map->binding[i] &&
552           array_index == map->array_index[i]) {
553          assert(array_size == map->array_size[i]);
554          return index;
555       }
556       index++;
557    }
558 
559    assert(index == map->num_desc);
560 
561    map->set[map->num_desc] = set;
562    map->binding[map->num_desc] = binding;
563    map->array_index[map->num_desc] = array_index;
564    map->array_size[map->num_desc] = array_size;
565    map->is_shadow[map->num_desc] = is_shadow;
566    map->num_desc++;
567 
568    return index;
569 }
570 
571 
572 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline)573 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
574                          struct v3dv_pipeline *pipeline)
575 {
576    assert(instr->intrinsic == nir_intrinsic_load_push_constant);
577    instr->intrinsic = nir_intrinsic_load_uniform;
578 }
579 
580 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
581  * could be used by the v3d_compiler */
582 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)583 lower_vulkan_resource_index(nir_builder *b,
584                             nir_intrinsic_instr *instr,
585                             struct v3dv_pipeline *pipeline,
586                             const struct v3dv_pipeline_layout *layout)
587 {
588    assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
589 
590    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
591 
592    unsigned set = nir_intrinsic_desc_set(instr);
593    unsigned binding = nir_intrinsic_binding(instr);
594    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
595    struct v3dv_descriptor_set_binding_layout *binding_layout =
596       &set_layout->binding[binding];
597    unsigned index = 0;
598 
599    switch (nir_intrinsic_desc_type(instr)) {
600    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
601    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
602       struct v3dv_descriptor_map *descriptor_map =
603          nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ?
604          &pipeline->ubo_map : &pipeline->ssbo_map;
605 
606       if (!const_val)
607          unreachable("non-constant vulkan_resource_index array index");
608 
609       index = descriptor_map_add(descriptor_map, set, binding,
610                                  const_val->u32,
611                                  binding_layout->array_size,
612                                  false /* is_shadow: Doesn't really matter in this case */);
613 
614       if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
615          /* skip index 0 which is used for push constants */
616          index++;
617       }
618       break;
619    }
620 
621    default:
622       unreachable("unsupported desc_type for vulkan_resource_index");
623       break;
624    }
625 
626    /* Since we use the deref pass, both vulkan_resource_index and
627     * vulkan_load_descriptor returns a vec2. But for the index the backend
628     * expect just one scalar (like with get_ssbo_size), so lets return here
629     * just it. Then on load_descriptor we would recreate the vec2, keeping the
630     * second component (unused right now) to zero.
631     */
632    nir_ssa_def_rewrite_uses(&instr->dest.ssa,
633                             nir_src_for_ssa(nir_imm_int(b, index)));
634    nir_instr_remove(&instr->instr);
635 }
636 
637 static struct hash_table *
pipeline_ensure_combined_index_map(struct v3dv_pipeline * pipeline)638 pipeline_ensure_combined_index_map(struct v3dv_pipeline *pipeline)
639 {
640    if (pipeline->combined_index_map == NULL) {
641       pipeline->combined_index_map =
642          _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal);
643       pipeline->next_combined_index = 0;
644    }
645 
646    assert(pipeline->combined_index_map);
647 
648    return pipeline->combined_index_map;
649 }
650 
651 static uint32_t
get_combined_index(struct v3dv_pipeline * pipeline,uint32_t texture_index,uint32_t sampler_index)652 get_combined_index(struct v3dv_pipeline *pipeline,
653                    uint32_t texture_index,
654                    uint32_t sampler_index)
655 {
656    struct hash_table *ht = pipeline_ensure_combined_index_map(pipeline);
657    uint32_t key = v3dv_pipeline_combined_index_key_create(texture_index, sampler_index);
658    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
659 
660    if (entry)
661       return (uint32_t)(uintptr_t) (entry->data);
662 
663    uint32_t new_index = pipeline->next_combined_index;
664    pipeline->next_combined_index++;
665 
666    pipeline->combined_index_to_key_map[new_index] = key;
667    _mesa_hash_table_insert(ht, &pipeline->combined_index_to_key_map[new_index],
668                            (void *)(uintptr_t) (new_index));
669 
670    return new_index;
671 }
672 
673 static void
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)674 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
675                         struct v3dv_pipeline *pipeline,
676                         const struct v3dv_pipeline_layout *layout)
677 {
678    nir_ssa_def *index = NULL;
679    unsigned base_index = 0;
680    unsigned array_elements = 1;
681    nir_tex_src *src = &instr->src[src_idx];
682    bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
683 
684    /* We compute first the offsets */
685    nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
686    while (deref->deref_type != nir_deref_type_var) {
687       assert(deref->parent.is_ssa);
688       nir_deref_instr *parent =
689          nir_instr_as_deref(deref->parent.ssa->parent_instr);
690 
691       assert(deref->deref_type == nir_deref_type_array);
692 
693       if (nir_src_is_const(deref->arr.index) && index == NULL) {
694          /* We're still building a direct index */
695          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
696       } else {
697          if (index == NULL) {
698             /* We used to be direct but not anymore */
699             index = nir_imm_int(b, base_index);
700             base_index = 0;
701          }
702 
703          index = nir_iadd(b, index,
704                           nir_imul(b, nir_imm_int(b, array_elements),
705                                    nir_ssa_for_src(b, deref->arr.index, 1)));
706       }
707 
708       array_elements *= glsl_get_length(parent->type);
709 
710       deref = parent;
711    }
712 
713    if (index)
714       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
715 
716    /* We have the offsets, we apply them, rewriting the source or removing
717     * instr if needed
718     */
719    if (index) {
720       nir_instr_rewrite_src(&instr->instr, &src->src,
721                             nir_src_for_ssa(index));
722 
723       src->src_type = is_sampler ?
724          nir_tex_src_sampler_offset :
725          nir_tex_src_texture_offset;
726    } else {
727       nir_tex_instr_remove_src(instr, src_idx);
728    }
729 
730    uint32_t set = deref->var->data.descriptor_set;
731    uint32_t binding = deref->var->data.binding;
732    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
733    struct v3dv_descriptor_set_binding_layout *binding_layout =
734       &set_layout->binding[binding];
735 
736    /* For input attachments, the shader includes the attachment_idx. As we are
737     * treating them as a texture, we only want the base_index
738     */
739    uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
740       deref->var->data.index + base_index :
741       base_index;
742 
743    int desc_index =
744       descriptor_map_add(is_sampler ?
745                          &pipeline->sampler_map : &pipeline->texture_map,
746                          deref->var->data.descriptor_set,
747                          deref->var->data.binding,
748                          array_index,
749                          binding_layout->array_size,
750                          instr->is_shadow);
751 
752    if (is_sampler)
753       instr->sampler_index = desc_index;
754    else
755       instr->texture_index = desc_index;
756 }
757 
758 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)759 lower_sampler(nir_builder *b, nir_tex_instr *instr,
760               struct v3dv_pipeline *pipeline,
761               const struct v3dv_pipeline_layout *layout)
762 {
763    int texture_idx =
764       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
765 
766    if (texture_idx >= 0)
767       lower_tex_src_to_offset(b, instr, texture_idx, pipeline, layout);
768 
769    int sampler_idx =
770       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
771 
772    if (sampler_idx >= 0)
773       lower_tex_src_to_offset(b, instr, sampler_idx, pipeline, layout);
774 
775    if (texture_idx < 0 && sampler_idx < 0)
776       return false;
777 
778    int combined_index =
779       get_combined_index(pipeline,
780                          instr->texture_index,
781                          sampler_idx < 0 ? V3DV_NO_SAMPLER_IDX : instr->sampler_index);
782 
783    instr->texture_index = combined_index;
784    instr->sampler_index = combined_index;
785 
786    return true;
787 }
788 
789 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
790 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)791 lower_image_deref(nir_builder *b,
792                   nir_intrinsic_instr *instr,
793                   struct v3dv_pipeline *pipeline,
794                   const struct v3dv_pipeline_layout *layout)
795 {
796    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
797    nir_ssa_def *index = NULL;
798    unsigned array_elements = 1;
799    unsigned base_index = 0;
800 
801    while (deref->deref_type != nir_deref_type_var) {
802       assert(deref->parent.is_ssa);
803       nir_deref_instr *parent =
804          nir_instr_as_deref(deref->parent.ssa->parent_instr);
805 
806       assert(deref->deref_type == nir_deref_type_array);
807 
808       if (nir_src_is_const(deref->arr.index) && index == NULL) {
809          /* We're still building a direct index */
810          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
811       } else {
812          if (index == NULL) {
813             /* We used to be direct but not anymore */
814             index = nir_imm_int(b, base_index);
815             base_index = 0;
816          }
817 
818          index = nir_iadd(b, index,
819                           nir_imul(b, nir_imm_int(b, array_elements),
820                                    nir_ssa_for_src(b, deref->arr.index, 1)));
821       }
822 
823       array_elements *= glsl_get_length(parent->type);
824 
825       deref = parent;
826    }
827 
828    if (index)
829       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
830 
831    uint32_t set = deref->var->data.descriptor_set;
832    uint32_t binding = deref->var->data.binding;
833    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
834    struct v3dv_descriptor_set_binding_layout *binding_layout =
835       &set_layout->binding[binding];
836 
837    uint32_t array_index = deref->var->data.index + base_index;
838 
839    assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
840           binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
841 
842    int desc_index =
843       descriptor_map_add(&pipeline->texture_map,
844                          deref->var->data.descriptor_set,
845                          deref->var->data.binding,
846                          array_index,
847                          binding_layout->array_size,
848                          false /* is_shadow: Doesn't really matter in this case */);
849 
850    /* We still need to get a combined_index, as we are integrating images with
851     * the rest of the texture/sampler support
852     */
853    int combined_index =
854       get_combined_index(pipeline, desc_index, V3DV_NO_SAMPLER_IDX);
855 
856    index = nir_imm_int(b, combined_index);
857 
858    nir_rewrite_image_intrinsic(instr, index, false);
859 }
860 
861 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)862 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
863                 struct v3dv_pipeline *pipeline,
864                 const struct v3dv_pipeline_layout *layout)
865 {
866    switch (instr->intrinsic) {
867    case nir_intrinsic_load_layer_id:
868       /* FIXME: if layered rendering gets supported, this would need a real
869        * lowering
870        */
871       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
872                                nir_src_for_ssa(nir_imm_int(b, 0)));
873       nir_instr_remove(&instr->instr);
874       return true;
875 
876    case nir_intrinsic_load_push_constant:
877       lower_load_push_constant(b, instr, pipeline);
878       pipeline->use_push_constants = true;
879       return true;
880 
881    case nir_intrinsic_vulkan_resource_index:
882       lower_vulkan_resource_index(b, instr, pipeline, layout);
883       return true;
884 
885    case nir_intrinsic_load_vulkan_descriptor: {
886       /* We are not using it, as loading the descriptor happens as part of the
887        * load/store instruction, so the simpler is just doing a no-op. We just
888        * lower the desc back to a vec2, as it is what load_ssbo/ubo expects.
889        */
890       nir_ssa_def *desc = nir_vec2(b, instr->src[0].ssa, nir_imm_int(b, 0));
891       nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(desc));
892       nir_instr_remove(&instr->instr);
893       return true;
894    }
895 
896    case nir_intrinsic_image_deref_load:
897    case nir_intrinsic_image_deref_store:
898    case nir_intrinsic_image_deref_atomic_add:
899    case nir_intrinsic_image_deref_atomic_imin:
900    case nir_intrinsic_image_deref_atomic_umin:
901    case nir_intrinsic_image_deref_atomic_imax:
902    case nir_intrinsic_image_deref_atomic_umax:
903    case nir_intrinsic_image_deref_atomic_and:
904    case nir_intrinsic_image_deref_atomic_or:
905    case nir_intrinsic_image_deref_atomic_xor:
906    case nir_intrinsic_image_deref_atomic_exchange:
907    case nir_intrinsic_image_deref_atomic_comp_swap:
908    case nir_intrinsic_image_deref_size:
909    case nir_intrinsic_image_deref_samples:
910       lower_image_deref(b, instr, pipeline, layout);
911       return true;
912 
913    default:
914       return false;
915    }
916 }
917 
918 static bool
lower_impl(nir_function_impl * impl,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)919 lower_impl(nir_function_impl *impl,
920            struct v3dv_pipeline *pipeline,
921            const struct v3dv_pipeline_layout *layout)
922 {
923    nir_builder b;
924    nir_builder_init(&b, impl);
925    bool progress = false;
926 
927    nir_foreach_block(block, impl) {
928       nir_foreach_instr_safe(instr, block) {
929          b.cursor = nir_before_instr(instr);
930          switch (instr->type) {
931          case nir_instr_type_tex:
932             progress |=
933                lower_sampler(&b, nir_instr_as_tex(instr), pipeline, layout);
934             break;
935          case nir_instr_type_intrinsic:
936             progress |=
937                lower_intrinsic(&b, nir_instr_as_intrinsic(instr), pipeline, layout);
938             break;
939          default:
940             break;
941          }
942       }
943    }
944 
945    return progress;
946 }
947 
948 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)949 lower_pipeline_layout_info(nir_shader *shader,
950                            struct v3dv_pipeline *pipeline,
951                            const struct v3dv_pipeline_layout *layout)
952 {
953    bool progress = false;
954 
955    nir_foreach_function(function, shader) {
956       if (function->impl)
957          progress |= lower_impl(function->impl, pipeline, layout);
958    }
959 
960    return progress;
961 }
962 
963 
964 static void
lower_fs_io(nir_shader * nir)965 lower_fs_io(nir_shader *nir)
966 {
967    /* Our backend doesn't handle array fragment shader outputs */
968    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
969    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
970 
971    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
972                                MESA_SHADER_FRAGMENT);
973 
974    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
975                                MESA_SHADER_FRAGMENT);
976 
977    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
978               type_size_vec4, 0);
979 }
980 
981 static void
lower_vs_io(struct nir_shader * nir)982 lower_vs_io(struct nir_shader *nir)
983 {
984    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
985 
986    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
987                                MESA_SHADER_VERTEX);
988 
989    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
990                                MESA_SHADER_VERTEX);
991 
992    /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
993     * overlaps with v3d_nir_lower_io. Need further research though.
994     */
995 }
996 
997 static void
shader_debug_output(const char * message,void * data)998 shader_debug_output(const char *message, void *data)
999 {
1000    /* FIXME: We probably don't want to debug anything extra here, and in fact
1001     * the compiler is not using this callback too much, only as an alternative
1002     * way to debug out the shaderdb stats, that you can already get using
1003     * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1004     * compiler to remove that callback.
1005     */
1006 }
1007 
1008 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1009 pipeline_populate_v3d_key(struct v3d_key *key,
1010                           const struct v3dv_pipeline_stage *p_stage,
1011                           uint32_t ucp_enables,
1012                           bool robust_buffer_access)
1013 {
1014    /* The following values are default values used at pipeline create. We use
1015     * there 16 bit as default return size.
1016     */
1017 
1018    /* We don't use the nir shader info.num_textures because that doesn't take
1019     * into account input attachments, even after calling
1020     * nir_lower_input_attachments. As a general rule that makes sense, but on
1021     * our case we are handling them mostly as textures. We iterate through the
1022     * combined_index_map that was filled with the textures sused on th sader.
1023     */
1024    uint32_t tex_idx = 0;
1025    if (p_stage->pipeline->combined_index_map) {
1026       hash_table_foreach(p_stage->pipeline->combined_index_map, entry) {
1027          key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1028          key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1029          key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1030          key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1031 
1032          key->tex[tex_idx].return_size = 16;
1033          key->tex[tex_idx].return_channels = 2;
1034 
1035          tex_idx++;
1036       }
1037    }
1038    key->num_tex_used = tex_idx;
1039    assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1040 
1041    /* default value. Would be override on the vs/gs populate methods when GS
1042     * gets supported
1043     */
1044    key->is_last_geometry_stage = true;
1045 
1046    /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1047     * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1048     * takes care of adding a single compact array variable at
1049     * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1050     *
1051     * The only lowering we are interested is specific to the fragment shader,
1052     * where we want to emit discards to honor writes to gl_ClipDistance[] in
1053     * previous stages. This is done via nir_lower_clip_fs() so we only set up
1054     * the ucp enable mask for that stage.
1055     */
1056    key->ucp_enables = ucp_enables;
1057 
1058    key->robust_buffer_access = robust_buffer_access;
1059 
1060    key->environment = V3D_ENVIRONMENT_VULKAN;
1061 }
1062 
1063 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1064  * same. For not using prim_mode that is the one already used on v3d
1065  */
1066 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1067    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1068    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1069    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1070    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1071    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1072    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1073    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1074    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1075    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1076    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1077 };
1078 
1079 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1080    [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1081    [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1082    [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1083    [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1084    [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1085    [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1086    [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1087    [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1088    [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1089    [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1090    [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1091    [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1092    [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1093    [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1094    [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1095    [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1096 };
1097 
1098 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)1099 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1100                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1101                              const struct v3dv_pipeline_stage *p_stage,
1102                              uint32_t ucp_enables)
1103 {
1104    memset(key, 0, sizeof(*key));
1105 
1106    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1107    pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1108 
1109    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1110       pCreateInfo->pInputAssemblyState;
1111    uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1112 
1113    key->is_points = (topology == PIPE_PRIM_POINTS);
1114    key->is_lines = (topology >= PIPE_PRIM_LINES &&
1115                     topology <= PIPE_PRIM_LINE_STRIP);
1116 
1117    /* Vulkan doesn't appear to specify (anv does the same) */
1118    key->clamp_color = false;
1119 
1120    const VkPipelineColorBlendStateCreateInfo *cb_info =
1121       pCreateInfo->pColorBlendState;
1122 
1123    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1124                        vk_to_pipe_logicop[cb_info->logicOp] :
1125                        PIPE_LOGICOP_COPY;
1126 
1127    const bool raster_enabled =
1128       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1129 
1130    /* Multisample rasterization state must be ignored if rasterization
1131     * is disabled.
1132     */
1133    const VkPipelineMultisampleStateCreateInfo *ms_info =
1134       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1135    if (ms_info) {
1136       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1137              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1138       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1139 
1140       if (key->msaa) {
1141          key->sample_coverage =
1142             p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1143          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1144          key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1145       }
1146    }
1147 
1148    /* Vulkan doesn't support alpha test */
1149    key->alpha_test = false;
1150    key->alpha_test_func = COMPARE_FUNC_NEVER;
1151 
1152    /* This is intended for V3D versions before 4.1, otherwise we just use the
1153     * tile buffer load/store swap R/B bit.
1154     */
1155    key->swap_color_rb = 0;
1156 
1157    const struct v3dv_render_pass *pass =
1158       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1159    const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1160    for (uint32_t i = 0; i < subpass->color_count; i++) {
1161       const uint32_t att_idx = subpass->color_attachments[i].attachment;
1162       if (att_idx == VK_ATTACHMENT_UNUSED)
1163          continue;
1164 
1165       key->cbufs |= 1 << i;
1166 
1167       VkFormat fb_format = pass->attachments[att_idx].desc.format;
1168       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1169 
1170       /* If logic operations are enabled then we might emit color reads and we
1171        * need to know the color buffer format and swizzle for that
1172        */
1173       if (key->logicop_func != PIPE_LOGICOP_COPY) {
1174          key->color_fmt[i].format = fb_pipe_format;
1175          key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format);
1176       }
1177 
1178       const struct util_format_description *desc =
1179          vk_format_description(fb_format);
1180 
1181       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1182           desc->channel[0].size == 32) {
1183          key->f32_color_rb |= 1 << i;
1184       }
1185 
1186       if (p_stage->nir->info.fs.untyped_color_outputs) {
1187          if (util_format_is_pure_uint(fb_pipe_format))
1188             key->uint_color_rb |= 1 << i;
1189          else if (util_format_is_pure_sint(fb_pipe_format))
1190             key->int_color_rb |= 1 << i;
1191       }
1192 
1193       if (key->is_points) {
1194          /* FIXME: The mask would need to be computed based on the shader
1195           * inputs. On gallium it is done at st_atom_rasterizer
1196           * (sprite_coord_enable). anv seems (need to confirm) to do that on
1197           * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1198           * better to have tests to guide filling the mask.
1199           */
1200          key->point_sprite_mask = 0;
1201 
1202          /* Vulkan mandates upper left. */
1203          key->point_coord_upper_left = true;
1204       }
1205    }
1206 
1207    /* FIXME: we understand that this is used on GL to configure fixed-function
1208     * two side lighting support, and not make sense for Vulkan. Need to
1209     * confirm though.
1210     */
1211    key->light_twoside = false;
1212 
1213    /* FIXME: ditto, although for flat lighting. Again, neet to confirm.*/
1214    key->shade_model_flat = false;
1215 }
1216 
1217 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1218 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1219                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1220                              const struct v3dv_pipeline_stage *p_stage)
1221 {
1222    memset(key, 0, sizeof(*key));
1223 
1224    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1225    pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1226 
1227    /* Vulkan doesn't appear to specify (anv does the same) */
1228    key->clamp_color = false;
1229 
1230    /* Vulkan specifies a point size per vertex, so true for if the prim are
1231     * points, like on ES2)
1232     */
1233    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1234       pCreateInfo->pInputAssemblyState;
1235    uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1236 
1237    /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is
1238     * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1239    key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1240 
1241    key->is_coord = p_stage->is_coord;
1242    if (p_stage->is_coord) {
1243       /* The only output varying on coord shaders are for transform
1244        * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1245        */
1246       key->num_used_outputs = 0;
1247    } else {
1248       struct v3dv_pipeline *pipeline = p_stage->pipeline;
1249       struct v3dv_shader_variant *fs_variant = pipeline->fs->current_variant;
1250 
1251       key->num_used_outputs = fs_variant->prog_data.fs->num_inputs;
1252 
1253       STATIC_ASSERT(sizeof(key->used_outputs) ==
1254                     sizeof(fs_variant->prog_data.fs->input_slots));
1255       memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots,
1256              sizeof(key->used_outputs));
1257    }
1258 
1259    const VkPipelineVertexInputStateCreateInfo *vi_info =
1260       pCreateInfo->pVertexInputState;
1261    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1262       const VkVertexInputAttributeDescription *desc =
1263          &vi_info->pVertexAttributeDescriptions[i];
1264       assert(desc->location < MAX_VERTEX_ATTRIBS);
1265       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1266          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1267    }
1268 }
1269 
1270 /*
1271  * Creates the pipeline_stage for the coordinate shader. Initially a clone of
1272  * the vs pipeline_stage, with is_coord to true
1273  *
1274  * Returns NULL if it was not able to allocate the object, so it should be
1275  * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1276  */
1277 static struct v3dv_pipeline_stage*
pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1278 pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
1279                              const VkAllocationCallbacks *pAllocator)
1280 {
1281    struct v3dv_device *device = src->pipeline->device;
1282 
1283    struct v3dv_pipeline_stage *p_stage =
1284       vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
1285                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1286 
1287    if (p_stage == NULL)
1288       return NULL;
1289 
1290    p_stage->pipeline = src->pipeline;
1291    assert(src->stage == MESA_SHADER_VERTEX);
1292    p_stage->stage = src->stage;
1293    p_stage->entrypoint = src->entrypoint;
1294    p_stage->module = src->module;
1295    p_stage->nir = nir_shader_clone(NULL, src->nir);
1296    p_stage->spec_info = src->spec_info;
1297    memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1298 
1299    p_stage->is_coord = true;
1300 
1301    return p_stage;
1302 }
1303 
1304 /* FIXME: right now this just asks for an bo for the exact size of the qpu
1305  * assembly. It would be good to be able to re-use bos to avoid bo
1306  * fragmentation. This could be tricky though, as right now we are uploading
1307  * the assembly from two paths, when compiling a shader, or when deserializing
1308  * from the pipeline cache. This also means that the same variant can be
1309  * shared by different objects. So with the current approach it is clear who
1310  * owns the assembly bo, but if shared, who owns the shared bo?
1311  *
1312  * For now one-bo per-assembly would work.
1313  *
1314  * Returns false if it was not able to allocate or map the assembly bo memory.
1315  */
1316 static bool
upload_assembly(struct v3dv_device * device,struct v3dv_shader_variant * variant,gl_shader_stage stage,bool is_coord,const void * data,uint32_t size)1317 upload_assembly(struct v3dv_device *device,
1318                 struct v3dv_shader_variant *variant,
1319                 gl_shader_stage stage,
1320                 bool is_coord,
1321                 const void *data,
1322                 uint32_t size)
1323 {
1324    const char *name = NULL;
1325    /* We are uploading the assembly just once, so at this point we shouldn't
1326     * have any bo
1327     */
1328    assert(variant->assembly_bo == NULL);
1329 
1330    switch (stage) {
1331    case MESA_SHADER_VERTEX:
1332       name = (is_coord == true) ? "coord_shader_assembly" :
1333          "vertex_shader_assembly";
1334       break;
1335    case MESA_SHADER_FRAGMENT:
1336       name = "fragment_shader_assembly";
1337       break;
1338    case MESA_SHADER_COMPUTE:
1339       name = "compute_shader_assembly";
1340       break;
1341    default:
1342       unreachable("Stage not supported\n");
1343       break;
1344    };
1345 
1346    struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true);
1347    if (!bo) {
1348       fprintf(stderr, "failed to allocate memory for shader\n");
1349       return false;
1350    }
1351 
1352    bool ok = v3dv_bo_map(device, bo, size);
1353    if (!ok) {
1354       fprintf(stderr, "failed to map source shader buffer\n");
1355       return false;
1356    }
1357 
1358    memcpy(bo->map, data, size);
1359 
1360    /* We don't unmap the assembly bo, as we would use to gather the assembly
1361     * when serializing the variant.
1362     */
1363    variant->assembly_bo = bo;
1364 
1365    return true;
1366 }
1367 
1368 static void
pipeline_hash_variant(const struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,unsigned char * sha1_out)1369 pipeline_hash_variant(const struct v3dv_pipeline_stage *p_stage,
1370                       struct v3d_key *key,
1371                       size_t key_size,
1372                       unsigned char *sha1_out)
1373 {
1374    struct mesa_sha1 ctx;
1375    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1376    _mesa_sha1_init(&ctx);
1377 
1378    if (p_stage->stage == MESA_SHADER_COMPUTE) {
1379       _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1380    } else {
1381       /* We need to include both on the sha1 key as one could affect the other
1382        * during linking (like if vertex output are constants, then the
1383        * fragment shader would load_const intead of load_input). An
1384        * alternative would be to use the serialized nir, but that seems like
1385        * an overkill
1386        */
1387       _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1388                         sizeof(pipeline->vs->shader_sha1));
1389       _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1390                         sizeof(pipeline->fs->shader_sha1));
1391    }
1392    _mesa_sha1_update(&ctx, key, key_size);
1393 
1394    _mesa_sha1_final(&ctx, sha1_out);
1395 }
1396 
1397 /* Checks that the pipeline has enough spill size to use a specific variant */
1398 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant)1399 pipeline_check_spill_size(struct v3dv_pipeline *pipeline,
1400                           struct v3dv_shader_variant *variant)
1401 {
1402    if (variant->prog_data.base->spill_size > pipeline->spill.size_per_thread) {
1403       struct v3dv_device *device = pipeline->device;
1404 
1405       /* The TIDX register we use for choosing the area to access
1406        * for scratch space is: (core << 6) | (qpu << 2) | thread.
1407        * Even at minimum threadcount in a particular shader, that
1408        * means we still multiply by qpus by 4.
1409        */
1410       const uint32_t total_spill_size =
1411          4 * device->devinfo.qpu_count * variant->prog_data.base->spill_size;
1412       if (pipeline->spill.bo) {
1413          assert(pipeline->spill.size_per_thread > 0);
1414          v3dv_bo_free(device, pipeline->spill.bo);
1415       }
1416       pipeline->spill.bo =
1417          v3dv_bo_alloc(device, total_spill_size, "spill", true);
1418       pipeline->spill.size_per_thread = variant->prog_data.base->spill_size;
1419    }
1420 }
1421 
1422 /*
1423  * Creates a new shader_variant_create. Note that for prog_data is const, so
1424  * it is used only to copy to their own prog_data
1425  *
1426  * Creation includes allocating a shader source bo, and filling it up.
1427  */
1428 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,gl_shader_stage stage,bool is_coord,const unsigned char * variant_sha1,const struct v3d_key * key,uint32_t key_size,struct v3d_prog_data * prog_data,uint32_t prog_data_size,const uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1429 v3dv_shader_variant_create(struct v3dv_device *device,
1430                            gl_shader_stage stage,
1431                            bool is_coord,
1432                            const unsigned char *variant_sha1,
1433                            const struct v3d_key *key,
1434                            uint32_t key_size,
1435                            struct v3d_prog_data *prog_data,
1436                            uint32_t prog_data_size,
1437                            const uint64_t *qpu_insts,
1438                            uint32_t qpu_insts_size,
1439                            VkResult *out_vk_result)
1440 {
1441    struct v3dv_shader_variant *variant =
1442       vk_zalloc(&device->alloc, sizeof(*variant), 8,
1443                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1444 
1445    if (variant == NULL) {
1446       *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1447       return NULL;
1448    }
1449 
1450    variant->ref_cnt = 1;
1451    variant->stage = stage;
1452    variant->is_coord = is_coord;
1453    memcpy(&variant->key, key, key_size);
1454    variant->v3d_key_size = key_size;
1455    memcpy(variant->variant_sha1, variant_sha1, sizeof(variant->variant_sha1));
1456    variant->prog_data_size = prog_data_size;
1457    variant->prog_data.base = prog_data;
1458 
1459    if (qpu_insts) {
1460       if (!upload_assembly(device, variant, stage, is_coord,
1461                            qpu_insts, qpu_insts_size)) {
1462          ralloc_free(variant->prog_data.base);
1463          vk_free(&device->alloc, variant);
1464 
1465          *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1466          return NULL;
1467       }
1468       variant->qpu_insts_size = qpu_insts_size;
1469    }
1470 
1471    *out_vk_result = VK_SUCCESS;
1472 
1473    return variant;
1474 }
1475 
1476 /* For a given key, it returns the compiled version of the shader. If it was
1477  * already compiled, it gets it from the p_stage cache, if not it compiles is
1478  * through the v3d compiler
1479  *
1480  * If the method returns NULL it means that it was not able to allocate the
1481  * resources for the variant. out_vk_result would return which OOM applies.
1482  *
1483  * Returns a new reference of the shader_variant to the caller.
1484  */
1485 struct v3dv_shader_variant*
v3dv_get_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_cache * cache,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1486 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1487                         struct v3dv_pipeline_cache *cache,
1488                         struct v3d_key *key,
1489                         size_t key_size,
1490                         const VkAllocationCallbacks *pAllocator,
1491                         VkResult *out_vk_result)
1492 {
1493    /* First we check if the current pipeline variant is such variant. For this
1494     * we can just use the v3d_key
1495     */
1496 
1497    if (p_stage->current_variant &&
1498        memcmp(key, &p_stage->current_variant->key, key_size) == 0) {
1499       *out_vk_result = VK_SUCCESS;
1500       return p_stage->current_variant;
1501    }
1502 
1503    /* We search on the pipeline cache if provided by the user, or the default
1504     * one
1505     */
1506    unsigned char variant_sha1[20];
1507    pipeline_hash_variant(p_stage, key, key_size, variant_sha1);
1508 
1509    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1510    struct v3dv_device *device = pipeline->device;
1511    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
1512        cache = &device->default_pipeline_cache;
1513 
1514    struct v3dv_shader_variant *variant =
1515       v3dv_pipeline_cache_search_for_variant(pipeline,
1516                                              cache,
1517                                              variant_sha1);
1518 
1519    if (variant) {
1520       pipeline_check_spill_size(pipeline, variant);
1521       *out_vk_result = VK_SUCCESS;
1522       return variant;
1523    }
1524    /* If we don't find the variant in any cache, we compile one and add the
1525     * variant to the cache
1526     */
1527    struct v3dv_physical_device *physical_device =
1528       &pipeline->device->instance->physicalDevice;
1529    const struct v3d_compiler *compiler = physical_device->compiler;
1530 
1531    uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count);
1532 
1533    if (V3D_DEBUG & (V3D_DEBUG_NIR |
1534                     v3d_debug_flag_for_shader_stage(p_stage->stage))) {
1535       fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n",
1536               gl_shader_stage_name(p_stage->stage),
1537               p_stage->program_id,
1538               variant_id);
1539       nir_print_shader(p_stage->nir, stderr);
1540       fprintf(stderr, "\n");
1541    }
1542 
1543    uint64_t *qpu_insts;
1544    uint32_t qpu_insts_size;
1545    struct v3d_prog_data *prog_data;
1546 
1547    qpu_insts = v3d_compile(compiler,
1548                            key, &prog_data,
1549                            p_stage->nir,
1550                            shader_debug_output, NULL,
1551                            p_stage->program_id,
1552                            variant_id,
1553                            &qpu_insts_size);
1554 
1555    if (!qpu_insts) {
1556       fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1557               gl_shader_stage_name(p_stage->stage),
1558               p_stage->program_id);
1559    }
1560 
1561    variant = v3dv_shader_variant_create(device, p_stage->stage, p_stage->is_coord,
1562                                         variant_sha1,
1563                                         key, key_size,
1564                                         prog_data, v3d_prog_data_size(p_stage->stage),
1565                                         qpu_insts, qpu_insts_size,
1566                                         out_vk_result);
1567    if (qpu_insts)
1568       free(qpu_insts);
1569 
1570    if (variant)
1571       pipeline_check_spill_size(pipeline, variant);
1572 
1573    if (*out_vk_result == VK_SUCCESS) {
1574       struct v3dv_pipeline_cache *default_cache =
1575          &pipeline->device->default_pipeline_cache;
1576 
1577       v3dv_pipeline_cache_upload_variant(pipeline, cache, variant);
1578 
1579       /* Ensure that the NIR shader is on the default cache, as cmd_buffer could
1580        * need to change the current variant.
1581        */
1582       if (default_cache != cache) {
1583          v3dv_pipeline_cache_upload_variant(pipeline, default_cache, variant);
1584       }
1585    }
1586 
1587    return variant;
1588 }
1589 
1590 /* This methods updates the return size for a given key. It assumes that it
1591  * was already properly populated. So for example values for key->num_tex_used
1592  * should be correct at this point
1593  *
1594  * Note that even the @return_size to set is 32bit, it could be overriden to
1595  * 16bit, like for shadow textures, that we know in advance that they are
1596  * always 16bit.
1597  */
1598 void
v3d_key_update_return_size(struct v3dv_pipeline * pipeline,struct v3d_key * key,uint32_t return_size)1599 v3d_key_update_return_size(struct v3dv_pipeline *pipeline,
1600                            struct v3d_key *key,
1601                            uint32_t return_size)
1602 {
1603    assert(return_size == 32 || return_size == 16);
1604    struct v3dv_descriptor_map *texture_map = &pipeline->texture_map;
1605 
1606    for (uint32_t tex_idx = 0; tex_idx < key->num_tex_used; tex_idx++) {
1607       key->tex[tex_idx].return_size =
1608          texture_map->is_shadow[tex_idx] ? 16 : return_size;
1609 
1610       key->tex[tex_idx].return_channels =
1611          key->tex[tex_idx].return_size == 16 ? 2 : 4;
1612    }
1613 }
1614 
1615 /*
1616  * To avoid needed too many shader re-compilation after pipeline creation
1617  * time, we pre-generate several options, so they are available on the default
1618  * cache. The poster boy here is return size for texture acceses, as the real
1619  * value needed would depend on the texture format used.
1620  */
1621 static struct v3dv_shader_variant*
pregenerate_shader_variants(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_cache * cache,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1622 pregenerate_shader_variants(struct v3dv_pipeline_stage *p_stage,
1623                             struct v3dv_pipeline_cache *cache,
1624                             struct v3d_key *key,
1625                             size_t key_size,
1626                             const VkAllocationCallbacks *pAllocator,
1627                             VkResult *out_vk_result)
1628 {
1629    /* We assume that we receive the default 16 return size*/
1630    struct v3dv_shader_variant *variant_16 =
1631       v3dv_get_shader_variant(p_stage, cache, key, key_size,
1632                               pAllocator, out_vk_result);
1633 
1634    if (*out_vk_result != VK_SUCCESS)
1635       return variant_16;
1636 
1637    if (!p_stage->pipeline->device->instance->default_pipeline_cache_enabled) {
1638       /* If pipeline cache is disabled it doesn't make sense to pre-generate,
1639        * as we are relying on the default pipeline cache to save the different
1640        * pre-compiled variants
1641        */
1642       return variant_16;
1643    }
1644 
1645    v3d_key_update_return_size(p_stage->pipeline, key, 32);
1646 
1647    struct v3dv_shader_variant *variant_32 =
1648       v3dv_get_shader_variant(p_stage, cache, key, key_size,
1649                               pAllocator, out_vk_result);
1650 
1651    /* get_shader_variant returns a new ref, so as we are going to use
1652     * variant_16, we need to unref this.
1653     */
1654    v3dv_shader_variant_unref(p_stage->pipeline->device, variant_32);
1655 
1656    return variant_16;
1657 }
1658 
1659 /* FIXME: C&P from st, common place? */
1660 static void
st_nir_opts(nir_shader * nir)1661 st_nir_opts(nir_shader *nir)
1662 {
1663    bool progress;
1664 
1665    do {
1666       progress = false;
1667 
1668       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1669 
1670       /* Linking deals with unused inputs/outputs, but here we can remove
1671        * things local to the shader in the hopes that we can cleanup other
1672        * things. This pass will also remove variables with only stores, so we
1673        * might be able to make progress after it.
1674        */
1675       NIR_PASS(progress, nir, nir_remove_dead_variables,
1676                (nir_variable_mode)(nir_var_function_temp |
1677                                    nir_var_shader_temp |
1678                                    nir_var_mem_shared),
1679                NULL);
1680 
1681       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1682       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1683 
1684       if (nir->options->lower_to_scalar) {
1685          NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1686          NIR_PASS_V(nir, nir_lower_phis_to_scalar);
1687       }
1688 
1689       NIR_PASS_V(nir, nir_lower_alu);
1690       NIR_PASS_V(nir, nir_lower_pack);
1691       NIR_PASS(progress, nir, nir_copy_prop);
1692       NIR_PASS(progress, nir, nir_opt_remove_phis);
1693       NIR_PASS(progress, nir, nir_opt_dce);
1694       if (nir_opt_trivial_continues(nir)) {
1695          progress = true;
1696          NIR_PASS(progress, nir, nir_copy_prop);
1697          NIR_PASS(progress, nir, nir_opt_dce);
1698       }
1699       NIR_PASS(progress, nir, nir_opt_if, false);
1700       NIR_PASS(progress, nir, nir_opt_dead_cf);
1701       NIR_PASS(progress, nir, nir_opt_cse);
1702       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1703 
1704       NIR_PASS(progress, nir, nir_opt_algebraic);
1705       NIR_PASS(progress, nir, nir_opt_constant_folding);
1706 
1707       NIR_PASS(progress, nir, nir_opt_undef);
1708       NIR_PASS(progress, nir, nir_opt_conditional_discard);
1709    } while (progress);
1710 }
1711 
1712 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1713 link_shaders(nir_shader *producer, nir_shader *consumer)
1714 {
1715    assert(producer);
1716    assert(consumer);
1717 
1718    if (producer->options->lower_to_scalar) {
1719       NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1720       NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1721    }
1722 
1723    nir_lower_io_arrays_to_elements(producer, consumer);
1724 
1725    st_nir_opts(producer);
1726    st_nir_opts(consumer);
1727 
1728    if (nir_link_opt_varyings(producer, consumer))
1729       st_nir_opts(consumer);
1730 
1731    NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1732    NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1733 
1734    if (nir_remove_unused_varyings(producer, consumer)) {
1735       NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1736       NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1737 
1738       st_nir_opts(producer);
1739       st_nir_opts(consumer);
1740 
1741       /* Optimizations can cause varyings to become unused.
1742        * nir_compact_varyings() depends on all dead varyings being removed so
1743        * we need to call nir_remove_dead_variables() again here.
1744        */
1745       NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1746       NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1747    }
1748 }
1749 
1750 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1751 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1752                    struct v3dv_pipeline_stage *p_stage,
1753                    struct v3dv_pipeline_layout *layout)
1754 {
1755    nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1756 
1757    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1758    NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1759 }
1760 
1761 /**
1762  * The SPIR-V compiler will insert a sized compact array for
1763  * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1764  * where the size of the array determines the number of active clip planes.
1765  */
1766 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1767 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1768 {
1769    assert(p_stage->stage == MESA_SHADER_VERTEX);
1770    const nir_shader *shader = p_stage->nir;
1771    assert(shader);
1772 
1773    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1774       if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1775          assert(var->data.compact);
1776          return (1 << glsl_get_length(var->type)) - 1;
1777       }
1778    }
1779    return 0;
1780 }
1781 
1782 static nir_shader*
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1783 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1784                        struct v3dv_pipeline *pipeline,
1785                        struct v3dv_pipeline_cache *cache)
1786 {
1787    nir_shader *nir = NULL;
1788 
1789    nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1790                                             &v3dv_nir_options,
1791                                             p_stage->shader_sha1);
1792 
1793    if (nir) {
1794       assert(nir->info.stage == p_stage->stage);
1795       return nir;
1796    }
1797 
1798    nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1799 
1800    if (nir) {
1801       struct v3dv_pipeline_cache *default_cache =
1802          &pipeline->device->default_pipeline_cache;
1803 
1804       v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1805                                      p_stage->shader_sha1);
1806 
1807       /* Ensure that the variant is on the default cache, as cmd_buffer could
1808        * need to change the current variant
1809        */
1810       if (default_cache != cache) {
1811          v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1812                                         p_stage->shader_sha1);
1813       }
1814       return nir;
1815    }
1816 
1817    /* FIXME: this shouldn't happen, raise error? */
1818    return NULL;
1819 }
1820 
1821 static void
pipeline_hash_shader(const struct v3dv_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)1822 pipeline_hash_shader(const struct v3dv_shader_module *module,
1823                      const char *entrypoint,
1824                      gl_shader_stage stage,
1825                      const VkSpecializationInfo *spec_info,
1826                      unsigned char *sha1_out)
1827 {
1828    struct mesa_sha1 ctx;
1829    _mesa_sha1_init(&ctx);
1830 
1831    _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1832    _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1833    _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1834    if (spec_info) {
1835       _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1836                         spec_info->mapEntryCount *
1837                         sizeof(*spec_info->pMapEntries));
1838       _mesa_sha1_update(&ctx, spec_info->pData,
1839                         spec_info->dataSize);
1840    }
1841 
1842    _mesa_sha1_final(&ctx, sha1_out);
1843 }
1844 
1845 
1846 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)1847 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1848                                struct v3dv_pipeline_cache *cache,
1849                                const VkGraphicsPipelineCreateInfo *pCreateInfo,
1850                                const VkAllocationCallbacks *pAllocator)
1851 {
1852    struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1853 
1854    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
1855    /* Right now we only support pipelines with both vertex and fragment
1856     * shader.
1857     */
1858    assert(pipeline->fs);
1859 
1860    /* Make sure we do all our common lowering *before* we create the vs
1861     * and vs_bin pipeline stages, since from that point forward we need to
1862     * run lowerings for both of them separately, since each stage will
1863     * own its NIR code.
1864     */
1865    lower_vs_io(p_stage->nir);
1866 
1867    pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, pAllocator);
1868    if (pipeline->vs_bin == NULL)
1869       return VK_ERROR_OUT_OF_HOST_MEMORY;
1870 
1871    /* FIXME: likely this to be moved to a gather info method to a full
1872     * struct inside pipeline_stage
1873     */
1874    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1875       pCreateInfo->pInputAssemblyState;
1876    pipeline->vs->topology = vk_to_pipe_prim_type[ia_info->topology];
1877 
1878    struct v3d_vs_key *key = &pipeline->vs->key.vs;
1879    pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs);
1880    VkResult vk_result;
1881    pipeline->vs->current_variant =
1882       pregenerate_shader_variants(pipeline->vs, cache, &key->base, sizeof(*key),
1883                                   pAllocator, &vk_result);
1884    if (vk_result != VK_SUCCESS)
1885       return vk_result;
1886 
1887    key = &pipeline->vs_bin->key.vs;
1888    pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs_bin);
1889    pipeline->vs_bin->current_variant =
1890       pregenerate_shader_variants(pipeline->vs_bin, cache, &key->base, sizeof(*key),
1891                                   pAllocator, &vk_result);
1892 
1893    return vk_result;
1894 }
1895 
1896 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)1897 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1898                                  struct v3dv_pipeline_cache *cache,
1899                                  const VkGraphicsPipelineCreateInfo *pCreateInfo,
1900                                  const VkAllocationCallbacks *pAllocator)
1901 {
1902    struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1903 
1904    p_stage = pipeline->fs;
1905    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
1906 
1907    struct v3d_fs_key *key = &p_stage->key.fs;
1908 
1909    pipeline_populate_v3d_fs_key(key, pCreateInfo, p_stage,
1910                                 get_ucp_enable_mask(pipeline->vs));
1911 
1912    lower_fs_io(p_stage->nir);
1913 
1914    VkResult vk_result;
1915    p_stage->current_variant =
1916       pregenerate_shader_variants(p_stage, cache, &key->base, sizeof(*key),
1917                                   pAllocator, &vk_result);
1918 
1919    return vk_result;
1920 }
1921 
1922 /*
1923  * It compiles a pipeline. Note that it also allocate internal object, but if
1924  * some allocations success, but other fails, the method is not freeing the
1925  * successful ones.
1926  *
1927  * This is done to simplify the code, as what we do in this case is just call
1928  * the pipeline destroy method, and this would handle freeing the internal
1929  * objects allocated. We just need to be careful setting to NULL the objects
1930  * not allocated.
1931  */
1932 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)1933 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
1934                           struct v3dv_pipeline_cache *cache,
1935                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
1936                           const VkAllocationCallbacks *pAllocator)
1937 {
1938    struct v3dv_device *device = pipeline->device;
1939    struct v3dv_physical_device *physical_device =
1940       &device->instance->physicalDevice;
1941 
1942    /* First pass to get the the common info from the shader and the nir
1943     * shader. We don't care of the coord shader for now.
1944     */
1945    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
1946       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
1947       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1948 
1949       struct v3dv_pipeline_stage *p_stage =
1950          vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
1951                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1952 
1953       if (p_stage == NULL)
1954          return VK_ERROR_OUT_OF_HOST_MEMORY;
1955 
1956       /* Note that we are assigning program_id slightly differently that
1957        * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
1958        * would have a different program_id, while v3d would have the same for
1959        * both. For the case of v3dv, it is more natural to have an id this way,
1960        * as right now we are using it for debugging, not for shader-db.
1961        */
1962       p_stage->program_id =
1963          p_atomic_inc_return(&physical_device->next_program_id);
1964       p_stage->compiled_variant_count = 0;
1965 
1966       p_stage->pipeline = pipeline;
1967       p_stage->stage = stage;
1968       if (stage == MESA_SHADER_VERTEX)
1969          p_stage->is_coord = false;
1970       p_stage->entrypoint = sinfo->pName;
1971       p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
1972       p_stage->spec_info = sinfo->pSpecializationInfo;
1973 
1974       pipeline_hash_shader(p_stage->module,
1975                            p_stage->entrypoint,
1976                            stage,
1977                            p_stage->spec_info,
1978                            p_stage->shader_sha1);
1979 
1980       pipeline->active_stages |= sinfo->stage;
1981 
1982       p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
1983 
1984       switch(stage) {
1985       case MESA_SHADER_VERTEX:
1986          pipeline->vs = p_stage;
1987          break;
1988       case MESA_SHADER_FRAGMENT:
1989          pipeline->fs = p_stage;
1990          break;
1991       default:
1992          unreachable("not supported shader stage");
1993       }
1994    }
1995 
1996    /* Add a no-op fragment shader if needed */
1997    if (!pipeline->fs) {
1998       nir_builder b;
1999       nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT,
2000                                      &v3dv_nir_options);
2001       b.shader->info.name = ralloc_strdup(b.shader, "noop_fs");
2002 
2003       struct v3dv_pipeline_stage *p_stage =
2004          vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
2005                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2006 
2007       if (p_stage == NULL)
2008          return VK_ERROR_OUT_OF_HOST_MEMORY;
2009 
2010       p_stage->pipeline = pipeline;
2011       p_stage->stage = MESA_SHADER_FRAGMENT;
2012       p_stage->entrypoint = "main";
2013       p_stage->module = 0;
2014       p_stage->nir = b.shader;
2015       /* The no-op shader is always the same, so we can just create the sha1
2016        * using the name
2017        */
2018       _mesa_sha1_compute(b.shader->info.name, strlen(b.shader->info.name),
2019                          p_stage->shader_sha1);
2020 
2021       p_stage->program_id =
2022          p_atomic_inc_return(&physical_device->next_program_id);
2023       p_stage->compiled_variant_count = 0;
2024 
2025       pipeline->fs = p_stage;
2026       pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2027    }
2028 
2029    /* Linking */
2030    link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2031 
2032    /* Compiling to vir (or getting it from a cache);
2033     */
2034    VkResult vk_result;
2035    vk_result = pipeline_compile_fragment_shader(pipeline, cache,
2036                                                 pCreateInfo, pAllocator);
2037    if (vk_result != VK_SUCCESS)
2038       return vk_result;
2039 
2040    vk_result = pipeline_compile_vertex_shader(pipeline, cache,
2041                                               pCreateInfo, pAllocator);
2042    if (vk_result != VK_SUCCESS)
2043       return vk_result;
2044 
2045    /* FIXME: values below are default when non-GS is available. Would need to
2046     * provide real values if GS gets supported
2047     */
2048    pipeline->vpm_cfg_bin.As = 1;
2049    pipeline->vpm_cfg_bin.Ve = 0;
2050    pipeline->vpm_cfg_bin.Vc =
2051       pipeline->vs_bin->current_variant->prog_data.vs->vcm_cache_size;
2052 
2053    pipeline->vpm_cfg.As = 1;
2054    pipeline->vpm_cfg.Ve = 0;
2055    pipeline->vpm_cfg.Vc =
2056       pipeline->vs->current_variant->prog_data.vs->vcm_cache_size;
2057 
2058    return VK_SUCCESS;
2059 }
2060 
2061 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2062 v3dv_dynamic_state_mask(VkDynamicState state)
2063 {
2064    switch(state) {
2065    case VK_DYNAMIC_STATE_VIEWPORT:
2066       return V3DV_DYNAMIC_VIEWPORT;
2067    case VK_DYNAMIC_STATE_SCISSOR:
2068       return V3DV_DYNAMIC_SCISSOR;
2069    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2070       return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2071    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2072       return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2073    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2074       return V3DV_DYNAMIC_STENCIL_REFERENCE;
2075    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2076       return V3DV_DYNAMIC_BLEND_CONSTANTS;
2077    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2078       return V3DV_DYNAMIC_DEPTH_BIAS;
2079    case VK_DYNAMIC_STATE_LINE_WIDTH:
2080       return V3DV_DYNAMIC_LINE_WIDTH;
2081 
2082    /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2083     * ignoring this dynamic state. We are already asserting at pipeline creation
2084     * time that depth bounds testing is not enabled.
2085     */
2086    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2087       return 0;
2088 
2089    default:
2090       unreachable("Unhandled dynamic state");
2091    }
2092 }
2093 
2094 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState)2095 pipeline_init_dynamic_state(
2096    struct v3dv_pipeline *pipeline,
2097    const VkPipelineDynamicStateCreateInfo *pDynamicState,
2098    const VkPipelineViewportStateCreateInfo *pViewportState,
2099    const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2100    const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2101    const VkPipelineRasterizationStateCreateInfo *pRasterizationState)
2102 {
2103    pipeline->dynamic_state = default_dynamic_state;
2104    struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2105 
2106    /* Create a mask of enabled dynamic states */
2107    uint32_t dynamic_states = 0;
2108    if (pDynamicState) {
2109       uint32_t count = pDynamicState->dynamicStateCount;
2110       for (uint32_t s = 0; s < count; s++) {
2111          dynamic_states |=
2112             v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2113       }
2114    }
2115 
2116    /* For any pipeline states that are not dynamic, set the dynamic state
2117     * from the static pipeline state.
2118     */
2119    if (pViewportState) {
2120       if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2121          dynamic->viewport.count = pViewportState->viewportCount;
2122          typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2123                       pViewportState->viewportCount);
2124 
2125          for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2126             v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2127                                         dynamic->viewport.scale[i],
2128                                         dynamic->viewport.translate[i]);
2129          }
2130       }
2131 
2132       if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2133          dynamic->scissor.count = pViewportState->scissorCount;
2134          typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2135                       pViewportState->scissorCount);
2136       }
2137    }
2138 
2139    if (pDepthStencilState) {
2140       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2141          dynamic->stencil_compare_mask.front =
2142             pDepthStencilState->front.compareMask;
2143          dynamic->stencil_compare_mask.back =
2144             pDepthStencilState->back.compareMask;
2145       }
2146 
2147       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2148          dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2149          dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2150       }
2151 
2152       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2153          dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2154          dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2155       }
2156    }
2157 
2158    if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2159       memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2160              sizeof(dynamic->blend_constants));
2161    }
2162 
2163    if (pRasterizationState) {
2164       if (pRasterizationState->depthBiasEnable &&
2165           !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2166          dynamic->depth_bias.constant_factor =
2167             pRasterizationState->depthBiasConstantFactor;
2168          dynamic->depth_bias.slope_factor =
2169             pRasterizationState->depthBiasSlopeFactor;
2170       }
2171       if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2172          dynamic->line_width = pRasterizationState->lineWidth;
2173    }
2174 
2175    pipeline->dynamic_state.mask = dynamic_states;
2176 }
2177 
2178 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)2179 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
2180 {
2181    switch (factor) {
2182    case VK_BLEND_FACTOR_ZERO:
2183    case VK_BLEND_FACTOR_ONE:
2184    case VK_BLEND_FACTOR_SRC_COLOR:
2185    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
2186    case VK_BLEND_FACTOR_DST_COLOR:
2187    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
2188    case VK_BLEND_FACTOR_SRC_ALPHA:
2189    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
2190    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
2191       return factor;
2192    case VK_BLEND_FACTOR_CONSTANT_COLOR:
2193    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
2194    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
2195    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
2196       *needs_constants = true;
2197       return factor;
2198    case VK_BLEND_FACTOR_DST_ALPHA:
2199       return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
2200                              V3D_BLEND_FACTOR_DST_ALPHA;
2201    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
2202       return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
2203                              V3D_BLEND_FACTOR_INV_DST_ALPHA;
2204    case VK_BLEND_FACTOR_SRC1_COLOR:
2205    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
2206    case VK_BLEND_FACTOR_SRC1_ALPHA:
2207    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
2208       assert(!"Invalid blend factor: dual source blending not supported.");
2209    default:
2210       assert(!"Unknown blend factor.");
2211    }
2212 
2213    /* Should be handled by the switch, added to avoid a "end of non-void
2214     * function" error
2215     */
2216    unreachable("Unknown blend factor.");
2217 }
2218 
2219 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)2220 pack_blend(struct v3dv_pipeline *pipeline,
2221            const VkPipelineColorBlendStateCreateInfo *cb_info)
2222 {
2223    /* By default, we are not enabling blending and all color channel writes are
2224     * enabled. Color write enables are independent of whether blending is
2225     * enabled or not.
2226     *
2227     * Vulkan specifies color write masks so that bits set correspond to
2228     * enabled channels. Our hardware does it the other way around.
2229     */
2230    pipeline->blend.enables = 0;
2231    pipeline->blend.color_write_masks = 0; /* All channels enabled */
2232 
2233    if (!cb_info)
2234       return;
2235 
2236    assert(pipeline->subpass);
2237    if (pipeline->subpass->color_count == 0)
2238       return;
2239 
2240    assert(pipeline->subpass->color_count == cb_info->attachmentCount);
2241 
2242    pipeline->blend.needs_color_constants = false;
2243    uint32_t color_write_masks = 0;
2244    for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
2245       const VkPipelineColorBlendAttachmentState *b_state =
2246          &cb_info->pAttachments[i];
2247 
2248       uint32_t attachment_idx =
2249          pipeline->subpass->color_attachments[i].attachment;
2250       if (attachment_idx == VK_ATTACHMENT_UNUSED)
2251          continue;
2252 
2253       color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
2254 
2255       if (!b_state->blendEnable)
2256          continue;
2257 
2258       VkAttachmentDescription *desc =
2259          &pipeline->pass->attachments[attachment_idx].desc;
2260       const struct v3dv_format *format = v3dv_get_format(desc->format);
2261       bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
2262 
2263       uint8_t rt_mask = 1 << i;
2264       pipeline->blend.enables |= rt_mask;
2265 
2266       v3dv_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
2267          config.render_target_mask = rt_mask;
2268 
2269          config.color_blend_mode = b_state->colorBlendOp;
2270          config.color_blend_dst_factor =
2271             blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
2272                          &pipeline->blend.needs_color_constants);
2273          config.color_blend_src_factor =
2274             blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
2275                          &pipeline->blend.needs_color_constants);
2276 
2277          config.alpha_blend_mode = b_state->alphaBlendOp;
2278          config.alpha_blend_dst_factor =
2279             blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
2280                          &pipeline->blend.needs_color_constants);
2281          config.alpha_blend_src_factor =
2282             blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
2283                          &pipeline->blend.needs_color_constants);
2284       }
2285    }
2286 
2287    pipeline->blend.color_write_masks = color_write_masks;
2288 }
2289 
2290 /* This requires that pack_blend() had been called before so we can set
2291  * the overall blend enable bit in the CFG_BITS packet.
2292  */
2293 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineMultisampleStateCreateInfo * ms_info)2294 pack_cfg_bits(struct v3dv_pipeline *pipeline,
2295               const VkPipelineDepthStencilStateCreateInfo *ds_info,
2296               const VkPipelineRasterizationStateCreateInfo *rs_info,
2297               const VkPipelineMultisampleStateCreateInfo *ms_info)
2298 {
2299    assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
2300 
2301    pipeline->msaa =
2302       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2303 
2304    v3dv_pack(pipeline->cfg_bits, CFG_BITS, config) {
2305       config.enable_forward_facing_primitive =
2306          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
2307 
2308       config.enable_reverse_facing_primitive =
2309          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
2310 
2311       /* Seems like the hardware is backwards regarding this setting... */
2312       config.clockwise_primitives =
2313          rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
2314 
2315       config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
2316 
2317       /* This is required to pass line rasterization tests in CTS while
2318        * exposing, at least, a minimum of 4-bits of subpixel precision
2319        * (the minimum requirement).
2320        */
2321       config.line_rasterization = 1; /* perp end caps */
2322 
2323       if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
2324          config.direct3d_wireframe_triangles_mode = true;
2325          config.direct3d_point_fill_mode =
2326             rs_info->polygonMode == VK_POLYGON_MODE_POINT;
2327       }
2328 
2329       config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
2330 
2331       /* From the Vulkan spec:
2332        *
2333        *   "Provoking Vertex:
2334        *
2335        *       The vertex in a primitive from which flat shaded attribute
2336        *       values are taken. This is generally the “first” vertex in the
2337        *       primitive, and depends on the primitive topology."
2338        *
2339        * First vertex is the Direct3D style for provoking vertex. OpenGL uses
2340        * the last vertex by default.
2341        */
2342       config.direct3d_provoking_vertex = true;
2343 
2344       config.blend_enable = pipeline->blend.enables != 0;
2345 
2346       /* Disable depth/stencil if we don't have a D/S attachment */
2347       bool has_ds_attachment =
2348          pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
2349 
2350       if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
2351          config.z_updates_enable = ds_info->depthWriteEnable;
2352          config.depth_test_function = ds_info->depthCompareOp;
2353       } else {
2354          config.depth_test_function = VK_COMPARE_OP_ALWAYS;
2355       }
2356 
2357       /* EZ state will be updated at draw time based on bound pipeline state */
2358       config.early_z_updates_enable = false;
2359       config.early_z_enable = false;
2360 
2361       config.stencil_enable =
2362          ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
2363    };
2364 }
2365 
2366 static uint32_t
translate_stencil_op(enum pipe_stencil_op op)2367 translate_stencil_op(enum pipe_stencil_op op)
2368 {
2369    switch (op) {
2370    case VK_STENCIL_OP_KEEP:
2371       return V3D_STENCIL_OP_KEEP;
2372    case VK_STENCIL_OP_ZERO:
2373       return V3D_STENCIL_OP_ZERO;
2374    case VK_STENCIL_OP_REPLACE:
2375       return V3D_STENCIL_OP_REPLACE;
2376    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
2377       return V3D_STENCIL_OP_INCR;
2378    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
2379       return V3D_STENCIL_OP_DECR;
2380    case VK_STENCIL_OP_INVERT:
2381       return V3D_STENCIL_OP_INVERT;
2382    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
2383       return V3D_STENCIL_OP_INCWRAP;
2384    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
2385       return V3D_STENCIL_OP_DECWRAP;
2386    default:
2387       unreachable("bad stencil op");
2388    }
2389 }
2390 
2391 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state)2392 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
2393                         uint8_t *stencil_cfg,
2394                         bool is_front,
2395                         bool is_back,
2396                         const VkStencilOpState *stencil_state)
2397 {
2398    /* From the Vulkan spec:
2399     *
2400     *   "Reference is an integer reference value that is used in the unsigned
2401     *    stencil comparison. The reference value used by stencil comparison
2402     *    must be within the range [0,2^s-1] , where s is the number of bits in
2403     *    the stencil framebuffer attachment, otherwise the reference value is
2404     *    considered undefined."
2405     *
2406     * In our case, 's' is always 8, so we clamp to that to prevent our packing
2407     * functions to assert in debug mode if they see larger values.
2408     *
2409     * If we have dynamic state we need to make sure we set the corresponding
2410     * state bits to 0, since cl_emit_with_prepacked ORs the new value with
2411     * the old.
2412     */
2413    const uint8_t write_mask =
2414       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
2415          0 : stencil_state->writeMask & 0xff;
2416 
2417    const uint8_t compare_mask =
2418       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
2419          0 : stencil_state->compareMask & 0xff;
2420 
2421    const uint8_t reference =
2422       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
2423          0 : stencil_state->reference & 0xff;
2424 
2425    v3dv_pack(stencil_cfg, STENCIL_CFG, config) {
2426       config.front_config = is_front;
2427       config.back_config = is_back;
2428       config.stencil_write_mask = write_mask;
2429       config.stencil_test_mask = compare_mask;
2430       config.stencil_test_function = stencil_state->compareOp;
2431       config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
2432       config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
2433       config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
2434       config.stencil_ref_value = reference;
2435    }
2436 }
2437 
2438 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2439 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
2440                  const VkPipelineDepthStencilStateCreateInfo *ds_info)
2441 {
2442    assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
2443 
2444    if (!ds_info || !ds_info->stencilTestEnable)
2445       return;
2446 
2447    if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2448       return;
2449 
2450    const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
2451                                            V3DV_DYNAMIC_STENCIL_WRITE_MASK |
2452                                            V3DV_DYNAMIC_STENCIL_REFERENCE;
2453 
2454 
2455    /* If front != back or we have dynamic stencil state we can't emit a single
2456     * packet for both faces.
2457     */
2458    bool needs_front_and_back = false;
2459    if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
2460        memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
2461       needs_front_and_back = true;
2462 
2463    /* If the front and back configurations are the same we can emit both with
2464     * a single packet.
2465     */
2466    pipeline->emit_stencil_cfg[0] = true;
2467    if (!needs_front_and_back) {
2468       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
2469                               true, true, &ds_info->front);
2470    } else {
2471       pipeline->emit_stencil_cfg[1] = true;
2472       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
2473                               true, false, &ds_info->front);
2474       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
2475                               false, true, &ds_info->back);
2476    }
2477 }
2478 
2479 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2480 stencil_op_is_no_op(const VkStencilOpState *stencil)
2481 {
2482    return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2483           stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2484 }
2485 
2486 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2487 enable_depth_bias(struct v3dv_pipeline *pipeline,
2488                   const VkPipelineRasterizationStateCreateInfo *rs_info)
2489 {
2490    pipeline->depth_bias.enabled = false;
2491    pipeline->depth_bias.is_z16 = false;
2492 
2493    if (!rs_info || !rs_info->depthBiasEnable)
2494       return;
2495 
2496    /* Check the depth/stencil attachment description for the subpass used with
2497     * this pipeline.
2498     */
2499    assert(pipeline->pass && pipeline->subpass);
2500    struct v3dv_render_pass *pass = pipeline->pass;
2501    struct v3dv_subpass *subpass = pipeline->subpass;
2502 
2503    if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2504       return;
2505 
2506    assert(subpass->ds_attachment.attachment < pass->attachment_count);
2507    struct v3dv_render_pass_attachment *att =
2508       &pass->attachments[subpass->ds_attachment.attachment];
2509 
2510    if (att->desc.format == VK_FORMAT_D16_UNORM)
2511       pipeline->depth_bias.is_z16 = true;
2512 
2513    pipeline->depth_bias.enabled = true;
2514 }
2515 
2516 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2517 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2518                       const VkPipelineDepthStencilStateCreateInfo *ds_info)
2519 {
2520    if (!ds_info || !ds_info->depthTestEnable) {
2521       pipeline->ez_state = VC5_EZ_DISABLED;
2522       return;
2523    }
2524 
2525    switch (ds_info->depthCompareOp) {
2526    case VK_COMPARE_OP_LESS:
2527    case VK_COMPARE_OP_LESS_OR_EQUAL:
2528       pipeline->ez_state = VC5_EZ_LT_LE;
2529       break;
2530    case VK_COMPARE_OP_GREATER:
2531    case VK_COMPARE_OP_GREATER_OR_EQUAL:
2532       pipeline->ez_state = VC5_EZ_GT_GE;
2533       break;
2534    case VK_COMPARE_OP_NEVER:
2535    case VK_COMPARE_OP_EQUAL:
2536       pipeline->ez_state = VC5_EZ_UNDECIDED;
2537       break;
2538    default:
2539       pipeline->ez_state = VC5_EZ_DISABLED;
2540       break;
2541    }
2542 
2543    /* If stencil is enabled and is not a no-op, we need to disable EZ */
2544    if (ds_info->stencilTestEnable &&
2545        (!stencil_op_is_no_op(&ds_info->front) ||
2546         !stencil_op_is_no_op(&ds_info->back))) {
2547          pipeline->ez_state = VC5_EZ_DISABLED;
2548    }
2549 }
2550 
2551 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)2552 pack_shader_state_record(struct v3dv_pipeline *pipeline)
2553 {
2554    assert(sizeof(pipeline->shader_state_record) ==
2555           cl_packet_length(GL_SHADER_STATE_RECORD));
2556 
2557    struct v3d_fs_prog_data *prog_data_fs =
2558       pipeline->fs->current_variant->prog_data.fs;
2559 
2560    struct v3d_vs_prog_data *prog_data_vs =
2561       pipeline->vs->current_variant->prog_data.vs;
2562 
2563    struct v3d_vs_prog_data *prog_data_vs_bin =
2564       pipeline->vs_bin->current_variant->prog_data.vs;
2565 
2566 
2567    /* Note: we are not packing addresses, as we need the job (see
2568     * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
2569     * point as they depend on dynamic info that can be set after create the
2570     * pipeline (like viewport), . Would need to be filled later, so we are
2571     * doing a partial prepacking.
2572     */
2573    v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
2574       shader.enable_clipping = true;
2575 
2576       shader.point_size_in_shaded_vertex_data =
2577          pipeline->vs->topology == PIPE_PRIM_POINTS;
2578 
2579       /* Must be set if the shader modifies Z, discards, or modifies
2580        * the sample mask.  For any of these cases, the fragment
2581        * shader needs to write the Z value (even just discards).
2582        */
2583       shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
2584       /* Set if the EZ test must be disabled (due to shader side
2585        * effects and the early_z flag not being present in the
2586        * shader).
2587        */
2588       shader.turn_off_early_z_test = prog_data_fs->disable_ez;
2589 
2590       shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
2591          prog_data_fs->uses_center_w;
2592 
2593       /* The description for gl_SampleID states that if a fragment shader reads
2594        * it, then we should automatically activate per-sample shading. However,
2595        * the Vulkan spec also states that if a framebuffer has no attachments:
2596        *
2597        *    "The subpass continues to use the width, height, and layers of the
2598        *     framebuffer to define the dimensions of the rendering area, and the
2599        *     rasterizationSamples from each pipeline’s
2600        *     VkPipelineMultisampleStateCreateInfo to define the number of
2601        *     samples used in rasterization multisample rasterization."
2602        *
2603        * So in this scenario, if the pipeline doesn't enable multiple samples
2604        * but the fragment shader accesses gl_SampleID we would be requested
2605        * to do per-sample shading in single sample rasterization mode, which
2606        * is pointless, so just disable it in that case.
2607        */
2608       shader.enable_sample_rate_shading =
2609          pipeline->sample_rate_shading ||
2610          (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
2611 
2612       shader.any_shader_reads_hardware_written_primitive_id = false;
2613 
2614       shader.do_scoreboard_wait_on_first_thread_switch =
2615          prog_data_fs->lock_scoreboard_on_first_thrsw;
2616       shader.disable_implicit_point_line_varyings =
2617          !prog_data_fs->uses_implicit_point_line_varyings;
2618 
2619       shader.number_of_varyings_in_fragment_shader =
2620          prog_data_fs->num_inputs;
2621 
2622       shader.coordinate_shader_propagate_nans = true;
2623       shader.vertex_shader_propagate_nans = true;
2624       shader.fragment_shader_propagate_nans = true;
2625 
2626       /* Note: see previous note about adresses */
2627       /* shader.coordinate_shader_code_address */
2628       /* shader.vertex_shader_code_address */
2629       /* shader.fragment_shader_code_address */
2630 
2631       /* FIXME: Use combined input/output size flag in the common case (also
2632        * on v3d, see v3dx_draw).
2633        */
2634       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
2635          prog_data_vs_bin->separate_segments;
2636       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
2637          prog_data_vs->separate_segments;
2638 
2639       shader.coordinate_shader_input_vpm_segment_size =
2640          prog_data_vs_bin->separate_segments ?
2641          prog_data_vs_bin->vpm_input_size : 1;
2642       shader.vertex_shader_input_vpm_segment_size =
2643          prog_data_vs->separate_segments ?
2644          prog_data_vs->vpm_input_size : 1;
2645 
2646       shader.coordinate_shader_output_vpm_segment_size =
2647          prog_data_vs_bin->vpm_output_size;
2648       shader.vertex_shader_output_vpm_segment_size =
2649          prog_data_vs->vpm_output_size;
2650 
2651       /* Note: see previous note about adresses */
2652       /* shader.coordinate_shader_uniforms_address */
2653       /* shader.vertex_shader_uniforms_address */
2654       /* shader.fragment_shader_uniforms_address */
2655 
2656       shader.min_coord_shader_input_segments_required_in_play =
2657          pipeline->vpm_cfg_bin.As;
2658       shader.min_vertex_shader_input_segments_required_in_play =
2659          pipeline->vpm_cfg.As;
2660 
2661       shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
2662          pipeline->vpm_cfg_bin.Ve;
2663       shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
2664          pipeline->vpm_cfg.Ve;
2665 
2666       shader.coordinate_shader_4_way_threadable =
2667          prog_data_vs_bin->base.threads == 4;
2668       shader.vertex_shader_4_way_threadable =
2669          prog_data_vs->base.threads == 4;
2670       shader.fragment_shader_4_way_threadable =
2671          prog_data_fs->base.threads == 4;
2672 
2673       shader.coordinate_shader_start_in_final_thread_section =
2674          prog_data_vs_bin->base.single_seg;
2675       shader.vertex_shader_start_in_final_thread_section =
2676          prog_data_vs->base.single_seg;
2677       shader.fragment_shader_start_in_final_thread_section =
2678          prog_data_fs->base.single_seg;
2679 
2680       shader.vertex_id_read_by_coordinate_shader =
2681          prog_data_vs_bin->uses_vid;
2682       shader.base_instance_id_read_by_coordinate_shader =
2683          prog_data_vs_bin->uses_biid;
2684       shader.instance_id_read_by_coordinate_shader =
2685          prog_data_vs_bin->uses_iid;
2686       shader.vertex_id_read_by_vertex_shader =
2687          prog_data_vs->uses_vid;
2688       shader.base_instance_id_read_by_vertex_shader =
2689          prog_data_vs->uses_biid;
2690       shader.instance_id_read_by_vertex_shader =
2691          prog_data_vs->uses_iid;
2692 
2693       /* Note: see previous note about adresses */
2694       /* shader.address_of_default_attribute_values */
2695    }
2696 }
2697 
2698 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)2699 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
2700 {
2701    assert(sizeof(pipeline->vcm_cache_size) ==
2702           cl_packet_length(VCM_CACHE_SIZE));
2703 
2704    v3dv_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
2705       vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
2706       vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
2707    }
2708 }
2709 
2710 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
2711 static uint8_t
get_attr_type(const struct util_format_description * desc)2712 get_attr_type(const struct util_format_description *desc)
2713 {
2714    uint32_t r_size = desc->channel[0].size;
2715    uint8_t attr_type = ATTRIBUTE_FLOAT;
2716 
2717    switch (desc->channel[0].type) {
2718    case UTIL_FORMAT_TYPE_FLOAT:
2719       if (r_size == 32) {
2720          attr_type = ATTRIBUTE_FLOAT;
2721       } else {
2722          assert(r_size == 16);
2723          attr_type = ATTRIBUTE_HALF_FLOAT;
2724       }
2725       break;
2726 
2727    case UTIL_FORMAT_TYPE_SIGNED:
2728    case UTIL_FORMAT_TYPE_UNSIGNED:
2729       switch (r_size) {
2730       case 32:
2731          attr_type = ATTRIBUTE_INT;
2732          break;
2733       case 16:
2734          attr_type = ATTRIBUTE_SHORT;
2735          break;
2736       case 10:
2737          attr_type = ATTRIBUTE_INT2_10_10_10;
2738          break;
2739       case 8:
2740          attr_type = ATTRIBUTE_BYTE;
2741          break;
2742       default:
2743          fprintf(stderr,
2744                  "format %s unsupported\n",
2745                  desc->name);
2746          attr_type = ATTRIBUTE_BYTE;
2747          abort();
2748       }
2749       break;
2750 
2751    default:
2752       fprintf(stderr,
2753               "format %s unsupported\n",
2754               desc->name);
2755       abort();
2756    }
2757 
2758    return attr_type;
2759 }
2760 
2761 static bool
create_default_attribute_values(struct v3dv_pipeline * pipeline,const VkPipelineVertexInputStateCreateInfo * vi_info)2762 create_default_attribute_values(struct v3dv_pipeline *pipeline,
2763                                 const VkPipelineVertexInputStateCreateInfo *vi_info)
2764 {
2765    uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2766 
2767    if (pipeline->default_attribute_values == NULL) {
2768       pipeline->default_attribute_values = v3dv_bo_alloc(pipeline->device, size,
2769                                                          "default_vi_attributes",
2770                                                          true);
2771 
2772       if (!pipeline->default_attribute_values) {
2773          fprintf(stderr, "failed to allocate memory for the default "
2774                  "attribute values\n");
2775          return false;
2776       }
2777    }
2778 
2779    bool ok = v3dv_bo_map(pipeline->device,
2780                          pipeline->default_attribute_values, size);
2781    if (!ok) {
2782       fprintf(stderr, "failed to map default attribute values buffer\n");
2783       return false;
2784    }
2785 
2786    uint32_t *attrs = pipeline->default_attribute_values->map;
2787 
2788    for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2789       attrs[i * 4 + 0] = 0;
2790       attrs[i * 4 + 1] = 0;
2791       attrs[i * 4 + 2] = 0;
2792       if (i < pipeline->va_count && vk_format_is_int(pipeline->va[i].vk_format)) {
2793          attrs[i * 4 + 3] = 1;
2794       } else {
2795          attrs[i * 4 + 3] = fui(1.0);
2796       }
2797    }
2798 
2799    v3dv_bo_unmap(pipeline->device, pipeline->default_attribute_values);
2800 
2801    return true;
2802 }
2803 
2804 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)2805 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
2806                                    uint32_t index,
2807                                    const VkVertexInputAttributeDescription *vi_desc)
2808 {
2809    const uint32_t packet_length =
2810       cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
2811 
2812    const struct util_format_description *desc =
2813       vk_format_description(vi_desc->format);
2814 
2815    uint32_t binding = vi_desc->binding;
2816 
2817    v3dv_pack(&pipeline->vertex_attrs[index * packet_length],
2818              GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
2819 
2820       /* vec_size == 0 means 4 */
2821       attr.vec_size = desc->nr_channels & 3;
2822       attr.signed_int_type = (desc->channel[0].type ==
2823                               UTIL_FORMAT_TYPE_SIGNED);
2824       attr.normalized_int_type = desc->channel[0].normalized;
2825       attr.read_as_int_uint = desc->channel[0].pure_integer;
2826 
2827       attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
2828                                    0xffff);
2829       attr.stride = pipeline->vb[binding].stride;
2830       attr.type = get_attr_type(desc);
2831    }
2832 }
2833 
2834 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2835 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2836                          const VkPipelineMultisampleStateCreateInfo *ms_info)
2837 {
2838    pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2839 
2840    /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2841     * requires this to be 0xf or 0x0 if using a single sample.
2842     */
2843    if (ms_info && ms_info->pSampleMask &&
2844        ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2845       pipeline->sample_mask &= ms_info->pSampleMask[0];
2846    }
2847 }
2848 
2849 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2850 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2851                                  const VkPipelineMultisampleStateCreateInfo *ms_info)
2852 {
2853    pipeline->sample_rate_shading =
2854       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2855       ms_info->sampleShadingEnable;
2856 }
2857 
2858 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2859 pipeline_init(struct v3dv_pipeline *pipeline,
2860               struct v3dv_device *device,
2861               struct v3dv_pipeline_cache *cache,
2862               const VkGraphicsPipelineCreateInfo *pCreateInfo,
2863               const VkAllocationCallbacks *pAllocator)
2864 {
2865    VkResult result = VK_SUCCESS;
2866 
2867    pipeline->device = device;
2868 
2869    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2870    pipeline->layout = layout;
2871 
2872    V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2873    assert(pCreateInfo->subpass < render_pass->subpass_count);
2874    pipeline->pass = render_pass;
2875    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2876 
2877    /* If rasterization is not enabled, various CreateInfo structs must be
2878     * ignored.
2879     */
2880    const bool raster_enabled =
2881       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2882 
2883    const VkPipelineViewportStateCreateInfo *vp_info =
2884       raster_enabled ? pCreateInfo->pViewportState : NULL;
2885 
2886    const VkPipelineDepthStencilStateCreateInfo *ds_info =
2887       raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2888 
2889    const VkPipelineRasterizationStateCreateInfo *rs_info =
2890       raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2891 
2892    const VkPipelineColorBlendStateCreateInfo *cb_info =
2893       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2894 
2895    const VkPipelineMultisampleStateCreateInfo *ms_info =
2896       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2897 
2898    pipeline_init_dynamic_state(pipeline,
2899                                pCreateInfo->pDynamicState,
2900                                vp_info, ds_info, cb_info, rs_info);
2901 
2902    /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2903     * feature and it shouldn't be used by any pipeline.
2904     */
2905    assert(!ds_info || !ds_info->depthBoundsTestEnable);
2906 
2907    pack_blend(pipeline, cb_info);
2908    pack_cfg_bits(pipeline, ds_info, rs_info, ms_info);
2909    pack_stencil_cfg(pipeline, ds_info);
2910    pipeline_set_ez_state(pipeline, ds_info);
2911    enable_depth_bias(pipeline, rs_info);
2912    pipeline_set_sample_mask(pipeline, ms_info);
2913    pipeline_set_sample_rate_shading(pipeline, ms_info);
2914 
2915    pipeline->primitive_restart =
2916       pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2917 
2918    result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2919 
2920    if (result != VK_SUCCESS) {
2921       /* Caller would already destroy the pipeline, and we didn't allocate any
2922        * extra info. We don't need to do anything else.
2923        */
2924       return result;
2925    }
2926 
2927    pack_shader_state_record(pipeline);
2928    pack_vcm_cache_size(pipeline);
2929 
2930    const VkPipelineVertexInputStateCreateInfo *vi_info =
2931       pCreateInfo->pVertexInputState;
2932 
2933    pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
2934    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2935       const VkVertexInputBindingDescription *desc =
2936          &vi_info->pVertexBindingDescriptions[i];
2937 
2938       pipeline->vb[desc->binding].stride = desc->stride;
2939       pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
2940    }
2941 
2942    pipeline->va_count = 0;
2943    nir_shader *shader = pipeline->vs->nir;
2944 
2945    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2946       const VkVertexInputAttributeDescription *desc =
2947          &vi_info->pVertexAttributeDescriptions[i];
2948       uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
2949 
2950       nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_in, location);
2951 
2952       if (var != NULL) {
2953          unsigned driver_location = var->data.driver_location;
2954 
2955          assert(driver_location < MAX_VERTEX_ATTRIBS);
2956          pipeline->va[driver_location].offset = desc->offset;
2957          pipeline->va[driver_location].binding = desc->binding;
2958          pipeline->va[driver_location].vk_format = desc->format;
2959 
2960          pack_shader_state_attribute_record(pipeline, driver_location, desc);
2961 
2962          pipeline->va_count++;
2963       }
2964    }
2965 
2966    if (!create_default_attribute_values(pipeline, vi_info))
2967       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2968 
2969    return result;
2970 }
2971 
2972 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2973 graphics_pipeline_create(VkDevice _device,
2974                          VkPipelineCache _cache,
2975                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
2976                          const VkAllocationCallbacks *pAllocator,
2977                          VkPipeline *pPipeline)
2978 {
2979    V3DV_FROM_HANDLE(v3dv_device, device, _device);
2980    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2981 
2982    struct v3dv_pipeline *pipeline;
2983    VkResult result;
2984 
2985    /* Use the default pipeline cache if none is specified */
2986    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
2987        cache = &device->default_pipeline_cache;
2988 
2989    pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
2990                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2991    if (pipeline == NULL)
2992       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2993 
2994    result = pipeline_init(pipeline, device, cache,
2995                           pCreateInfo,
2996                           pAllocator);
2997 
2998    if (result != VK_SUCCESS) {
2999       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3000       return result;
3001    }
3002 
3003    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3004 
3005    return VK_SUCCESS;
3006 }
3007 
3008 VkResult
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3009 v3dv_CreateGraphicsPipelines(VkDevice _device,
3010                              VkPipelineCache pipelineCache,
3011                              uint32_t count,
3012                              const VkGraphicsPipelineCreateInfo *pCreateInfos,
3013                              const VkAllocationCallbacks *pAllocator,
3014                              VkPipeline *pPipelines)
3015 {
3016    VkResult result = VK_SUCCESS;
3017 
3018    for (uint32_t i = 0; i < count; i++) {
3019       VkResult local_result;
3020 
3021       local_result = graphics_pipeline_create(_device,
3022                                               pipelineCache,
3023                                               &pCreateInfos[i],
3024                                               pAllocator,
3025                                               &pPipelines[i]);
3026 
3027       if (local_result != VK_SUCCESS) {
3028          result = local_result;
3029          pPipelines[i] = VK_NULL_HANDLE;
3030       }
3031    }
3032 
3033    return result;
3034 }
3035 
3036 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3037 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3038 {
3039    assert(glsl_type_is_vector_or_scalar(type));
3040 
3041    uint32_t comp_size = glsl_type_is_boolean(type)
3042       ? 4 : glsl_get_bit_size(type) / 8;
3043    unsigned length = glsl_get_vector_elements(type);
3044    *size = comp_size * length,
3045    *align = comp_size * (length == 3 ? 4 : length);
3046 }
3047 
3048 static void
lower_cs_shared(struct nir_shader * nir)3049 lower_cs_shared(struct nir_shader *nir)
3050 {
3051    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
3052               nir_var_mem_shared, shared_type_info);
3053    NIR_PASS_V(nir, nir_lower_explicit_io,
3054               nir_var_mem_shared, nir_address_format_32bit_offset);
3055 }
3056 
3057 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3058 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3059                          struct v3dv_pipeline_cache *cache,
3060                          const VkComputePipelineCreateInfo *info,
3061                          const VkAllocationCallbacks *alloc)
3062 {
3063    struct v3dv_device *device = pipeline->device;
3064    struct v3dv_physical_device *physical_device =
3065       &device->instance->physicalDevice;
3066 
3067    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3068    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3069 
3070    struct v3dv_pipeline_stage *p_stage =
3071       vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8,
3072                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3073    if (!p_stage)
3074       return VK_ERROR_OUT_OF_HOST_MEMORY;
3075 
3076    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3077    p_stage->compiled_variant_count = 0;
3078    p_stage->pipeline = pipeline;
3079    p_stage->stage = stage;
3080    p_stage->entrypoint = sinfo->pName;
3081    p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
3082    p_stage->spec_info = sinfo->pSpecializationInfo;
3083 
3084    pipeline_hash_shader(p_stage->module,
3085                         p_stage->entrypoint,
3086                         stage,
3087                         p_stage->spec_info,
3088                         p_stage->shader_sha1);
3089 
3090    p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3091 
3092    pipeline->active_stages |= sinfo->stage;
3093    st_nir_opts(p_stage->nir);
3094    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3095    lower_cs_shared(p_stage->nir);
3096 
3097    pipeline->cs = p_stage;
3098 
3099    struct v3d_key *key = &p_stage->key.base;
3100    memset(key, 0, sizeof(*key));
3101    pipeline_populate_v3d_key(key, p_stage, 0,
3102                              pipeline->device->features.robustBufferAccess);
3103 
3104    VkResult result;
3105    p_stage->current_variant =
3106       pregenerate_shader_variants(p_stage, cache, key, sizeof(*key), alloc, &result);
3107    return result;
3108 }
3109 
3110 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3111 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3112                       struct v3dv_device *device,
3113                       struct v3dv_pipeline_cache *cache,
3114                       const VkComputePipelineCreateInfo *info,
3115                       const VkAllocationCallbacks *alloc)
3116 {
3117    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3118 
3119    pipeline->device = device;
3120    pipeline->layout = layout;
3121 
3122    VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3123 
3124    return result;
3125 }
3126 
3127 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3128 compute_pipeline_create(VkDevice _device,
3129                          VkPipelineCache _cache,
3130                          const VkComputePipelineCreateInfo *pCreateInfo,
3131                          const VkAllocationCallbacks *pAllocator,
3132                          VkPipeline *pPipeline)
3133 {
3134    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3135    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3136 
3137    struct v3dv_pipeline *pipeline;
3138    VkResult result;
3139 
3140    /* Use the default pipeline cache if none is specified */
3141    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3142        cache = &device->default_pipeline_cache;
3143 
3144    pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
3145                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3146    if (pipeline == NULL)
3147       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3148 
3149    result = compute_pipeline_init(pipeline, device, cache,
3150                                   pCreateInfo, pAllocator);
3151    if (result != VK_SUCCESS) {
3152       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3153       return result;
3154    }
3155 
3156    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3157 
3158    return VK_SUCCESS;
3159 }
3160 
3161 VkResult
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3162 v3dv_CreateComputePipelines(VkDevice _device,
3163                             VkPipelineCache pipelineCache,
3164                             uint32_t createInfoCount,
3165                             const VkComputePipelineCreateInfo *pCreateInfos,
3166                             const VkAllocationCallbacks *pAllocator,
3167                             VkPipeline *pPipelines)
3168 {
3169    VkResult result = VK_SUCCESS;
3170 
3171    for (uint32_t i = 0; i < createInfoCount; i++) {
3172       VkResult local_result;
3173       local_result = compute_pipeline_create(_device,
3174                                               pipelineCache,
3175                                               &pCreateInfos[i],
3176                                               pAllocator,
3177                                               &pPipelines[i]);
3178 
3179       if (local_result != VK_SUCCESS) {
3180          result = local_result;
3181          pPipelines[i] = VK_NULL_HANDLE;
3182       }
3183    }
3184 
3185    return result;
3186 }
3187