• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_shader.h"
6 
7 #include "nvk_cmd_buffer.h"
8 #include "nvk_descriptor_set_layout.h"
9 #include "nvk_device.h"
10 #include "nvk_physical_device.h"
11 #include "nvk_sampler.h"
12 #include "nvk_shader.h"
13 
14 #include "vk_nir_convert_ycbcr.h"
15 #include "vk_pipeline.h"
16 #include "vk_pipeline_layout.h"
17 #include "vk_shader_module.h"
18 #include "vk_ycbcr_conversion.h"
19 
20 #include "nak.h"
21 #include "nir.h"
22 #include "nir_builder.h"
23 #include "compiler/spirv/nir_spirv.h"
24 
25 #include "nv50_ir_driver.h"
26 
27 #include "util/mesa-sha1.h"
28 #include "util/u_debug.h"
29 
30 #include "cla097.h"
31 #include "clb097.h"
32 #include "clc397.h"
33 #include "clc597.h"
34 
35 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)36 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
37 {
38    assert(glsl_type_is_vector_or_scalar(type));
39 
40    uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
41    unsigned length = glsl_get_vector_elements(type);
42    *size = comp_size * length, *align = comp_size;
43 }
44 
45 VkShaderStageFlags
nvk_nak_stages(const struct nv_device_info * info)46 nvk_nak_stages(const struct nv_device_info *info)
47 {
48    const VkShaderStageFlags all =
49       VK_SHADER_STAGE_VERTEX_BIT |
50       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
51       VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
52       VK_SHADER_STAGE_GEOMETRY_BIT |
53       VK_SHADER_STAGE_FRAGMENT_BIT |
54       VK_SHADER_STAGE_COMPUTE_BIT;
55 
56    const struct debug_control flags[] = {
57       { "vs", BITFIELD64_BIT(MESA_SHADER_VERTEX) },
58       { "tcs", BITFIELD64_BIT(MESA_SHADER_TESS_CTRL) },
59       { "tes", BITFIELD64_BIT(MESA_SHADER_TESS_EVAL) },
60       { "gs", BITFIELD64_BIT(MESA_SHADER_GEOMETRY) },
61       { "fs", BITFIELD64_BIT(MESA_SHADER_FRAGMENT) },
62       { "cs", BITFIELD64_BIT(MESA_SHADER_COMPUTE) },
63       { "all", all },
64       { NULL, 0 },
65    };
66 
67    const char *env_str = getenv("NVK_USE_NAK");
68    if (env_str == NULL)
69       return info->cls_eng3d >= VOLTA_A ? all : 0;
70    else
71       return parse_debug_string(env_str, flags);
72 }
73 
74 static bool
use_nak(const struct nvk_physical_device * pdev,gl_shader_stage stage)75 use_nak(const struct nvk_physical_device *pdev, gl_shader_stage stage)
76 {
77    return nvk_nak_stages(&pdev->info) & mesa_to_vk_shader_stage(stage);
78 }
79 
80 uint64_t
nvk_physical_device_compiler_flags(const struct nvk_physical_device * pdev)81 nvk_physical_device_compiler_flags(const struct nvk_physical_device *pdev)
82 {
83    bool no_cbufs = pdev->debug_flags & NVK_DEBUG_NO_CBUF;
84    uint64_t prog_debug = nvk_cg_get_prog_debug();
85    uint64_t prog_optimize = nvk_cg_get_prog_optimize();
86    uint64_t nak_stages = nvk_nak_stages(&pdev->info);
87    uint64_t nak_flags = nak_debug_flags(pdev->nak);
88 
89    assert(prog_debug <= UINT8_MAX);
90    assert(prog_optimize < 16);
91    assert(nak_stages <= UINT32_MAX);
92    assert(nak_flags <= UINT16_MAX);
93 
94    return prog_debug
95       | (prog_optimize << 8)
96       | ((uint64_t)no_cbufs << 12)
97       | (nak_stages << 16)
98       | (nak_flags << 48);
99 }
100 
101 static const nir_shader_compiler_options *
nvk_get_nir_options(struct vk_physical_device * vk_pdev,gl_shader_stage stage,UNUSED const struct vk_pipeline_robustness_state * rs)102 nvk_get_nir_options(struct vk_physical_device *vk_pdev,
103                     gl_shader_stage stage,
104                     UNUSED const struct vk_pipeline_robustness_state *rs)
105 {
106    const struct nvk_physical_device *pdev =
107       container_of(vk_pdev, struct nvk_physical_device, vk);
108 
109    if (use_nak(pdev, stage))
110       return nak_nir_options(pdev->nak);
111    else
112       return nvk_cg_nir_options(pdev, stage);
113 }
114 
115 static struct spirv_to_nir_options
nvk_get_spirv_options(struct vk_physical_device * vk_pdev,UNUSED gl_shader_stage stage,const struct vk_pipeline_robustness_state * rs)116 nvk_get_spirv_options(struct vk_physical_device *vk_pdev,
117                       UNUSED gl_shader_stage stage,
118                       const struct vk_pipeline_robustness_state *rs)
119 {
120    const struct nvk_physical_device *pdev =
121       container_of(vk_pdev, struct nvk_physical_device, vk);
122 
123    return (struct spirv_to_nir_options) {
124       .caps = {
125          .demote_to_helper_invocation = true,
126          .descriptor_array_dynamic_indexing = true,
127          .descriptor_array_non_uniform_indexing = true,
128          .descriptor_indexing = true,
129          .device_group = true,
130          .draw_parameters = true,
131          .float_controls = true,
132          .float64 = true,
133          .fragment_barycentric = true,
134          .geometry_streams = true,
135          .image_atomic_int64 = true,
136          .image_read_without_format = true,
137          .image_write_without_format = true,
138          .int8 = true,
139          .int16 = true,
140          .int64 = true,
141          .int64_atomics = true,
142          .min_lod = true,
143          .multiview = true,
144          .physical_storage_buffer_address = true,
145          .runtime_descriptor_array = true,
146          .shader_clock = true,
147          .shader_sm_builtins_nv = true,
148          .shader_viewport_index_layer = true,
149          .storage_8bit = true,
150          .storage_16bit = true,
151          .subgroup_arithmetic = true,
152          .subgroup_ballot = true,
153          .subgroup_basic = true,
154          .subgroup_quad = true,
155          .subgroup_shuffle = true,
156          .subgroup_vote = true,
157          .tessellation = true,
158          .transform_feedback = true,
159          .variable_pointers = true,
160          .vk_memory_model_device_scope = true,
161          .vk_memory_model = true,
162          .workgroup_memory_explicit_layout = true,
163       },
164       .ssbo_addr_format = nvk_buffer_addr_format(rs->storage_buffers),
165       .phys_ssbo_addr_format = nir_address_format_64bit_global,
166       .ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers),
167       .shared_addr_format = nir_address_format_32bit_offset,
168       .min_ssbo_alignment = NVK_MIN_SSBO_ALIGNMENT,
169       .min_ubo_alignment = nvk_min_cbuf_alignment(&pdev->info),
170    };
171 }
172 
173 static void
nvk_preprocess_nir(struct vk_physical_device * vk_pdev,nir_shader * nir)174 nvk_preprocess_nir(struct vk_physical_device *vk_pdev, nir_shader *nir)
175 {
176    const struct nvk_physical_device *pdev =
177       container_of(vk_pdev, struct nvk_physical_device, vk);
178 
179    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
180               nir_shader_get_entrypoint(nir), true, false);
181 
182    if (use_nak(pdev, nir->info.stage))
183       nak_preprocess_nir(nir, pdev->nak);
184    else
185       nvk_cg_preprocess_nir(nir);
186 }
187 
188 static void
nvk_populate_fs_key(struct nak_fs_key * key,const struct vk_graphics_pipeline_state * state)189 nvk_populate_fs_key(struct nak_fs_key *key,
190                     const struct vk_graphics_pipeline_state *state)
191 {
192    memset(key, 0, sizeof(*key));
193 
194    key->sample_locations_cb = 0;
195    key->sample_locations_offset = nvk_root_descriptor_offset(draw.sample_locations);
196 
197    if (state == NULL)
198       return;
199 
200    if (state->pipeline_flags &
201        VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)
202       key->zs_self_dep = true;
203 
204    const struct vk_multisample_state *ms = state->ms;
205    if (ms == NULL || ms->rasterization_samples <= 1)
206       return;
207 
208    if (ms->sample_shading_enable &&
209        (ms->rasterization_samples * ms->min_sample_shading) > 1.0)
210       key->force_sample_shading = true;
211 }
212 
213 static void
nvk_hash_graphics_state(struct vk_physical_device * device,const struct vk_graphics_pipeline_state * state,VkShaderStageFlags stages,blake3_hash blake3_out)214 nvk_hash_graphics_state(struct vk_physical_device *device,
215                         const struct vk_graphics_pipeline_state *state,
216                         VkShaderStageFlags stages,
217                         blake3_hash blake3_out)
218 {
219    struct mesa_blake3 blake3_ctx;
220    _mesa_blake3_init(&blake3_ctx);
221    if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
222       struct nak_fs_key key;
223       nvk_populate_fs_key(&key, state);
224       _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
225 
226       const bool is_multiview = state->rp->view_mask != 0;
227       _mesa_blake3_update(&blake3_ctx, &is_multiview, sizeof(is_multiview));
228    }
229    _mesa_blake3_final(&blake3_ctx, blake3_out);
230 }
231 
232 static bool
lower_load_global_constant_offset_instr(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * _data)233 lower_load_global_constant_offset_instr(nir_builder *b,
234                                         nir_intrinsic_instr *intrin,
235                                         UNUSED void *_data)
236 {
237    if (intrin->intrinsic != nir_intrinsic_load_global_constant_offset &&
238        intrin->intrinsic != nir_intrinsic_load_global_constant_bounded)
239       return false;
240 
241    b->cursor = nir_before_instr(&intrin->instr);
242 
243    nir_def *base_addr = intrin->src[0].ssa;
244    nir_def *offset = intrin->src[1].ssa;
245 
246    nir_def *zero = NULL;
247    if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
248       nir_def *bound = intrin->src[2].ssa;
249 
250       unsigned bit_size = intrin->def.bit_size;
251       assert(bit_size >= 8 && bit_size % 8 == 0);
252       unsigned byte_size = bit_size / 8;
253 
254       zero = nir_imm_zero(b, intrin->num_components, bit_size);
255 
256       unsigned load_size = byte_size * intrin->num_components;
257 
258       nir_def *sat_offset =
259          nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
260       nir_def *in_bounds =
261          nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
262 
263       nir_push_if(b, in_bounds);
264    }
265 
266    nir_def *val =
267       nir_build_load_global_constant(b, intrin->def.num_components,
268                                      intrin->def.bit_size,
269                                      nir_iadd(b, base_addr, nir_u2u64(b, offset)),
270                                      .align_mul = nir_intrinsic_align_mul(intrin),
271                                      .align_offset = nir_intrinsic_align_offset(intrin));
272 
273    if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
274       nir_pop_if(b, NULL);
275       val = nir_if_phi(b, val, zero);
276    }
277 
278    nir_def_rewrite_uses(&intrin->def, val);
279 
280    return true;
281 }
282 
283 struct lower_ycbcr_state {
284    uint32_t set_layout_count;
285    struct vk_descriptor_set_layout * const *set_layouts;
286 };
287 
288 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _state,uint32_t set,uint32_t binding,uint32_t array_index)289 lookup_ycbcr_conversion(const void *_state, uint32_t set,
290                         uint32_t binding, uint32_t array_index)
291 {
292    const struct lower_ycbcr_state *state = _state;
293    assert(set < state->set_layout_count);
294    assert(state->set_layouts[set] != NULL);
295    const struct nvk_descriptor_set_layout *set_layout =
296       vk_to_nvk_descriptor_set_layout(state->set_layouts[set]);
297    assert(binding < set_layout->binding_count);
298 
299    const struct nvk_descriptor_set_binding_layout *bind_layout =
300       &set_layout->binding[binding];
301 
302    if (bind_layout->immutable_samplers == NULL)
303       return NULL;
304 
305    array_index = MIN2(array_index, bind_layout->array_size - 1);
306 
307    const struct nvk_sampler *sampler =
308       bind_layout->immutable_samplers[array_index];
309 
310    return sampler && sampler->vk.ycbcr_conversion ?
311           &sampler->vk.ycbcr_conversion->state : NULL;
312 }
313 
314 static inline bool
nir_has_image_var(nir_shader * nir)315 nir_has_image_var(nir_shader *nir)
316 {
317    nir_foreach_image_variable(_, nir)
318       return true;
319 
320    return false;
321 }
322 
323 void
nvk_lower_nir(struct nvk_device * dev,nir_shader * nir,const struct vk_pipeline_robustness_state * rs,bool is_multiview,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)324 nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
325               const struct vk_pipeline_robustness_state *rs,
326               bool is_multiview,
327               uint32_t set_layout_count,
328               struct vk_descriptor_set_layout * const *set_layouts,
329               struct nvk_cbuf_map *cbuf_map_out)
330 {
331    struct nvk_physical_device *pdev = nvk_device_physical(dev);
332 
333    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
334       NIR_PASS(_, nir, nir_lower_input_attachments,
335                &(nir_input_attachment_options) {
336                   .use_fragcoord_sysval = use_nak(pdev, nir->info.stage),
337                   .use_layer_id_sysval = use_nak(pdev, nir->info.stage) ||
338                                          is_multiview,
339                   .use_view_id_for_layer = is_multiview,
340                });
341    }
342 
343    if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
344       NIR_PASS(_, nir, nir_lower_patch_vertices,
345                nir->info.tess.tcs_vertices_out, NULL);
346    }
347 
348    const struct lower_ycbcr_state ycbcr_state = {
349       .set_layout_count = set_layout_count,
350       .set_layouts = set_layouts,
351    };
352    NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex,
353             lookup_ycbcr_conversion, &ycbcr_state);
354 
355    nir_lower_compute_system_values_options csv_options = {
356       .has_base_workgroup_id = true,
357    };
358    NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
359 
360    /* Lower push constants before lower_descriptors */
361    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
362             nir_address_format_32bit_offset);
363 
364    /* Lower non-uniform access before lower_descriptors */
365    enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
366       nir_lower_non_uniform_ubo_access;
367 
368    if (pdev->info.cls_eng3d < TURING_A) {
369       lower_non_uniform_access_types |= nir_lower_non_uniform_texture_access |
370                                         nir_lower_non_uniform_image_access;
371    }
372 
373    /* In practice, most shaders do not have non-uniform-qualified accesses
374     * thus a cheaper and likely to fail check is run first.
375     */
376    if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
377       struct nir_lower_non_uniform_access_options opts = {
378          .types = lower_non_uniform_access_types,
379          .callback = NULL,
380       };
381       NIR_PASS(_, nir, nir_opt_non_uniform_access);
382       NIR_PASS(_, nir, nir_lower_non_uniform_access, &opts);
383    }
384 
385    /* TODO: Kepler image lowering requires image params to be loaded from the
386     * descriptor set which we don't currently support.
387     */
388    assert(dev->pdev->info.cls_eng3d >= MAXWELL_A || !nir_has_image_var(nir));
389 
390    struct nvk_cbuf_map *cbuf_map = NULL;
391    if (use_nak(pdev, nir->info.stage) &&
392        !(pdev->debug_flags & NVK_DEBUG_NO_CBUF)) {
393       cbuf_map = cbuf_map_out;
394 
395       /* Large constant support assumes cbufs */
396       NIR_PASS(_, nir, nir_opt_large_constants, NULL, 32);
397    } else {
398       /* Codegen sometimes puts stuff in cbuf 1 and adds 1 to our cbuf indices
399        * so we can't really rely on it for lowering to cbufs and instead place
400        * the root descriptors in both cbuf 0 and cbuf 1.
401        */
402       *cbuf_map_out = (struct nvk_cbuf_map) {
403          .cbuf_count = 2,
404          .cbufs = {
405             { .type = NVK_CBUF_TYPE_ROOT_DESC },
406             { .type = NVK_CBUF_TYPE_ROOT_DESC },
407          }
408       };
409    }
410 
411    NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs,
412             set_layout_count, set_layouts, cbuf_map);
413    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
414             nir_address_format_64bit_global);
415    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
416             nvk_buffer_addr_format(rs->storage_buffers));
417    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
418             nvk_buffer_addr_format(rs->uniform_buffers));
419    NIR_PASS(_, nir, nir_shader_intrinsics_pass,
420             lower_load_global_constant_offset_instr, nir_metadata_none, NULL);
421 
422    if (!nir->info.shared_memory_explicit_layout) {
423       NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
424                nir_var_mem_shared, shared_var_info);
425    }
426    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
427             nir_address_format_32bit_offset);
428 
429    if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
430       /* QMD::SHARED_MEMORY_SIZE requires an alignment of 256B so it's safe to
431        * align everything up to 16B so we can write whole vec4s.
432        */
433       nir->info.shared_size = align(nir->info.shared_size, 16);
434       NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
435                nir->info.shared_size, 16);
436 
437       /* We need to call lower_compute_system_values again because
438        * nir_zero_initialize_shared_memory generates load_invocation_id which
439        * has to be lowered to load_invocation_index.
440        */
441       NIR_PASS(_, nir, nir_lower_compute_system_values, NULL);
442    }
443 }
444 
445 #ifndef NDEBUG
446 static void
nvk_shader_dump(struct nvk_shader * shader)447 nvk_shader_dump(struct nvk_shader *shader)
448 {
449    unsigned pos;
450 
451    if (shader->info.stage != MESA_SHADER_COMPUTE) {
452       _debug_printf("dumping HDR for %s shader\n",
453                     _mesa_shader_stage_to_string(shader->info.stage));
454       for (pos = 0; pos < ARRAY_SIZE(shader->info.hdr); ++pos)
455          _debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
456                       pos * sizeof(shader->info.hdr[0]), shader->info.hdr[pos]);
457    }
458    _debug_printf("shader binary code (0x%x bytes):", shader->code_size);
459    for (pos = 0; pos < shader->code_size / 4; ++pos) {
460       if ((pos % 8) == 0)
461          _debug_printf("\n");
462       _debug_printf("%08x ", ((const uint32_t *)shader->code_ptr)[pos]);
463    }
464    _debug_printf("\n");
465 }
466 #endif
467 
468 static VkResult
nvk_compile_nir_with_nak(struct nvk_physical_device * pdev,nir_shader * nir,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,const struct nak_fs_key * fs_key,struct nvk_shader * shader)469 nvk_compile_nir_with_nak(struct nvk_physical_device *pdev,
470                          nir_shader *nir,
471                          VkShaderCreateFlagsEXT shader_flags,
472                          const struct vk_pipeline_robustness_state *rs,
473                          const struct nak_fs_key *fs_key,
474                          struct nvk_shader *shader)
475 {
476    const bool dump_asm =
477       shader_flags & VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA;
478 
479    nir_variable_mode robust2_modes = 0;
480    if (rs->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT)
481       robust2_modes |= nir_var_mem_ubo;
482    if (rs->storage_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT)
483       robust2_modes |= nir_var_mem_ssbo;
484 
485    shader->nak = nak_compile_shader(nir, dump_asm, pdev->nak, robust2_modes, fs_key);
486    shader->info = shader->nak->info;
487    shader->code_ptr = shader->nak->code;
488    shader->code_size = shader->nak->code_size;
489 
490    return VK_SUCCESS;
491 }
492 
493 static VkResult
nvk_compile_nir(struct nvk_device * dev,nir_shader * nir,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,const struct nak_fs_key * fs_key,struct nvk_shader * shader)494 nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
495                 VkShaderCreateFlagsEXT shader_flags,
496                 const struct vk_pipeline_robustness_state *rs,
497                 const struct nak_fs_key *fs_key,
498                 struct nvk_shader *shader)
499 {
500    struct nvk_physical_device *pdev = nvk_device_physical(dev);
501    VkResult result;
502 
503    if (use_nak(pdev, nir->info.stage)) {
504       result = nvk_compile_nir_with_nak(pdev, nir, shader_flags, rs,
505                                        fs_key, shader);
506    } else {
507       result = nvk_cg_compile_nir(pdev, nir, fs_key, shader);
508    }
509    if (result != VK_SUCCESS)
510       return result;
511 
512    if (nir->constant_data_size > 0) {
513       uint32_t data_align = nvk_min_cbuf_alignment(&pdev->info);
514       uint32_t data_size = align(nir->constant_data_size, data_align);
515 
516       void *data = malloc(data_size);
517       if (data == NULL)
518          return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
519 
520       memcpy(data, nir->constant_data, nir->constant_data_size);
521 
522       assert(nir->constant_data_size <= data_size);
523       memset(data + nir->constant_data_size, 0,
524              data_size - nir->constant_data_size);
525 
526       shader->data_ptr = data;
527       shader->data_size = data_size;
528    }
529 
530    return VK_SUCCESS;
531 }
532 
533 VkResult
nvk_shader_upload(struct nvk_device * dev,struct nvk_shader * shader)534 nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader)
535 {
536    uint32_t hdr_size = 0;
537    if (shader->info.stage != MESA_SHADER_COMPUTE) {
538       if (dev->pdev->info.cls_eng3d >= TURING_A)
539          hdr_size = TU102_SHADER_HEADER_SIZE;
540       else
541          hdr_size = GF100_SHADER_HEADER_SIZE;
542    }
543 
544    /* Fermi   needs 0x40 alignment
545     * Kepler+ needs the first instruction to be 0x80 aligned, so we waste 0x30 bytes
546     */
547    int alignment = dev->pdev->info.cls_eng3d >= KEPLER_A ? 0x80 : 0x40;
548 
549    uint32_t total_size = 0;
550    if (dev->pdev->info.cls_eng3d >= KEPLER_A &&
551        dev->pdev->info.cls_eng3d < TURING_A &&
552        hdr_size > 0) {
553       /* The instructions are what has to be aligned so we need to start at a
554        * small offset (0x30 B) into the upload area.
555        */
556       total_size = alignment - hdr_size;
557    }
558 
559    const uint32_t hdr_offset = total_size;
560    total_size += hdr_size;
561 
562    const uint32_t code_offset = total_size;
563    assert(code_offset % alignment == 0);
564    total_size += shader->code_size;
565 
566    uint32_t data_offset = 0;
567    if (shader->data_size > 0) {
568       total_size = align(total_size, nvk_min_cbuf_alignment(&dev->pdev->info));
569       data_offset = total_size;
570       total_size += shader->data_size;
571    }
572 
573    char *data = malloc(total_size);
574    if (data == NULL)
575       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
576 
577    assert(hdr_size <= sizeof(shader->info.hdr));
578    memcpy(data + hdr_offset, shader->info.hdr, hdr_size);
579    memcpy(data + code_offset, shader->code_ptr, shader->code_size);
580    if (shader->data_size > 0)
581       memcpy(data + data_offset, shader->data_ptr, shader->data_size);
582 
583 #ifndef NDEBUG
584    if (debug_get_bool_option("NV50_PROG_DEBUG", false))
585       nvk_shader_dump(shader);
586 #endif
587 
588    VkResult result = nvk_heap_upload(dev, &dev->shader_heap, data,
589                                      total_size, alignment,
590                                      &shader->upload_addr);
591    if (result == VK_SUCCESS) {
592       shader->upload_size = total_size;
593 
594       shader->hdr_addr = shader->upload_addr + hdr_offset;
595       if (dev->pdev->info.cls_eng3d < VOLTA_A) {
596          const uint64_t heap_base_addr =
597             nvk_heap_contiguous_base_address(&dev->shader_heap);
598          assert(shader->upload_addr - heap_base_addr < UINT32_MAX);
599          shader->hdr_addr -= heap_base_addr;
600       }
601       shader->data_addr = shader->upload_addr + data_offset;
602    }
603    free(data);
604 
605    return result;
606 }
607 
608 static const struct vk_shader_ops nvk_shader_ops;
609 
610 static void
nvk_shader_destroy(struct vk_device * vk_dev,struct vk_shader * vk_shader,const VkAllocationCallbacks * pAllocator)611 nvk_shader_destroy(struct vk_device *vk_dev,
612                    struct vk_shader *vk_shader,
613                    const VkAllocationCallbacks* pAllocator)
614 {
615    struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
616    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
617 
618    if (shader->upload_size > 0) {
619       nvk_heap_free(dev, &dev->shader_heap,
620                     shader->upload_addr,
621                     shader->upload_size);
622    }
623 
624    if (shader->nak) {
625       nak_shader_bin_destroy(shader->nak);
626    } else {
627       /* This came from codegen or deserialize, just free it */
628       free((void *)shader->code_ptr);
629    }
630 
631    free((void *)shader->data_ptr);
632 
633    vk_shader_free(&dev->vk, pAllocator, &shader->vk);
634 }
635 
636 static VkResult
nvk_compile_shader(struct nvk_device * dev,struct vk_shader_compile_info * info,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * pAllocator,struct vk_shader ** shader_out)637 nvk_compile_shader(struct nvk_device *dev,
638                    struct vk_shader_compile_info *info,
639                    const struct vk_graphics_pipeline_state *state,
640                    const VkAllocationCallbacks* pAllocator,
641                    struct vk_shader **shader_out)
642 {
643    struct nvk_shader *shader;
644    VkResult result;
645 
646    /* We consume the NIR, regardless of success or failure */
647    nir_shader *nir = info->nir;
648 
649    shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info->stage,
650                              pAllocator, sizeof(*shader));
651    if (shader == NULL) {
652       ralloc_free(nir);
653       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
654    }
655 
656    /* TODO: Multiview with ESO */
657    const bool is_multiview = state && state->rp->view_mask != 0;
658 
659    nvk_lower_nir(dev, nir, info->robustness, is_multiview,
660                  info->set_layout_count, info->set_layouts,
661                  &shader->cbuf_map);
662 
663    struct nak_fs_key fs_key_tmp, *fs_key = NULL;
664    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
665       nvk_populate_fs_key(&fs_key_tmp, state);
666       fs_key = &fs_key_tmp;
667    }
668 
669    result = nvk_compile_nir(dev, nir, info->flags, info->robustness,
670                             fs_key, shader);
671    ralloc_free(nir);
672    if (result != VK_SUCCESS) {
673       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
674       return result;
675    }
676 
677    result = nvk_shader_upload(dev, shader);
678    if (result != VK_SUCCESS) {
679       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
680       return result;
681    }
682 
683    if (info->stage == MESA_SHADER_FRAGMENT) {
684       if (shader->info.fs.reads_sample_mask ||
685           shader->info.fs.uses_sample_shading) {
686          shader->min_sample_shading = 1;
687       } else if (state != NULL && state->ms != NULL &&
688                  state->ms->sample_shading_enable) {
689          shader->min_sample_shading =
690             CLAMP(state->ms->min_sample_shading, 0, 1);
691       } else {
692          shader->min_sample_shading = 0;
693       }
694    }
695 
696    *shader_out = &shader->vk;
697 
698    return VK_SUCCESS;
699 }
700 
701 static VkResult
nvk_compile_shaders(struct vk_device * vk_dev,uint32_t shader_count,struct vk_shader_compile_info * infos,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * pAllocator,struct vk_shader ** shaders_out)702 nvk_compile_shaders(struct vk_device *vk_dev,
703                     uint32_t shader_count,
704                     struct vk_shader_compile_info *infos,
705                     const struct vk_graphics_pipeline_state *state,
706                     const VkAllocationCallbacks* pAllocator,
707                     struct vk_shader **shaders_out)
708 {
709    struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
710 
711    for (uint32_t i = 0; i < shader_count; i++) {
712       VkResult result = nvk_compile_shader(dev, &infos[i], state,
713                                            pAllocator, &shaders_out[i]);
714       if (result != VK_SUCCESS) {
715          /* Clean up all the shaders before this point */
716          for (uint32_t j = 0; j < i; j++)
717             nvk_shader_destroy(&dev->vk, shaders_out[j], pAllocator);
718 
719          /* Clean up all the NIR after this point */
720          for (uint32_t j = i + 1; j < shader_count; j++)
721             ralloc_free(infos[j].nir);
722 
723          /* Memset the output array */
724          memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
725 
726          return result;
727       }
728    }
729 
730    return VK_SUCCESS;
731 }
732 
733 static VkResult
nvk_deserialize_shader(struct vk_device * vk_dev,struct blob_reader * blob,uint32_t binary_version,const VkAllocationCallbacks * pAllocator,struct vk_shader ** shader_out)734 nvk_deserialize_shader(struct vk_device *vk_dev,
735                        struct blob_reader *blob,
736                        uint32_t binary_version,
737                        const VkAllocationCallbacks* pAllocator,
738                        struct vk_shader **shader_out)
739 {
740    struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
741    struct nvk_shader *shader;
742    VkResult result;
743 
744    struct nak_shader_info info;
745    blob_copy_bytes(blob, &info, sizeof(info));
746 
747    struct nvk_cbuf_map cbuf_map;
748    blob_copy_bytes(blob, &cbuf_map, sizeof(cbuf_map));
749 
750    float min_sample_shading;
751    blob_copy_bytes(blob, &min_sample_shading, sizeof(min_sample_shading));
752 
753    const uint32_t code_size = blob_read_uint32(blob);
754    const uint32_t data_size = blob_read_uint32(blob);
755    if (blob->overrun)
756       return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
757 
758    shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info.stage,
759                              pAllocator, sizeof(*shader));
760    if (shader == NULL)
761       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
762 
763    shader->info = info;
764    shader->cbuf_map = cbuf_map;
765    shader->min_sample_shading = min_sample_shading;
766    shader->code_size = code_size;
767    shader->data_size = data_size;
768 
769    shader->code_ptr = malloc(code_size);
770    if (shader->code_ptr == NULL) {
771       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
772       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
773    }
774 
775    shader->data_ptr = malloc(data_size);
776    if (shader->data_ptr == NULL) {
777       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
778       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
779    }
780 
781    blob_copy_bytes(blob, (void *)shader->code_ptr, shader->code_size);
782    blob_copy_bytes(blob, (void *)shader->data_ptr, shader->data_size);
783    if (blob->overrun) {
784       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
785       return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
786    }
787 
788    result = nvk_shader_upload(dev, shader);
789    if (result != VK_SUCCESS) {
790       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
791       return result;
792    }
793 
794    *shader_out = &shader->vk;
795 
796    return VK_SUCCESS;
797 }
798 
799 static bool
nvk_shader_serialize(struct vk_device * vk_dev,const struct vk_shader * vk_shader,struct blob * blob)800 nvk_shader_serialize(struct vk_device *vk_dev,
801                      const struct vk_shader *vk_shader,
802                      struct blob *blob)
803 {
804    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
805 
806    /* We can't currently cache assmbly */
807    if (shader->nak != NULL && shader->nak->asm_str != NULL)
808       return false;
809 
810    blob_write_bytes(blob, &shader->info, sizeof(shader->info));
811    blob_write_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map));
812    blob_write_bytes(blob, &shader->min_sample_shading,
813                     sizeof(shader->min_sample_shading));
814 
815    blob_write_uint32(blob, shader->code_size);
816    blob_write_uint32(blob, shader->data_size);
817    blob_write_bytes(blob, shader->code_ptr, shader->code_size);
818    blob_write_bytes(blob, shader->data_ptr, shader->data_size);
819 
820    return !blob->out_of_memory;
821 }
822 
823 #define WRITE_STR(field, ...) ({                               \
824    memset(field, 0, sizeof(field));                            \
825    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
826    assert(i > 0 && i < sizeof(field));                         \
827 })
828 
829 static VkResult
nvk_shader_get_executable_properties(UNUSED struct vk_device * device,const struct vk_shader * vk_shader,uint32_t * executable_count,VkPipelineExecutablePropertiesKHR * properties)830 nvk_shader_get_executable_properties(
831    UNUSED struct vk_device *device,
832    const struct vk_shader *vk_shader,
833    uint32_t *executable_count,
834    VkPipelineExecutablePropertiesKHR *properties)
835 {
836    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
837    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
838                           properties, executable_count);
839 
840    vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
841       props->stages = mesa_to_vk_shader_stage(shader->info.stage);
842       props->subgroupSize = 32;
843       WRITE_STR(props->name, "%s",
844                 _mesa_shader_stage_to_string(shader->info.stage));
845       WRITE_STR(props->description, "%s shader",
846                 _mesa_shader_stage_to_string(shader->info.stage));
847    }
848 
849    return vk_outarray_status(&out);
850 }
851 
852 static VkResult
nvk_shader_get_executable_statistics(UNUSED struct vk_device * device,const struct vk_shader * vk_shader,uint32_t executable_index,uint32_t * statistic_count,VkPipelineExecutableStatisticKHR * statistics)853 nvk_shader_get_executable_statistics(
854    UNUSED struct vk_device *device,
855    const struct vk_shader *vk_shader,
856    uint32_t executable_index,
857    uint32_t *statistic_count,
858    VkPipelineExecutableStatisticKHR *statistics)
859 {
860    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
861    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
862                           statistics, statistic_count);
863 
864    assert(executable_index == 0);
865 
866    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
867       WRITE_STR(stat->name, "Code Size");
868       WRITE_STR(stat->description,
869                 "Size of the compiled shader binary, in bytes");
870       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
871       stat->value.u64 = shader->code_size;
872    }
873 
874    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
875       WRITE_STR(stat->name, "Number of GPRs");
876       WRITE_STR(stat->description, "Number of GPRs used by this pipeline");
877       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
878       stat->value.u64 = shader->info.num_gprs;
879    }
880 
881    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
882       WRITE_STR(stat->name, "SLM Size");
883       WRITE_STR(stat->description,
884                 "Size of shader local (scratch) memory, in bytes");
885       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
886       stat->value.u64 = shader->info.slm_size;
887    }
888 
889    return vk_outarray_status(&out);
890 }
891 
892 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)893 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
894               const char *data)
895 {
896    ir->isText = VK_TRUE;
897 
898    size_t data_len = strlen(data) + 1;
899 
900    if (ir->pData == NULL) {
901       ir->dataSize = data_len;
902       return true;
903    }
904 
905    strncpy(ir->pData, data, ir->dataSize);
906    if (ir->dataSize < data_len)
907       return false;
908 
909    ir->dataSize = data_len;
910    return true;
911 }
912 
913 static VkResult
nvk_shader_get_executable_internal_representations(UNUSED struct vk_device * device,const struct vk_shader * vk_shader,uint32_t executable_index,uint32_t * internal_representation_count,VkPipelineExecutableInternalRepresentationKHR * internal_representations)914 nvk_shader_get_executable_internal_representations(
915    UNUSED struct vk_device *device,
916    const struct vk_shader *vk_shader,
917    uint32_t executable_index,
918    uint32_t *internal_representation_count,
919    VkPipelineExecutableInternalRepresentationKHR *internal_representations)
920 {
921    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
922    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
923                           internal_representations,
924                           internal_representation_count);
925    bool incomplete_text = false;
926 
927    assert(executable_index == 0);
928 
929    if (shader->nak != NULL && shader->nak->asm_str != NULL) {
930       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
931          WRITE_STR(ir->name, "NAK assembly");
932          WRITE_STR(ir->description, "NAK assembly");
933          if (!write_ir_text(ir, shader->nak->asm_str))
934             incomplete_text = true;
935       }
936    }
937 
938    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
939 }
940 
941 static const struct vk_shader_ops nvk_shader_ops = {
942    .destroy = nvk_shader_destroy,
943    .serialize = nvk_shader_serialize,
944    .get_executable_properties = nvk_shader_get_executable_properties,
945    .get_executable_statistics = nvk_shader_get_executable_statistics,
946    .get_executable_internal_representations =
947       nvk_shader_get_executable_internal_representations,
948 };
949 
950 const struct vk_device_shader_ops nvk_device_shader_ops = {
951    .get_nir_options = nvk_get_nir_options,
952    .get_spirv_options = nvk_get_spirv_options,
953    .preprocess_nir = nvk_preprocess_nir,
954    .hash_graphics_state = nvk_hash_graphics_state,
955    .compile = nvk_compile_shaders,
956    .deserialize = nvk_deserialize_shader,
957    .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
958    .cmd_bind_shaders = nvk_cmd_bind_shaders,
959 };
960