/* * Copyright © 2021 Collabora Ltd. * SPDX-License-Identifier: MIT */ #ifndef PANVK_SHADER_H #define PANVK_SHADER_H #ifndef PAN_ARCH #error "PAN_ARCH must be defined" #endif #include "util/pan_ir.h" #include "pan_desc.h" #include "panvk_cmd_push_constant.h" #include "panvk_descriptor_set.h" #include "panvk_macros.h" #include "panvk_mempool.h" #include "vk_pipeline_layout.h" #include "vk_shader.h" extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops); #define MAX_VS_ATTRIBS 16 struct nir_shader; struct pan_blend_state; struct panvk_device; enum panvk_varying_buf_id { PANVK_VARY_BUF_GENERAL, PANVK_VARY_BUF_POSITION, PANVK_VARY_BUF_PSIZ, /* Keep last */ PANVK_VARY_BUF_MAX, }; #if PAN_ARCH <= 7 enum panvk_desc_table_id { PANVK_DESC_TABLE_USER = 0, PANVK_DESC_TABLE_CS_DYN_SSBOS = MAX_SETS, PANVK_DESC_TABLE_COMPUTE_COUNT = PANVK_DESC_TABLE_CS_DYN_SSBOS + 1, PANVK_DESC_TABLE_VS_DYN_SSBOS = MAX_SETS, PANVK_DESC_TABLE_FS_DYN_SSBOS = MAX_SETS + 1, PANVK_DESC_TABLE_GFX_COUNT = PANVK_DESC_TABLE_FS_DYN_SSBOS + 1, }; #endif #define FAU_WORD_SIZE sizeof(uint64_t) #define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t struct panvk_graphics_sysvals { struct { float constants[4]; } blend; struct { struct { float x, y, z; } scale, offset; } viewport; struct { #if PAN_ARCH <= 7 int32_t raw_vertex_offset; #endif int32_t first_vertex; int32_t base_instance; uint32_t noperspective_varyings; } vs; aligned_u64 push_consts; #if PAN_ARCH <= 7 /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per * layer, and filter primitives at the VS level. */ int32_t layer_id; struct { aligned_u64 sets[PANVK_DESC_TABLE_GFX_COUNT]; } desc; #endif } __attribute__((aligned(FAU_WORD_SIZE))); static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0, "struct panvk_graphics_sysvals must be 8-byte aligned"); static_assert((offsetof(struct panvk_graphics_sysvals, push_consts) % FAU_WORD_SIZE) == 0, "panvk_graphics_sysvals::push_consts must be 8-byte aligned"); #if PAN_ARCH <= 7 static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) == 0, "panvk_graphics_sysvals::desc must be 8-byte aligned"); #endif struct panvk_compute_sysvals { struct { uint32_t x, y, z; } base; struct { uint32_t x, y, z; } num_work_groups; struct { uint32_t x, y, z; } local_group_size; aligned_u64 push_consts; #if PAN_ARCH <= 7 struct { aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT]; } desc; #endif } __attribute__((aligned(FAU_WORD_SIZE))); static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0, "struct panvk_compute_sysvals must be 8-byte aligned"); static_assert((offsetof(struct panvk_compute_sysvals, push_consts) % FAU_WORD_SIZE) == 0, "panvk_compute_sysvals::push_consts must be 8-byte aligned"); #if PAN_ARCH <= 7 static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) == 0, "panvk_compute_sysvals::desc must be 8-byte aligned"); #endif /* This is not the final offset in the push constant buffer (AKA FAU), but * just a magic offset we use before packing push constants so we can easily * identify the type of push constant (driver sysvals vs user push constants). */ #define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE #define sysval_size(__ptype, __name) \ sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name) #define sysval_offset(__ptype, __name) \ offsetof(struct panvk_##__ptype##_sysvals, __name) #define sysval_entry_size(__ptype, __name) \ sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0]) #define sysval_entry_offset(__ptype, __name, __idx) \ (sysval_offset(__ptype, __name) + \ (sysval_entry_size(__ptype, __name) * __idx)) #define sysval_fau_start(__ptype, __name) \ (sysval_offset(__ptype, __name) / FAU_WORD_SIZE) #define sysval_fau_end(__ptype, __name) \ ((sysval_offset(__ptype, __name) + sysval_size(__ptype, __name) - 1) / \ FAU_WORD_SIZE) #define sysval_fau_entry_start(__ptype, __name, __idx) \ (sysval_entry_offset(__ptype, __name, __idx) / FAU_WORD_SIZE) #define sysval_fau_entry_end(__ptype, __name, __idx) \ ((sysval_entry_offset(__ptype, __name, __idx + 1) - 1) / FAU_WORD_SIZE) #define shader_remapped_fau_offset(__shader, __kind, __offset) \ ((FAU_WORD_SIZE * BITSET_PREFIX_SUM((__shader)->fau.used_##__kind, \ (__offset) / FAU_WORD_SIZE)) + \ ((__offset) % FAU_WORD_SIZE)) #define shader_remapped_sysval_offset(__shader, __offset) \ shader_remapped_fau_offset(__shader, sysvals, __offset) #define shader_remapped_push_const_offset(__shader, __offset) \ (((__shader)->fau.sysval_count * FAU_WORD_SIZE) + \ shader_remapped_fau_offset(__shader, push_consts, __offset)) #define shader_use_sysval(__shader, __ptype, __name) \ BITSET_SET_RANGE((__shader)->fau.used_sysvals, \ sysval_fau_start(__ptype, __name), \ sysval_fau_end(__ptype, __name)) #define shader_uses_sysval(__shader, __ptype, __name) \ BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \ sysval_fau_start(__ptype, __name), \ sysval_fau_end(__ptype, __name)) #define shader_uses_sysval_entry(__shader, __ptype, __name, __idx) \ BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \ sysval_fau_entry_start(__ptype, __name, __idx), \ sysval_fau_entry_end(__ptype, __name, __idx)) #define shader_use_sysval_range(__shader, __base, __range) \ BITSET_SET_RANGE((__shader)->fau.used_sysvals, (__base) / FAU_WORD_SIZE, \ ((__base) + (__range) - 1) / FAU_WORD_SIZE) #define shader_use_push_const_range(__shader, __base, __range) \ BITSET_SET_RANGE((__shader)->fau.used_push_consts, \ (__base) / FAU_WORD_SIZE, \ ((__base) + (__range) - 1) / FAU_WORD_SIZE) #define load_sysval(__b, __ptype, __bitsz, __name) \ nir_load_push_constant( \ __b, sysval_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \ nir_imm_int(__b, sysval_offset(__ptype, __name)), \ .base = SYSVALS_PUSH_CONST_BASE) #define load_sysval_entry(__b, __ptype, __bitsz, __name, __dyn_idx) \ nir_load_push_constant( \ __b, sysval_entry_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \ nir_iadd_imm( \ __b, \ nir_imul_imm(__b, __dyn_idx, sysval_entry_size(__ptype, __name)), \ sysval_offset(__ptype, __name)), \ .base = SYSVALS_PUSH_CONST_BASE) #if PAN_ARCH <= 7 enum panvk_bifrost_desc_table_type { PANVK_BIFROST_DESC_TABLE_INVALID = -1, /* UBO is encoded on 8 bytes */ PANVK_BIFROST_DESC_TABLE_UBO = 0, /* Images are using a <3DAttributeBuffer,Attribute> pair, each * of them being stored in a separate table. */ PANVK_BIFROST_DESC_TABLE_IMG, /* Texture and sampler are encoded on 32 bytes */ PANVK_BIFROST_DESC_TABLE_TEXTURE, PANVK_BIFROST_DESC_TABLE_SAMPLER, PANVK_BIFROST_DESC_TABLE_COUNT, }; #endif #define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx)) #define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle) & BITFIELD_MASK(28)) #define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28) #define MAX_COMPUTE_SYSVAL_FAUS \ (sizeof(struct panvk_compute_sysvals) / FAU_WORD_SIZE) #define MAX_GFX_SYSVAL_FAUS \ (sizeof(struct panvk_graphics_sysvals) / FAU_WORD_SIZE) #define MAX_SYSVAL_FAUS MAX2(MAX_COMPUTE_SYSVAL_FAUS, MAX_GFX_SYSVAL_FAUS) #define MAX_PUSH_CONST_FAUS (MAX_PUSH_CONSTANTS_SIZE / FAU_WORD_SIZE) struct panvk_shader_fau_info { BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS); BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS); uint32_t sysval_count; uint32_t total_count; }; struct panvk_shader { struct vk_shader vk; struct pan_shader_info info; struct pan_compute_dim local_size; struct { uint32_t used_set_mask; #if PAN_ARCH <= 7 struct { uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS]; uint32_t count; } dyn_ubos; struct { uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS]; uint32_t count; } dyn_ssbos; struct { struct panvk_priv_mem map; uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT]; } others; #else struct { uint32_t map[MAX_DYNAMIC_BUFFERS]; uint32_t count; } dyn_bufs; #endif } desc_info; struct panvk_shader_fau_info fau; const void *bin_ptr; uint32_t bin_size; struct panvk_priv_mem code_mem; #if PAN_ARCH <= 7 struct panvk_priv_mem rsd; #else union { struct panvk_priv_mem spd; struct { struct panvk_priv_mem pos_points; struct panvk_priv_mem pos_triangles; struct panvk_priv_mem var; } spds; }; #endif const char *nir_str; const char *asm_str; }; static inline uint64_t panvk_shader_get_dev_addr(const struct panvk_shader *shader) { return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0; } #if PAN_ARCH <= 7 struct panvk_shader_link { struct { struct panvk_priv_mem attribs; } vs, fs; unsigned buf_strides[PANVK_VARY_BUF_MAX]; }; VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, const struct panvk_shader *vs, const struct panvk_shader *fs, struct panvk_shader_link *link); static inline void panvk_shader_link_cleanup(struct panvk_shader_link *link) { panvk_pool_free_mem(&link->vs.attribs); panvk_pool_free_mem(&link->fs.attribs); } #endif void panvk_per_arch(nir_lower_descriptors)( nir_shader *nir, struct panvk_device *dev, const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count, struct vk_descriptor_set_layout *const *set_layouts, struct panvk_shader *shader); /* This a stripped-down version of panvk_shader for internal shaders that * are managed by vk_meta (blend and preload shaders). Those don't need the * complexity inherent to user provided shaders as they're not exposed. */ struct panvk_internal_shader { struct vk_shader vk; struct pan_shader_info info; struct panvk_priv_mem code_mem; #if PAN_ARCH <= 7 struct panvk_priv_mem rsd; #else struct panvk_priv_mem spd; #endif }; VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_internal_shader, vk.base, VkShaderEXT, VK_OBJECT_TYPE_SHADER_EXT) VkResult panvk_per_arch(create_internal_shader)( struct panvk_device *dev, nir_shader *nir, struct panfrost_compile_inputs *compiler_inputs, struct panvk_internal_shader **shader_out); #endif