1 /*
2 * Copyright © 2021 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef PANVK_SHADER_H
7 #define PANVK_SHADER_H
8
9 #ifndef PAN_ARCH
10 #error "PAN_ARCH must be defined"
11 #endif
12
13 #include "util/pan_ir.h"
14
15 #include "pan_desc.h"
16
17 #include "panvk_cmd_push_constant.h"
18 #include "panvk_descriptor_set.h"
19 #include "panvk_macros.h"
20 #include "panvk_mempool.h"
21
22 #include "vk_pipeline_layout.h"
23
24 #include "vk_shader.h"
25
26 extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops);
27
28 #define MAX_VS_ATTRIBS 16
29
30 struct nir_shader;
31 struct pan_blend_state;
32 struct panvk_device;
33
34 enum panvk_varying_buf_id {
35 PANVK_VARY_BUF_GENERAL,
36 PANVK_VARY_BUF_POSITION,
37 PANVK_VARY_BUF_PSIZ,
38
39 /* Keep last */
40 PANVK_VARY_BUF_MAX,
41 };
42
43 #if PAN_ARCH <= 7
44 enum panvk_desc_table_id {
45 PANVK_DESC_TABLE_USER = 0,
46 PANVK_DESC_TABLE_CS_DYN_SSBOS = MAX_SETS,
47 PANVK_DESC_TABLE_COMPUTE_COUNT = PANVK_DESC_TABLE_CS_DYN_SSBOS + 1,
48 PANVK_DESC_TABLE_VS_DYN_SSBOS = MAX_SETS,
49 PANVK_DESC_TABLE_FS_DYN_SSBOS = MAX_SETS + 1,
50 PANVK_DESC_TABLE_GFX_COUNT = PANVK_DESC_TABLE_FS_DYN_SSBOS + 1,
51 };
52 #endif
53
54 #define FAU_WORD_SIZE sizeof(uint64_t)
55
56 #define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
57
58 struct panvk_graphics_sysvals {
59 struct {
60 float constants[4];
61 } blend;
62
63 struct {
64 struct {
65 float x, y, z;
66 } scale, offset;
67 } viewport;
68
69 struct {
70 #if PAN_ARCH <= 7
71 int32_t raw_vertex_offset;
72 #endif
73 int32_t first_vertex;
74 int32_t base_instance;
75 uint32_t noperspective_varyings;
76 } vs;
77
78 aligned_u64 push_consts;
79
80 #if PAN_ARCH <= 7
81 /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
82 * layer, and filter primitives at the VS level.
83 */
84 int32_t layer_id;
85
86 struct {
87 aligned_u64 sets[PANVK_DESC_TABLE_GFX_COUNT];
88 } desc;
89 #endif
90 } __attribute__((aligned(FAU_WORD_SIZE)));
91
92 static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0,
93 "struct panvk_graphics_sysvals must be 8-byte aligned");
94 static_assert((offsetof(struct panvk_graphics_sysvals, push_consts) %
95 FAU_WORD_SIZE) == 0,
96 "panvk_graphics_sysvals::push_consts must be 8-byte aligned");
97 #if PAN_ARCH <= 7
98 static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) ==
99 0,
100 "panvk_graphics_sysvals::desc must be 8-byte aligned");
101 #endif
102
103 struct panvk_compute_sysvals {
104 struct {
105 uint32_t x, y, z;
106 } base;
107 struct {
108 uint32_t x, y, z;
109 } num_work_groups;
110 struct {
111 uint32_t x, y, z;
112 } local_group_size;
113
114 aligned_u64 push_consts;
115
116 #if PAN_ARCH <= 7
117 struct {
118 aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT];
119 } desc;
120 #endif
121 } __attribute__((aligned(FAU_WORD_SIZE)));
122
123 static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0,
124 "struct panvk_compute_sysvals must be 8-byte aligned");
125 static_assert((offsetof(struct panvk_compute_sysvals, push_consts) %
126 FAU_WORD_SIZE) == 0,
127 "panvk_compute_sysvals::push_consts must be 8-byte aligned");
128 #if PAN_ARCH <= 7
129 static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
130 0,
131 "panvk_compute_sysvals::desc must be 8-byte aligned");
132 #endif
133
134 /* This is not the final offset in the push constant buffer (AKA FAU), but
135 * just a magic offset we use before packing push constants so we can easily
136 * identify the type of push constant (driver sysvals vs user push constants).
137 */
138 #define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE
139
140 #define sysval_size(__ptype, __name) \
141 sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name)
142
143 #define sysval_offset(__ptype, __name) \
144 offsetof(struct panvk_##__ptype##_sysvals, __name)
145
146 #define sysval_entry_size(__ptype, __name) \
147 sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0])
148
149 #define sysval_entry_offset(__ptype, __name, __idx) \
150 (sysval_offset(__ptype, __name) + \
151 (sysval_entry_size(__ptype, __name) * __idx))
152
153 #define sysval_fau_start(__ptype, __name) \
154 (sysval_offset(__ptype, __name) / FAU_WORD_SIZE)
155
156 #define sysval_fau_end(__ptype, __name) \
157 ((sysval_offset(__ptype, __name) + sysval_size(__ptype, __name) - 1) / \
158 FAU_WORD_SIZE)
159
160 #define sysval_fau_entry_start(__ptype, __name, __idx) \
161 (sysval_entry_offset(__ptype, __name, __idx) / FAU_WORD_SIZE)
162
163 #define sysval_fau_entry_end(__ptype, __name, __idx) \
164 ((sysval_entry_offset(__ptype, __name, __idx + 1) - 1) / FAU_WORD_SIZE)
165
166 #define shader_remapped_fau_offset(__shader, __kind, __offset) \
167 ((FAU_WORD_SIZE * BITSET_PREFIX_SUM((__shader)->fau.used_##__kind, \
168 (__offset) / FAU_WORD_SIZE)) + \
169 ((__offset) % FAU_WORD_SIZE))
170
171 #define shader_remapped_sysval_offset(__shader, __offset) \
172 shader_remapped_fau_offset(__shader, sysvals, __offset)
173
174 #define shader_remapped_push_const_offset(__shader, __offset) \
175 (((__shader)->fau.sysval_count * FAU_WORD_SIZE) + \
176 shader_remapped_fau_offset(__shader, push_consts, __offset))
177
178 #define shader_use_sysval(__shader, __ptype, __name) \
179 BITSET_SET_RANGE((__shader)->fau.used_sysvals, \
180 sysval_fau_start(__ptype, __name), \
181 sysval_fau_end(__ptype, __name))
182
183 #define shader_uses_sysval(__shader, __ptype, __name) \
184 BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
185 sysval_fau_start(__ptype, __name), \
186 sysval_fau_end(__ptype, __name))
187
188 #define shader_uses_sysval_entry(__shader, __ptype, __name, __idx) \
189 BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
190 sysval_fau_entry_start(__ptype, __name, __idx), \
191 sysval_fau_entry_end(__ptype, __name, __idx))
192
193 #define shader_use_sysval_range(__shader, __base, __range) \
194 BITSET_SET_RANGE((__shader)->fau.used_sysvals, (__base) / FAU_WORD_SIZE, \
195 ((__base) + (__range) - 1) / FAU_WORD_SIZE)
196
197 #define shader_use_push_const_range(__shader, __base, __range) \
198 BITSET_SET_RANGE((__shader)->fau.used_push_consts, \
199 (__base) / FAU_WORD_SIZE, \
200 ((__base) + (__range) - 1) / FAU_WORD_SIZE)
201
202 #define load_sysval(__b, __ptype, __bitsz, __name) \
203 nir_load_push_constant( \
204 __b, sysval_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
205 nir_imm_int(__b, sysval_offset(__ptype, __name)), \
206 .base = SYSVALS_PUSH_CONST_BASE)
207
208 #define load_sysval_entry(__b, __ptype, __bitsz, __name, __dyn_idx) \
209 nir_load_push_constant( \
210 __b, sysval_entry_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
211 nir_iadd_imm( \
212 __b, \
213 nir_imul_imm(__b, __dyn_idx, sysval_entry_size(__ptype, __name)), \
214 sysval_offset(__ptype, __name)), \
215 .base = SYSVALS_PUSH_CONST_BASE)
216
217 #if PAN_ARCH <= 7
218 enum panvk_bifrost_desc_table_type {
219 PANVK_BIFROST_DESC_TABLE_INVALID = -1,
220
221 /* UBO is encoded on 8 bytes */
222 PANVK_BIFROST_DESC_TABLE_UBO = 0,
223
224 /* Images are using a <3DAttributeBuffer,Attribute> pair, each
225 * of them being stored in a separate table. */
226 PANVK_BIFROST_DESC_TABLE_IMG,
227
228 /* Texture and sampler are encoded on 32 bytes */
229 PANVK_BIFROST_DESC_TABLE_TEXTURE,
230 PANVK_BIFROST_DESC_TABLE_SAMPLER,
231
232 PANVK_BIFROST_DESC_TABLE_COUNT,
233 };
234 #endif
235
236 #define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx))
237 #define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle) & BITFIELD_MASK(28))
238 #define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28)
239
240 #define MAX_COMPUTE_SYSVAL_FAUS \
241 (sizeof(struct panvk_compute_sysvals) / FAU_WORD_SIZE)
242 #define MAX_GFX_SYSVAL_FAUS \
243 (sizeof(struct panvk_graphics_sysvals) / FAU_WORD_SIZE)
244 #define MAX_SYSVAL_FAUS MAX2(MAX_COMPUTE_SYSVAL_FAUS, MAX_GFX_SYSVAL_FAUS)
245 #define MAX_PUSH_CONST_FAUS (MAX_PUSH_CONSTANTS_SIZE / FAU_WORD_SIZE)
246
247 struct panvk_shader_fau_info {
248 BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
249 BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
250 uint32_t sysval_count;
251 uint32_t total_count;
252 };
253
254 struct panvk_shader {
255 struct vk_shader vk;
256 struct pan_shader_info info;
257 struct pan_compute_dim local_size;
258
259 struct {
260 uint32_t used_set_mask;
261
262 #if PAN_ARCH <= 7
263 struct {
264 uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS];
265 uint32_t count;
266 } dyn_ubos;
267 struct {
268 uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS];
269 uint32_t count;
270 } dyn_ssbos;
271 struct {
272 struct panvk_priv_mem map;
273 uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT];
274 } others;
275 #else
276 struct {
277 uint32_t map[MAX_DYNAMIC_BUFFERS];
278 uint32_t count;
279 } dyn_bufs;
280 #endif
281 } desc_info;
282
283 struct panvk_shader_fau_info fau;
284
285 const void *bin_ptr;
286 uint32_t bin_size;
287
288 struct panvk_priv_mem code_mem;
289
290 #if PAN_ARCH <= 7
291 struct panvk_priv_mem rsd;
292 #else
293 union {
294 struct panvk_priv_mem spd;
295 struct {
296 struct panvk_priv_mem pos_points;
297 struct panvk_priv_mem pos_triangles;
298 struct panvk_priv_mem var;
299 } spds;
300 };
301 #endif
302
303 const char *nir_str;
304 const char *asm_str;
305 };
306
307 static inline uint64_t
panvk_shader_get_dev_addr(const struct panvk_shader * shader)308 panvk_shader_get_dev_addr(const struct panvk_shader *shader)
309 {
310 return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0;
311 }
312
313 #if PAN_ARCH <= 7
314 struct panvk_shader_link {
315 struct {
316 struct panvk_priv_mem attribs;
317 } vs, fs;
318 unsigned buf_strides[PANVK_VARY_BUF_MAX];
319 };
320
321 VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
322 const struct panvk_shader *vs,
323 const struct panvk_shader *fs,
324 struct panvk_shader_link *link);
325
326 static inline void
panvk_shader_link_cleanup(struct panvk_shader_link * link)327 panvk_shader_link_cleanup(struct panvk_shader_link *link)
328 {
329 panvk_pool_free_mem(&link->vs.attribs);
330 panvk_pool_free_mem(&link->fs.attribs);
331 }
332 #endif
333
334 void panvk_per_arch(nir_lower_descriptors)(
335 nir_shader *nir, struct panvk_device *dev,
336 const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
337 struct vk_descriptor_set_layout *const *set_layouts,
338 struct panvk_shader *shader);
339
340 /* This a stripped-down version of panvk_shader for internal shaders that
341 * are managed by vk_meta (blend and preload shaders). Those don't need the
342 * complexity inherent to user provided shaders as they're not exposed. */
343 struct panvk_internal_shader {
344 struct vk_shader vk;
345 struct pan_shader_info info;
346 struct panvk_priv_mem code_mem;
347
348 #if PAN_ARCH <= 7
349 struct panvk_priv_mem rsd;
350 #else
351 struct panvk_priv_mem spd;
352 #endif
353 };
354
355 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_internal_shader, vk.base, VkShaderEXT,
356 VK_OBJECT_TYPE_SHADER_EXT)
357
358 VkResult panvk_per_arch(create_internal_shader)(
359 struct panvk_device *dev, nir_shader *nir,
360 struct panfrost_compile_inputs *compiler_inputs,
361 struct panvk_internal_shader **shader_out);
362
363 #endif
364