• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #ifndef PANVK_SHADER_H
7 #define PANVK_SHADER_H
8 
9 #ifndef PAN_ARCH
10 #error "PAN_ARCH must be defined"
11 #endif
12 
13 #include "util/pan_ir.h"
14 
15 #include "pan_desc.h"
16 
17 #include "panvk_cmd_push_constant.h"
18 #include "panvk_descriptor_set.h"
19 #include "panvk_macros.h"
20 #include "panvk_mempool.h"
21 
22 #include "vk_pipeline_layout.h"
23 
24 #include "vk_shader.h"
25 
26 extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops);
27 
28 #define MAX_VS_ATTRIBS 16
29 
30 struct nir_shader;
31 struct pan_blend_state;
32 struct panvk_device;
33 
34 enum panvk_varying_buf_id {
35    PANVK_VARY_BUF_GENERAL,
36    PANVK_VARY_BUF_POSITION,
37    PANVK_VARY_BUF_PSIZ,
38 
39    /* Keep last */
40    PANVK_VARY_BUF_MAX,
41 };
42 
43 #if PAN_ARCH <= 7
44 enum panvk_desc_table_id {
45    PANVK_DESC_TABLE_USER = 0,
46    PANVK_DESC_TABLE_CS_DYN_SSBOS = MAX_SETS,
47    PANVK_DESC_TABLE_COMPUTE_COUNT = PANVK_DESC_TABLE_CS_DYN_SSBOS + 1,
48    PANVK_DESC_TABLE_VS_DYN_SSBOS = MAX_SETS,
49    PANVK_DESC_TABLE_FS_DYN_SSBOS = MAX_SETS + 1,
50    PANVK_DESC_TABLE_GFX_COUNT = PANVK_DESC_TABLE_FS_DYN_SSBOS + 1,
51 };
52 #endif
53 
54 #define FAU_WORD_SIZE sizeof(uint64_t)
55 
56 #define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
57 
58 struct panvk_graphics_sysvals {
59    struct {
60       float constants[4];
61    } blend;
62 
63    struct {
64       struct {
65          float x, y, z;
66       } scale, offset;
67    } viewport;
68 
69    struct {
70 #if PAN_ARCH <= 7
71       int32_t raw_vertex_offset;
72 #endif
73       int32_t first_vertex;
74       int32_t base_instance;
75       uint32_t noperspective_varyings;
76    } vs;
77 
78    aligned_u64 push_consts;
79 
80 #if PAN_ARCH <= 7
81    /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
82     * layer, and filter primitives at the VS level.
83     */
84    int32_t layer_id;
85 
86    struct {
87       aligned_u64 sets[PANVK_DESC_TABLE_GFX_COUNT];
88    } desc;
89 #endif
90 } __attribute__((aligned(FAU_WORD_SIZE)));
91 
92 static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0,
93               "struct panvk_graphics_sysvals must be 8-byte aligned");
94 static_assert((offsetof(struct panvk_graphics_sysvals, push_consts) %
95                FAU_WORD_SIZE) == 0,
96               "panvk_graphics_sysvals::push_consts must be 8-byte aligned");
97 #if PAN_ARCH <= 7
98 static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) ==
99                  0,
100               "panvk_graphics_sysvals::desc must be 8-byte aligned");
101 #endif
102 
103 struct panvk_compute_sysvals {
104    struct {
105       uint32_t x, y, z;
106    } base;
107    struct {
108       uint32_t x, y, z;
109    } num_work_groups;
110    struct {
111       uint32_t x, y, z;
112    } local_group_size;
113 
114    aligned_u64 push_consts;
115 
116 #if PAN_ARCH <= 7
117    struct {
118       aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT];
119    } desc;
120 #endif
121 } __attribute__((aligned(FAU_WORD_SIZE)));
122 
123 static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0,
124               "struct panvk_compute_sysvals must be 8-byte aligned");
125 static_assert((offsetof(struct panvk_compute_sysvals, push_consts) %
126                FAU_WORD_SIZE) == 0,
127               "panvk_compute_sysvals::push_consts must be 8-byte aligned");
128 #if PAN_ARCH <= 7
129 static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
130                  0,
131               "panvk_compute_sysvals::desc must be 8-byte aligned");
132 #endif
133 
134 /* This is not the final offset in the push constant buffer (AKA FAU), but
135  * just a magic offset we use before packing push constants so we can easily
136  * identify the type of push constant (driver sysvals vs user push constants).
137  */
138 #define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE
139 
140 #define sysval_size(__ptype, __name)                                           \
141    sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name)
142 
143 #define sysval_offset(__ptype, __name)                                         \
144    offsetof(struct panvk_##__ptype##_sysvals, __name)
145 
146 #define sysval_entry_size(__ptype, __name)                                     \
147    sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0])
148 
149 #define sysval_entry_offset(__ptype, __name, __idx)                            \
150    (sysval_offset(__ptype, __name) +                                           \
151     (sysval_entry_size(__ptype, __name) * __idx))
152 
153 #define sysval_fau_start(__ptype, __name)                                      \
154    (sysval_offset(__ptype, __name) / FAU_WORD_SIZE)
155 
156 #define sysval_fau_end(__ptype, __name)                                        \
157    ((sysval_offset(__ptype, __name) + sysval_size(__ptype, __name) - 1) /      \
158     FAU_WORD_SIZE)
159 
160 #define sysval_fau_entry_start(__ptype, __name, __idx)                         \
161    (sysval_entry_offset(__ptype, __name, __idx) / FAU_WORD_SIZE)
162 
163 #define sysval_fau_entry_end(__ptype, __name, __idx)                           \
164    ((sysval_entry_offset(__ptype, __name, __idx + 1) - 1) / FAU_WORD_SIZE)
165 
166 #define shader_remapped_fau_offset(__shader, __kind, __offset)                 \
167    ((FAU_WORD_SIZE * BITSET_PREFIX_SUM((__shader)->fau.used_##__kind,          \
168                                        (__offset) / FAU_WORD_SIZE)) +          \
169     ((__offset) % FAU_WORD_SIZE))
170 
171 #define shader_remapped_sysval_offset(__shader, __offset)                      \
172    shader_remapped_fau_offset(__shader, sysvals, __offset)
173 
174 #define shader_remapped_push_const_offset(__shader, __offset)                  \
175    (((__shader)->fau.sysval_count * FAU_WORD_SIZE) +                     \
176     shader_remapped_fau_offset(__shader, push_consts, __offset))
177 
178 #define shader_use_sysval(__shader, __ptype, __name)                           \
179    BITSET_SET_RANGE((__shader)->fau.used_sysvals,                              \
180                     sysval_fau_start(__ptype, __name),                         \
181                     sysval_fau_end(__ptype, __name))
182 
183 #define shader_uses_sysval(__shader, __ptype, __name)                          \
184    BITSET_TEST_RANGE((__shader)->fau.used_sysvals,                             \
185                      sysval_fau_start(__ptype, __name),                        \
186                      sysval_fau_end(__ptype, __name))
187 
188 #define shader_uses_sysval_entry(__shader, __ptype, __name, __idx)             \
189    BITSET_TEST_RANGE((__shader)->fau.used_sysvals,                             \
190                      sysval_fau_entry_start(__ptype, __name, __idx),           \
191                      sysval_fau_entry_end(__ptype, __name, __idx))
192 
193 #define shader_use_sysval_range(__shader, __base, __range)                     \
194    BITSET_SET_RANGE((__shader)->fau.used_sysvals, (__base) / FAU_WORD_SIZE,    \
195                     ((__base) + (__range) - 1) / FAU_WORD_SIZE)
196 
197 #define shader_use_push_const_range(__shader, __base, __range)                 \
198    BITSET_SET_RANGE((__shader)->fau.used_push_consts,                          \
199                     (__base) / FAU_WORD_SIZE,                                  \
200                     ((__base) + (__range) - 1) / FAU_WORD_SIZE)
201 
202 #define load_sysval(__b, __ptype, __bitsz, __name)                             \
203    nir_load_push_constant(                                                     \
204       __b, sysval_size(__ptype, __name) / ((__bitsz) / 8), __bitsz,            \
205       nir_imm_int(__b, sysval_offset(__ptype, __name)),                        \
206       .base = SYSVALS_PUSH_CONST_BASE)
207 
208 #define load_sysval_entry(__b, __ptype, __bitsz, __name, __dyn_idx)            \
209    nir_load_push_constant(                                                     \
210       __b, sysval_entry_size(__ptype, __name) / ((__bitsz) / 8), __bitsz,      \
211       nir_iadd_imm(                                                            \
212          __b,                                                                  \
213          nir_imul_imm(__b, __dyn_idx, sysval_entry_size(__ptype, __name)),     \
214          sysval_offset(__ptype, __name)),                                      \
215       .base = SYSVALS_PUSH_CONST_BASE)
216 
217 #if PAN_ARCH <= 7
218 enum panvk_bifrost_desc_table_type {
219    PANVK_BIFROST_DESC_TABLE_INVALID = -1,
220 
221    /* UBO is encoded on 8 bytes */
222    PANVK_BIFROST_DESC_TABLE_UBO = 0,
223 
224    /* Images are using a <3DAttributeBuffer,Attribute> pair, each
225     * of them being stored in a separate table. */
226    PANVK_BIFROST_DESC_TABLE_IMG,
227 
228    /* Texture and sampler are encoded on 32 bytes */
229    PANVK_BIFROST_DESC_TABLE_TEXTURE,
230    PANVK_BIFROST_DESC_TABLE_SAMPLER,
231 
232    PANVK_BIFROST_DESC_TABLE_COUNT,
233 };
234 #endif
235 
236 #define COPY_DESC_HANDLE(table, idx)           ((table << 28) | (idx))
237 #define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle) & BITFIELD_MASK(28))
238 #define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28)
239 
240 #define MAX_COMPUTE_SYSVAL_FAUS                                                \
241    (sizeof(struct panvk_compute_sysvals) / FAU_WORD_SIZE)
242 #define MAX_GFX_SYSVAL_FAUS                                                    \
243    (sizeof(struct panvk_graphics_sysvals) / FAU_WORD_SIZE)
244 #define MAX_SYSVAL_FAUS     MAX2(MAX_COMPUTE_SYSVAL_FAUS, MAX_GFX_SYSVAL_FAUS)
245 #define MAX_PUSH_CONST_FAUS (MAX_PUSH_CONSTANTS_SIZE / FAU_WORD_SIZE)
246 
247 struct panvk_shader_fau_info {
248    BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
249    BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
250    uint32_t sysval_count;
251    uint32_t total_count;
252 };
253 
254 struct panvk_shader {
255    struct vk_shader vk;
256    struct pan_shader_info info;
257    struct pan_compute_dim local_size;
258 
259    struct {
260       uint32_t used_set_mask;
261 
262 #if PAN_ARCH <= 7
263       struct {
264          uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS];
265          uint32_t count;
266       } dyn_ubos;
267       struct {
268          uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS];
269          uint32_t count;
270       } dyn_ssbos;
271       struct {
272          struct panvk_priv_mem map;
273          uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT];
274       } others;
275 #else
276       struct {
277          uint32_t map[MAX_DYNAMIC_BUFFERS];
278          uint32_t count;
279       } dyn_bufs;
280 #endif
281    } desc_info;
282 
283    struct panvk_shader_fau_info fau;
284 
285    const void *bin_ptr;
286    uint32_t bin_size;
287 
288    struct panvk_priv_mem code_mem;
289 
290 #if PAN_ARCH <= 7
291    struct panvk_priv_mem rsd;
292 #else
293    union {
294       struct panvk_priv_mem spd;
295       struct {
296          struct panvk_priv_mem pos_points;
297          struct panvk_priv_mem pos_triangles;
298          struct panvk_priv_mem var;
299       } spds;
300    };
301 #endif
302 
303    const char *nir_str;
304    const char *asm_str;
305 };
306 
307 static inline uint64_t
panvk_shader_get_dev_addr(const struct panvk_shader * shader)308 panvk_shader_get_dev_addr(const struct panvk_shader *shader)
309 {
310    return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0;
311 }
312 
313 #if PAN_ARCH <= 7
314 struct panvk_shader_link {
315    struct {
316       struct panvk_priv_mem attribs;
317    } vs, fs;
318    unsigned buf_strides[PANVK_VARY_BUF_MAX];
319 };
320 
321 VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
322                                       const struct panvk_shader *vs,
323                                       const struct panvk_shader *fs,
324                                       struct panvk_shader_link *link);
325 
326 static inline void
panvk_shader_link_cleanup(struct panvk_shader_link * link)327 panvk_shader_link_cleanup(struct panvk_shader_link *link)
328 {
329    panvk_pool_free_mem(&link->vs.attribs);
330    panvk_pool_free_mem(&link->fs.attribs);
331 }
332 #endif
333 
334 void panvk_per_arch(nir_lower_descriptors)(
335    nir_shader *nir, struct panvk_device *dev,
336    const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
337    struct vk_descriptor_set_layout *const *set_layouts,
338    struct panvk_shader *shader);
339 
340 /* This a stripped-down version of panvk_shader for internal shaders that
341  * are managed by vk_meta (blend and preload shaders). Those don't need the
342  * complexity inherent to user provided shaders as they're not exposed. */
343 struct panvk_internal_shader {
344    struct vk_shader vk;
345    struct pan_shader_info info;
346    struct panvk_priv_mem code_mem;
347 
348 #if PAN_ARCH <= 7
349    struct panvk_priv_mem rsd;
350 #else
351    struct panvk_priv_mem spd;
352 #endif
353 };
354 
355 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_internal_shader, vk.base, VkShaderEXT,
356                                VK_OBJECT_TYPE_SHADER_EXT)
357 
358 VkResult panvk_per_arch(create_internal_shader)(
359    struct panvk_device *dev, nir_shader *nir,
360    struct panfrost_compile_inputs *compiler_inputs,
361    struct panvk_internal_shader **shader_out);
362 
363 #endif
364