1 /*
2 * Copyright © 2024 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef PANVK_CMD_DRAW_H
7 #define PANVK_CMD_DRAW_H
8
9 #ifndef PAN_ARCH
10 #error "PAN_ARCH must be defined"
11 #endif
12
13 #include "panvk_blend.h"
14 #include "panvk_cmd_oq.h"
15 #include "panvk_entrypoints.h"
16 #include "panvk_image.h"
17 #include "panvk_image_view.h"
18 #include "panvk_physical_device.h"
19
20 #include "vk_command_buffer.h"
21 #include "vk_format.h"
22
23 #include "pan_props.h"
24
25 #define MAX_VBS 16
26 #define MAX_RTS 8
27
28 struct panvk_cmd_buffer;
29
30 struct panvk_attrib_buf {
31 uint64_t address;
32 unsigned size;
33 };
34
35 struct panvk_resolve_attachment {
36 VkResolveModeFlagBits mode;
37 struct panvk_image_view *dst_iview;
38 };
39
40 struct panvk_rendering_state {
41 VkRenderingFlags flags;
42 uint32_t layer_count;
43 uint32_t view_mask;
44
45 enum vk_rp_attachment_flags bound_attachments;
46 struct {
47 struct panvk_image_view *iviews[MAX_RTS];
48 VkFormat fmts[MAX_RTS];
49 uint8_t samples[MAX_RTS];
50 struct panvk_resolve_attachment resolve[MAX_RTS];
51 } color_attachments;
52
53 struct pan_image_view zs_pview;
54 struct pan_image_view s_pview;
55
56 struct {
57 struct panvk_image_view *iview;
58 VkFormat fmt;
59 struct panvk_resolve_attachment resolve;
60 } z_attachment, s_attachment;
61
62 struct {
63 struct pan_fb_info info;
64 bool crc_valid[MAX_RTS];
65
66 #if PAN_ARCH <= 7
67 uint32_t bo_count;
68 struct pan_kmod_bo *bos[MAX_RTS + 2];
69 #endif
70 } fb;
71
72 #if PAN_ARCH >= 10
73 struct panfrost_ptr fbds;
74 uint64_t tiler;
75
76 /* When a secondary command buffer has to flush draws, it disturbs the
77 * inherited context, and the primary command buffer needs to know. */
78 bool invalidate_inherited_ctx;
79
80 struct {
81 /* != 0 if the render pass contains one or more occlusion queries to
82 * signal. */
83 uint64_t chain;
84
85 /* Point to the syncobj of the last occlusion query that was passed
86 * to a draw. */
87 uint64_t last;
88 } oq;
89 #endif
90 };
91
92 enum panvk_cmd_graphics_dirty_state {
93 PANVK_CMD_GRAPHICS_DIRTY_VS,
94 PANVK_CMD_GRAPHICS_DIRTY_FS,
95 PANVK_CMD_GRAPHICS_DIRTY_VB,
96 PANVK_CMD_GRAPHICS_DIRTY_IB,
97 PANVK_CMD_GRAPHICS_DIRTY_OQ,
98 PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE,
99 PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE,
100 PANVK_CMD_GRAPHICS_DIRTY_VS_PUSH_UNIFORMS,
101 PANVK_CMD_GRAPHICS_DIRTY_FS_PUSH_UNIFORMS,
102 PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT,
103 };
104
105 struct panvk_cmd_graphics_state {
106 struct panvk_descriptor_state desc_state;
107
108 struct {
109 struct vk_vertex_input_state vi;
110 struct vk_sample_locations_state sl;
111 } dynamic;
112
113 struct panvk_occlusion_query_state occlusion_query;
114 struct panvk_graphics_sysvals sysvals;
115
116 #if PAN_ARCH <= 7
117 struct panvk_shader_link link;
118 #endif
119
120 struct {
121 const struct panvk_shader *shader;
122 struct panvk_shader_desc_state desc;
123 uint64_t push_uniforms;
124 bool required;
125 #if PAN_ARCH <= 7
126 uint64_t rsd;
127 #endif
128 } fs;
129
130 struct {
131 const struct panvk_shader *shader;
132 struct panvk_shader_desc_state desc;
133 uint64_t push_uniforms;
134 #if PAN_ARCH <= 7
135 uint64_t attribs;
136 uint64_t attrib_bufs;
137 #endif
138 } vs;
139
140 struct {
141 struct panvk_attrib_buf bufs[MAX_VBS];
142 unsigned count;
143 } vb;
144
145 /* Index buffer */
146 struct {
147 struct panvk_buffer *buffer;
148 uint64_t offset;
149 uint8_t index_size;
150 } ib;
151
152 struct {
153 struct panvk_blend_info info;
154 } cb;
155
156 struct panvk_rendering_state render;
157
158 #if PAN_ARCH <= 7
159 uint64_t vpd;
160 #endif
161
162 #if PAN_ARCH >= 10
163 uint64_t tsd;
164 #endif
165
166 BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT);
167 };
168
169 #define dyn_gfx_state_dirty(__cmdbuf, __name) \
170 BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \
171 MESA_VK_DYNAMIC_##__name)
172
173 #define gfx_state_dirty(__cmdbuf, __name) \
174 BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
175
176 #define gfx_state_set_dirty(__cmdbuf, __name) \
177 BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
178
179 #define gfx_state_clear_all_dirty(__cmdbuf) \
180 BITSET_ZERO((__cmdbuf)->state.gfx.dirty)
181
182 #define gfx_state_set_all_dirty(__cmdbuf) \
183 BITSET_ONES((__cmdbuf)->state.gfx.dirty)
184
185 #define set_gfx_sysval(__cmdbuf, __dirty, __name, __val) \
186 do { \
187 struct panvk_graphics_sysvals __new_sysval; \
188 __new_sysval.__name = __val; \
189 if (memcmp(&(__cmdbuf)->state.gfx.sysvals.__name, &__new_sysval.__name, \
190 sizeof(__new_sysval.__name))) { \
191 (__cmdbuf)->state.gfx.sysvals.__name = __new_sysval.__name; \
192 BITSET_SET_RANGE(__dirty, sysval_fau_start(graphics, __name), \
193 sysval_fau_end(graphics, __name)); \
194 } \
195 } while (0)
196
197 static inline uint32_t
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device * phys_dev,const struct panvk_cmd_graphics_state * state)198 panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
199 const struct panvk_cmd_graphics_state *state)
200 {
201 struct panfrost_tiler_features tiler_features =
202 panfrost_query_tiler_features(&phys_dev->kmod.props);
203 uint32_t max_fb_wh = MAX2(state->render.fb.info.width,
204 state->render.fb.info.height);
205 uint32_t last_hierarchy_bit = util_last_bit(DIV_ROUND_UP(max_fb_wh, 16));
206 uint32_t hierarchy_mask = BITFIELD_MASK(tiler_features.max_levels);
207
208 /* Always enable the level covering the whole FB, and disable the finest
209 * levels if we don't have enough to cover everything.
210 * This is suboptimal for small primitives, since it might force
211 * primitives to be walked multiple times even if they don't cover the
212 * the tile being processed. On the other hand, it's hard to guess
213 * the draw pattern, so it's probably good enough for now.
214 */
215 if (last_hierarchy_bit > tiler_features.max_levels)
216 hierarchy_mask <<= last_hierarchy_bit - tiler_features.max_levels;
217
218 /* For effective tile size larger than 16x16, disable first level */
219 if (state->render.fb.info.tile_size > 16 * 16)
220 hierarchy_mask &= ~1;
221
222 return hierarchy_mask;
223 }
224
225 static inline bool
fs_required(const struct panvk_cmd_graphics_state * state,const struct vk_dynamic_graphics_state * dyn_state)226 fs_required(const struct panvk_cmd_graphics_state *state,
227 const struct vk_dynamic_graphics_state *dyn_state)
228 {
229 const struct pan_shader_info *fs_info =
230 state->fs.shader ? &state->fs.shader->info : NULL;
231 const struct vk_color_blend_state *cb = &dyn_state->cb;
232 const struct vk_rasterization_state *rs = &dyn_state->rs;
233
234 if (rs->rasterizer_discard_enable || !fs_info)
235 return false;
236
237 /* If we generally have side effects */
238 if (fs_info->fs.sidefx)
239 return true;
240
241 /* If colour is written we need to execute */
242 for (unsigned i = 0; i < cb->attachment_count; ++i) {
243 if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
244 cb->attachments[i].write_mask)
245 return true;
246 }
247
248 /* If alpha-to-coverage is enabled, we need to run the fragment shader even
249 * if we don't have a color attachment, so depth/stencil updates can be
250 * discarded if alpha, and thus coverage, is 0. */
251 if (dyn_state->ms.alpha_to_coverage_enable)
252 return true;
253
254 /* If the sample mask is updated, we need to run the fragment shader,
255 * otherwise the fixed-function depth/stencil results will apply to all
256 * samples. */
257 if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
258 return true;
259
260 /* If depth is written and not implied we need to execute.
261 * TODO: Predicate on Z/S writes being enabled */
262 return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
263 }
264
265 static inline bool
cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state * state,ASSERTED const struct vk_dynamic_graphics_state * dyn_state,bool cached_value)266 cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
267 ASSERTED const struct vk_dynamic_graphics_state *dyn_state,
268 bool cached_value)
269 {
270 /* Make sure the cached value was properly initialized. */
271 assert(fs_required(state, dyn_state) == cached_value);
272 return cached_value;
273 }
274
275 #define get_fs(__cmdbuf) \
276 (cached_fs_required(&(__cmdbuf)->state.gfx, \
277 &(__cmdbuf)->vk.dynamic_graphics_state, \
278 (__cmdbuf)->state.gfx.fs.required) \
279 ? (__cmdbuf)->state.gfx.fs.shader \
280 : NULL)
281
282 /* Anything that might change the value returned by get_fs() makes users of the
283 * fragment shader dirty, because not using the fragment shader (when
284 * fs_required() returns false) impacts various other things, like VS -> FS
285 * linking in the JM backend, or the update of the fragment shader pointer in
286 * the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about
287 * fragment shader updates. */
288
289 #define fs_user_dirty(__cmdbuf) \
290 (gfx_state_dirty(cmdbuf, FS) || \
291 dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || \
292 dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || \
293 dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || \
294 dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || \
295 dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE))
296
297 /* After a draw, all dirty flags are cleared except the FS dirty flag which
298 * needs to be set again if the draw didn't use the fragment shader. */
299
300 #define clear_dirty_after_draw(__cmdbuf) \
301 do { \
302 bool __set_fs_dirty = \
303 (__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf); \
304 bool __set_fs_push_dirty = \
305 __set_fs_dirty && gfx_state_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
306 vk_dynamic_graphics_state_clear_dirty( \
307 &(__cmdbuf)->vk.dynamic_graphics_state); \
308 gfx_state_clear_all_dirty(__cmdbuf); \
309 if (__set_fs_dirty) \
310 gfx_state_set_dirty(__cmdbuf, FS); \
311 if (__set_fs_push_dirty) \
312 gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
313 } while (0)
314
315 void
316 panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
317 const VkRenderingInfo *pRenderingInfo);
318
319 void
320 panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
321 const VkRenderingInfo *render_info);
322
323 void
324 panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf,
325 const VkRenderingInfo *render_info);
326
327 void panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf);
328
329 struct panvk_draw_info {
330 struct {
331 uint32_t size;
332 uint32_t offset;
333 } index;
334
335 struct {
336 #if PAN_ARCH <= 7
337 int32_t raw_offset;
338 #endif
339 int32_t base;
340 uint32_t count;
341 } vertex;
342
343 struct {
344 int32_t base;
345 uint32_t count;
346 } instance;
347
348 struct {
349 uint64_t buffer_dev_addr;
350 uint32_t draw_count;
351 uint32_t stride;
352 } indirect;
353
354 #if PAN_ARCH <= 7
355 uint32_t layer_id;
356 #endif
357 };
358
359 void
360 panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
361 const struct panvk_draw_info *info);
362
363 #endif
364