1 /*
2  * Copyright © 2024 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #ifndef PANVK_CMD_DRAW_H
7 #define PANVK_CMD_DRAW_H
8 
9 #ifndef PAN_ARCH
10 #error "PAN_ARCH must be defined"
11 #endif
12 
13 #include "panvk_blend.h"
14 #include "panvk_cmd_oq.h"
15 #include "panvk_entrypoints.h"
16 #include "panvk_image.h"
17 #include "panvk_image_view.h"
18 #include "panvk_physical_device.h"
19 
20 #include "vk_command_buffer.h"
21 #include "vk_format.h"
22 
23 #include "pan_props.h"
24 
25 #define MAX_VBS 16
26 #define MAX_RTS 8
27 
28 struct panvk_cmd_buffer;
29 
30 struct panvk_attrib_buf {
31    uint64_t address;
32    unsigned size;
33 };
34 
35 struct panvk_resolve_attachment {
36    VkResolveModeFlagBits mode;
37    struct panvk_image_view *dst_iview;
38 };
39 
40 struct panvk_rendering_state {
41    VkRenderingFlags flags;
42    uint32_t layer_count;
43    uint32_t view_mask;
44 
45    enum vk_rp_attachment_flags bound_attachments;
46    struct {
47       struct panvk_image_view *iviews[MAX_RTS];
48       VkFormat fmts[MAX_RTS];
49       uint8_t samples[MAX_RTS];
50       struct panvk_resolve_attachment resolve[MAX_RTS];
51    } color_attachments;
52 
53    struct pan_image_view zs_pview;
54    struct pan_image_view s_pview;
55 
56    struct {
57       struct panvk_image_view *iview;
58       VkFormat fmt;
59       struct panvk_resolve_attachment resolve;
60    } z_attachment, s_attachment;
61 
62    struct {
63       struct pan_fb_info info;
64       bool crc_valid[MAX_RTS];
65 
66 #if PAN_ARCH <= 7
67       uint32_t bo_count;
68       struct pan_kmod_bo *bos[MAX_RTS + 2];
69 #endif
70    } fb;
71 
72 #if PAN_ARCH >= 10
73    struct panfrost_ptr fbds;
74    uint64_t tiler;
75 
76    /* When a secondary command buffer has to flush draws, it disturbs the
77     * inherited context, and the primary command buffer needs to know. */
78    bool invalidate_inherited_ctx;
79 
80    struct {
81       /* != 0 if the render pass contains one or more occlusion queries to
82        * signal. */
83       uint64_t chain;
84 
85       /* Point to the syncobj of the last occlusion query that was passed
86        * to a draw. */
87       uint64_t last;
88    } oq;
89 #endif
90 };
91 
92 enum panvk_cmd_graphics_dirty_state {
93    PANVK_CMD_GRAPHICS_DIRTY_VS,
94    PANVK_CMD_GRAPHICS_DIRTY_FS,
95    PANVK_CMD_GRAPHICS_DIRTY_VB,
96    PANVK_CMD_GRAPHICS_DIRTY_IB,
97    PANVK_CMD_GRAPHICS_DIRTY_OQ,
98    PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE,
99    PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE,
100    PANVK_CMD_GRAPHICS_DIRTY_VS_PUSH_UNIFORMS,
101    PANVK_CMD_GRAPHICS_DIRTY_FS_PUSH_UNIFORMS,
102    PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT,
103 };
104 
105 struct panvk_cmd_graphics_state {
106    struct panvk_descriptor_state desc_state;
107 
108    struct {
109       struct vk_vertex_input_state vi;
110       struct vk_sample_locations_state sl;
111    } dynamic;
112 
113    struct panvk_occlusion_query_state occlusion_query;
114    struct panvk_graphics_sysvals sysvals;
115 
116 #if PAN_ARCH <= 7
117    struct panvk_shader_link link;
118 #endif
119 
120    struct {
121       const struct panvk_shader *shader;
122       struct panvk_shader_desc_state desc;
123       uint64_t push_uniforms;
124       bool required;
125 #if PAN_ARCH <= 7
126       uint64_t rsd;
127 #endif
128    } fs;
129 
130    struct {
131       const struct panvk_shader *shader;
132       struct panvk_shader_desc_state desc;
133       uint64_t push_uniforms;
134 #if PAN_ARCH <= 7
135       uint64_t attribs;
136       uint64_t attrib_bufs;
137 #endif
138    } vs;
139 
140    struct {
141       struct panvk_attrib_buf bufs[MAX_VBS];
142       unsigned count;
143    } vb;
144 
145    /* Index buffer */
146    struct {
147       struct panvk_buffer *buffer;
148       uint64_t offset;
149       uint8_t index_size;
150    } ib;
151 
152    struct {
153       struct panvk_blend_info info;
154    } cb;
155 
156    struct panvk_rendering_state render;
157 
158 #if PAN_ARCH <= 7
159    uint64_t vpd;
160 #endif
161 
162 #if PAN_ARCH >= 10
163    uint64_t tsd;
164 #endif
165 
166    BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT);
167 };
168 
169 #define dyn_gfx_state_dirty(__cmdbuf, __name)                                  \
170    BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty,                    \
171                MESA_VK_DYNAMIC_##__name)
172 
173 #define gfx_state_dirty(__cmdbuf, __name)                                      \
174    BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
175 
176 #define gfx_state_set_dirty(__cmdbuf, __name)                                  \
177    BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
178 
179 #define gfx_state_clear_all_dirty(__cmdbuf)                                    \
180    BITSET_ZERO((__cmdbuf)->state.gfx.dirty)
181 
182 #define gfx_state_set_all_dirty(__cmdbuf)                                      \
183    BITSET_ONES((__cmdbuf)->state.gfx.dirty)
184 
185 #define set_gfx_sysval(__cmdbuf, __dirty, __name, __val)                       \
186    do {                                                                        \
187       struct panvk_graphics_sysvals __new_sysval;                              \
188       __new_sysval.__name = __val;                                             \
189       if (memcmp(&(__cmdbuf)->state.gfx.sysvals.__name, &__new_sysval.__name,  \
190                  sizeof(__new_sysval.__name))) {                               \
191          (__cmdbuf)->state.gfx.sysvals.__name = __new_sysval.__name;           \
192          BITSET_SET_RANGE(__dirty, sysval_fau_start(graphics, __name),         \
193                           sysval_fau_end(graphics, __name));                   \
194       }                                                                        \
195    } while (0)
196 
197 static inline uint32_t
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device * phys_dev,const struct panvk_cmd_graphics_state * state)198 panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
199                                   const struct panvk_cmd_graphics_state *state)
200 {
201    struct panfrost_tiler_features tiler_features =
202       panfrost_query_tiler_features(&phys_dev->kmod.props);
203    uint32_t max_fb_wh = MAX2(state->render.fb.info.width,
204                              state->render.fb.info.height);
205    uint32_t last_hierarchy_bit = util_last_bit(DIV_ROUND_UP(max_fb_wh, 16));
206    uint32_t hierarchy_mask = BITFIELD_MASK(tiler_features.max_levels);
207 
208    /* Always enable the level covering the whole FB, and disable the finest
209     * levels if we don't have enough to cover everything.
210     * This is suboptimal for small primitives, since it might force
211     * primitives to be walked multiple times even if they don't cover the
212     * the tile being processed. On the other hand, it's hard to guess
213     * the draw pattern, so it's probably good enough for now.
214     */
215    if (last_hierarchy_bit > tiler_features.max_levels)
216       hierarchy_mask <<= last_hierarchy_bit - tiler_features.max_levels;
217 
218    /* For effective tile size larger than 16x16, disable first level */
219    if (state->render.fb.info.tile_size > 16 * 16)
220       hierarchy_mask &= ~1;
221 
222    return hierarchy_mask;
223 }
224 
225 static inline bool
fs_required(const struct panvk_cmd_graphics_state * state,const struct vk_dynamic_graphics_state * dyn_state)226 fs_required(const struct panvk_cmd_graphics_state *state,
227             const struct vk_dynamic_graphics_state *dyn_state)
228 {
229    const struct pan_shader_info *fs_info =
230       state->fs.shader ? &state->fs.shader->info : NULL;
231    const struct vk_color_blend_state *cb = &dyn_state->cb;
232    const struct vk_rasterization_state *rs = &dyn_state->rs;
233 
234    if (rs->rasterizer_discard_enable || !fs_info)
235       return false;
236 
237    /* If we generally have side effects */
238    if (fs_info->fs.sidefx)
239       return true;
240 
241    /* If colour is written we need to execute */
242    for (unsigned i = 0; i < cb->attachment_count; ++i) {
243       if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
244           cb->attachments[i].write_mask)
245          return true;
246    }
247 
248    /* If alpha-to-coverage is enabled, we need to run the fragment shader even
249     * if we don't have a color attachment, so depth/stencil updates can be
250     * discarded if alpha, and thus coverage, is 0. */
251    if (dyn_state->ms.alpha_to_coverage_enable)
252       return true;
253 
254    /* If the sample mask is updated, we need to run the fragment shader,
255     * otherwise the fixed-function depth/stencil results will apply to all
256     * samples. */
257    if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
258       return true;
259 
260    /* If depth is written and not implied we need to execute.
261     * TODO: Predicate on Z/S writes being enabled */
262    return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
263 }
264 
265 static inline bool
cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state * state,ASSERTED const struct vk_dynamic_graphics_state * dyn_state,bool cached_value)266 cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
267                    ASSERTED const struct vk_dynamic_graphics_state *dyn_state,
268                    bool cached_value)
269 {
270    /* Make sure the cached value was properly initialized. */
271    assert(fs_required(state, dyn_state) == cached_value);
272    return cached_value;
273 }
274 
275 #define get_fs(__cmdbuf)                                                       \
276    (cached_fs_required(&(__cmdbuf)->state.gfx,                                 \
277                        &(__cmdbuf)->vk.dynamic_graphics_state,                 \
278                        (__cmdbuf)->state.gfx.fs.required)                      \
279        ? (__cmdbuf)->state.gfx.fs.shader                                       \
280        : NULL)
281 
282 /* Anything that might change the value returned by get_fs() makes users of the
283  * fragment shader dirty, because not using the fragment shader (when
284  * fs_required() returns false) impacts various other things, like VS -> FS
285  * linking in the JM backend, or the update of the fragment shader pointer in
286  * the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about
287  * fragment shader updates. */
288 
289 #define fs_user_dirty(__cmdbuf)                                                \
290    (gfx_state_dirty(cmdbuf, FS) ||                                             \
291     dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||               \
292     dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||                        \
293     dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||                     \
294     dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) ||                             \
295     dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE))
296 
297 /* After a draw, all dirty flags are cleared except the FS dirty flag which
298  * needs to be set again if the draw didn't use the fragment shader. */
299 
300 #define clear_dirty_after_draw(__cmdbuf)                                       \
301    do {                                                                        \
302       bool __set_fs_dirty =                                                    \
303          (__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf);                  \
304       bool __set_fs_push_dirty =                                               \
305          __set_fs_dirty && gfx_state_dirty(__cmdbuf, FS_PUSH_UNIFORMS);        \
306       vk_dynamic_graphics_state_clear_dirty(                                   \
307          &(__cmdbuf)->vk.dynamic_graphics_state);                              \
308       gfx_state_clear_all_dirty(__cmdbuf);                                     \
309       if (__set_fs_dirty)                                                      \
310          gfx_state_set_dirty(__cmdbuf, FS);                                    \
311       if (__set_fs_push_dirty)                                                 \
312          gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS);                      \
313    } while (0)
314 
315 void
316 panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
317                                       const VkRenderingInfo *pRenderingInfo);
318 
319 void
320 panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
321                                      const VkRenderingInfo *render_info);
322 
323 void
324 panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf,
325                                                const VkRenderingInfo *render_info);
326 
327 void panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf);
328 
329 struct panvk_draw_info {
330    struct {
331       uint32_t size;
332       uint32_t offset;
333    } index;
334 
335    struct {
336 #if PAN_ARCH <= 7
337       int32_t raw_offset;
338 #endif
339       int32_t base;
340       uint32_t count;
341    } vertex;
342 
343    struct {
344       int32_t base;
345       uint32_t count;
346    } instance;
347 
348    struct {
349       uint64_t buffer_dev_addr;
350       uint32_t draw_count;
351       uint32_t stride;
352    } indirect;
353 
354 #if PAN_ARCH <= 7
355    uint32_t layer_id;
356 #endif
357 };
358 
359 void
360 panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
361                                          const struct panvk_draw_info *info);
362 
363 #endif
364