• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #ifndef RADV_CMD_BUFFER_H
12 #define RADV_CMD_BUFFER_H
13 
14 #include "ac_vcn.h"
15 
16 #include "vk_command_buffer.h"
17 
18 #include "radv_device.h"
19 #include "radv_physical_device.h"
20 #include "radv_pipeline_graphics.h"
21 #include "radv_video.h"
22 
23 extern const struct vk_command_buffer_ops radv_cmd_buffer_ops;
24 
25 enum radv_dynamic_state_bits {
26    RADV_DYNAMIC_VIEWPORT = 1ull << 0,
27    RADV_DYNAMIC_SCISSOR = 1ull << 1,
28    RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
29    RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
30    RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
31    RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
32    RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
33    RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
34    RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
35    RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
36    RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
37    RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
38    RADV_DYNAMIC_CULL_MODE = 1ull << 12,
39    RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
40    RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
41    RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
42    RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
43    RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
44    RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
45    RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
46    RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
47    RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
48    RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
49    RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
50    RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
51    RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
52    RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
53    RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
54    RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
55    RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
56    RADV_DYNAMIC_POLYGON_MODE = 1ull << 30,
57    RADV_DYNAMIC_TESS_DOMAIN_ORIGIN = 1ull << 31,
58    RADV_DYNAMIC_LOGIC_OP_ENABLE = 1ull << 32,
59    RADV_DYNAMIC_LINE_STIPPLE_ENABLE = 1ull << 33,
60    RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE = 1ull << 34,
61    RADV_DYNAMIC_SAMPLE_MASK = 1ull << 35,
62    RADV_DYNAMIC_DEPTH_CLIP_ENABLE = 1ull << 36,
63    RADV_DYNAMIC_CONSERVATIVE_RAST_MODE = 1ull << 37,
64    RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE = 1ull << 38,
65    RADV_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
66    RADV_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
67    RADV_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
68    RADV_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
69    RADV_DYNAMIC_RASTERIZATION_SAMPLES = 1ull << 43,
70    RADV_DYNAMIC_LINE_RASTERIZATION_MODE = 1ull << 44,
71    RADV_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45,
72    RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46,
73    RADV_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47,
74    RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48,
75    RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE = 1ull << 49,
76    RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE = 1ull << 50,
77    RADV_DYNAMIC_COLOR_ATTACHMENT_MAP = 1ull << 51,
78    RADV_DYNAMIC_INPUT_ATTACHMENT_MAP = 1ull << 52,
79    RADV_DYNAMIC_DEPTH_CLAMP_RANGE = 1ull << 53,
80    RADV_DYNAMIC_ALL = (1ull << 54) - 1,
81 };
82 
83 enum radv_cmd_dirty_bits {
84    RADV_CMD_DIRTY_PIPELINE = 1ull << 0,
85    RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 1,
86    RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 2,
87    RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 3,
88    RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 4,
89    RADV_CMD_DIRTY_GUARDBAND = 1ull << 5,
90    RADV_CMD_DIRTY_RBPLUS = 1ull << 6,
91    RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 7,
92    RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 8,
93    RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 9,
94    RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 10,
95    RADV_CMD_DIRTY_FRAGMENT_OUTPUT = 1ull << 11,
96    RADV_CMD_DIRTY_FBFETCH_OUTPUT = 1ull << 12,
97    RADV_CMD_DIRTY_FS_STATE = 1ull << 13,
98    RADV_CMD_DIRTY_NGG_STATE = 1ull << 14,
99    RADV_CMD_DIRTY_TASK_STATE = 1ull << 15,
100    RADV_CMD_DIRTY_DEPTH_STENCIL_STATE = 1ull << 16,
101    RADV_CMD_DIRTY_RASTER_STATE = 1ull << 17,
102    RADV_CMD_DIRTY_ALL = (1ull << 18) - 1,
103 
104    RADV_CMD_DIRTY_SHADER_QUERY = RADV_CMD_DIRTY_NGG_STATE | RADV_CMD_DIRTY_TASK_STATE,
105 };
106 
107 enum radv_cmd_flush_bits {
108    /* Instruction cache. */
109    RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
110    /* Scalar L1 cache. */
111    RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
112    /* Vector L1 cache. */
113    RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
114    /* L2 cache + L2 metadata cache writeback & invalidate.
115     * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
116    RADV_CMD_FLAG_INV_L2 = 1 << 3,
117    /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
118     * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
119     * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
120    RADV_CMD_FLAG_WB_L2 = 1 << 4,
121    /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
122     * changed and we want to read an image from shaders. */
123    RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
124    /* Framebuffer caches */
125    RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
126    RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
127    RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
128    RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
129    /* Engine synchronization. */
130    RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
131    RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
132    RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
133    RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
134    /* Pipeline query controls. */
135    RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
136    RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
137    RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
138 
139    RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
140                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
141 
142    RADV_CMD_FLUSH_ALL_COMPUTE = (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
143                                  RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
144 };
145 
146 struct radv_vertex_binding {
147    VkDeviceSize offset;
148    VkDeviceSize size;
149    VkDeviceSize stride;
150 };
151 
152 struct radv_streamout_binding {
153    struct radv_buffer *buffer;
154    VkDeviceSize offset;
155    VkDeviceSize size;
156 };
157 
158 struct radv_streamout_state {
159    /* Mask of bound streamout buffers. */
160    uint8_t enabled_mask;
161 
162    /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
163    uint32_t hw_enabled_mask;
164 
165    /* State of VGT_STRMOUT_(CONFIG|EN) */
166    bool streamout_enabled;
167 };
168 
169 /**
170  * Attachment state when recording a renderpass instance.
171  *
172  * The clear value is valid only if there exists a pending clear.
173  */
174 struct radv_attachment {
175    VkFormat format;
176    struct radv_image_view *iview;
177    VkImageLayout layout;
178    VkImageLayout stencil_layout;
179 
180    union {
181       struct radv_color_buffer_info cb;
182       struct radv_ds_buffer_info ds;
183    };
184 
185    struct radv_image_view *resolve_iview;
186    VkResolveModeFlagBits resolve_mode;
187    VkResolveModeFlagBits stencil_resolve_mode;
188    VkImageLayout resolve_layout;
189    VkImageLayout stencil_resolve_layout;
190 };
191 
192 struct radv_rendering_state {
193    bool active;
194    bool has_image_views;
195    bool has_input_attachment_no_concurrent_writes;
196    VkRect2D area;
197    uint32_t layer_count;
198    uint32_t view_mask;
199    uint32_t color_samples;
200    uint32_t ds_samples;
201    uint32_t max_samples;
202    struct radv_sample_locations_state sample_locations;
203    uint32_t color_att_count;
204    struct radv_attachment color_att[MAX_RTS];
205    struct radv_attachment ds_att;
206    VkImageAspectFlags ds_att_aspects;
207    struct radv_attachment vrs_att;
208    VkExtent2D vrs_texel_size;
209 };
210 
211 struct radv_descriptor_state {
212    struct radv_descriptor_set *sets[MAX_SETS];
213    uint32_t dirty;
214    uint32_t valid;
215    struct radv_push_descriptor_set push_set;
216    uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
217    uint64_t descriptor_buffers[MAX_SETS];
218    bool need_indirect_descriptor_sets;
219    uint64_t indirect_descriptor_sets_va;
220 };
221 
222 struct radv_push_constant_state {
223    uint32_t size;
224    uint32_t dynamic_offset_count;
225 };
226 
227 enum rgp_flush_bits {
228    RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
229    RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
230    RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
231    RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
232    RGP_FLUSH_PFP_SYNC_ME = 0x10,
233    RGP_FLUSH_SYNC_CP_DMA = 0x20,
234    RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
235    RGP_FLUSH_INVAL_ICACHE = 0x80,
236    RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
237    RGP_FLUSH_FLUSH_L2 = 0x200,
238    RGP_FLUSH_INVAL_L2 = 0x400,
239    RGP_FLUSH_FLUSH_CB = 0x800,
240    RGP_FLUSH_INVAL_CB = 0x1000,
241    RGP_FLUSH_FLUSH_DB = 0x2000,
242    RGP_FLUSH_INVAL_DB = 0x4000,
243    RGP_FLUSH_INVAL_L1 = 0x8000,
244 };
245 
246 enum radv_tracked_reg {
247    RADV_TRACKED_DB_COUNT_CONTROL,
248    RADV_TRACKED_DB_SHADER_CONTROL,
249    RADV_TRACKED_DB_VRS_OVERRIDE_CNTL,
250 
251    /* 2 consecutive registers */
252    RADV_TRACKED_DB_DEPTH_BOUNDS_MIN,
253    RADV_TRACKED_DB_DEPTH_BOUNDS_MAX,
254 
255    /* 2 consecutive registers */
256    RADV_TRACKED_DB_STENCILREFMASK,    /* GFX6-11.5 */
257    RADV_TRACKED_DB_STENCILREFMASK_BF, /* GFX6-11.5 */
258 
259    /* 2 consecutive registers */
260    RADV_TRACKED_DB_STENCIL_READ_MASK,  /* GFX12 */
261    RADV_TRACKED_DB_STENCIL_WRITE_MASK, /* GFX12 */
262 
263    RADV_TRACKED_DB_DEPTH_CONTROL,
264    RADV_TRACKED_DB_STENCIL_CONTROL,
265    RADV_TRACKED_DB_STENCIL_REF, /* GFX12 */
266 
267    RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
268    RADV_TRACKED_GE_NGG_SUBGRP_CNTL,
269 
270    RADV_TRACKED_PA_CL_CLIP_CNTL,
271    RADV_TRACKED_PA_CL_VRS_CNTL,
272    RADV_TRACKED_PA_CL_VS_OUT_CNTL,
273 
274    RADV_TRACKED_PA_SC_BINNER_CNTL_0,
275    RADV_TRACKED_PA_SC_SHADER_CONTROL,
276    RADV_TRACKED_PA_SC_LINE_CNTL,
277    RADV_TRACKED_PA_SC_LINE_STIPPLE,
278    RADV_TRACKED_PA_SC_LINE_STIPPLE_RESET, /* GFX12 */
279 
280    /* 2 consecutive registers */
281    RADV_TRACKED_SPI_PS_INPUT_ENA,
282    RADV_TRACKED_SPI_PS_INPUT_ADDR,
283 
284    RADV_TRACKED_SPI_PS_IN_CONTROL,
285 
286    /* 2 consecutive registers */
287    RADV_TRACKED_SPI_SHADER_IDX_FORMAT,
288    RADV_TRACKED_SPI_SHADER_POS_FORMAT,
289 
290    RADV_TRACKED_SPI_SHADER_Z_FORMAT,
291    RADV_TRACKED_SPI_VS_OUT_CONFIG,
292 
293    /* 3 consecutive registers */
294    RADV_TRACKED_SX_PS_DOWNCONVERT,
295    RADV_TRACKED_SX_BLEND_OPT_EPSILON,
296    RADV_TRACKED_SX_BLEND_OPT_CONTROL,
297 
298    RADV_TRACKED_VGT_DRAW_PAYLOAD_CNTL,
299    RADV_TRACKED_VGT_ESGS_RING_ITEMSIZE, /* GFX6-8 */
300    RADV_TRACKED_VGT_GS_MODE,
301    RADV_TRACKED_VGT_GS_INSTANCE_CNT,
302    RADV_TRACKED_VGT_GS_ONCHIP_CNTL,
303    RADV_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
304    RADV_TRACKED_VGT_GS_MAX_VERT_OUT,
305    RADV_TRACKED_VGT_GS_OUT_PRIM_TYPE,
306 
307    /* 4 consecutive registers */
308    RADV_TRACKED_VGT_GS_VERT_ITEMSIZE,
309    RADV_TRACKED_VGT_GS_VERT_ITEMSIZE_1,
310    RADV_TRACKED_VGT_GS_VERT_ITEMSIZE_2,
311    RADV_TRACKED_VGT_GS_VERT_ITEMSIZE_3,
312 
313    RADV_TRACKED_VGT_GSVS_RING_ITEMSIZE,
314 
315    /* 3 consecutive registers */
316    RADV_TRACKED_VGT_GSVS_RING_OFFSET_1,
317    RADV_TRACKED_VGT_GSVS_RING_OFFSET_2,
318    RADV_TRACKED_VGT_GSVS_RING_OFFSET_3,
319 
320    RADV_TRACKED_VGT_MULTI_PRIM_IB_RESET_INDX, /* GFX6-7 */
321    RADV_TRACKED_VGT_PRIMITIVEID_EN,
322    RADV_TRACKED_VGT_REUSE_OFF,
323    RADV_TRACKED_VGT_SHADER_STAGES_EN,
324    RADV_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
325 
326    RADV_TRACKED_PA_SU_LINE_CNTL,
327    RADV_TRACKED_PA_SU_SC_MODE_CNTL,
328 
329    RADV_NUM_ALL_TRACKED_REGS,
330 };
331 
332 struct radv_tracked_regs {
333    BITSET_DECLARE(reg_saved_mask, RADV_NUM_ALL_TRACKED_REGS);
334    uint32_t reg_value[RADV_NUM_ALL_TRACKED_REGS];
335    uint32_t spi_ps_input_cntl[32];
336 };
337 
338 struct radv_cmd_state {
339    /* Vertex descriptors */
340    uint64_t vb_va;
341    unsigned vb_size;
342 
343    bool predicating;
344    uint64_t dirty_dynamic;
345    uint32_t dirty;
346 
347    VkShaderStageFlags active_stages;
348    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
349    struct radv_shader *gs_copy_shader;
350    struct radv_shader *last_vgt_shader;
351    struct radv_shader *rt_prolog;
352 
353    struct radv_shader_object *shader_objs[MESA_VULKAN_SHADER_STAGES];
354 
355    uint32_t prefetch_L2_mask;
356 
357    struct radv_graphics_pipeline *graphics_pipeline;
358    struct radv_graphics_pipeline *emitted_graphics_pipeline;
359    struct radv_compute_pipeline *compute_pipeline;
360    struct radv_compute_pipeline *emitted_compute_pipeline;
361    struct radv_ray_tracing_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
362    struct radv_dynamic_state dynamic;
363    struct radv_vertex_input_state vertex_input;
364    struct radv_streamout_state streamout;
365 
366    struct radv_rendering_state render;
367 
368    /* Index buffer */
369    uint32_t index_type;
370    uint32_t max_index_count;
371    uint64_t index_va;
372    int32_t last_index_type;
373 
374    enum radv_cmd_flush_bits flush_bits;
375    unsigned active_occlusion_queries;
376    bool perfect_occlusion_queries_enabled;
377    unsigned active_pipeline_queries;
378    unsigned active_emulated_pipeline_queries;
379    unsigned active_pipeline_ace_queries; /* Task shader invocations query */
380    unsigned active_prims_gen_queries;
381    unsigned active_prims_xfb_queries;
382    unsigned active_emulated_prims_gen_queries;
383    unsigned active_emulated_prims_xfb_queries;
384    uint32_t trace_id;
385    uint32_t last_ia_multi_vgt_param;
386    uint32_t last_ge_cntl;
387 
388    uint32_t last_num_instances;
389    uint32_t last_first_instance;
390    bool last_vertex_offset_valid;
391    uint32_t last_vertex_offset;
392    uint32_t last_drawid;
393    uint32_t last_subpass_color_count;
394 
395    /* Whether CP DMA is busy/idle. */
396    bool dma_is_busy;
397 
398    /* Whether any images that are not L2 coherent are dirty from the CB. */
399    bool rb_noncoherent_dirty;
400 
401    /* Conditional rendering info. */
402    uint8_t predication_op; /* 32-bit or 64-bit predicate value */
403    int predication_type;   /* -1: disabled, 0: normal, 1: inverted */
404    uint64_t predication_va;
405    uint64_t mec_inv_pred_va;  /* For inverted predication when using MEC. */
406    bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
407 
408    /* Inheritance info. */
409    VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
410    bool inherited_occlusion_queries;
411    VkQueryControlFlags inherited_query_control_flags;
412 
413    bool context_roll_without_scissor_emitted;
414 
415    /* SQTT related state. */
416    uint32_t current_event_type;
417    uint32_t num_events;
418    uint32_t num_layout_transitions;
419    bool in_barrier;
420    bool pending_sqtt_barrier_end;
421    enum rgp_flush_bits sqtt_flush_bits;
422 
423    /* NGG culling state. */
424    bool has_nggc;
425 
426    /* Mesh shading state. */
427    bool mesh_shading;
428 
429    uint8_t cb_mip[MAX_RTS];
430    uint8_t ds_mip;
431 
432    /* Whether DRAW_{INDEX}_INDIRECT_{MULTI} is emitted. */
433    bool uses_draw_indirect;
434 
435    uint32_t rt_stack_size;
436 
437    struct radv_shader_part *emitted_vs_prolog;
438    uint32_t vbo_misaligned_mask;
439    uint32_t vbo_unaligned_mask;
440    uint32_t vbo_misaligned_mask_invalid;
441    uint32_t vbo_bound_mask;
442 
443    struct radv_shader_part *emitted_ps_epilog;
444 
445    /* Per-vertex VRS state. */
446    uint32_t last_vrs_rates;
447    int32_t last_force_vrs_rates_offset;
448 
449    /* Whether to suspend streamout for internal driver operations. */
450    bool suspend_streamout;
451 
452    /* Whether this commandbuffer uses performance counters. */
453    bool uses_perf_counters;
454 
455    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
456 
457    /* Tessellation info when patch control points is dynamic. */
458    unsigned tess_num_patches;
459    unsigned tess_lds_size;
460 
461    unsigned spi_shader_col_format;
462    unsigned spi_shader_z_format;
463    unsigned cb_shader_mask;
464 
465    struct radv_multisample_state ms;
466 
467    /* Custom blend mode for internal operations. */
468    unsigned custom_blend_mode;
469    unsigned db_render_control;
470 
471    unsigned last_cb_target_mask;
472 
473    unsigned rast_prim;
474 
475    uint32_t vtx_base_sgpr;
476    uint8_t vtx_emit_num;
477    bool uses_drawid;
478    bool uses_baseinstance;
479 
480    bool uses_out_of_order_rast;
481    bool uses_vrs;
482    bool uses_vrs_attachment;
483    bool uses_vrs_coarse_shading;
484    bool uses_dynamic_patch_control_points;
485    bool uses_fbfetch_output;
486 };
487 
488 struct radv_enc_state {
489    uint32_t task_size_offset;
490    uint32_t total_task_size;
491    unsigned shifter;
492    unsigned bits_in_shifter;
493    uint32_t num_zeros;
494    uint32_t byte_index;
495    unsigned bits_output;
496    unsigned bits_size;
497    bool emulation_prevention;
498    bool is_even_frame;
499    unsigned task_id;
500    uint32_t copy_start_offset;
501 };
502 
503 struct radv_cmd_buffer_upload {
504    uint8_t *map;
505    unsigned offset;
506    uint64_t size;
507    struct radeon_winsys_bo *upload_bo;
508    struct list_head list;
509 };
510 
511 struct radv_cmd_buffer {
512    struct vk_command_buffer vk;
513 
514    struct radv_tracked_regs tracked_regs;
515 
516    VkCommandBufferUsageFlags usage_flags;
517    struct radeon_cmdbuf *cs;
518    struct radv_cmd_state state;
519    struct radv_buffer *vertex_binding_buffers[MAX_VBS];
520    struct radv_vertex_binding vertex_bindings[MAX_VBS];
521    uint32_t used_vertex_bindings;
522    struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
523    enum radv_queue_family qf;
524 
525    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
526    VkShaderStageFlags push_constant_stages;
527    struct radv_descriptor_set_header meta_push_descriptors;
528 
529    struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
530 
531    struct radv_push_constant_state push_constant_state[MAX_BIND_POINTS];
532 
533    uint64_t descriptor_buffers[MAX_SETS];
534 
535    struct radv_cmd_buffer_upload upload;
536 
537    uint32_t scratch_size_per_wave_needed;
538    uint32_t scratch_waves_wanted;
539    uint32_t compute_scratch_size_per_wave_needed;
540    uint32_t compute_scratch_waves_wanted;
541    uint32_t esgs_ring_size_needed;
542    uint32_t gsvs_ring_size_needed;
543    bool tess_rings_needed;
544    bool task_rings_needed;
545    bool mesh_scratch_ring_needed;
546    bool gds_needed;    /* for GFX10 streamout and NGG GS queries */
547    bool gds_oa_needed; /* for GFX10 streamout */
548    bool sample_positions_needed;
549 
550    uint64_t gfx9_fence_va;
551    uint32_t gfx9_fence_idx;
552    uint64_t gfx9_eop_bug_va;
553 
554    struct set vs_prologs;
555    struct set ps_epilogs;
556 
557    /**
558     * Gang state.
559     * Used when the command buffer needs work done on a different queue
560     * (eg. when a graphics command buffer needs compute work).
561     * Currently only one follower is possible per command buffer.
562     */
563    struct {
564       /** Follower command stream. */
565       struct radeon_cmdbuf *cs;
566 
567       /** Flush bits for the follower cmdbuf. */
568       enum radv_cmd_flush_bits flush_bits;
569 
570       /**
571        * For synchronization between the follower and leader.
572        * The value of these semaphores are incremented whenever we
573        * encounter a barrier that affects the follower.
574        *
575        * DWORD 0: Leader to follower semaphore.
576        *          The leader writes the value and the follower waits.
577        * DWORD 1: Follower to leader semaphore.
578        *          The follower writes the value, and the leader waits.
579        */
580       struct {
581          uint64_t va;                     /* Virtual address of the semaphore. */
582          uint32_t leader_value;           /* Current value of the leader. */
583          uint32_t emitted_leader_value;   /* Last value emitted by the leader. */
584          uint32_t follower_value;         /* Current value of the follower. */
585          uint32_t emitted_follower_value; /* Last value emitted by the follower. */
586       } sem;
587    } gang;
588 
589    /**
590     * Whether a query pool has been reset and we have to flush caches.
591     */
592    bool pending_reset_query;
593 
594    /**
595     * Bitmask of pending active query flushes.
596     */
597    enum radv_cmd_flush_bits active_query_flush_bits;
598 
599    struct {
600       struct radv_video_session *vid;
601       struct radv_video_session_params *params;
602       struct rvcn_sq_var sq;
603       struct rvcn_decode_buffer_s *decode_buffer;
604       struct radv_enc_state enc;
605       uint64_t feedback_query_va;
606    } video;
607 
608    struct {
609       /* Temporary space for some transfer queue copy command workarounds. */
610       struct radeon_winsys_bo *copy_temp;
611    } transfer;
612 
613    uint64_t shader_upload_seq;
614 
615    uint32_t sqtt_cb_id;
616 
617    struct set *accel_struct_buffers;
618    struct util_dynarray ray_history;
619 };
620 
621 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
622 
623 static inline struct radv_device *
radv_cmd_buffer_device(const struct radv_cmd_buffer * cmd_buffer)624 radv_cmd_buffer_device(const struct radv_cmd_buffer *cmd_buffer)
625 {
626    return (struct radv_device *)cmd_buffer->vk.base.device;
627 }
628 
629 ALWAYS_INLINE static bool
radv_is_streamout_enabled(struct radv_cmd_buffer * cmd_buffer)630 radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
631 {
632    struct radv_streamout_state *so = &cmd_buffer->state.streamout;
633 
634    /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */
635    return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && !cmd_buffer->state.suspend_streamout;
636 }
637 
638 static inline unsigned
vk_to_bind_point(VkPipelineBindPoint bind_point)639 vk_to_bind_point(VkPipelineBindPoint bind_point)
640 {
641    return bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? 2 : bind_point;
642 }
643 
644 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)645 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
646 {
647    return &cmd_buffer->descriptors[vk_to_bind_point(bind_point)];
648 }
649 
650 static inline const struct radv_push_constant_state *
radv_get_push_constants_state(const struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)651 radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
652 {
653    return &cmd_buffer->push_constant_state[vk_to_bind_point(bind_point)];
654 }
655 
656 static inline bool
radv_cmdbuf_has_stage(const struct radv_cmd_buffer * cmd_buffer,gl_shader_stage stage)657 radv_cmdbuf_has_stage(const struct radv_cmd_buffer *cmd_buffer, gl_shader_stage stage)
658 {
659    return !!(cmd_buffer->state.active_stages & mesa_to_vk_shader_stage(stage));
660 }
661 
662 static inline uint32_t
radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer * cmd_buffer)663 radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer *cmd_buffer)
664 {
665    /* SAMPLE_STREAMOUTSTATS also requires PIPELINESTAT_START to be enabled. */
666    return cmd_buffer->state.active_pipeline_queries + cmd_buffer->state.active_prims_gen_queries +
667           cmd_buffer->state.active_prims_xfb_queries;
668 }
669 
670 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)671 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
672                               bool use_32bit_pointers)
673 {
674    radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
675    radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
676 }
677 
678 static inline void
radv_emit_shader_pointer_body(const struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)679 radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
680                               bool use_32bit_pointers)
681 {
682    const struct radv_physical_device *pdev = radv_device_physical(device);
683 
684    radeon_emit(cs, va);
685 
686    if (use_32bit_pointers) {
687       assert(va == 0 || (va >> 32) == pdev->info.address32_hi);
688    } else {
689       radeon_emit(cs, va >> 32);
690    }
691 }
692 
693 static inline void
radv_emit_shader_pointer(const struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)694 radv_emit_shader_pointer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va,
695                          bool global)
696 {
697    bool use_32bit_pointers = !global;
698 
699    radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
700    radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
701 }
702 
703 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
704 
705 void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer);
706 
707 bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment,
708                                           unsigned *out_offset, void **ptr);
709 
710 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr);
711 
712 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
713                                  unsigned *out_offset);
714 
715 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
716 
717 void radv_cmd_buffer_annotate(struct radv_cmd_buffer *cmd_buffer, const char *annotation);
718 
719 void radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer);
720 
721 bool radv_gang_init(struct radv_cmd_buffer *cmd_buffer);
722 
723 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
724                              struct radv_descriptor_set *set, unsigned idx);
725 
726 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
727                                    VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects);
728 
729 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
730                               const VkImageSubresourceRange *range, bool value);
731 
732 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
733                               const VkImageSubresourceRange *range, bool value);
734 
735 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
736                                       int cb_idx, uint32_t color_values[2]);
737 
738 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
739 
740 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
741                                                VkAccessFlags2 src_flags, const struct radv_image *image,
742                                                const VkImageSubresourceRange *range);
743 
744 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 dst_stages,
745                                                VkAccessFlags2 dst_flags, const struct radv_image *image,
746                                                const VkImageSubresourceRange *range);
747 
748 struct radv_resolve_barrier {
749    VkPipelineStageFlags2 src_stage_mask;
750    VkPipelineStageFlags2 dst_stage_mask;
751    VkAccessFlags2 src_access_mask;
752    VkAccessFlags2 dst_access_mask;
753 };
754 
755 void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier);
756 
757 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
758                                    VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
759                                    const VkWriteDescriptorSet *pDescriptorWrites);
760 
761 struct radv_dispatch_info {
762    /**
763     * Determine the layout of the grid (in block units) to be used.
764     */
765    uint32_t blocks[3];
766 
767    /**
768     * A starting offset for the grid. If unaligned is set, the offset
769     * must still be aligned.
770     */
771    uint32_t offsets[3];
772 
773    /**
774     * Whether it's an unaligned compute dispatch.
775     */
776    bool unaligned;
777 
778    /**
779     * Whether waves must be launched in order.
780     */
781    bool ordered;
782 
783    /**
784     * Indirect compute parameters resource.
785     */
786    struct radeon_winsys_bo *indirect;
787    uint64_t va;
788 };
789 
790 void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
791 
792 /*
793  * Takes x,y,z as exact numbers of invocations, instead of blocks.
794  *
795  * Limitations: Can't call normal dispatch functions without binding or rebinding
796  *              the compute pipeline.
797  */
798 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z);
799 
800 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va);
801 
802 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
803                          const VkImageSubresourceRange *range);
804 
805 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
806                        const VkImageSubresourceRange *range, uint32_t value);
807 
808 void radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
809 
810 void radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op,
811                                      uint64_t va);
812 
813 void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va, bool draw_visible);
814 
815 void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
816 
817 uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx);
818 
819 struct radv_vbo_info {
820    uint64_t va;
821 
822    uint32_t binding;
823    uint32_t stride;
824    uint32_t size;
825 
826    uint32_t attrib_offset;
827    uint32_t attrib_index_offset;
828    uint32_t attrib_format_size;
829 
830    uint32_t non_trivial_format;
831 };
832 
833 void radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t vbo_idx, struct radv_vbo_info *vbo_info);
834 
835 void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
836                               const struct radv_shader *shader);
837 
838 void radv_upload_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
839                                           struct radv_descriptor_state *descriptors_state);
840 
841 #endif /* RADV_CMD_BUFFER_H */
842