1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30
31 #include <assert.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #ifdef HAVE_VALGRIND
38 #include <memcheck.h>
39 #include <valgrind.h>
40 #define VG(x) x
41 #else
42 #define VG(x) ((void)0)
43 #endif
44
45 #include "c11/threads.h"
46 #ifndef _WIN32
47 #include <amdgpu.h>
48 #include <xf86drm.h>
49 #endif
50 #include "compiler/shader_enums.h"
51 #include "util/bitscan.h"
52 #include "util/detect_os.h"
53 #include "util/list.h"
54 #include "util/macros.h"
55 #include "util/rwlock.h"
56 #include "util/xmlconfig.h"
57 #include "vk_alloc.h"
58 #include "vk_buffer.h"
59 #include "vk_buffer_view.h"
60 #include "vk_command_buffer.h"
61 #include "vk_command_pool.h"
62 #include "vk_debug_report.h"
63 #include "vk_device.h"
64 #include "vk_format.h"
65 #include "vk_image.h"
66 #include "vk_instance.h"
67 #include "vk_log.h"
68 #include "vk_physical_device.h"
69 #include "vk_query_pool.h"
70 #include "vk_queue.h"
71 #include "vk_sampler.h"
72 #include "vk_shader_module.h"
73 #include "vk_texcompress_astc.h"
74 #include "vk_texcompress_etc2.h"
75 #include "vk_util.h"
76 #include "vk_video.h"
77 #include "vk_ycbcr_conversion.h"
78
79 #include "rmv/vk_rmv_common.h"
80 #include "rmv/vk_rmv_tokens.h"
81
82 #include "ac_binary.h"
83 #include "ac_gpu_info.h"
84 #include "ac_shader_util.h"
85 #include "ac_spm.h"
86 #include "ac_sqtt.h"
87 #include "ac_surface.h"
88 #include "ac_vcn.h"
89 #include "radv_constants.h"
90 #include "radv_descriptor_set.h"
91 #include "radv_radeon_winsys.h"
92 #include "radv_shader.h"
93 #include "radv_shader_args.h"
94 #include "sid.h"
95
96 #include "radix_sort/radix_sort_vk_devaddr.h"
97
98 /* Pre-declarations needed for WSI entrypoints */
99 struct wl_surface;
100 struct wl_display;
101 typedef struct xcb_connection_t xcb_connection_t;
102 typedef uint32_t xcb_visualid_t;
103 typedef uint32_t xcb_window_t;
104
105 #include <vulkan/vk_android_native_buffer.h>
106 #include <vulkan/vk_icd.h>
107 #include <vulkan/vulkan.h>
108 #include <vulkan/vulkan_android.h>
109
110 #include "radv_entrypoints.h"
111
112 #include "wsi_common.h"
113
114 #ifdef __cplusplus
115 extern "C" {
116 #endif
117
118 /* Helper to determine if we should compile
119 * any of the Android AHB support.
120 *
121 * To actually enable the ext we also need
122 * the necessary kernel support.
123 */
124 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
125 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
126 #include <vndk/hardware_buffer.h>
127 #else
128 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
129 #endif
130
131 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) || \
132 defined(VK_USE_PLATFORM_DISPLAY_KHR)
133 #define RADV_USE_WSI_PLATFORM
134 #endif
135
136 #ifdef ANDROID_STRICT
137 #define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
138 #else
139 #define RADV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
140 #endif
141
142 #ifdef _WIN32
143 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
144 #else
145 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
146 #endif
147
148 #ifdef _WIN32
149 #define radv_printflike(a, b)
150 #else
151 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
152 #endif
153
154 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
155 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
156 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
157 #endif
158
159 static inline uint32_t
align_u32(uint32_t v,uint32_t a)160 align_u32(uint32_t v, uint32_t a)
161 {
162 assert(a != 0 && a == (a & -a));
163 return (v + a - 1) & ~(a - 1);
164 }
165
166 static inline uint32_t
align_u32_npot(uint32_t v,uint32_t a)167 align_u32_npot(uint32_t v, uint32_t a)
168 {
169 return (v + a - 1) / a * a;
170 }
171
172 static inline uint64_t
align_u64(uint64_t v,uint64_t a)173 align_u64(uint64_t v, uint64_t a)
174 {
175 assert(a != 0 && a == (a & -a));
176 return (v + a - 1) & ~(a - 1);
177 }
178
179 /** Alignment must be a power of 2. */
180 static inline bool
radv_is_aligned(uintmax_t n,uintmax_t a)181 radv_is_aligned(uintmax_t n, uintmax_t a)
182 {
183 assert(a == (a & -a));
184 return (n & (a - 1)) == 0;
185 }
186
187 static inline uint32_t
radv_minify(uint32_t n,uint32_t levels)188 radv_minify(uint32_t n, uint32_t levels)
189 {
190 if (unlikely(n == 0))
191 return 0;
192 else
193 return MAX2(n >> levels, 1);
194 }
195
196 static inline int
radv_float_to_sfixed(float value,unsigned frac_bits)197 radv_float_to_sfixed(float value, unsigned frac_bits)
198 {
199 return value * (1 << frac_bits);
200 }
201
202 static inline unsigned int
radv_float_to_ufixed(float value,unsigned frac_bits)203 radv_float_to_ufixed(float value, unsigned frac_bits)
204 {
205 return value * (1 << frac_bits);
206 }
207
208 /* Whenever we generate an error, pass it through this function. Useful for
209 * debugging, where we can break on it. Only call at error site, not when
210 * propagating errors. Might be useful to plug in a stack trace here.
211 */
212
213 struct radv_image_view;
214 struct radv_instance;
215 struct rvcn_decode_buffer_s;
216
217 /* queue types */
218 enum radv_queue_family {
219 RADV_QUEUE_GENERAL,
220 RADV_QUEUE_COMPUTE,
221 RADV_QUEUE_TRANSFER,
222 RADV_QUEUE_SPARSE,
223 RADV_QUEUE_VIDEO_DEC,
224 RADV_QUEUE_VIDEO_ENC,
225 RADV_MAX_QUEUE_FAMILIES,
226 RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
227 RADV_QUEUE_IGNORED,
228 };
229
230 struct radv_perfcounter_desc;
231
232 struct radv_binning_settings {
233 unsigned context_states_per_bin; /* allowed range: [1, 6] */
234 unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
235 unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
236 };
237
238 struct radv_physical_device_cache_key {
239 enum radeon_family family;
240 uint32_t ptr_size;
241
242 uint32_t conformant_trunc_coord : 1;
243 uint32_t clear_lds : 1;
244 uint32_t cs_wave32 : 1;
245 uint32_t disable_aniso_single_level : 1;
246 uint32_t disable_shrink_image_store : 1;
247 uint32_t disable_sinking_load_input_fs : 1;
248 uint32_t dual_color_blend_by_location : 1;
249 uint32_t emulate_rt : 1;
250 uint32_t ge_wave32 : 1;
251 uint32_t invariant_geom : 1;
252 uint32_t lower_discard_to_demote : 1;
253 uint32_t mesh_fast_launch_2 : 1;
254 uint32_t no_fmask : 1;
255 uint32_t no_rt : 1;
256 uint32_t ps_wave32 : 1;
257 uint32_t rt_wave64 : 1;
258 uint32_t split_fma : 1;
259 uint32_t ssbo_non_uniform : 1;
260 uint32_t tex_non_uniform : 1;
261 uint32_t use_llvm : 1;
262 uint32_t use_ngg : 1;
263 uint32_t use_ngg_culling : 1;
264 };
265
266 struct radv_physical_device {
267 struct vk_physical_device vk;
268
269 struct radv_instance *instance;
270
271 struct radeon_winsys *ws;
272 struct radeon_info rad_info;
273 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
274 char marketing_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
275 uint8_t driver_uuid[VK_UUID_SIZE];
276 uint8_t device_uuid[VK_UUID_SIZE];
277 uint8_t cache_uuid[VK_UUID_SIZE];
278
279 int local_fd;
280 int master_fd;
281 struct wsi_device wsi_device;
282
283 /* Whether DCC should be enabled for MSAA textures. */
284 bool dcc_msaa_allowed;
285
286 /* Whether to enable FMASK compression for MSAA textures (GFX6-GFX10.3) */
287 bool use_fmask;
288
289 /* Whether to enable NGG. */
290 bool use_ngg;
291
292 /* Whether to enable NGG culling. */
293 bool use_ngg_culling;
294
295 /* Whether to enable NGG streamout. */
296 bool use_ngg_streamout;
297
298 /* Whether to emulate the number of primitives generated by GS. */
299 bool emulate_ngg_gs_query_pipeline_stat;
300
301 /* Whether to use GS_FAST_LAUNCH(2) for mesh shaders. */
302 bool mesh_fast_launch_2;
303
304 /* Whether to emulate mesh/task shader queries. */
305 bool emulate_mesh_shader_queries;
306
307 /* Number of threads per wave. */
308 uint8_t ps_wave_size;
309 uint8_t cs_wave_size;
310 uint8_t ge_wave_size;
311 uint8_t rt_wave_size;
312
313 /* Maximum compute shared memory size. */
314 uint32_t max_shared_size;
315
316 /* Whether to use the LLVM compiler backend */
317 bool use_llvm;
318
319 /* Whether to emulate ETC2 image support on HW without support. */
320 bool emulate_etc2;
321
322 /* Whether to emulate ASTC image support on HW without support. */
323 bool emulate_astc;
324
325 VkPhysicalDeviceMemoryProperties memory_properties;
326 enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
327 enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
328 unsigned heaps;
329
330 /* Bitmask of memory types that use the 32-bit address space. */
331 uint32_t memory_types_32bit;
332
333 #ifndef _WIN32
334 int available_nodes;
335 drmPciBusInfo bus_info;
336
337 dev_t primary_devid;
338 dev_t render_devid;
339 #endif
340
341 nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
342
343 enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
344 uint32_t num_queues;
345
346 uint32_t gs_table_depth;
347
348 struct ac_hs_info hs;
349 struct ac_task_info task_info;
350
351 struct radv_binning_settings binning_settings;
352
353 /* Performance counters. */
354 struct ac_perfcounters ac_perfcounters;
355
356 uint32_t num_perfcounters;
357 struct radv_perfcounter_desc *perfcounters;
358
359 struct {
360 unsigned data0;
361 unsigned data1;
362 unsigned cmd;
363 unsigned cntl;
364 } vid_dec_reg;
365 enum amd_ip_type vid_decode_ip;
366 uint32_t vid_addr_gfx_mode;
367 uint32_t stream_handle_base;
368 uint32_t stream_handle_counter;
369 uint32_t av1_version;
370
371 struct radv_physical_device_cache_key cache_key;
372 };
373
374 uint32_t radv_find_memory_index(const struct radv_physical_device *pdevice, VkMemoryPropertyFlags flags);
375
376 VkResult create_null_physical_device(struct vk_instance *vk_instance);
377
378 VkResult create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device,
379 struct vk_physical_device **out);
380
381 void radv_physical_device_destroy(struct vk_physical_device *vk_device);
382
383 enum radv_trace_mode {
384 /** Radeon GPU Profiler */
385 RADV_TRACE_MODE_RGP = 1 << VK_TRACE_MODE_COUNT,
386
387 /** Radeon Raytracing Analyzer */
388 RADV_TRACE_MODE_RRA = 1 << (VK_TRACE_MODE_COUNT + 1),
389
390 /** Gather context rolls of submitted command buffers */
391 RADV_TRACE_MODE_CTX_ROLLS = 1 << (VK_TRACE_MODE_COUNT + 2),
392 };
393
394 struct radv_instance {
395 struct vk_instance vk;
396
397 VkAllocationCallbacks alloc;
398
399 uint64_t debug_flags;
400 uint64_t perftest_flags;
401
402 struct {
403 struct driOptionCache options;
404 struct driOptionCache available_options;
405
406 bool enable_mrt_output_nan_fixup;
407 bool disable_tc_compat_htile_in_general;
408 bool disable_shrink_image_store;
409 bool disable_aniso_single_level;
410 bool disable_trunc_coord;
411 bool zero_vram;
412 bool disable_sinking_load_input_fs;
413 bool flush_before_query_copy;
414 bool enable_unified_heap_on_apu;
415 bool tex_non_uniform;
416 bool ssbo_non_uniform;
417 bool flush_before_timestamp_write;
418 bool force_rt_wave64;
419 bool dual_color_blend_by_location;
420 bool legacy_sparse_binding;
421 bool clear_lds;
422 bool enable_dgc;
423 bool enable_khr_present_wait;
424 bool report_llvm9_version_string;
425 bool vk_require_etc2;
426 bool vk_require_astc;
427 bool force_active_accel_struct_leaves;
428 char *app_layer;
429 uint8_t override_graphics_shader_version;
430 uint8_t override_compute_shader_version;
431 uint8_t override_ray_tracing_shader_version;
432 int override_vram_size;
433 int override_uniform_offset_alignment;
434 } drirc;
435 };
436
437 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
438 void radv_finish_wsi(struct radv_physical_device *physical_device);
439
440 struct radv_shader_binary_part;
441
442 bool radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache,
443 struct radv_pipeline *pipeline, const unsigned char *sha1,
444 bool *found_in_application_cache);
445
446 void radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
447 struct radv_pipeline *pipeline, const unsigned char *sha1);
448
449 struct radv_ray_tracing_pipeline;
450 bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache,
451 struct radv_ray_tracing_pipeline *pipeline,
452 const VkRayTracingPipelineCreateInfoKHR *create_info);
453
454 void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
455 struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
456 const unsigned char *sha1);
457
458 nir_shader *radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
459 gl_shader_stage stage, const blake3_hash key);
460
461 void radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
462 const nir_shader *nir);
463
464 struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(struct radv_device *device,
465 struct vk_pipeline_cache *cache,
466 const unsigned char *sha1);
467
468 struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device,
469 struct vk_pipeline_cache *cache,
470 struct nir_shader *nir, const unsigned char *sha1,
471 bool cached);
472
473 struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device,
474 struct vk_pipeline_cache_object *object);
475
476 struct radv_meta_state {
477 VkAllocationCallbacks alloc;
478
479 VkPipelineCache cache;
480 uint32_t initial_cache_entries;
481
482 /*
483 * For on-demand pipeline creation, makes sure that
484 * only one thread tries to build a pipeline at the same time.
485 */
486 mtx_t mtx;
487
488 /**
489 * Use array element `i` for images with `2^i` samples.
490 */
491 struct {
492 VkPipeline color_pipelines[NUM_META_FS_KEYS];
493 } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
494
495 struct {
496 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
497 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
498 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
499
500 VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
501 VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
502 VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
503 } ds_clear[MAX_SAMPLES_LOG2];
504
505 VkPipelineLayout clear_color_p_layout;
506 VkPipelineLayout clear_depth_p_layout;
507 VkPipelineLayout clear_depth_unrestricted_p_layout;
508
509 /* Optimized compute fast HTILE clear for stencil or depth only. */
510 VkPipeline clear_htile_mask_pipeline;
511 VkPipelineLayout clear_htile_mask_p_layout;
512 VkDescriptorSetLayout clear_htile_mask_ds_layout;
513
514 /* Copy VRS into HTILE. */
515 VkPipeline copy_vrs_htile_pipeline;
516 VkPipelineLayout copy_vrs_htile_p_layout;
517 VkDescriptorSetLayout copy_vrs_htile_ds_layout;
518
519 /* Clear DCC with comp-to-single. */
520 VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
521 VkPipelineLayout clear_dcc_comp_to_single_p_layout;
522 VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
523
524 struct {
525 /** Pipeline that blits from a 1D image. */
526 VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
527
528 /** Pipeline that blits from a 2D image. */
529 VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
530
531 /** Pipeline that blits from a 3D image. */
532 VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
533
534 VkPipeline depth_only_1d_pipeline;
535 VkPipeline depth_only_2d_pipeline;
536 VkPipeline depth_only_3d_pipeline;
537
538 VkPipeline stencil_only_1d_pipeline;
539 VkPipeline stencil_only_2d_pipeline;
540 VkPipeline stencil_only_3d_pipeline;
541 VkPipelineLayout pipeline_layout;
542 VkDescriptorSetLayout ds_layout;
543 } blit;
544
545 struct {
546 VkPipelineLayout p_layouts[5];
547 VkDescriptorSetLayout ds_layouts[5];
548 VkPipeline pipelines[5][NUM_META_FS_KEYS];
549
550 VkPipeline depth_only_pipeline[5];
551
552 VkPipeline stencil_only_pipeline[5];
553 } blit2d[MAX_SAMPLES_LOG2];
554
555 struct {
556 VkPipelineLayout img_p_layout;
557 VkDescriptorSetLayout img_ds_layout;
558 VkPipeline pipeline;
559 VkPipeline pipeline_3d;
560 } itob;
561 struct {
562 VkPipelineLayout img_p_layout;
563 VkDescriptorSetLayout img_ds_layout;
564 VkPipeline pipeline;
565 VkPipeline pipeline_3d;
566 } btoi;
567 struct {
568 VkPipelineLayout img_p_layout;
569 VkDescriptorSetLayout img_ds_layout;
570 VkPipeline pipeline;
571 } btoi_r32g32b32;
572 struct {
573 VkPipelineLayout img_p_layout;
574 VkDescriptorSetLayout img_ds_layout;
575 VkPipeline pipeline[MAX_SAMPLES_LOG2];
576 VkPipeline pipeline_3d;
577 } itoi;
578 struct {
579 VkPipelineLayout img_p_layout;
580 VkDescriptorSetLayout img_ds_layout;
581 VkPipeline pipeline;
582 } itoi_r32g32b32;
583 struct {
584 VkPipelineLayout img_p_layout;
585 VkDescriptorSetLayout img_ds_layout;
586 VkPipeline pipeline[MAX_SAMPLES_LOG2];
587 VkPipeline pipeline_3d;
588 } cleari;
589 struct {
590 VkPipelineLayout img_p_layout;
591 VkDescriptorSetLayout img_ds_layout;
592 VkPipeline pipeline;
593 } cleari_r32g32b32;
594 struct {
595 VkPipelineLayout p_layout;
596 VkDescriptorSetLayout ds_layout;
597 VkPipeline pipeline[MAX_SAMPLES_LOG2];
598 } fmask_copy;
599
600 struct {
601 VkPipelineLayout p_layout;
602 VkPipeline pipeline[NUM_META_FS_KEYS];
603 } resolve;
604
605 struct {
606 VkDescriptorSetLayout ds_layout;
607 VkPipelineLayout p_layout;
608 struct {
609 VkPipeline pipeline;
610 VkPipeline i_pipeline;
611 VkPipeline srgb_pipeline;
612 } rc[MAX_SAMPLES_LOG2];
613
614 VkPipeline depth_zero_pipeline;
615 struct {
616 VkPipeline average_pipeline;
617 VkPipeline max_pipeline;
618 VkPipeline min_pipeline;
619 } depth[MAX_SAMPLES_LOG2];
620
621 VkPipeline stencil_zero_pipeline;
622 struct {
623 VkPipeline max_pipeline;
624 VkPipeline min_pipeline;
625 } stencil[MAX_SAMPLES_LOG2];
626 } resolve_compute;
627
628 struct {
629 VkDescriptorSetLayout ds_layout;
630 VkPipelineLayout p_layout;
631
632 struct {
633 VkPipeline pipeline[NUM_META_FS_KEYS];
634 } rc[MAX_SAMPLES_LOG2];
635
636 VkPipeline depth_zero_pipeline;
637 struct {
638 VkPipeline average_pipeline;
639 VkPipeline max_pipeline;
640 VkPipeline min_pipeline;
641 } depth[MAX_SAMPLES_LOG2];
642
643 VkPipeline stencil_zero_pipeline;
644 struct {
645 VkPipeline max_pipeline;
646 VkPipeline min_pipeline;
647 } stencil[MAX_SAMPLES_LOG2];
648 } resolve_fragment;
649
650 struct {
651 VkPipelineLayout p_layout;
652 VkPipeline decompress_pipeline;
653 VkPipeline resummarize_pipeline;
654 } depth_decomp[MAX_SAMPLES_LOG2];
655
656 VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
657 VkPipelineLayout expand_depth_stencil_compute_p_layout;
658 VkPipeline expand_depth_stencil_compute_pipeline;
659
660 struct {
661 VkPipelineLayout p_layout;
662 VkPipeline cmask_eliminate_pipeline;
663 VkPipeline fmask_decompress_pipeline;
664 VkPipeline dcc_decompress_pipeline;
665
666 VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
667 VkPipelineLayout dcc_decompress_compute_p_layout;
668 VkPipeline dcc_decompress_compute_pipeline;
669 } fast_clear_flush;
670
671 struct {
672 VkPipelineLayout fill_p_layout;
673 VkPipelineLayout copy_p_layout;
674 VkPipeline fill_pipeline;
675 VkPipeline copy_pipeline;
676 } buffer;
677
678 struct {
679 VkDescriptorSetLayout ds_layout;
680 VkPipelineLayout p_layout;
681 VkPipeline occlusion_query_pipeline;
682 VkPipeline pipeline_statistics_query_pipeline;
683 VkPipeline tfb_query_pipeline;
684 VkPipeline timestamp_query_pipeline;
685 VkPipeline pg_query_pipeline;
686 VkPipeline ms_prim_gen_query_pipeline;
687 } query;
688
689 struct {
690 VkDescriptorSetLayout ds_layout;
691 VkPipelineLayout p_layout;
692 VkPipeline pipeline[MAX_SAMPLES_LOG2];
693 } fmask_expand;
694
695 struct {
696 VkDescriptorSetLayout ds_layout;
697 VkPipelineLayout p_layout;
698 VkPipeline pipeline[32];
699 } dcc_retile;
700
701 struct {
702 VkPipelineLayout leaf_p_layout;
703 VkPipeline leaf_pipeline;
704 VkPipelineLayout morton_p_layout;
705 VkPipeline morton_pipeline;
706 VkPipelineLayout lbvh_main_p_layout;
707 VkPipeline lbvh_main_pipeline;
708 VkPipelineLayout lbvh_generate_ir_p_layout;
709 VkPipeline lbvh_generate_ir_pipeline;
710 VkPipelineLayout ploc_p_layout;
711 VkPipeline ploc_pipeline;
712 VkPipelineLayout encode_p_layout;
713 VkPipeline encode_pipeline;
714 VkPipeline encode_compact_pipeline;
715 VkPipelineLayout header_p_layout;
716 VkPipeline header_pipeline;
717 VkPipelineLayout update_p_layout;
718 VkPipeline update_pipeline;
719 VkPipelineLayout copy_p_layout;
720 VkPipeline copy_pipeline;
721
722 struct radix_sort_vk *radix_sort;
723
724 struct {
725 VkBuffer buffer;
726 VkDeviceMemory memory;
727 VkAccelerationStructureKHR accel_struct;
728 } null;
729 } accel_struct_build;
730
731 struct vk_texcompress_etc2_state etc_decode;
732
733 struct vk_texcompress_astc_state *astc_decode;
734
735 struct {
736 VkDescriptorSetLayout ds_layout;
737 VkPipelineLayout p_layout;
738 VkPipeline pipeline;
739 } dgc_prepare;
740 };
741
742 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
743
744 static inline enum radv_queue_family
vk_queue_to_radv(const struct radv_physical_device * phys_dev,int queue_family_index)745 vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
746 {
747 if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL || queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
748 return RADV_QUEUE_FOREIGN;
749 if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
750 return RADV_QUEUE_IGNORED;
751
752 assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
753 return phys_dev->vk_queue_to_radv[queue_family_index];
754 }
755
756 enum amd_ip_type radv_queue_family_to_ring(const struct radv_physical_device *physical_device,
757 enum radv_queue_family f);
758
759 static inline bool
radv_has_uvd(struct radv_physical_device * phys_dev)760 radv_has_uvd(struct radv_physical_device *phys_dev)
761 {
762 enum radeon_family family = phys_dev->rad_info.family;
763 /* Only support UVD on TONGA+ */
764 if (family < CHIP_TONGA)
765 return false;
766 return phys_dev->rad_info.ip[AMD_IP_UVD].num_queues > 0;
767 }
768
769 struct radv_queue_ring_info {
770 uint32_t scratch_size_per_wave;
771 uint32_t scratch_waves;
772 uint32_t compute_scratch_size_per_wave;
773 uint32_t compute_scratch_waves;
774 uint32_t esgs_ring_size;
775 uint32_t gsvs_ring_size;
776 uint32_t attr_ring_size;
777 bool tess_rings;
778 bool task_rings;
779 bool mesh_scratch_ring;
780 bool gds;
781 bool gds_oa;
782 bool sample_positions;
783 };
784
785 struct radv_queue_state {
786 enum radv_queue_family qf;
787 struct radv_queue_ring_info ring_info;
788
789 struct radeon_winsys_bo *scratch_bo;
790 struct radeon_winsys_bo *descriptor_bo;
791 struct radeon_winsys_bo *compute_scratch_bo;
792 struct radeon_winsys_bo *esgs_ring_bo;
793 struct radeon_winsys_bo *gsvs_ring_bo;
794 struct radeon_winsys_bo *tess_rings_bo;
795 struct radeon_winsys_bo *task_rings_bo;
796 struct radeon_winsys_bo *mesh_scratch_ring_bo;
797 struct radeon_winsys_bo *attr_ring_bo;
798 struct radeon_winsys_bo *gds_bo;
799 struct radeon_winsys_bo *gds_oa_bo;
800
801 struct radeon_cmdbuf *initial_preamble_cs;
802 struct radeon_cmdbuf *initial_full_flush_preamble_cs;
803 struct radeon_cmdbuf *continue_preamble_cs;
804 struct radeon_cmdbuf *gang_wait_preamble_cs;
805 struct radeon_cmdbuf *gang_wait_postamble_cs;
806
807 /* the uses_shadow_regs here will be set only for general queue */
808 bool uses_shadow_regs;
809 /* register state is saved in shadowed_regs buffer */
810 struct radeon_winsys_bo *shadowed_regs;
811 /* shadow regs preamble ib. This will be the first preamble ib.
812 * This ib has the packets to start register shadowing.
813 */
814 struct radeon_winsys_bo *shadow_regs_ib;
815 uint32_t shadow_regs_ib_size_dw;
816 };
817
818 struct radv_queue {
819 struct vk_queue vk;
820 struct radv_device *device;
821 struct radeon_winsys_ctx *hw_ctx;
822 enum radeon_ctx_priority priority;
823 struct radv_queue_state state;
824 struct radv_queue_state *follower_state;
825 struct radeon_winsys_bo *gang_sem_bo;
826
827 uint64_t last_shader_upload_seq;
828 bool sqtt_present;
829 };
830
831 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
832 const VkDeviceQueueCreateInfo *create_info,
833 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
834
835 void radv_queue_finish(struct radv_queue *queue);
836
837 enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj);
838
839 #define RADV_BORDER_COLOR_COUNT 4096
840 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
841
842 struct radv_device_border_color_data {
843 bool used[RADV_BORDER_COLOR_COUNT];
844
845 struct radeon_winsys_bo *bo;
846 VkClearColorValue *colors_gpu_ptr;
847
848 /* Mutex is required to guarantee vkCreateSampler thread safety
849 * given that we are writing to a buffer and checking color occupation */
850 mtx_t mutex;
851 };
852
853 enum radv_force_vrs {
854 RADV_FORCE_VRS_1x1 = 0,
855 RADV_FORCE_VRS_2x2,
856 RADV_FORCE_VRS_2x1,
857 RADV_FORCE_VRS_1x2,
858 };
859
860 struct radv_notifier {
861 int fd;
862 int watch;
863 bool quit;
864 thrd_t thread;
865 };
866
867 struct radv_memory_trace_data {
868 /* ID of the PTE update event in ftrace data */
869 uint16_t ftrace_update_ptes_id;
870
871 uint32_t num_cpus;
872 int *pipe_fds;
873 };
874
875 struct radv_rra_accel_struct_data {
876 VkEvent build_event;
877 uint64_t va;
878 uint64_t size;
879 VkBuffer buffer;
880 VkDeviceMemory memory;
881 VkAccelerationStructureTypeKHR type;
882 bool is_dead;
883 };
884
885 void radv_destroy_rra_accel_struct_data(VkDevice device, struct radv_rra_accel_struct_data *data);
886
887 struct radv_ray_history_header {
888 uint32_t offset;
889 uint32_t dispatch_index;
890 uint32_t submit_base_index;
891 };
892
893 enum radv_packed_token_type {
894 radv_packed_token_end_trace,
895 };
896
897 struct radv_packed_token_header {
898 uint32_t launch_index : 29;
899 uint32_t hit : 1;
900 uint32_t token_type : 2;
901 };
902
903 struct radv_packed_end_trace_token {
904 struct radv_packed_token_header header;
905
906 uint32_t accel_struct_lo;
907 uint32_t accel_struct_hi;
908
909 uint32_t flags : 16;
910 uint32_t dispatch_index : 16;
911
912 uint32_t sbt_offset : 4;
913 uint32_t sbt_stride : 4;
914 uint32_t miss_index : 16;
915 uint32_t cull_mask : 8;
916
917 float origin[3];
918 float tmin;
919 float direction[3];
920 float tmax;
921
922 uint32_t iteration_count : 16;
923 uint32_t instance_count : 16;
924
925 uint32_t ahit_count : 16;
926 uint32_t isec_count : 16;
927
928 uint32_t primitive_id;
929 uint32_t geometry_id;
930
931 uint32_t instance_id : 24;
932 uint32_t hit_kind : 8;
933
934 float t;
935 };
936 static_assert(sizeof(struct radv_packed_end_trace_token) == 76, "Unexpected radv_packed_end_trace_token size");
937
938 enum radv_rra_ray_history_metadata_type {
939 RADV_RRA_COUNTER_INFO = 1,
940 RADV_RRA_DISPATCH_SIZE = 2,
941 RADV_RRA_TRAVERSAL_FLAGS = 3,
942 };
943
944 struct radv_rra_ray_history_metadata_info {
945 enum radv_rra_ray_history_metadata_type type : 32;
946 uint32_t padding;
947 uint64_t size;
948 };
949
950 enum radv_rra_pipeline_type {
951 RADV_RRA_PIPELINE_RAY_TRACING,
952 };
953
954 struct radv_rra_ray_history_counter {
955 uint32_t dispatch_size[3];
956 uint32_t hit_shader_count;
957 uint32_t miss_shader_count;
958 uint32_t shader_count;
959 uint64_t pipeline_api_hash;
960 uint32_t mode;
961 uint32_t mask;
962 uint32_t stride;
963 uint32_t data_size;
964 uint32_t lost_token_size;
965 uint32_t ray_id_begin;
966 uint32_t ray_id_end;
967 enum radv_rra_pipeline_type pipeline_type : 32;
968 };
969
970 struct radv_rra_ray_history_dispatch_size {
971 uint32_t size[3];
972 uint32_t padding;
973 };
974
975 struct radv_rra_ray_history_traversal_flags {
976 uint32_t box_sort_mode : 1;
977 uint32_t node_ptr_flags : 1;
978 uint32_t reserved : 30;
979 uint32_t padding;
980 };
981
982 struct radv_rra_ray_history_metadata {
983 struct radv_rra_ray_history_metadata_info counter_info;
984 struct radv_rra_ray_history_counter counter;
985
986 struct radv_rra_ray_history_metadata_info dispatch_size_info;
987 struct radv_rra_ray_history_dispatch_size dispatch_size;
988
989 struct radv_rra_ray_history_metadata_info traversal_flags_info;
990 struct radv_rra_ray_history_traversal_flags traversal_flags;
991 };
992 static_assert(sizeof(struct radv_rra_ray_history_metadata) == 136,
993 "radv_rra_ray_history_metadata does not match RRA expectations");
994
995 struct radv_rra_ray_history_data {
996 struct radv_rra_ray_history_metadata metadata;
997 };
998
999 struct radv_rra_trace_data {
1000 struct hash_table *accel_structs;
1001 struct hash_table_u64 *accel_struct_vas;
1002 simple_mtx_t data_mtx;
1003 bool validate_as;
1004 bool copy_after_build;
1005 bool triggered;
1006 uint32_t copy_memory_index;
1007
1008 struct util_dynarray ray_history;
1009 VkBuffer ray_history_buffer;
1010 VkDeviceMemory ray_history_memory;
1011 void *ray_history_data;
1012 uint64_t ray_history_addr;
1013 uint32_t ray_history_buffer_size;
1014 uint32_t ray_history_resolution_scale;
1015 };
1016
1017 enum radv_dispatch_table {
1018 RADV_DEVICE_DISPATCH_TABLE,
1019 RADV_APP_DISPATCH_TABLE,
1020 RADV_RGP_DISPATCH_TABLE,
1021 RADV_RRA_DISPATCH_TABLE,
1022 RADV_RMV_DISPATCH_TABLE,
1023 RADV_CTX_ROLL_DISPATCH_TABLE,
1024 RADV_DISPATCH_TABLE_COUNT,
1025 };
1026
1027 struct radv_layer_dispatch_tables {
1028 struct vk_device_dispatch_table app;
1029 struct vk_device_dispatch_table rgp;
1030 struct vk_device_dispatch_table rra;
1031 struct vk_device_dispatch_table rmv;
1032 struct vk_device_dispatch_table ctx_roll;
1033 };
1034
1035 enum radv_buffer_robustness {
1036 RADV_BUFFER_ROBUSTNESS_DISABLED,
1037 RADV_BUFFER_ROBUSTNESS_1, /* robustBufferAccess */
1038 RADV_BUFFER_ROBUSTNESS_2, /* robustBufferAccess2 */
1039 };
1040
1041 struct radv_sqtt_timestamp {
1042 uint8_t *map;
1043 unsigned offset;
1044 uint64_t size;
1045 struct radeon_winsys_bo *bo;
1046 struct list_head list;
1047 };
1048
1049 struct radv_device_cache_key {
1050 uint32_t disable_trunc_coord : 1;
1051 uint32_t image_2d_view_of_3d : 1;
1052 uint32_t mesh_shader_queries : 1;
1053 uint32_t primitives_generated_query : 1;
1054 };
1055
1056 struct radv_printf_format {
1057 char *string;
1058 uint32_t divergence_mask;
1059 uint8_t element_sizes[32];
1060 };
1061
1062 struct radv_printf_data {
1063 uint32_t buffer_size;
1064 VkBuffer buffer;
1065 VkDeviceMemory memory;
1066 VkDeviceAddress buffer_addr;
1067 void *data;
1068 struct util_dynarray formats;
1069 };
1070
1071 VkResult radv_printf_data_init(struct radv_device *device);
1072
1073 void radv_printf_data_finish(struct radv_device *device);
1074
1075 struct radv_printf_buffer_header {
1076 uint32_t offset;
1077 uint32_t size;
1078 };
1079
1080 typedef struct nir_builder nir_builder;
1081 typedef struct nir_def nir_def;
1082
1083 void radv_build_printf(nir_builder *b, nir_def *cond, const char *format, ...);
1084
1085 void radv_dump_printf_data(struct radv_device *device);
1086
1087 void radv_device_associate_nir(struct radv_device *device, nir_shader *nir);
1088
1089 struct radv_device {
1090 struct vk_device vk;
1091
1092 struct radv_instance *instance;
1093 struct radeon_winsys *ws;
1094
1095 struct radv_layer_dispatch_tables layer_dispatch;
1096
1097 struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
1098 struct radv_meta_state meta_state;
1099
1100 struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
1101 int queue_count[RADV_MAX_QUEUE_FAMILIES];
1102
1103 bool pbb_allowed;
1104 uint32_t scratch_waves;
1105 uint32_t dispatch_initiator;
1106 uint32_t dispatch_initiator_task;
1107
1108 /* MSAA sample locations.
1109 * The first index is the sample index.
1110 * The second index is the coordinate: X, Y. */
1111 float sample_locations_1x[1][2];
1112 float sample_locations_2x[2][2];
1113 float sample_locations_4x[4][2];
1114 float sample_locations_8x[8][2];
1115
1116 /* GFX7 and later */
1117 uint32_t gfx_init_size_dw;
1118 struct radeon_winsys_bo *gfx_init;
1119
1120 struct radeon_winsys_bo *trace_bo;
1121 uint32_t *trace_id_ptr;
1122
1123 /* Whether to keep shader debug info, for debugging. */
1124 bool keep_shader_info;
1125
1126 struct radv_physical_device *physical_device;
1127
1128 /* Backup in-memory cache to be used if the app doesn't provide one */
1129 struct vk_pipeline_cache *mem_cache;
1130
1131 /*
1132 * use different counters so MSAA MRTs get consecutive surface indices,
1133 * even if MASK is allocated in between.
1134 */
1135 uint32_t image_mrt_offset_counter;
1136 uint32_t fmask_mrt_offset_counter;
1137
1138 struct list_head shader_arenas;
1139 struct hash_table_u64 *capture_replay_arena_vas;
1140 unsigned shader_arena_shift;
1141 uint8_t shader_free_list_mask;
1142 struct radv_shader_free_list shader_free_list;
1143 struct radv_shader_free_list capture_replay_free_list;
1144 struct list_head shader_block_obj_pool;
1145 mtx_t shader_arena_mutex;
1146
1147 mtx_t shader_upload_hw_ctx_mutex;
1148 struct radeon_winsys_ctx *shader_upload_hw_ctx;
1149 VkSemaphore shader_upload_sem;
1150 uint64_t shader_upload_seq;
1151 struct list_head shader_dma_submissions;
1152 mtx_t shader_dma_submission_list_mutex;
1153 cnd_t shader_dma_submission_list_cond;
1154
1155 /* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */
1156 bool shader_use_invisible_vram;
1157
1158 /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
1159 enum radv_buffer_robustness buffer_robustness;
1160
1161 /* Whether to inline the compute dispatch size in user sgprs. */
1162 bool load_grid_size_from_user_sgpr;
1163
1164 /* Whether the driver uses a global BO list. */
1165 bool use_global_bo_list;
1166
1167 /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
1168 int force_aniso;
1169
1170 /* Always disable TRUNC_COORD. */
1171 bool disable_trunc_coord;
1172
1173 struct radv_device_border_color_data border_color_data;
1174
1175 /* Thread trace. */
1176 struct ac_sqtt sqtt;
1177 bool sqtt_enabled;
1178 bool sqtt_triggered;
1179
1180 /* SQTT timestamps for queue events. */
1181 simple_mtx_t sqtt_timestamp_mtx;
1182 struct radv_sqtt_timestamp sqtt_timestamp;
1183
1184 /* SQTT timed cmd buffers. */
1185 simple_mtx_t sqtt_command_pool_mtx;
1186 struct vk_command_pool *sqtt_command_pool[2];
1187
1188 /* Memory trace. */
1189 struct radv_memory_trace_data memory_trace;
1190
1191 /* SPM. */
1192 struct ac_spm spm;
1193
1194 /* Radeon Raytracing Analyzer trace. */
1195 struct radv_rra_trace_data rra_trace;
1196
1197 FILE *ctx_roll_file;
1198 simple_mtx_t ctx_roll_mtx;
1199
1200 /* Trap handler. */
1201 struct radv_shader *trap_handler_shader;
1202 struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
1203 uint32_t *tma_ptr;
1204
1205 /* Overallocation. */
1206 bool overallocation_disallowed;
1207 uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
1208 mtx_t overallocation_mutex;
1209
1210 /* RADV_FORCE_VRS. */
1211 struct radv_notifier notifier;
1212 enum radv_force_vrs force_vrs;
1213
1214 /* Depth image for VRS when not bound by the app. */
1215 struct {
1216 struct radv_image *image;
1217 struct radv_buffer *buffer; /* HTILE */
1218 struct radv_device_memory *mem;
1219 } vrs;
1220
1221 /* Prime blit sdma queue */
1222 struct radv_queue *private_sdma_queue;
1223
1224 struct radv_shader_part_cache vs_prologs;
1225 struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
1226 struct radv_shader_part *instance_rate_vs_prologs[816];
1227
1228 struct radv_shader_part_cache ps_epilogs;
1229
1230 struct radv_shader_part_cache tcs_epilogs;
1231
1232 simple_mtx_t trace_mtx;
1233
1234 /* Whether per-vertex VRS is forced. */
1235 bool force_vrs_enabled;
1236
1237 simple_mtx_t pstate_mtx;
1238 unsigned pstate_cnt;
1239
1240 /* BO to contain some performance counter helpers:
1241 * - A lock for profiling cmdbuffers.
1242 * - a temporary fence for the end query synchronization.
1243 * - the pass to use for profiling. (as an array of bools)
1244 */
1245 struct radeon_winsys_bo *perf_counter_bo;
1246
1247 /* Interleaved lock/unlock commandbuffers for perfcounter passes. */
1248 struct radeon_cmdbuf **perf_counter_lock_cs;
1249
1250 bool uses_shadow_regs;
1251
1252 struct hash_table *rt_handles;
1253 simple_mtx_t rt_handles_mtx;
1254
1255 struct radv_printf_data printf;
1256
1257 struct radv_device_cache_key cache_key;
1258 blake3_hash cache_hash;
1259
1260 /* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
1261 char *gpu_hang_report;
1262 };
1263
1264 bool radv_device_set_pstate(struct radv_device *device, bool enable);
1265 bool radv_device_acquire_performance_counters(struct radv_device *device);
1266 void radv_device_release_performance_counters(struct radv_device *device);
1267
1268 struct radv_device_memory {
1269 struct vk_object_base base;
1270 struct radeon_winsys_bo *bo;
1271 /* for dedicated allocations */
1272 struct radv_image *image;
1273 struct radv_buffer *buffer;
1274 uint32_t heap_index;
1275 uint64_t alloc_size;
1276 void *map;
1277 void *user_ptr;
1278
1279 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
1280 struct AHardwareBuffer *android_hardware_buffer;
1281 #endif
1282 };
1283
1284 void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, struct radeon_winsys_bo *bo);
1285 void radv_device_memory_finish(struct radv_device_memory *mem);
1286
1287 struct radv_buffer {
1288 struct vk_buffer vk;
1289
1290 /* Set when bound */
1291 struct radeon_winsys_bo *bo;
1292 VkDeviceSize offset;
1293 };
1294
1295 void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, struct radeon_winsys_bo *bo,
1296 uint64_t size, uint64_t offset);
1297 void radv_buffer_finish(struct radv_buffer *buffer);
1298
1299 enum radv_dynamic_state_bits {
1300 RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1301 RADV_DYNAMIC_SCISSOR = 1ull << 1,
1302 RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1303 RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1304 RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1305 RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1306 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1307 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1308 RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1309 RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1310 RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1311 RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1312 RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1313 RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1314 RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1315 RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1316 RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1317 RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1318 RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1319 RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1320 RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1321 RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1322 RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1323 RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1324 RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1325 RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1326 RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1327 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1328 RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1329 RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1330 RADV_DYNAMIC_POLYGON_MODE = 1ull << 30,
1331 RADV_DYNAMIC_TESS_DOMAIN_ORIGIN = 1ull << 31,
1332 RADV_DYNAMIC_LOGIC_OP_ENABLE = 1ull << 32,
1333 RADV_DYNAMIC_LINE_STIPPLE_ENABLE = 1ull << 33,
1334 RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE = 1ull << 34,
1335 RADV_DYNAMIC_SAMPLE_MASK = 1ull << 35,
1336 RADV_DYNAMIC_DEPTH_CLIP_ENABLE = 1ull << 36,
1337 RADV_DYNAMIC_CONSERVATIVE_RAST_MODE = 1ull << 37,
1338 RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE = 1ull << 38,
1339 RADV_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
1340 RADV_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
1341 RADV_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
1342 RADV_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
1343 RADV_DYNAMIC_RASTERIZATION_SAMPLES = 1ull << 43,
1344 RADV_DYNAMIC_LINE_RASTERIZATION_MODE = 1ull << 44,
1345 RADV_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45,
1346 RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46,
1347 RADV_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47,
1348 RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48,
1349 RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE = 1ull << 49,
1350 RADV_DYNAMIC_ALL = (1ull << 50) - 1,
1351 };
1352
1353 enum radv_cmd_dirty_bits {
1354 /* Keep the dynamic state dirty bits in sync with
1355 * enum radv_dynamic_state_bits */
1356 RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1357 RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1358 RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1359 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1360 RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1361 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1362 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1363 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1364 RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1365 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1366 RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1367 RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1368 RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1369 RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1370 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1371 RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1372 RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1373 RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1374 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1375 RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1376 RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1377 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1378 RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1379 RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1380 RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1381 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1382 RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1383 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1384 RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1385 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1386 RADV_CMD_DIRTY_DYNAMIC_POLYGON_MODE = 1ull << 30,
1387 RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN = 1ull << 31,
1388 RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE = 1ull << 32,
1389 RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE = 1ull << 33,
1390 RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE = 1ull << 34,
1391 RADV_CMD_DIRTY_DYNAMIC_SAMPLE_MASK = 1ull << 35,
1392 RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE = 1ull << 36,
1393 RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE = 1ull << 37,
1394 RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE = 1ull << 38,
1395 RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
1396 RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
1397 RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
1398 RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
1399 RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES = 1ull << 43,
1400 RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE = 1ull << 44,
1401 RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45,
1402 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46,
1403 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47,
1404 RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48,
1405 RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS_ENABLE = 1ull << 49,
1406 RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 50) - 1,
1407 RADV_CMD_DIRTY_PIPELINE = 1ull << 50,
1408 RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 51,
1409 RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 52,
1410 RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 53,
1411 RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 54,
1412 RADV_CMD_DIRTY_GUARDBAND = 1ull << 55,
1413 RADV_CMD_DIRTY_RBPLUS = 1ull << 56,
1414 RADV_CMD_DIRTY_SHADER_QUERY = 1ull << 57,
1415 RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 58,
1416 RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 59,
1417 RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 60,
1418 RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 61,
1419 };
1420
1421 enum radv_cmd_flush_bits {
1422 /* Instruction cache. */
1423 RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1424 /* Scalar L1 cache. */
1425 RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1426 /* Vector L1 cache. */
1427 RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1428 /* L2 cache + L2 metadata cache writeback & invalidate.
1429 * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1430 RADV_CMD_FLAG_INV_L2 = 1 << 3,
1431 /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1432 * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1433 * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1434 RADV_CMD_FLAG_WB_L2 = 1 << 4,
1435 /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1436 * changed and we want to read an image from shaders. */
1437 RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1438 /* Framebuffer caches */
1439 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1440 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1441 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1442 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1443 /* Engine synchronization. */
1444 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1445 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1446 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1447 RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1448 /* Pipeline query controls. */
1449 RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1450 RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1451 RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1452
1453 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1454 RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
1455
1456 RADV_CMD_FLUSH_ALL_COMPUTE = (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
1457 RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
1458 };
1459
1460 struct radv_vertex_binding {
1461 VkDeviceSize offset;
1462 VkDeviceSize size;
1463 VkDeviceSize stride;
1464 };
1465
1466 struct radv_streamout_binding {
1467 struct radv_buffer *buffer;
1468 VkDeviceSize offset;
1469 VkDeviceSize size;
1470 };
1471
1472 struct radv_streamout_state {
1473 /* Mask of bound streamout buffers. */
1474 uint8_t enabled_mask;
1475
1476 /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1477 uint32_t hw_enabled_mask;
1478
1479 /* State of VGT_STRMOUT_(CONFIG|EN) */
1480 bool streamout_enabled;
1481 };
1482
1483 struct radv_sample_locations_state {
1484 VkSampleCountFlagBits per_pixel;
1485 VkExtent2D grid_size;
1486 uint32_t count;
1487 VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1488 };
1489
1490 struct radv_dynamic_state {
1491 struct vk_dynamic_graphics_state vk;
1492
1493 /**
1494 * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1495 * Defines the set of saved dynamic state.
1496 */
1497 uint64_t mask;
1498
1499 struct {
1500 struct {
1501 float scale[3];
1502 float translate[3];
1503 } xform[MAX_VIEWPORTS];
1504 } hw_vp;
1505
1506 struct radv_sample_locations_state sample_location;
1507
1508 VkImageAspectFlags feedback_loop_aspects;
1509 };
1510
1511 const char *radv_get_debug_option_name(int id);
1512
1513 const char *radv_get_perftest_option_name(int id);
1514
1515 struct radv_color_buffer_info {
1516 uint64_t cb_color_base;
1517 uint64_t cb_color_cmask;
1518 uint64_t cb_color_fmask;
1519 uint64_t cb_dcc_base;
1520 uint32_t cb_color_slice;
1521 uint32_t cb_color_view;
1522 uint32_t cb_color_info;
1523 uint32_t cb_color_attrib;
1524 uint32_t cb_color_attrib2; /* GFX9 and later */
1525 uint32_t cb_color_attrib3; /* GFX10 and later */
1526 uint32_t cb_dcc_control;
1527 uint32_t cb_color_cmask_slice;
1528 uint32_t cb_color_fmask_slice;
1529 union {
1530 uint32_t cb_color_pitch; // GFX6-GFX8
1531 uint32_t cb_mrt_epitch; // GFX9+
1532 };
1533 };
1534
1535 struct radv_ds_buffer_info {
1536 uint64_t db_z_read_base;
1537 uint64_t db_stencil_read_base;
1538 uint64_t db_z_write_base;
1539 uint64_t db_stencil_write_base;
1540 uint64_t db_htile_data_base;
1541 uint32_t db_depth_info;
1542 uint32_t db_z_info;
1543 uint32_t db_stencil_info;
1544 uint32_t db_depth_view;
1545 uint32_t db_depth_size;
1546 uint32_t db_depth_slice;
1547 uint32_t db_htile_surface;
1548 uint32_t db_z_info2; /* GFX9 only */
1549 uint32_t db_stencil_info2; /* GFX9 only */
1550 uint32_t db_render_override2;
1551 uint32_t db_render_control;
1552 };
1553
1554 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1555 struct radv_image_view *iview);
1556 void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
1557 struct radv_image_view *iview, VkImageAspectFlags ds_aspects);
1558 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1559 struct radv_ds_buffer_info *ds);
1560
1561 void radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples,
1562 unsigned *db_render_control);
1563 /**
1564 * Attachment state when recording a renderpass instance.
1565 *
1566 * The clear value is valid only if there exists a pending clear.
1567 */
1568 struct radv_attachment {
1569 VkFormat format;
1570 struct radv_image_view *iview;
1571 VkImageLayout layout;
1572 VkImageLayout stencil_layout;
1573
1574 union {
1575 struct radv_color_buffer_info cb;
1576 struct radv_ds_buffer_info ds;
1577 };
1578
1579 struct radv_image_view *resolve_iview;
1580 VkResolveModeFlagBits resolve_mode;
1581 VkResolveModeFlagBits stencil_resolve_mode;
1582 VkImageLayout resolve_layout;
1583 VkImageLayout stencil_resolve_layout;
1584 };
1585
1586 struct radv_rendering_state {
1587 bool active;
1588 bool has_image_views;
1589 VkRect2D area;
1590 uint32_t layer_count;
1591 uint32_t view_mask;
1592 uint32_t color_samples;
1593 uint32_t ds_samples;
1594 uint32_t max_samples;
1595 struct radv_sample_locations_state sample_locations;
1596 uint32_t color_att_count;
1597 struct radv_attachment color_att[MAX_RTS];
1598 struct radv_attachment ds_att;
1599 VkImageAspectFlags ds_att_aspects;
1600 struct radv_attachment vrs_att;
1601 VkExtent2D vrs_texel_size;
1602 };
1603
1604 struct radv_descriptor_state {
1605 struct radv_descriptor_set *sets[MAX_SETS];
1606 uint32_t dirty;
1607 uint32_t valid;
1608 struct radv_push_descriptor_set push_set;
1609 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1610 uint64_t descriptor_buffers[MAX_SETS];
1611 bool need_indirect_descriptor_sets;
1612 };
1613
1614 struct radv_push_constant_state {
1615 uint32_t size;
1616 uint32_t dynamic_offset_count;
1617 };
1618
1619 enum rgp_flush_bits {
1620 RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1621 RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1622 RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1623 RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1624 RGP_FLUSH_PFP_SYNC_ME = 0x10,
1625 RGP_FLUSH_SYNC_CP_DMA = 0x20,
1626 RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1627 RGP_FLUSH_INVAL_ICACHE = 0x80,
1628 RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1629 RGP_FLUSH_FLUSH_L2 = 0x200,
1630 RGP_FLUSH_INVAL_L2 = 0x400,
1631 RGP_FLUSH_FLUSH_CB = 0x800,
1632 RGP_FLUSH_INVAL_CB = 0x1000,
1633 RGP_FLUSH_FLUSH_DB = 0x2000,
1634 RGP_FLUSH_INVAL_DB = 0x4000,
1635 RGP_FLUSH_INVAL_L1 = 0x8000,
1636 };
1637
1638 struct radv_multisample_state {
1639 bool sample_shading_enable;
1640 float min_sample_shading;
1641 };
1642
1643 struct radv_ia_multi_vgt_param_helpers {
1644 uint32_t base;
1645 bool partial_es_wave;
1646 bool ia_switch_on_eoi;
1647 bool partial_vs_wave;
1648 };
1649
1650 struct radv_cmd_state {
1651 /* Vertex descriptors */
1652 uint64_t vb_va;
1653 unsigned vb_size;
1654
1655 bool predicating;
1656 uint64_t dirty;
1657
1658 VkShaderStageFlags active_stages;
1659 struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
1660 struct radv_shader *gs_copy_shader;
1661 struct radv_shader *last_vgt_shader;
1662 struct radv_shader *rt_prolog;
1663
1664 struct radv_shader_object *shader_objs[MESA_VULKAN_SHADER_STAGES];
1665
1666 uint32_t prefetch_L2_mask;
1667
1668 struct radv_graphics_pipeline *graphics_pipeline;
1669 struct radv_graphics_pipeline *emitted_graphics_pipeline;
1670 struct radv_compute_pipeline *compute_pipeline;
1671 struct radv_compute_pipeline *emitted_compute_pipeline;
1672 struct radv_ray_tracing_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1673 struct radv_dynamic_state dynamic;
1674 struct radv_vs_input_state dynamic_vs_input;
1675 struct radv_streamout_state streamout;
1676
1677 struct radv_rendering_state render;
1678
1679 /* Index buffer */
1680 uint32_t index_type;
1681 uint32_t max_index_count;
1682 uint64_t index_va;
1683 int32_t last_index_type;
1684
1685 uint32_t last_primitive_reset_index; /* only relevant on GFX6-7 */
1686 enum radv_cmd_flush_bits flush_bits;
1687 unsigned active_occlusion_queries;
1688 bool perfect_occlusion_queries_enabled;
1689 unsigned active_pipeline_queries;
1690 unsigned active_pipeline_gds_queries;
1691 unsigned active_pipeline_ace_queries; /* Task shader invocations query */
1692 unsigned active_prims_gen_queries;
1693 unsigned active_prims_xfb_queries;
1694 unsigned active_prims_gen_gds_queries;
1695 unsigned active_prims_xfb_gds_queries;
1696 uint32_t trace_id;
1697 uint32_t last_ia_multi_vgt_param;
1698 uint32_t last_ge_cntl;
1699
1700 uint32_t last_num_instances;
1701 uint32_t last_first_instance;
1702 bool last_vertex_offset_valid;
1703 uint32_t last_vertex_offset;
1704 uint32_t last_drawid;
1705 uint32_t last_subpass_color_count;
1706
1707 uint32_t last_sx_ps_downconvert;
1708 uint32_t last_sx_blend_opt_epsilon;
1709 uint32_t last_sx_blend_opt_control;
1710
1711 uint32_t last_db_count_control;
1712
1713 uint32_t last_db_shader_control;
1714
1715 /* Whether CP DMA is busy/idle. */
1716 bool dma_is_busy;
1717
1718 /* Whether any images that are not L2 coherent are dirty from the CB. */
1719 bool rb_noncoherent_dirty;
1720
1721 /* Conditional rendering info. */
1722 uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1723 int predication_type; /* -1: disabled, 0: normal, 1: inverted */
1724 uint64_t predication_va;
1725
1726 /* Inheritance info. */
1727 VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1728 bool inherited_occlusion_queries;
1729 VkQueryControlFlags inherited_query_control_flags;
1730
1731 bool context_roll_without_scissor_emitted;
1732
1733 /* SQTT related state. */
1734 uint32_t current_event_type;
1735 uint32_t num_events;
1736 uint32_t num_layout_transitions;
1737 bool in_barrier;
1738 bool pending_sqtt_barrier_end;
1739 enum rgp_flush_bits sqtt_flush_bits;
1740
1741 /* NGG culling state. */
1742 bool has_nggc;
1743
1744 /* Mesh shading state. */
1745 bool mesh_shading;
1746
1747 uint8_t cb_mip[MAX_RTS];
1748 uint8_t ds_mip;
1749
1750 /* Whether DRAW_{INDEX}_INDIRECT_{MULTI} is emitted. */
1751 bool uses_draw_indirect;
1752
1753 uint32_t rt_stack_size;
1754
1755 struct radv_shader_part *emitted_vs_prolog;
1756 uint32_t vbo_misaligned_mask;
1757 uint32_t vbo_misaligned_mask_invalid;
1758 uint32_t vbo_bound_mask;
1759
1760 struct radv_shader_part *emitted_tcs_epilog;
1761 struct radv_shader_part *emitted_ps_epilog;
1762
1763 /* Per-vertex VRS state. */
1764 uint32_t last_vrs_rates;
1765 int8_t last_vrs_rates_sgpr_idx;
1766
1767 /* Whether to suspend streamout for internal driver operations. */
1768 bool suspend_streamout;
1769
1770 /* Whether this commandbuffer uses performance counters. */
1771 bool uses_perf_counters;
1772
1773 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
1774
1775 /* Tessellation info when patch control points is dynamic. */
1776 unsigned tess_num_patches;
1777 unsigned tess_lds_size;
1778
1779 unsigned col_format_non_compacted;
1780
1781 /* Binning state */
1782 unsigned last_pa_sc_binner_cntl_0;
1783
1784 struct radv_multisample_state ms;
1785
1786 /* Custom blend mode for internal operations. */
1787 unsigned custom_blend_mode;
1788 unsigned db_render_control;
1789
1790 unsigned rast_prim;
1791
1792 uint32_t vtx_base_sgpr;
1793 uint8_t vtx_emit_num;
1794 bool uses_drawid;
1795 bool uses_baseinstance;
1796
1797 bool uses_out_of_order_rast;
1798 bool uses_vrs_attachment;
1799 bool uses_dynamic_patch_control_points;
1800 bool uses_dynamic_vertex_binding_stride;
1801 };
1802
1803 struct radv_cmd_buffer_upload {
1804 uint8_t *map;
1805 unsigned offset;
1806 uint64_t size;
1807 struct radeon_winsys_bo *upload_bo;
1808 struct list_head list;
1809 };
1810
1811 struct radv_cmd_buffer {
1812 struct vk_command_buffer vk;
1813
1814 struct radv_device *device;
1815
1816 VkCommandBufferUsageFlags usage_flags;
1817 struct radeon_cmdbuf *cs;
1818 struct radv_cmd_state state;
1819 struct radv_buffer *vertex_binding_buffers[MAX_VBS];
1820 struct radv_vertex_binding vertex_bindings[MAX_VBS];
1821 uint32_t used_vertex_bindings;
1822 struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1823 enum radv_queue_family qf;
1824
1825 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1826 VkShaderStageFlags push_constant_stages;
1827 struct radv_descriptor_set_header meta_push_descriptors;
1828
1829 struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1830
1831 struct radv_push_constant_state push_constant_state[MAX_BIND_POINTS];
1832
1833 uint64_t descriptor_buffers[MAX_SETS];
1834
1835 struct radv_cmd_buffer_upload upload;
1836
1837 uint32_t scratch_size_per_wave_needed;
1838 uint32_t scratch_waves_wanted;
1839 uint32_t compute_scratch_size_per_wave_needed;
1840 uint32_t compute_scratch_waves_wanted;
1841 uint32_t esgs_ring_size_needed;
1842 uint32_t gsvs_ring_size_needed;
1843 bool tess_rings_needed;
1844 bool task_rings_needed;
1845 bool mesh_scratch_ring_needed;
1846 bool gds_needed; /* for GFX10 streamout and NGG GS queries */
1847 bool gds_oa_needed; /* for GFX10 streamout */
1848 bool sample_positions_needed;
1849
1850 uint64_t gfx9_fence_va;
1851 uint32_t gfx9_fence_idx;
1852 uint64_t gfx9_eop_bug_va;
1853
1854 uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
1855 bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
1856
1857 struct set vs_prologs;
1858 struct set ps_epilogs;
1859 struct set tcs_epilogs;
1860
1861 /**
1862 * Gang state.
1863 * Used when the command buffer needs work done on a different queue
1864 * (eg. when a graphics command buffer needs compute work).
1865 * Currently only one follower is possible per command buffer.
1866 */
1867 struct {
1868 /** Follower command stream. */
1869 struct radeon_cmdbuf *cs;
1870
1871 /** Flush bits for the follower cmdbuf. */
1872 enum radv_cmd_flush_bits flush_bits;
1873
1874 /**
1875 * For synchronization between the follower and leader.
1876 * The value of these semaphores are incremented whenever we
1877 * encounter a barrier that affects the follower.
1878 *
1879 * DWORD 0: Leader to follower semaphore.
1880 * The leader writes the value and the follower waits.
1881 * DWORD 1: Follower to leader semaphore.
1882 * The follower writes the value, and the leader waits.
1883 */
1884 struct {
1885 uint64_t va; /* Virtual address of the semaphore. */
1886 uint32_t leader_value; /* Current value of the leader. */
1887 uint32_t emitted_leader_value; /* Last value emitted by the leader. */
1888 uint32_t follower_value; /* Current value of the follower. */
1889 uint32_t emitted_follower_value; /* Last value emitted by the follower. */
1890 } sem;
1891 } gang;
1892
1893 /**
1894 * Whether a query pool has been reset and we have to flush caches.
1895 */
1896 bool pending_reset_query;
1897
1898 /**
1899 * Bitmask of pending active query flushes.
1900 */
1901 enum radv_cmd_flush_bits active_query_flush_bits;
1902
1903 struct {
1904 struct radv_video_session *vid;
1905 struct radv_video_session_params *params;
1906 struct rvcn_sq_var sq;
1907 struct rvcn_decode_buffer_s *decode_buffer;
1908 } video;
1909
1910 struct {
1911 /* Temporary space for some transfer queue copy command workarounds. */
1912 struct radeon_winsys_bo *copy_temp;
1913 } transfer;
1914
1915 uint64_t shader_upload_seq;
1916
1917 uint32_t sqtt_cb_id;
1918
1919 struct util_dynarray ray_history;
1920 };
1921
1922 static inline bool
radv_cmdbuf_has_stage(const struct radv_cmd_buffer * cmd_buffer,gl_shader_stage stage)1923 radv_cmdbuf_has_stage(const struct radv_cmd_buffer *cmd_buffer, gl_shader_stage stage)
1924 {
1925 return !!(cmd_buffer->state.active_stages & mesa_to_vk_shader_stage(stage));
1926 }
1927
1928 static inline uint32_t
radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer * cmd_buffer)1929 radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer *cmd_buffer)
1930 {
1931 /* SAMPLE_STREAMOUTSTATS also requires PIPELINESTAT_START to be enabled. */
1932 return cmd_buffer->state.active_pipeline_queries + cmd_buffer->state.active_prims_gen_queries +
1933 cmd_buffer->state.active_prims_xfb_queries;
1934 }
1935
1936 extern const struct vk_command_buffer_ops radv_cmd_buffer_ops;
1937
1938 struct radv_dispatch_info {
1939 /**
1940 * Determine the layout of the grid (in block units) to be used.
1941 */
1942 uint32_t blocks[3];
1943
1944 /**
1945 * A starting offset for the grid. If unaligned is set, the offset
1946 * must still be aligned.
1947 */
1948 uint32_t offsets[3];
1949
1950 /**
1951 * Whether it's an unaligned compute dispatch.
1952 */
1953 bool unaligned;
1954
1955 /**
1956 * Whether waves must be launched in order.
1957 */
1958 bool ordered;
1959
1960 /**
1961 * Indirect compute parameters resource.
1962 */
1963 struct radeon_winsys_bo *indirect;
1964 uint64_t va;
1965 };
1966
1967 void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
1968
1969 struct radv_image;
1970 struct radv_image_view;
1971
1972 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1973
1974 void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1975 void radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1976
1977 void radv_create_gfx_config(struct radv_device *device);
1978
1979 void radv_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, const VkViewport *viewports);
1980
1981 void radv_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, unsigned rast_prim,
1982 unsigned polygon_mode, float line_width);
1983
1984 VkResult radv_create_shadow_regs_preamble(const struct radv_device *device, struct radv_queue_state *queue_state);
1985 void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, struct radeon_winsys *ws);
1986 void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
1987 struct radv_queue_state *queue_state);
1988 VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue);
1989
1990 uint32_t radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
1991 bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
1992 bool prim_restart_enable, unsigned patch_control_points,
1993 unsigned num_tess_patches);
1994 void radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
1995 unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel,
1996 uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va);
1997
1998 struct radv_vgt_shader_key {
1999 uint8_t tess : 1;
2000 uint8_t gs : 1;
2001 uint8_t mesh_scratch_ring : 1;
2002 uint8_t mesh : 1;
2003 uint8_t ngg_passthrough : 1;
2004 uint8_t ngg : 1; /* gfx10+ */
2005 uint8_t ngg_streamout : 1;
2006 uint8_t hs_wave32 : 1;
2007 uint8_t gs_wave32 : 1;
2008 uint8_t vs_wave32 : 1;
2009 };
2010
2011 void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
2012 uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
2013 enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
2014 uint64_t gfx9_eop_bug_va);
2015 void radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
2016 void radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op,
2017 uint64_t va);
2018 void radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size);
2019 void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
2020 bool predicating);
2021 void radv_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
2022 void radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value);
2023 void radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
2024
2025 uint32_t radv_get_vgt_index_size(uint32_t type);
2026
2027 void radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2028 const struct radv_vgt_shader_key *key);
2029
2030 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
2031
2032 struct radv_ps_epilog_state {
2033 uint8_t color_attachment_count;
2034 VkFormat color_attachment_formats[MAX_RTS];
2035
2036 uint32_t color_write_mask;
2037 uint32_t color_blend_enable;
2038
2039 uint32_t colors_written;
2040 bool mrt0_is_dual_src;
2041 bool export_depth;
2042 bool export_stencil;
2043 bool export_sample_mask;
2044 bool alpha_to_coverage_via_mrtz;
2045 uint8_t need_src_alpha;
2046 };
2047
2048 struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device,
2049 const struct radv_ps_epilog_state *state);
2050
2051 bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
2052 unsigned custom_blend_mode);
2053
2054 void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer);
2055 bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment,
2056 unsigned *out_offset, void **ptr);
2057 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr);
2058 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
2059 unsigned *out_offset);
2060 void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
2061 const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors,
2062 void *vb_ptr);
2063
2064 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
2065 unsigned radv_get_default_max_sample_dist(int log_samples);
2066 void radv_device_init_msaa(struct radv_device *device);
2067 VkResult radv_device_init_vrs_state(struct radv_device *device);
2068
2069 void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
2070
2071 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
2072 VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects);
2073
2074 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
2075 int cb_idx, uint32_t color_values[2]);
2076
2077 void radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
2078 const struct legacy_surf_level *base_level_info, unsigned plane_id,
2079 unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil,
2080 bool is_storage_image, bool disable_compression, bool enable_write_compression,
2081 uint32_t *state, const struct ac_surf_nbc_view *nbc_view);
2082
2083 void radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image,
2084 VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping,
2085 unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer,
2086 unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
2087 uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
2088 const struct ac_surf_nbc_view *nbc_view,
2089 const VkImageViewSlicedCreateInfoEXT *sliced_3d);
2090
2091 bool radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image);
2092 bool radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image);
2093
2094 bool radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image);
2095
2096 unsigned radv_plane_from_aspect(VkImageAspectFlags mask);
2097
2098 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2099 const VkImageSubresourceRange *range, bool value);
2100
2101 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2102 const VkImageSubresourceRange *range, bool value);
2103 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags,
2104 const struct radv_image *image);
2105 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags,
2106 const struct radv_image *image);
2107
2108 void radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage);
2109
2110 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
2111 bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
2112 void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
2113 struct radv_device_memory *mem);
2114
2115 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)2116 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
2117 bool use_32bit_pointers)
2118 {
2119 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
2120 radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
2121 }
2122
2123 static inline void
radv_emit_shader_pointer_body(const struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)2124 radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
2125 bool use_32bit_pointers)
2126 {
2127 radeon_emit(cs, va);
2128
2129 if (use_32bit_pointers) {
2130 assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
2131 } else {
2132 radeon_emit(cs, va >> 32);
2133 }
2134 }
2135
2136 static inline void
radv_emit_shader_pointer(const struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)2137 radv_emit_shader_pointer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va,
2138 bool global)
2139 {
2140 bool use_32bit_pointers = !global;
2141
2142 radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
2143 radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
2144 }
2145
2146 static inline unsigned
vk_to_bind_point(VkPipelineBindPoint bind_point)2147 vk_to_bind_point(VkPipelineBindPoint bind_point)
2148 {
2149 return bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? 2 : bind_point;
2150 }
2151
2152 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)2153 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
2154 {
2155 return &cmd_buffer->descriptors[vk_to_bind_point(bind_point)];
2156 }
2157
2158 static inline const struct radv_push_constant_state *
radv_get_push_constants_state(const struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)2159 radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
2160 {
2161 return &cmd_buffer->push_constant_state[vk_to_bind_point(bind_point)];
2162 }
2163
2164 void radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
2165
2166 /*
2167 * Takes x,y,z as exact numbers of invocations, instead of blocks.
2168 *
2169 * Limitations: Can't call normal dispatch functions without binding or rebinding
2170 * the compute pipeline.
2171 */
2172 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z);
2173
2174 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va);
2175
2176 struct radv_event {
2177 struct vk_object_base base;
2178 struct radeon_winsys_bo *bo;
2179 uint64_t *map;
2180 };
2181
2182 struct radv_ray_tracing_group;
2183
2184 void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, const struct radv_pipeline_layout *layout,
2185 const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage);
2186
2187 void radv_hash_graphics_spirv_to_nir(blake3_hash hash, const struct radv_shader_stage *stage,
2188 const struct radv_spirv_to_nir_options *options);
2189
2190 void radv_hash_shaders(const struct radv_device *device, unsigned char *hash, const struct radv_shader_stage *stages,
2191 uint32_t stage_count, const struct radv_pipeline_layout *layout,
2192 const struct radv_graphics_state_key *gfx_state);
2193
2194 struct radv_ray_tracing_stage;
2195 void radv_hash_rt_shaders(const struct radv_device *device, unsigned char *hash,
2196 const struct radv_ray_tracing_stage *stages,
2197 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
2198 const struct radv_ray_tracing_group *groups);
2199
2200 bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines);
2201
2202 bool radv_emulate_rt(const struct radv_physical_device *pdevice);
2203
2204 struct radv_prim_vertex_count {
2205 uint8_t min;
2206 uint8_t incr;
2207 };
2208
2209 enum radv_pipeline_type {
2210 RADV_PIPELINE_GRAPHICS,
2211 RADV_PIPELINE_GRAPHICS_LIB,
2212 /* Compute pipeline */
2213 RADV_PIPELINE_COMPUTE,
2214 /* Raytracing pipeline */
2215 RADV_PIPELINE_RAY_TRACING,
2216 };
2217
2218 struct radv_pipeline_group_handle {
2219 uint64_t recursive_shader_ptr;
2220
2221 union {
2222 uint32_t general_index;
2223 uint32_t closest_hit_index;
2224 };
2225 union {
2226 uint32_t intersection_index;
2227 uint32_t any_hit_index;
2228 };
2229 };
2230
2231 struct radv_rt_capture_replay_handle {
2232 struct radv_serialized_shader_arena_block recursive_shader_alloc;
2233 uint32_t non_recursive_idx;
2234 };
2235
2236 struct radv_pipeline {
2237 struct vk_object_base base;
2238 enum radv_pipeline_type type;
2239
2240 VkPipelineCreateFlags2KHR create_flags;
2241
2242 struct vk_pipeline_cache_object *cache_object;
2243
2244 bool is_internal;
2245 bool need_indirect_descriptor_sets;
2246 struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
2247 struct radv_shader *gs_copy_shader;
2248
2249 uint64_t shader_upload_seq;
2250
2251 struct radeon_cmdbuf cs;
2252 uint32_t ctx_cs_hash;
2253 struct radeon_cmdbuf ctx_cs;
2254
2255 uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
2256
2257 /* Unique pipeline hash identifier. */
2258 uint64_t pipeline_hash;
2259
2260 /* Pipeline layout info. */
2261 uint32_t push_constant_size;
2262 uint32_t dynamic_offset_count;
2263 };
2264
2265 struct radv_sqtt_shaders_reloc {
2266 struct radeon_winsys_bo *bo;
2267 union radv_shader_arena_block *alloc;
2268 uint64_t va[MESA_VULKAN_SHADER_STAGES];
2269 };
2270
2271 struct radv_graphics_pipeline {
2272 struct radv_pipeline base;
2273
2274 bool uses_drawid;
2275 bool uses_baseinstance;
2276
2277 /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
2278 bool force_vrs_per_vertex;
2279
2280 /* Whether the pipeline uses NGG (GFX10+). */
2281 bool is_ngg;
2282 bool has_ngg_culling;
2283
2284 uint8_t vtx_emit_num;
2285
2286 uint32_t vtx_base_sgpr;
2287 uint64_t dynamic_states;
2288 uint64_t needed_dynamic_state;
2289
2290 VkShaderStageFlags active_stages;
2291
2292 /* Used for rbplus */
2293 uint32_t col_format_non_compacted;
2294
2295 struct radv_dynamic_state dynamic_state;
2296
2297 struct radv_vs_input_state vs_input_state;
2298
2299 struct radv_multisample_state ms;
2300 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
2301 uint32_t binding_stride[MAX_VBS];
2302 uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
2303 uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
2304 uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
2305 uint32_t db_render_control;
2306
2307 /* Last pre-PS API stage */
2308 gl_shader_stage last_vgt_api_stage;
2309
2310 /* Not NULL if graphics pipeline uses streamout. */
2311 struct radv_shader *streamout_shader;
2312
2313 unsigned rast_prim;
2314
2315 /* For vk_graphics_pipeline_state */
2316 void *state_data;
2317
2318 /* Custom blend mode for internal operations. */
2319 unsigned custom_blend_mode;
2320
2321 /* Whether the pipeline uses out-of-order rasterization. */
2322 bool uses_out_of_order_rast;
2323
2324 /* Whether the pipeline uses a VRS attachment. */
2325 bool uses_vrs_attachment;
2326
2327 /* For graphics pipeline library */
2328 bool retain_shaders;
2329
2330 /* For relocation of shaders with RGP. */
2331 struct radv_sqtt_shaders_reloc *sqtt_shaders_reloc;
2332 };
2333
2334 struct radv_compute_pipeline {
2335 struct radv_pipeline base;
2336 };
2337
2338 struct radv_ray_tracing_group {
2339 VkRayTracingShaderGroupTypeKHR type;
2340 uint32_t recursive_shader; /* generalShader or closestHitShader */
2341 uint32_t any_hit_shader;
2342 uint32_t intersection_shader;
2343 struct radv_pipeline_group_handle handle;
2344 };
2345
2346 struct radv_ray_tracing_stage {
2347 struct vk_pipeline_cache_object *nir;
2348 struct radv_shader *shader;
2349 gl_shader_stage stage;
2350 uint32_t stack_size;
2351
2352 bool can_inline;
2353
2354 uint8_t sha1[SHA1_DIGEST_LENGTH];
2355 };
2356
2357 struct radv_ray_tracing_pipeline {
2358 struct radv_compute_pipeline base;
2359
2360 struct radv_shader *prolog;
2361
2362 struct radv_ray_tracing_stage *stages;
2363 struct radv_ray_tracing_group *groups;
2364 unsigned stage_count;
2365 unsigned non_imported_stage_count;
2366 unsigned group_count;
2367
2368 uint8_t sha1[SHA1_DIGEST_LENGTH];
2369 uint32_t stack_size;
2370
2371 /* set if any shaders from this pipeline require robustness2 in the merged traversal shader */
2372 bool traversal_storage_robustness2 : 1;
2373 bool traversal_uniform_robustness2 : 1;
2374 };
2375
2376 struct radv_retained_shaders {
2377 struct {
2378 void *serialized_nir;
2379 size_t serialized_nir_size;
2380 unsigned char shader_sha1[SHA1_DIGEST_LENGTH];
2381 struct radv_shader_stage_key key;
2382 } stages[MESA_VULKAN_SHADER_STAGES];
2383 };
2384
2385 struct radv_graphics_lib_pipeline {
2386 struct radv_graphics_pipeline base;
2387
2388 struct radv_pipeline_layout layout;
2389
2390 struct vk_graphics_pipeline_state graphics_state;
2391
2392 VkGraphicsPipelineLibraryFlagsEXT lib_flags;
2393
2394 struct radv_retained_shaders retained_shaders;
2395
2396 void *mem_ctx;
2397
2398 unsigned stage_count;
2399 VkPipelineShaderStageCreateInfo *stages;
2400 struct radv_shader_stage_key stage_keys[MESA_VULKAN_SHADER_STAGES];
2401 };
2402
2403 #define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
2404 static inline struct radv_##pipe_type##_pipeline *radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \
2405 { \
2406 assert(pipeline->type == pipe_enum); \
2407 return (struct radv_##pipe_type##_pipeline *)pipeline; \
2408 }
2409
2410 RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
2411 RADV_DECL_PIPELINE_DOWNCAST(graphics_lib, RADV_PIPELINE_GRAPHICS_LIB)
2412 RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
2413 RADV_DECL_PIPELINE_DOWNCAST(ray_tracing, RADV_PIPELINE_RAY_TRACING)
2414
2415 struct radv_shader_layout {
2416 uint32_t num_sets;
2417
2418 struct {
2419 struct radv_descriptor_set_layout *layout;
2420 uint32_t dynamic_offset_start;
2421 } set[MAX_SETS];
2422
2423 uint32_t push_constant_size;
2424 uint32_t dynamic_offset_count;
2425 bool use_dynamic_descriptors;
2426 };
2427
2428 struct radv_shader_stage {
2429 gl_shader_stage stage;
2430 gl_shader_stage next_stage;
2431
2432 struct {
2433 const struct vk_object_base *object;
2434 const char *data;
2435 uint32_t size;
2436 } spirv;
2437
2438 const char *entrypoint;
2439 const VkSpecializationInfo *spec_info;
2440
2441 unsigned char shader_sha1[20];
2442
2443 nir_shader *nir;
2444 nir_shader *internal_nir; /* meta shaders */
2445
2446 struct radv_shader_info info;
2447 struct radv_shader_args args;
2448 struct radv_shader_stage_key key;
2449
2450 VkPipelineCreationFeedback feedback;
2451
2452 struct radv_shader_layout layout;
2453 };
2454
2455 void radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, gl_shader_stage stage,
2456 struct radv_shader_layout *layout);
2457
2458 static inline bool
radv_is_last_vgt_stage(const struct radv_shader_stage * stage)2459 radv_is_last_vgt_stage(const struct radv_shader_stage *stage)
2460 {
2461 return (stage->info.stage == MESA_SHADER_VERTEX || stage->info.stage == MESA_SHADER_TESS_EVAL ||
2462 stage->info.stage == MESA_SHADER_GEOMETRY || stage->info.stage == MESA_SHADER_MESH) &&
2463 (stage->info.next_stage == MESA_SHADER_FRAGMENT || stage->info.next_stage == MESA_SHADER_NONE);
2464 }
2465
2466 static inline bool
radv_pipeline_has_stage(const struct radv_graphics_pipeline * pipeline,gl_shader_stage stage)2467 radv_pipeline_has_stage(const struct radv_graphics_pipeline *pipeline, gl_shader_stage stage)
2468 {
2469 return pipeline->base.shaders[stage];
2470 }
2471
2472 bool radv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline);
2473
2474 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
2475
2476 const struct radv_userdata_info *radv_get_user_sgpr(const struct radv_shader *shader, int idx);
2477
2478 struct radv_shader *radv_get_shader(struct radv_shader *const *shaders, gl_shader_stage stage);
2479
2480 void radv_emit_compute_shader(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
2481 const struct radv_shader *shader);
2482
2483 bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components,
2484 nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
2485
2486 void radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2487 const struct radv_shader *vs, const struct radv_shader *next_stage);
2488
2489 void radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs,
2490 const struct radv_shader *tcs);
2491
2492 void radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2493 struct radeon_cmdbuf *cs, const struct radv_shader *tes, const struct radv_shader *gs);
2494
2495 void radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2496 const struct radv_shader *ps);
2497
2498 void radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *cs,
2499 const struct radv_shader *last_vgt_shader, const struct radv_shader *ps);
2500
2501 struct radv_ia_multi_vgt_param_helpers radv_compute_ia_multi_vgt_param(const struct radv_device *device,
2502 struct radv_shader *const *shaders);
2503
2504 void radv_emit_vgt_vertex_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2505 const struct radv_shader *tes);
2506
2507 void radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2508 uint32_t vgt_gs_out_prim_type);
2509
2510 void radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2511 const struct radv_shader *last_vgt_api_shader);
2512
2513 void gfx103_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct radv_shader *mesh_shader,
2514 bool enable_vrs);
2515
2516 void gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps,
2517 bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex);
2518
2519 void radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2520 const struct radv_shader *gs, const struct radv_shader *es,
2521 const struct radv_shader *gs_copy_shader);
2522
2523 void radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2524 const struct radv_shader *ms);
2525
2526 void radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
2527 struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2528 bool keep_executable_info, bool keep_statistic_info, bool is_internal,
2529 struct radv_retained_shaders *retained_shaders, bool noop_fs,
2530 struct radv_shader **shaders, struct radv_shader_binary **binaries,
2531 struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary);
2532
2533 void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
2534 const struct radv_pipeline_layout *layout, struct radv_shader *shader);
2535
2536 struct radv_shader *radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache,
2537 struct radv_shader_stage *cs_stage, bool keep_executable_info,
2538 bool keep_statistic_info, bool is_internal, struct radv_shader_binary **cs_binary);
2539
2540 struct radv_graphics_pipeline_create_info {
2541 bool use_rectlist;
2542 bool db_depth_clear;
2543 bool db_stencil_clear;
2544 bool depth_compress_disable;
2545 bool stencil_compress_disable;
2546 bool resummarize_enable;
2547 uint32_t custom_blend_mode;
2548 };
2549
2550 struct radv_shader_stage_key radv_pipeline_get_shader_key(const struct radv_device *device,
2551 const VkPipelineShaderStageCreateInfo *stage,
2552 VkPipelineCreateFlags2KHR flags, const void *pNext);
2553
2554 void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type);
2555
2556 VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
2557 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2558 const struct radv_graphics_pipeline_create_info *extra,
2559 const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
2560
2561 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
2562 const VkComputePipelineCreateInfo *pCreateInfo,
2563 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline);
2564
2565 bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2KHR flags);
2566 bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2KHR flags);
2567
2568 VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
2569 const VkPipelineShaderStageCreateInfo *pStages,
2570 void *mem_ctx);
2571
2572 bool radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader);
2573
2574 bool radv_pipeline_has_ngg(const struct radv_graphics_pipeline *pipeline);
2575
2576 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
2577 const VkAllocationCallbacks *allocator);
2578
2579 struct vk_format_description;
2580 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void);
2581 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc, int first_non_void);
2582 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2583 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2584 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2585 uint32_t radv_translate_dbformat(VkFormat format);
2586 uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, int first_non_void);
2587 uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void);
2588 bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearColorValue *value);
2589 bool radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, VkFormat format);
2590 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, bool *blendable);
2591 bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
2592 bool *sign_reinterpret);
2593 bool radv_is_atomic_format_supported(VkFormat format);
2594 bool radv_device_supports_etc(const struct radv_physical_device *physical_device);
2595 bool radv_is_format_emulated(const struct radv_physical_device *physical_device, VkFormat format);
2596
2597 static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS =
2598 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
2599 VK_IMAGE_USAGE_STORAGE_BIT;
2600
2601 struct radv_image_plane {
2602 VkFormat format;
2603 struct radeon_surf surface;
2604 };
2605
2606 struct radv_image_binding {
2607 /* Set when bound */
2608 struct radeon_winsys_bo *bo;
2609 VkDeviceSize offset;
2610 };
2611
2612 struct radv_image {
2613 struct vk_image vk;
2614
2615 VkDeviceSize size;
2616 uint32_t alignment;
2617
2618 unsigned queue_family_mask;
2619 bool exclusive;
2620 bool shareable;
2621 bool l2_coherent;
2622 bool dcc_sign_reinterpret;
2623 bool support_comp_to_single;
2624
2625 struct radv_image_binding bindings[3];
2626 bool tc_compatible_cmask;
2627
2628 uint64_t clear_value_offset;
2629 uint64_t fce_pred_offset;
2630 uint64_t dcc_pred_offset;
2631
2632 /*
2633 * Metadata for the TC-compat zrange workaround. If the 32-bit value
2634 * stored at this offset is UINT_MAX, the driver will emit
2635 * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2636 * SET_CONTEXT_REG packet.
2637 */
2638 uint64_t tc_compat_zrange_offset;
2639
2640 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2641 VkDeviceMemory owned_memory;
2642
2643 unsigned plane_count;
2644 bool disjoint;
2645 struct radv_image_plane planes[0];
2646 };
2647
2648 struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image);
2649
2650 /* Whether the image has a htile that is known consistent with the contents of
2651 * the image and is allowed to be in compressed form.
2652 *
2653 * If this is false reads that don't use the htile should be able to return
2654 * correct results.
2655 */
2656 bool radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2657 VkImageLayout layout, unsigned queue_mask);
2658
2659 bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
2660 VkImageLayout layout, unsigned queue_mask);
2661
2662 bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
2663 VkImageLayout layout, unsigned queue_mask);
2664
2665 enum radv_fmask_compression {
2666 RADV_FMASK_COMPRESSION_NONE,
2667 RADV_FMASK_COMPRESSION_PARTIAL,
2668 RADV_FMASK_COMPRESSION_FULL,
2669 };
2670
2671 enum radv_fmask_compression radv_layout_fmask_compression(const struct radv_device *device,
2672 const struct radv_image *image, VkImageLayout layout,
2673 unsigned queue_mask);
2674
2675 /**
2676 * Return whether the image has CMASK metadata for color surfaces.
2677 */
2678 static inline bool
radv_image_has_cmask(const struct radv_image * image)2679 radv_image_has_cmask(const struct radv_image *image)
2680 {
2681 return image->planes[0].surface.cmask_offset;
2682 }
2683
2684 /**
2685 * Return whether the image has FMASK metadata for color surfaces.
2686 */
2687 static inline bool
radv_image_has_fmask(const struct radv_image * image)2688 radv_image_has_fmask(const struct radv_image *image)
2689 {
2690 return image->planes[0].surface.fmask_offset;
2691 }
2692
2693 /**
2694 * Return whether the image has DCC metadata for color surfaces.
2695 */
2696 static inline bool
radv_image_has_dcc(const struct radv_image * image)2697 radv_image_has_dcc(const struct radv_image *image)
2698 {
2699 return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && image->planes[0].surface.meta_offset;
2700 }
2701
2702 /**
2703 * Return whether the image is TC-compatible CMASK.
2704 */
2705 static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image * image)2706 radv_image_is_tc_compat_cmask(const struct radv_image *image)
2707 {
2708 return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2709 }
2710
2711 /**
2712 * Return whether DCC metadata is enabled for a level.
2713 */
2714 static inline bool
radv_dcc_enabled(const struct radv_image * image,unsigned level)2715 radv_dcc_enabled(const struct radv_image *image, unsigned level)
2716 {
2717 return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2718 }
2719
2720 /**
2721 * Return whether the image has CB metadata.
2722 */
2723 static inline bool
radv_image_has_CB_metadata(const struct radv_image * image)2724 radv_image_has_CB_metadata(const struct radv_image *image)
2725 {
2726 return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2727 }
2728
2729 /**
2730 * Return whether the image has HTILE metadata for depth surfaces.
2731 */
2732 static inline bool
radv_image_has_htile(const struct radv_image * image)2733 radv_image_has_htile(const struct radv_image *image)
2734 {
2735 return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && image->planes[0].surface.meta_size;
2736 }
2737
2738 /**
2739 * Return whether the image has VRS HTILE metadata for depth surfaces
2740 */
2741 static inline bool
radv_image_has_vrs_htile(const struct radv_device * device,const struct radv_image * image)2742 radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2743 {
2744 const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
2745
2746 /* Any depth buffer can potentially use VRS on GFX10.3. */
2747 return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate &&
2748 radv_image_has_htile(image) && (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2749 }
2750
2751 /**
2752 * Return whether HTILE metadata is enabled for a level.
2753 */
2754 static inline bool
radv_htile_enabled(const struct radv_image * image,unsigned level)2755 radv_htile_enabled(const struct radv_image *image, unsigned level)
2756 {
2757 return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2758 }
2759
2760 /**
2761 * Return whether the image is TC-compatible HTILE.
2762 */
2763 static inline bool
radv_image_is_tc_compat_htile(const struct radv_image * image)2764 radv_image_is_tc_compat_htile(const struct radv_image *image)
2765 {
2766 return radv_image_has_htile(image) && (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2767 }
2768
2769 /**
2770 * Return whether the entire HTILE buffer can be used for depth in order to
2771 * improve HiZ Z-Range precision.
2772 */
2773 static inline bool
radv_image_tile_stencil_disabled(const struct radv_device * device,const struct radv_image * image)2774 radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2775 {
2776 if (device->physical_device->rad_info.gfx_level >= GFX9) {
2777 return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
2778 } else {
2779 /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2780 * the TC-compat ZRANGE issue even if no stencil is used.
2781 */
2782 return !vk_format_has_stencil(image->vk.format) && !radv_image_is_tc_compat_htile(image);
2783 }
2784 }
2785
2786 static inline bool
radv_image_has_clear_value(const struct radv_image * image)2787 radv_image_has_clear_value(const struct radv_image *image)
2788 {
2789 return image->clear_value_offset != 0;
2790 }
2791
2792 static inline uint64_t
radv_image_get_fast_clear_va(const struct radv_image * image,uint32_t base_level)2793 radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2794 {
2795 assert(radv_image_has_clear_value(image));
2796
2797 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2798 va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2799 return va;
2800 }
2801
2802 static inline uint64_t
radv_image_get_fce_pred_va(const struct radv_image * image,uint32_t base_level)2803 radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2804 {
2805 assert(image->fce_pred_offset != 0);
2806
2807 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2808 va += image->bindings[0].offset + image->fce_pred_offset + base_level * 8;
2809 return va;
2810 }
2811
2812 static inline uint64_t
radv_image_get_dcc_pred_va(const struct radv_image * image,uint32_t base_level)2813 radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2814 {
2815 assert(image->dcc_pred_offset != 0);
2816
2817 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2818 va += image->bindings[0].offset + image->dcc_pred_offset + base_level * 8;
2819 return va;
2820 }
2821
2822 static inline uint64_t
radv_get_tc_compat_zrange_va(const struct radv_image * image,uint32_t base_level)2823 radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2824 {
2825 assert(image->tc_compat_zrange_offset != 0);
2826
2827 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2828 va += image->bindings[0].offset + image->tc_compat_zrange_offset + base_level * 4;
2829 return va;
2830 }
2831
2832 static inline uint64_t
radv_get_ds_clear_value_va(const struct radv_image * image,uint32_t base_level)2833 radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2834 {
2835 assert(radv_image_has_clear_value(image));
2836
2837 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2838 va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2839 return va;
2840 }
2841
2842 static inline uint32_t
radv_get_htile_initial_value(const struct radv_device * device,const struct radv_image * image)2843 radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2844 {
2845 uint32_t initial_value;
2846
2847 if (radv_image_tile_stencil_disabled(device, image)) {
2848 /* Z only (no stencil):
2849 *
2850 * |31 18|17 4|3 0|
2851 * +---------+---------+-------+
2852 * | Max Z | Min Z | ZMask |
2853 */
2854 initial_value = 0xfffc000f;
2855 } else {
2856 /* Z and stencil:
2857 *
2858 * |31 12|11 10|9 8|7 6|5 4|3 0|
2859 * +-----------+-----+------+-----+-----+-------+
2860 * | Z Range | | SMem | SR1 | SR0 | ZMask |
2861 *
2862 * SR0/SR1 contains the stencil test results. Initializing
2863 * SR0/SR1 to 0x3 means the stencil test result is unknown.
2864 *
2865 * Z, stencil and 4 bit VRS encoding:
2866 * |31 12|11 10|9 8|7 6|5 4|3 0|
2867 * +-----------+------------+------+------------+-----+-------+
2868 * | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2869 */
2870 if (radv_image_has_vrs_htile(device, image)) {
2871 /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2872 initial_value = 0xfffff33f;
2873 } else {
2874 initial_value = 0xfffff3ff;
2875 }
2876 }
2877
2878 return initial_value;
2879 }
2880
2881 static inline bool
radv_image_get_iterate256(const struct radv_device * device,struct radv_image * image)2882 radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image)
2883 {
2884 /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2885 return device->physical_device->rad_info.gfx_level >= GFX10 &&
2886 (image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2887 radv_image_is_tc_compat_htile(image) && image->vk.samples > 1;
2888 }
2889
2890 unsigned radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
2891 enum radv_queue_family queue_family);
2892
2893 bool radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image);
2894
2895 unsigned radv_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil);
2896
2897 struct radeon_bo_metadata;
2898 void radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata);
2899
2900 void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
2901 uint32_t stride);
2902
2903 union radv_descriptor {
2904 struct {
2905 uint32_t plane0_descriptor[8];
2906 uint32_t fmask_descriptor[8];
2907 };
2908 struct {
2909 uint32_t plane_descriptors[3][8];
2910 };
2911 };
2912
2913 struct radv_image_view {
2914 struct vk_image_view vk;
2915 struct radv_image *image; /**< VkImageViewCreateInfo::image */
2916
2917 unsigned plane_id;
2918 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2919
2920 /* Whether the image iview supports fast clear. */
2921 bool support_fast_clear;
2922
2923 bool disable_dcc_mrt;
2924
2925 union radv_descriptor descriptor;
2926
2927 /* Descriptor for use as a storage image as opposed to a sampled image.
2928 * This has a few differences for cube maps (e.g. type).
2929 */
2930 union radv_descriptor storage_descriptor;
2931
2932 /* Block-compressed image views on GFX10+. */
2933 struct ac_surf_nbc_view nbc_view;
2934 };
2935
2936 struct radv_image_create_info {
2937 const VkImageCreateInfo *vk_info;
2938 bool scanout;
2939 bool no_metadata_planes;
2940 bool prime_blit_src;
2941 const struct radeon_bo_metadata *bo_metadata;
2942 };
2943
2944 VkResult radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2945 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2946 const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image);
2947
2948 VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2949 const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal);
2950
2951 bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
2952 VkImageCreateFlags flags, bool *sign_reinterpret);
2953
2954 bool vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format);
2955
2956 unsigned radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image);
2957
2958 VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2959 const VkNativeBufferANDROID *gralloc_info, const VkAllocationCallbacks *alloc,
2960 VkImage *out_image_h);
2961 VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
2962 const VkImportAndroidHardwareBufferInfoANDROID *info);
2963 VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
2964 const VkMemoryAllocateInfo *pAllocateInfo);
2965
2966 unsigned radv_ahb_format_for_vk_format(VkFormat vk_format);
2967
2968 VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2969
2970 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2971
2972 struct radv_image_view_extra_create_info {
2973 bool disable_compression;
2974 bool enable_compression;
2975 bool disable_dcc_mrt;
2976 bool from_client; /**< Set only if this came from vkCreateImage */
2977 };
2978
2979 void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2980 const VkImageViewCreateInfo *pCreateInfo, VkImageCreateFlags img_create_flags,
2981 const struct radv_image_view_extra_create_info *extra_create_info);
2982 void radv_image_view_finish(struct radv_image_view *iview);
2983
2984 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2985
2986 void radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
2987 enum pipe_swizzle swizzle[4]);
2988
2989 unsigned radv_map_swizzle(unsigned swizzle);
2990
2991 struct radv_buffer_view {
2992 struct vk_buffer_view vk;
2993 struct radeon_winsys_bo *bo;
2994 uint32_t state[4];
2995 };
2996 void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2997 const VkBufferViewCreateInfo *pCreateInfo);
2998 void radv_buffer_view_finish(struct radv_buffer_view *view);
2999
3000 static inline bool
radv_image_extent_compare(const struct radv_image * image,const VkExtent3D * extent)3001 radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
3002 {
3003 if (extent->width != image->vk.extent.width || extent->height != image->vk.extent.height ||
3004 extent->depth != image->vk.extent.depth)
3005 return false;
3006 return true;
3007 }
3008
3009 struct radv_sampler {
3010 struct vk_sampler vk;
3011 uint32_t state[4];
3012 uint32_t border_color_slot;
3013 };
3014
3015 struct radv_resolve_barrier {
3016 VkPipelineStageFlags2 src_stage_mask;
3017 VkPipelineStageFlags2 dst_stage_mask;
3018 VkAccessFlags2 src_access_mask;
3019 VkAccessFlags2 dst_access_mask;
3020 };
3021
3022 void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier);
3023
3024 struct radv_query_pool {
3025 struct vk_query_pool vk;
3026 struct radeon_winsys_bo *bo;
3027 uint32_t stride;
3028 uint32_t availability_offset;
3029 uint64_t size;
3030 char *ptr;
3031 bool uses_gds; /* For NGG GS on GFX10+ */
3032 bool uses_ace; /* For task shader invocations on GFX10.3+ */
3033 };
3034
3035 struct radv_perfcounter_impl;
3036
3037 struct radv_pc_query_pool {
3038 struct radv_query_pool b;
3039
3040 uint32_t *pc_regs;
3041 unsigned num_pc_regs;
3042
3043 unsigned num_passes;
3044
3045 unsigned num_counters;
3046 struct radv_perfcounter_impl *counters;
3047 };
3048
3049 void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
3050 VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice, const VkQueryPoolCreateInfo *pCreateInfo,
3051 struct radv_pc_query_pool *pool);
3052 void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va);
3053 void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va);
3054 void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out);
3055
3056 #define VL_MACROBLOCK_WIDTH 16
3057 #define VL_MACROBLOCK_HEIGHT 16
3058
3059 struct radv_vid_mem {
3060 struct radv_device_memory *mem;
3061 VkDeviceSize offset;
3062 VkDeviceSize size;
3063 };
3064
3065 struct radv_video_session {
3066 struct vk_video_session vk;
3067
3068 uint32_t stream_handle;
3069 unsigned stream_type;
3070 bool interlaced;
3071 enum { DPB_MAX_RES = 0, DPB_DYNAMIC_TIER_1, DPB_DYNAMIC_TIER_2 } dpb_type;
3072 unsigned db_alignment;
3073
3074 struct radv_vid_mem sessionctx;
3075 struct radv_vid_mem ctx;
3076
3077 unsigned dbg_frame_cnt;
3078 };
3079
3080 struct radv_video_session_params {
3081 struct vk_video_session_parameters vk;
3082 };
3083
3084 bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
3085
3086 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
3087 const VkDeviceQueueCreateInfo *create_info,
3088 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
3089
3090 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
3091 struct radv_descriptor_set *set, unsigned idx);
3092
3093 void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
3094 VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
3095 const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
3096 const VkCopyDescriptorSet *pDescriptorCopies);
3097
3098 void radv_cmd_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
3099 struct radv_descriptor_set *set,
3100 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
3101 const void *pData);
3102
3103 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
3104 VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
3105 const VkWriteDescriptorSet *pDescriptorWrites);
3106
3107 void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
3108 unsigned range, uint32_t *state);
3109
3110 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
3111 const VkImageSubresourceRange *range, uint32_t value);
3112
3113 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
3114 const VkImageSubresourceRange *range);
3115
3116 /* radv_nir_to_llvm.c */
3117 struct radv_shader_args;
3118 struct radv_nir_compiler_options;
3119 struct radv_shader_info;
3120
3121 void llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info,
3122 unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary,
3123 const struct radv_shader_args *args);
3124
3125 bool radv_sqtt_init(struct radv_device *device);
3126 void radv_sqtt_finish(struct radv_device *device);
3127 bool radv_begin_sqtt(struct radv_queue *queue);
3128 bool radv_end_sqtt(struct radv_queue *queue);
3129 bool radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace);
3130 void radv_reset_sqtt_trace(struct radv_device *device);
3131 void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords);
3132 bool radv_is_instruction_timing_enabled(void);
3133 bool radv_sqtt_queue_events_enabled(void);
3134 bool radv_sqtt_sample_clocks(struct radv_device *device);
3135
3136 void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit);
3137 void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
3138
3139 VkResult radv_sqtt_get_timed_cmdbuf(struct radv_queue *queue, struct radeon_winsys_bo *timestamp_bo,
3140 uint32_t timestamp_offset, VkPipelineStageFlags2 timestamp_stage,
3141 VkCommandBuffer *pcmdbuf);
3142
3143 VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo,
3144 uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);
3145
3146 VkResult radv_rra_trace_init(struct radv_device *device);
3147
3148 VkResult radv_rra_dump_trace(VkQueue vk_queue, char *filename);
3149 void radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data);
3150 void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data);
3151
3152 void radv_memory_trace_init(struct radv_device *device);
3153 void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal);
3154 void radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
3155 void radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
3156 VkMemoryAllocateFlags alloc_flags);
3157 void radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer);
3158 void radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
3159 VkImage _image);
3160 void radv_rmv_log_image_bind(struct radv_device *device, VkImage _image);
3161 void radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool pool, bool is_internal);
3162 void radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo,
3163 uint32_t executable_size, uint32_t data_size, uint32_t scratch_size);
3164 void radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
3165 void radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo);
3166 void radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
3167 void radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset);
3168 void radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset);
3169 void radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
3170 VkDescriptorPool pool, bool is_internal);
3171 void radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline,
3172 bool is_internal);
3173 void radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal);
3174 void radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline);
3175 void radv_rmv_log_event_create(struct radv_device *device, VkEvent event, VkEventCreateFlags flags, bool is_internal);
3176 void radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle);
3177 void radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type);
3178 void radv_rmv_fill_device_info(const struct radv_physical_device *device, struct vk_rmv_device_info *info);
3179 void radv_rmv_collect_trace_events(struct radv_device *device);
3180 void radv_memory_trace_finish(struct radv_device *device);
3181
3182 VkResult radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreateInfo,
3183 const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, bool is_internal);
3184 VkResult radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
3185 const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem, bool is_internal);
3186 VkResult radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
3187 const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, bool is_internal);
3188 VkResult radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateInfo,
3189 const VkAllocationCallbacks *pAllocator, VkEvent *pEvent, bool is_internal);
3190
3191 /* radv_sqtt_layer_.c */
3192 struct radv_barrier_data {
3193 union {
3194 struct {
3195 uint16_t depth_stencil_expand : 1;
3196 uint16_t htile_hiz_range_expand : 1;
3197 uint16_t depth_stencil_resummarize : 1;
3198 uint16_t dcc_decompress : 1;
3199 uint16_t fmask_decompress : 1;
3200 uint16_t fast_clear_eliminate : 1;
3201 uint16_t fmask_color_expand : 1;
3202 uint16_t init_mask_ram : 1;
3203 uint16_t reserved : 8;
3204 };
3205 uint16_t all;
3206 } layout_transitions;
3207 };
3208
3209 /**
3210 * Value for the reason field of an RGP barrier start marker originating from
3211 * the Vulkan client (does not include PAL-defined values). (Table 15)
3212 */
3213 enum rgp_barrier_reason {
3214 RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
3215
3216 /* External app-generated barrier reasons, i.e. API synchronization
3217 * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
3218 */
3219 RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
3220 RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
3221 RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
3222
3223 /* Internal barrier reasons, i.e. implicit synchronization inserted by
3224 * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
3225 */
3226 RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
3227 RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
3228 RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
3229 RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
3230 RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
3231 };
3232
3233 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3234 void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3235 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
3236 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
3237 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects);
3238 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
3239 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3240 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3241 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason);
3242 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
3243 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
3244 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier);
3245 void radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count);
3246 void radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer);
3247
3248 void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline);
3249
3250 void radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
3251 const char *str);
3252
3253 struct radv_indirect_command_layout {
3254 struct vk_object_base base;
3255
3256 VkIndirectCommandsLayoutUsageFlagsNV flags;
3257 VkPipelineBindPoint pipeline_bind_point;
3258
3259 uint32_t input_stride;
3260 uint32_t token_count;
3261
3262 bool indexed;
3263 bool binds_index_buffer;
3264 bool draw_mesh_tasks;
3265 uint16_t draw_params_offset;
3266 uint16_t index_buffer_offset;
3267
3268 uint16_t dispatch_params_offset;
3269
3270 uint32_t bind_vbo_mask;
3271 uint32_t vbo_offsets[MAX_VBS];
3272
3273 uint64_t push_constant_mask;
3274 uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
3275
3276 uint32_t ibo_type_32;
3277 uint32_t ibo_type_8;
3278
3279 VkIndirectCommandsLayoutTokenNV tokens[0];
3280 };
3281
3282 uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
3283
3284 bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
3285 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
3286 void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo,
3287 bool cond_render_enabled);
3288
3289 bool radv_dgc_can_preprocess(const struct radv_indirect_command_layout *layout, struct radv_pipeline *pipeline);
3290
3291 static inline uint32_t
radv_conv_prim_to_gs_out(uint32_t topology,bool is_ngg)3292 radv_conv_prim_to_gs_out(uint32_t topology, bool is_ngg)
3293 {
3294 switch (topology) {
3295 case V_008958_DI_PT_POINTLIST:
3296 case V_008958_DI_PT_PATCH:
3297 return V_028A6C_POINTLIST;
3298 case V_008958_DI_PT_LINELIST:
3299 case V_008958_DI_PT_LINESTRIP:
3300 case V_008958_DI_PT_LINELIST_ADJ:
3301 case V_008958_DI_PT_LINESTRIP_ADJ:
3302 return V_028A6C_LINESTRIP;
3303 case V_008958_DI_PT_TRILIST:
3304 case V_008958_DI_PT_TRISTRIP:
3305 case V_008958_DI_PT_TRIFAN:
3306 case V_008958_DI_PT_TRILIST_ADJ:
3307 case V_008958_DI_PT_TRISTRIP_ADJ:
3308 return V_028A6C_TRISTRIP;
3309 case V_008958_DI_PT_RECTLIST:
3310 return is_ngg ? V_028A6C_RECTLIST : V_028A6C_TRISTRIP;
3311 default:
3312 assert(0);
3313 return 0;
3314 }
3315 }
3316
3317 static inline uint32_t
radv_translate_prim(unsigned topology)3318 radv_translate_prim(unsigned topology)
3319 {
3320 switch (topology) {
3321 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
3322 return V_008958_DI_PT_POINTLIST;
3323 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
3324 return V_008958_DI_PT_LINELIST;
3325 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
3326 return V_008958_DI_PT_LINESTRIP;
3327 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
3328 return V_008958_DI_PT_TRILIST;
3329 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
3330 return V_008958_DI_PT_TRISTRIP;
3331 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
3332 return V_008958_DI_PT_TRIFAN;
3333 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
3334 return V_008958_DI_PT_LINELIST_ADJ;
3335 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
3336 return V_008958_DI_PT_LINESTRIP_ADJ;
3337 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
3338 return V_008958_DI_PT_TRILIST_ADJ;
3339 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
3340 return V_008958_DI_PT_TRISTRIP_ADJ;
3341 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
3342 return V_008958_DI_PT_PATCH;
3343 default:
3344 unreachable("unhandled primitive type");
3345 }
3346 }
3347
3348 static inline bool
radv_prim_is_points_or_lines(unsigned topology)3349 radv_prim_is_points_or_lines(unsigned topology)
3350 {
3351 switch (topology) {
3352 case V_008958_DI_PT_POINTLIST:
3353 case V_008958_DI_PT_LINELIST:
3354 case V_008958_DI_PT_LINESTRIP:
3355 case V_008958_DI_PT_LINELIST_ADJ:
3356 case V_008958_DI_PT_LINESTRIP_ADJ:
3357 return true;
3358 default:
3359 return false;
3360 }
3361 }
3362
3363 static inline bool
radv_rast_prim_is_point(unsigned rast_prim)3364 radv_rast_prim_is_point(unsigned rast_prim)
3365 {
3366 return rast_prim == V_028A6C_POINTLIST;
3367 }
3368
3369 static inline bool
radv_rast_prim_is_line(unsigned rast_prim)3370 radv_rast_prim_is_line(unsigned rast_prim)
3371 {
3372 return rast_prim == V_028A6C_LINESTRIP;
3373 }
3374
3375 static inline bool
radv_rast_prim_is_points_or_lines(unsigned rast_prim)3376 radv_rast_prim_is_points_or_lines(unsigned rast_prim)
3377 {
3378 return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
3379 }
3380
3381 static inline bool
radv_polygon_mode_is_point(unsigned polygon_mode)3382 radv_polygon_mode_is_point(unsigned polygon_mode)
3383 {
3384 return polygon_mode == V_028814_X_DRAW_POINTS;
3385 }
3386
3387 static inline bool
radv_polygon_mode_is_line(unsigned polygon_mode)3388 radv_polygon_mode_is_line(unsigned polygon_mode)
3389 {
3390 return polygon_mode == V_028814_X_DRAW_LINES;
3391 }
3392
3393 static inline bool
radv_polygon_mode_is_points_or_lines(unsigned polygon_mode)3394 radv_polygon_mode_is_points_or_lines(unsigned polygon_mode)
3395 {
3396 return radv_polygon_mode_is_point(polygon_mode) || radv_polygon_mode_is_line(polygon_mode);
3397 }
3398
3399 static inline bool
radv_primitive_topology_is_line_list(unsigned primitive_topology)3400 radv_primitive_topology_is_line_list(unsigned primitive_topology)
3401 {
3402 return primitive_topology == V_008958_DI_PT_LINELIST || primitive_topology == V_008958_DI_PT_LINELIST_ADJ;
3403 }
3404
3405 static inline unsigned
radv_get_num_vertices_per_prim(const struct radv_graphics_state_key * gfx_state)3406 radv_get_num_vertices_per_prim(const struct radv_graphics_state_key *gfx_state)
3407 {
3408 if (gfx_state->ia.topology == V_008958_DI_PT_NONE) {
3409 /* When the topology is unknown (with graphics pipeline library), return the maximum number of
3410 * vertices per primitives for VS. This is used to lower NGG (the HW will ignore the extra
3411 * bits for points/lines) and also to enable NGG culling unconditionally (it will be disabled
3412 * dynamically for points/lines).
3413 */
3414 return 3;
3415 } else {
3416 /* Need to add 1, because: V_028A6C_POINTLIST=0, V_028A6C_LINESTRIP=1, V_028A6C_TRISTRIP=2, etc. */
3417 return radv_conv_prim_to_gs_out(gfx_state->ia.topology, false) + 1;
3418 }
3419 }
3420
3421 uint32_t radv_get_vgt_gs_out(struct radv_shader **shaders, uint32_t primitive_topology);
3422
3423 static inline uint32_t
radv_translate_fill(VkPolygonMode func)3424 radv_translate_fill(VkPolygonMode func)
3425 {
3426 switch (func) {
3427 case VK_POLYGON_MODE_FILL:
3428 return V_028814_X_DRAW_TRIANGLES;
3429 case VK_POLYGON_MODE_LINE:
3430 return V_028814_X_DRAW_LINES;
3431 case VK_POLYGON_MODE_POINT:
3432 return V_028814_X_DRAW_POINTS;
3433 default:
3434 assert(0);
3435 return V_028814_X_DRAW_POINTS;
3436 }
3437 }
3438
3439 static inline uint32_t
radv_translate_stencil_op(enum VkStencilOp op)3440 radv_translate_stencil_op(enum VkStencilOp op)
3441 {
3442 switch (op) {
3443 case VK_STENCIL_OP_KEEP:
3444 return V_02842C_STENCIL_KEEP;
3445 case VK_STENCIL_OP_ZERO:
3446 return V_02842C_STENCIL_ZERO;
3447 case VK_STENCIL_OP_REPLACE:
3448 return V_02842C_STENCIL_REPLACE_TEST;
3449 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
3450 return V_02842C_STENCIL_ADD_CLAMP;
3451 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
3452 return V_02842C_STENCIL_SUB_CLAMP;
3453 case VK_STENCIL_OP_INVERT:
3454 return V_02842C_STENCIL_INVERT;
3455 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
3456 return V_02842C_STENCIL_ADD_WRAP;
3457 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
3458 return V_02842C_STENCIL_SUB_WRAP;
3459 default:
3460 return 0;
3461 }
3462 }
3463
3464 static inline uint32_t
radv_translate_blend_logic_op(VkLogicOp op)3465 radv_translate_blend_logic_op(VkLogicOp op)
3466 {
3467 switch (op) {
3468 case VK_LOGIC_OP_CLEAR:
3469 return V_028808_ROP3_CLEAR;
3470 case VK_LOGIC_OP_AND:
3471 return V_028808_ROP3_AND;
3472 case VK_LOGIC_OP_AND_REVERSE:
3473 return V_028808_ROP3_AND_REVERSE;
3474 case VK_LOGIC_OP_COPY:
3475 return V_028808_ROP3_COPY;
3476 case VK_LOGIC_OP_AND_INVERTED:
3477 return V_028808_ROP3_AND_INVERTED;
3478 case VK_LOGIC_OP_NO_OP:
3479 return V_028808_ROP3_NO_OP;
3480 case VK_LOGIC_OP_XOR:
3481 return V_028808_ROP3_XOR;
3482 case VK_LOGIC_OP_OR:
3483 return V_028808_ROP3_OR;
3484 case VK_LOGIC_OP_NOR:
3485 return V_028808_ROP3_NOR;
3486 case VK_LOGIC_OP_EQUIVALENT:
3487 return V_028808_ROP3_EQUIVALENT;
3488 case VK_LOGIC_OP_INVERT:
3489 return V_028808_ROP3_INVERT;
3490 case VK_LOGIC_OP_OR_REVERSE:
3491 return V_028808_ROP3_OR_REVERSE;
3492 case VK_LOGIC_OP_COPY_INVERTED:
3493 return V_028808_ROP3_COPY_INVERTED;
3494 case VK_LOGIC_OP_OR_INVERTED:
3495 return V_028808_ROP3_OR_INVERTED;
3496 case VK_LOGIC_OP_NAND:
3497 return V_028808_ROP3_NAND;
3498 case VK_LOGIC_OP_SET:
3499 return V_028808_ROP3_SET;
3500 default:
3501 unreachable("Unhandled logic op");
3502 }
3503 }
3504
3505 static inline uint32_t
radv_translate_blend_function(VkBlendOp op)3506 radv_translate_blend_function(VkBlendOp op)
3507 {
3508 switch (op) {
3509 case VK_BLEND_OP_ADD:
3510 return V_028780_COMB_DST_PLUS_SRC;
3511 case VK_BLEND_OP_SUBTRACT:
3512 return V_028780_COMB_SRC_MINUS_DST;
3513 case VK_BLEND_OP_REVERSE_SUBTRACT:
3514 return V_028780_COMB_DST_MINUS_SRC;
3515 case VK_BLEND_OP_MIN:
3516 return V_028780_COMB_MIN_DST_SRC;
3517 case VK_BLEND_OP_MAX:
3518 return V_028780_COMB_MAX_DST_SRC;
3519 default:
3520 return 0;
3521 }
3522 }
3523
3524 static inline uint32_t
radv_translate_blend_factor(enum amd_gfx_level gfx_level,VkBlendFactor factor)3525 radv_translate_blend_factor(enum amd_gfx_level gfx_level, VkBlendFactor factor)
3526 {
3527 switch (factor) {
3528 case VK_BLEND_FACTOR_ZERO:
3529 return V_028780_BLEND_ZERO;
3530 case VK_BLEND_FACTOR_ONE:
3531 return V_028780_BLEND_ONE;
3532 case VK_BLEND_FACTOR_SRC_COLOR:
3533 return V_028780_BLEND_SRC_COLOR;
3534 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
3535 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
3536 case VK_BLEND_FACTOR_DST_COLOR:
3537 return V_028780_BLEND_DST_COLOR;
3538 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
3539 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
3540 case VK_BLEND_FACTOR_SRC_ALPHA:
3541 return V_028780_BLEND_SRC_ALPHA;
3542 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
3543 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
3544 case VK_BLEND_FACTOR_DST_ALPHA:
3545 return V_028780_BLEND_DST_ALPHA;
3546 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
3547 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
3548 case VK_BLEND_FACTOR_CONSTANT_COLOR:
3549 return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11 : V_028780_BLEND_CONSTANT_COLOR_GFX6;
3550 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
3551 return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11
3552 : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6;
3553 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
3554 return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 : V_028780_BLEND_CONSTANT_ALPHA_GFX6;
3555 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
3556 return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11
3557 : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6;
3558 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
3559 return V_028780_BLEND_SRC_ALPHA_SATURATE;
3560 case VK_BLEND_FACTOR_SRC1_COLOR:
3561 return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11 : V_028780_BLEND_SRC1_COLOR_GFX6;
3562 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
3563 return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11 : V_028780_BLEND_INV_SRC1_COLOR_GFX6;
3564 case VK_BLEND_FACTOR_SRC1_ALPHA:
3565 return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11 : V_028780_BLEND_SRC1_ALPHA_GFX6;
3566 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
3567 return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11 : V_028780_BLEND_INV_SRC1_ALPHA_GFX6;
3568 default:
3569 return 0;
3570 }
3571 }
3572
3573 static inline uint32_t
radv_translate_blend_opt_factor(VkBlendFactor factor,bool is_alpha)3574 radv_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
3575 {
3576 switch (factor) {
3577 case VK_BLEND_FACTOR_ZERO:
3578 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
3579 case VK_BLEND_FACTOR_ONE:
3580 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
3581 case VK_BLEND_FACTOR_SRC_COLOR:
3582 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
3583 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
3584 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
3585 case VK_BLEND_FACTOR_SRC_ALPHA:
3586 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
3587 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
3588 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
3589 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
3590 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
3591 default:
3592 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
3593 }
3594 }
3595
3596 static inline uint32_t
radv_translate_blend_opt_function(VkBlendOp op)3597 radv_translate_blend_opt_function(VkBlendOp op)
3598 {
3599 switch (op) {
3600 case VK_BLEND_OP_ADD:
3601 return V_028760_OPT_COMB_ADD;
3602 case VK_BLEND_OP_SUBTRACT:
3603 return V_028760_OPT_COMB_SUBTRACT;
3604 case VK_BLEND_OP_REVERSE_SUBTRACT:
3605 return V_028760_OPT_COMB_REVSUBTRACT;
3606 case VK_BLEND_OP_MIN:
3607 return V_028760_OPT_COMB_MIN;
3608 case VK_BLEND_OP_MAX:
3609 return V_028760_OPT_COMB_MAX;
3610 default:
3611 return V_028760_OPT_COMB_BLEND_DISABLED;
3612 }
3613 }
3614
3615 static inline bool
radv_blend_factor_uses_dst(VkBlendFactor factor)3616 radv_blend_factor_uses_dst(VkBlendFactor factor)
3617 {
3618 return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA ||
3619 factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
3620 factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
3621 }
3622
3623 static inline bool
radv_is_dual_src(VkBlendFactor factor)3624 radv_is_dual_src(VkBlendFactor factor)
3625 {
3626 switch (factor) {
3627 case VK_BLEND_FACTOR_SRC1_COLOR:
3628 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
3629 case VK_BLEND_FACTOR_SRC1_ALPHA:
3630 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
3631 return true;
3632 default:
3633 return false;
3634 }
3635 }
3636
3637 static ALWAYS_INLINE bool
radv_can_enable_dual_src(const struct vk_color_blend_attachment_state * att)3638 radv_can_enable_dual_src(const struct vk_color_blend_attachment_state *att)
3639 {
3640 VkBlendOp eqRGB = att->color_blend_op;
3641 VkBlendFactor srcRGB = att->src_color_blend_factor;
3642 VkBlendFactor dstRGB = att->dst_color_blend_factor;
3643 VkBlendOp eqA = att->alpha_blend_op;
3644 VkBlendFactor srcA = att->src_alpha_blend_factor;
3645 VkBlendFactor dstA = att->dst_alpha_blend_factor;
3646 bool eqRGB_minmax = eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX;
3647 bool eqA_minmax = eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX;
3648
3649 if (!eqRGB_minmax && (radv_is_dual_src(srcRGB) || radv_is_dual_src(dstRGB)))
3650 return true;
3651 if (!eqA_minmax && (radv_is_dual_src(srcA) || radv_is_dual_src(dstA)))
3652 return true;
3653 return false;
3654 }
3655
3656 static inline void
radv_normalize_blend_factor(VkBlendOp op,VkBlendFactor * src_factor,VkBlendFactor * dst_factor)3657 radv_normalize_blend_factor(VkBlendOp op, VkBlendFactor *src_factor, VkBlendFactor *dst_factor)
3658 {
3659 if (op == VK_BLEND_OP_MIN || op == VK_BLEND_OP_MAX) {
3660 *src_factor = VK_BLEND_FACTOR_ONE;
3661 *dst_factor = VK_BLEND_FACTOR_ONE;
3662 }
3663 }
3664
3665 void radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor,
3666 VkBlendFactor expected_dst, VkBlendFactor replacement_src);
3667
3668 ALWAYS_INLINE static bool
radv_is_streamout_enabled(struct radv_cmd_buffer * cmd_buffer)3669 radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
3670 {
3671 struct radv_streamout_state *so = &cmd_buffer->state.streamout;
3672
3673 /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */
3674 return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && !cmd_buffer->state.suspend_streamout;
3675 }
3676
3677 /*
3678 * Queue helper to get ring.
3679 * placed here as it needs queue + device structs.
3680 */
3681 static inline enum amd_ip_type
radv_queue_ring(const struct radv_queue * queue)3682 radv_queue_ring(const struct radv_queue *queue)
3683 {
3684 return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
3685 }
3686
3687 /* radv_video */
3688 void radv_init_physical_device_decoder(struct radv_physical_device *pdevice);
3689 void radv_video_get_profile_alignments(struct radv_physical_device *pdevice,
3690 const VkVideoProfileListInfoKHR *profile_list, uint32_t *width_align_out,
3691 uint32_t *height_align_out);
3692 /**
3693 * Helper used for debugging compiler issues by enabling/disabling LLVM for a
3694 * specific shader stage (developers only).
3695 */
3696 static inline bool
radv_use_llvm_for_stage(const struct radv_device * device,UNUSED gl_shader_stage stage)3697 radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage)
3698 {
3699 return device->physical_device->use_llvm;
3700 }
3701
3702 static inline bool
radv_has_shader_buffer_float_minmax(const struct radv_physical_device * pdevice,unsigned bitsize)3703 radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice, unsigned bitsize)
3704 {
3705 return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) || pdevice->rad_info.gfx_level == GFX10 ||
3706 pdevice->rad_info.gfx_level == GFX10_3 || (pdevice->rad_info.gfx_level == GFX11 && bitsize == 32);
3707 }
3708
3709 static inline bool
radv_has_pops(const struct radv_physical_device * pdevice)3710 radv_has_pops(const struct radv_physical_device *pdevice)
3711 {
3712 return pdevice->rad_info.gfx_level >= GFX9 && !pdevice->use_llvm;
3713 }
3714
3715 unsigned radv_compact_spi_shader_col_format(const struct radv_shader *ps, uint32_t spi_shader_col_format);
3716
3717 /* radv_perfcounter.c */
3718 void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders);
3719 void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
3720 void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
3721 void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
3722
3723 /* radv_spm.c */
3724 bool radv_spm_init(struct radv_device *device);
3725 void radv_spm_finish(struct radv_device *device);
3726 void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf);
3727
3728 void radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline);
3729 void radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline);
3730 void radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline);
3731 void radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline);
3732
3733 void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va, bool draw_visible);
3734 void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
3735
3736 bool radv_gang_init(struct radv_cmd_buffer *cmd_buffer);
3737 void radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer);
3738
3739 static inline bool
radv_uses_device_generated_commands(const struct radv_device * device)3740 radv_uses_device_generated_commands(const struct radv_device *device)
3741 {
3742 return device->vk.enabled_features.deviceGeneratedCommands || device->vk.enabled_features.deviceGeneratedCompute;
3743 }
3744
3745 static inline bool
radv_uses_primitives_generated_query(const struct radv_device * device)3746 radv_uses_primitives_generated_query(const struct radv_device *device)
3747 {
3748 return device->vk.enabled_features.primitivesGeneratedQuery ||
3749 device->vk.enabled_features.primitivesGeneratedQueryWithRasterizerDiscard ||
3750 device->vk.enabled_features.primitivesGeneratedQueryWithNonZeroStreams;
3751 }
3752
3753 static inline bool
radv_uses_image_float32_atomics(const struct radv_device * device)3754 radv_uses_image_float32_atomics(const struct radv_device *device)
3755 {
3756 return device->vk.enabled_features.shaderImageFloat32Atomics ||
3757 device->vk.enabled_features.sparseImageFloat32Atomics ||
3758 device->vk.enabled_features.shaderImageFloat32AtomicMinMax ||
3759 device->vk.enabled_features.sparseImageFloat32AtomicMinMax;
3760 }
3761
3762 bool radv_device_fault_detection_enabled(const struct radv_device *device);
3763
3764 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) VK_FROM_HANDLE(__radv_type, __name, __handle)
3765
3766 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
3767 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
3768 VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
3769 VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE)
3770 VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
3771 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
3772 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, vk.base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW)
3773 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL)
3774 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET)
3775 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout,
3776 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
3777 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base, VkDescriptorUpdateTemplate,
3778 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
3779 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY)
3780 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
3781 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
3782 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW);
3783 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
3784 VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
3785 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE)
3786 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT)
3787 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, vk.base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL)
3788 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, vk.base, VkSampler, VK_OBJECT_TYPE_SAMPLER)
3789 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_object, base, VkShaderEXT, VK_OBJECT_TYPE_SHADER_EXT);
3790
3791 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
3792 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session_params, vk.base, VkVideoSessionParametersKHR,
3793 VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
3794
3795 static inline uint64_t
radv_get_tdr_timeout_for_ip(enum amd_ip_type ip_type)3796 radv_get_tdr_timeout_for_ip(enum amd_ip_type ip_type)
3797 {
3798 const uint64_t compute_tdr_duration_ns = 60000000000ull; /* 1 minute (default in kernel) */
3799 const uint64_t other_tdr_duration_ns = 10000000000ull; /* 10 seconds (default in kernel) */
3800
3801 return ip_type == AMD_IP_COMPUTE ? compute_tdr_duration_ns : other_tdr_duration_ns;
3802 }
3803
3804 #ifdef __cplusplus
3805 }
3806 #endif
3807
3808 #endif /* RADV_PRIVATE_H */
3809