• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30 
31 #include <assert.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #ifdef HAVE_VALGRIND
38 #include <memcheck.h>
39 #include <valgrind.h>
40 #define VG(x) x
41 #else
42 #define VG(x) ((void)0)
43 #endif
44 
45 #include "c11/threads.h"
46 #ifndef _WIN32
47 #include <amdgpu.h>
48 #include <xf86drm.h>
49 #endif
50 #include "compiler/shader_enums.h"
51 #include "util/bitscan.h"
52 #include "util/detect_os.h"
53 #include "util/list.h"
54 #include "util/macros.h"
55 #include "util/rwlock.h"
56 #include "util/xmlconfig.h"
57 #include "vk_alloc.h"
58 #include "vk_buffer.h"
59 #include "vk_buffer_view.h"
60 #include "vk_command_buffer.h"
61 #include "vk_command_pool.h"
62 #include "vk_debug_report.h"
63 #include "vk_device.h"
64 #include "vk_format.h"
65 #include "vk_image.h"
66 #include "vk_instance.h"
67 #include "vk_log.h"
68 #include "vk_physical_device.h"
69 #include "vk_query_pool.h"
70 #include "vk_queue.h"
71 #include "vk_sampler.h"
72 #include "vk_shader_module.h"
73 #include "vk_texcompress_astc.h"
74 #include "vk_texcompress_etc2.h"
75 #include "vk_util.h"
76 #include "vk_video.h"
77 #include "vk_ycbcr_conversion.h"
78 
79 #include "rmv/vk_rmv_common.h"
80 #include "rmv/vk_rmv_tokens.h"
81 
82 #include "ac_binary.h"
83 #include "ac_gpu_info.h"
84 #include "ac_shader_util.h"
85 #include "ac_spm.h"
86 #include "ac_sqtt.h"
87 #include "ac_surface.h"
88 #include "ac_vcn.h"
89 #include "radv_constants.h"
90 #include "radv_descriptor_set.h"
91 #include "radv_radeon_winsys.h"
92 #include "radv_shader.h"
93 #include "radv_shader_args.h"
94 #include "sid.h"
95 
96 #include "radix_sort/radix_sort_vk_devaddr.h"
97 
98 /* Pre-declarations needed for WSI entrypoints */
99 struct wl_surface;
100 struct wl_display;
101 typedef struct xcb_connection_t xcb_connection_t;
102 typedef uint32_t xcb_visualid_t;
103 typedef uint32_t xcb_window_t;
104 
105 #include <vulkan/vk_android_native_buffer.h>
106 #include <vulkan/vk_icd.h>
107 #include <vulkan/vulkan.h>
108 #include <vulkan/vulkan_android.h>
109 
110 #include "radv_entrypoints.h"
111 
112 #include "wsi_common.h"
113 
114 #ifdef __cplusplus
115 extern "C" {
116 #endif
117 
118 /* Helper to determine if we should compile
119  * any of the Android AHB support.
120  *
121  * To actually enable the ext we also need
122  * the necessary kernel support.
123  */
124 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
125 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
126 #include <vndk/hardware_buffer.h>
127 #else
128 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
129 #endif
130 
131 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) ||   \
132    defined(VK_USE_PLATFORM_DISPLAY_KHR)
133 #define RADV_USE_WSI_PLATFORM
134 #endif
135 
136 #ifdef ANDROID_STRICT
137 #define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
138 #else
139 #define RADV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
140 #endif
141 
142 #ifdef _WIN32
143 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
144 #else
145 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
146 #endif
147 
148 #ifdef _WIN32
149 #define radv_printflike(a, b)
150 #else
151 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
152 #endif
153 
154 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
155 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
156 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
157 #endif
158 
159 static inline uint32_t
align_u32(uint32_t v,uint32_t a)160 align_u32(uint32_t v, uint32_t a)
161 {
162    assert(a != 0 && a == (a & -a));
163    return (v + a - 1) & ~(a - 1);
164 }
165 
166 static inline uint32_t
align_u32_npot(uint32_t v,uint32_t a)167 align_u32_npot(uint32_t v, uint32_t a)
168 {
169    return (v + a - 1) / a * a;
170 }
171 
172 static inline uint64_t
align_u64(uint64_t v,uint64_t a)173 align_u64(uint64_t v, uint64_t a)
174 {
175    assert(a != 0 && a == (a & -a));
176    return (v + a - 1) & ~(a - 1);
177 }
178 
179 /** Alignment must be a power of 2. */
180 static inline bool
radv_is_aligned(uintmax_t n,uintmax_t a)181 radv_is_aligned(uintmax_t n, uintmax_t a)
182 {
183    assert(a == (a & -a));
184    return (n & (a - 1)) == 0;
185 }
186 
187 static inline uint32_t
radv_minify(uint32_t n,uint32_t levels)188 radv_minify(uint32_t n, uint32_t levels)
189 {
190    if (unlikely(n == 0))
191       return 0;
192    else
193       return MAX2(n >> levels, 1);
194 }
195 
196 static inline int
radv_float_to_sfixed(float value,unsigned frac_bits)197 radv_float_to_sfixed(float value, unsigned frac_bits)
198 {
199    return value * (1 << frac_bits);
200 }
201 
202 static inline unsigned int
radv_float_to_ufixed(float value,unsigned frac_bits)203 radv_float_to_ufixed(float value, unsigned frac_bits)
204 {
205    return value * (1 << frac_bits);
206 }
207 
208 /* Whenever we generate an error, pass it through this function. Useful for
209  * debugging, where we can break on it. Only call at error site, not when
210  * propagating errors. Might be useful to plug in a stack trace here.
211  */
212 
213 struct radv_image_view;
214 struct radv_instance;
215 struct rvcn_decode_buffer_s;
216 
217 /* queue types */
218 enum radv_queue_family {
219    RADV_QUEUE_GENERAL,
220    RADV_QUEUE_COMPUTE,
221    RADV_QUEUE_TRANSFER,
222    RADV_QUEUE_SPARSE,
223    RADV_QUEUE_VIDEO_DEC,
224    RADV_QUEUE_VIDEO_ENC,
225    RADV_MAX_QUEUE_FAMILIES,
226    RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
227    RADV_QUEUE_IGNORED,
228 };
229 
230 struct radv_perfcounter_desc;
231 
232 struct radv_binning_settings {
233    unsigned context_states_per_bin;    /* allowed range: [1, 6] */
234    unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
235    unsigned fpovs_per_batch;           /* allowed range: [0, 255], 0 = unlimited */
236 };
237 
238 struct radv_physical_device_cache_key {
239    enum radeon_family family;
240    uint32_t ptr_size;
241 
242    uint32_t conformant_trunc_coord : 1;
243    uint32_t clear_lds : 1;
244    uint32_t cs_wave32 : 1;
245    uint32_t disable_aniso_single_level : 1;
246    uint32_t disable_shrink_image_store : 1;
247    uint32_t disable_sinking_load_input_fs : 1;
248    uint32_t dual_color_blend_by_location : 1;
249    uint32_t emulate_rt : 1;
250    uint32_t ge_wave32 : 1;
251    uint32_t invariant_geom : 1;
252    uint32_t lower_discard_to_demote : 1;
253    uint32_t mesh_fast_launch_2 : 1;
254    uint32_t no_fmask : 1;
255    uint32_t no_rt : 1;
256    uint32_t ps_wave32 : 1;
257    uint32_t rt_wave64 : 1;
258    uint32_t split_fma : 1;
259    uint32_t ssbo_non_uniform : 1;
260    uint32_t tex_non_uniform : 1;
261    uint32_t use_llvm : 1;
262    uint32_t use_ngg : 1;
263    uint32_t use_ngg_culling : 1;
264 };
265 
266 struct radv_physical_device {
267    struct vk_physical_device vk;
268 
269    struct radv_instance *instance;
270 
271    struct radeon_winsys *ws;
272    struct radeon_info rad_info;
273    char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
274    char marketing_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
275    uint8_t driver_uuid[VK_UUID_SIZE];
276    uint8_t device_uuid[VK_UUID_SIZE];
277    uint8_t cache_uuid[VK_UUID_SIZE];
278 
279    int local_fd;
280    int master_fd;
281    struct wsi_device wsi_device;
282 
283    /* Whether DCC should be enabled for MSAA textures. */
284    bool dcc_msaa_allowed;
285 
286    /* Whether to enable FMASK compression for MSAA textures (GFX6-GFX10.3) */
287    bool use_fmask;
288 
289    /* Whether to enable NGG. */
290    bool use_ngg;
291 
292    /* Whether to enable NGG culling. */
293    bool use_ngg_culling;
294 
295    /* Whether to enable NGG streamout. */
296    bool use_ngg_streamout;
297 
298    /* Whether to emulate the number of primitives generated by GS. */
299    bool emulate_ngg_gs_query_pipeline_stat;
300 
301    /* Whether to use GS_FAST_LAUNCH(2) for mesh shaders. */
302    bool mesh_fast_launch_2;
303 
304    /* Whether to emulate mesh/task shader queries. */
305    bool emulate_mesh_shader_queries;
306 
307    /* Number of threads per wave. */
308    uint8_t ps_wave_size;
309    uint8_t cs_wave_size;
310    uint8_t ge_wave_size;
311    uint8_t rt_wave_size;
312 
313    /* Maximum compute shared memory size. */
314    uint32_t max_shared_size;
315 
316    /* Whether to use the LLVM compiler backend */
317    bool use_llvm;
318 
319    /* Whether to emulate ETC2 image support on HW without support. */
320    bool emulate_etc2;
321 
322    /* Whether to emulate ASTC image support on HW without support. */
323    bool emulate_astc;
324 
325    VkPhysicalDeviceMemoryProperties memory_properties;
326    enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
327    enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
328    unsigned heaps;
329 
330    /* Bitmask of memory types that use the 32-bit address space. */
331    uint32_t memory_types_32bit;
332 
333 #ifndef _WIN32
334    int available_nodes;
335    drmPciBusInfo bus_info;
336 
337    dev_t primary_devid;
338    dev_t render_devid;
339 #endif
340 
341    nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
342 
343    enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
344    uint32_t num_queues;
345 
346    uint32_t gs_table_depth;
347 
348    struct ac_hs_info hs;
349    struct ac_task_info task_info;
350 
351    struct radv_binning_settings binning_settings;
352 
353    /* Performance counters. */
354    struct ac_perfcounters ac_perfcounters;
355 
356    uint32_t num_perfcounters;
357    struct radv_perfcounter_desc *perfcounters;
358 
359    struct {
360       unsigned data0;
361       unsigned data1;
362       unsigned cmd;
363       unsigned cntl;
364    } vid_dec_reg;
365    enum amd_ip_type vid_decode_ip;
366    uint32_t vid_addr_gfx_mode;
367    uint32_t stream_handle_base;
368    uint32_t stream_handle_counter;
369    uint32_t av1_version;
370 
371    struct radv_physical_device_cache_key cache_key;
372 };
373 
374 uint32_t radv_find_memory_index(const struct radv_physical_device *pdevice, VkMemoryPropertyFlags flags);
375 
376 VkResult create_null_physical_device(struct vk_instance *vk_instance);
377 
378 VkResult create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device,
379                                     struct vk_physical_device **out);
380 
381 void radv_physical_device_destroy(struct vk_physical_device *vk_device);
382 
383 enum radv_trace_mode {
384    /** Radeon GPU Profiler */
385    RADV_TRACE_MODE_RGP = 1 << VK_TRACE_MODE_COUNT,
386 
387    /** Radeon Raytracing Analyzer */
388    RADV_TRACE_MODE_RRA = 1 << (VK_TRACE_MODE_COUNT + 1),
389 
390    /** Gather context rolls of submitted command buffers */
391    RADV_TRACE_MODE_CTX_ROLLS = 1 << (VK_TRACE_MODE_COUNT + 2),
392 };
393 
394 struct radv_instance {
395    struct vk_instance vk;
396 
397    VkAllocationCallbacks alloc;
398 
399    uint64_t debug_flags;
400    uint64_t perftest_flags;
401 
402    struct {
403       struct driOptionCache options;
404       struct driOptionCache available_options;
405 
406       bool enable_mrt_output_nan_fixup;
407       bool disable_tc_compat_htile_in_general;
408       bool disable_shrink_image_store;
409       bool disable_aniso_single_level;
410       bool disable_trunc_coord;
411       bool zero_vram;
412       bool disable_sinking_load_input_fs;
413       bool flush_before_query_copy;
414       bool enable_unified_heap_on_apu;
415       bool tex_non_uniform;
416       bool ssbo_non_uniform;
417       bool flush_before_timestamp_write;
418       bool force_rt_wave64;
419       bool dual_color_blend_by_location;
420       bool legacy_sparse_binding;
421       bool clear_lds;
422       bool enable_dgc;
423       bool enable_khr_present_wait;
424       bool report_llvm9_version_string;
425       bool vk_require_etc2;
426       bool vk_require_astc;
427       bool force_active_accel_struct_leaves;
428       char *app_layer;
429       uint8_t override_graphics_shader_version;
430       uint8_t override_compute_shader_version;
431       uint8_t override_ray_tracing_shader_version;
432       int override_vram_size;
433       int override_uniform_offset_alignment;
434    } drirc;
435 };
436 
437 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
438 void radv_finish_wsi(struct radv_physical_device *physical_device);
439 
440 struct radv_shader_binary_part;
441 
442 bool radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache,
443                                 struct radv_pipeline *pipeline, const unsigned char *sha1,
444                                 bool *found_in_application_cache);
445 
446 void radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
447                                 struct radv_pipeline *pipeline, const unsigned char *sha1);
448 
449 struct radv_ray_tracing_pipeline;
450 bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache,
451                                             struct radv_ray_tracing_pipeline *pipeline,
452                                             const VkRayTracingPipelineCreateInfoKHR *create_info);
453 
454 void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
455                                             struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
456                                             const unsigned char *sha1);
457 
458 nir_shader *radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
459                                            gl_shader_stage stage, const blake3_hash key);
460 
461 void radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
462                                     const nir_shader *nir);
463 
464 struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(struct radv_device *device,
465                                                                        struct vk_pipeline_cache *cache,
466                                                                        const unsigned char *sha1);
467 
468 struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device,
469                                                                    struct vk_pipeline_cache *cache,
470                                                                    struct nir_shader *nir, const unsigned char *sha1,
471                                                                    bool cached);
472 
473 struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device,
474                                                      struct vk_pipeline_cache_object *object);
475 
476 struct radv_meta_state {
477    VkAllocationCallbacks alloc;
478 
479    VkPipelineCache cache;
480    uint32_t initial_cache_entries;
481 
482    /*
483     * For on-demand pipeline creation, makes sure that
484     * only one thread tries to build a pipeline at the same time.
485     */
486    mtx_t mtx;
487 
488    /**
489     * Use array element `i` for images with `2^i` samples.
490     */
491    struct {
492       VkPipeline color_pipelines[NUM_META_FS_KEYS];
493    } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
494 
495    struct {
496       VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
497       VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
498       VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
499 
500       VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
501       VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
502       VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
503    } ds_clear[MAX_SAMPLES_LOG2];
504 
505    VkPipelineLayout clear_color_p_layout;
506    VkPipelineLayout clear_depth_p_layout;
507    VkPipelineLayout clear_depth_unrestricted_p_layout;
508 
509    /* Optimized compute fast HTILE clear for stencil or depth only. */
510    VkPipeline clear_htile_mask_pipeline;
511    VkPipelineLayout clear_htile_mask_p_layout;
512    VkDescriptorSetLayout clear_htile_mask_ds_layout;
513 
514    /* Copy VRS into HTILE. */
515    VkPipeline copy_vrs_htile_pipeline;
516    VkPipelineLayout copy_vrs_htile_p_layout;
517    VkDescriptorSetLayout copy_vrs_htile_ds_layout;
518 
519    /* Clear DCC with comp-to-single. */
520    VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
521    VkPipelineLayout clear_dcc_comp_to_single_p_layout;
522    VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
523 
524    struct {
525       /** Pipeline that blits from a 1D image. */
526       VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
527 
528       /** Pipeline that blits from a 2D image. */
529       VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
530 
531       /** Pipeline that blits from a 3D image. */
532       VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
533 
534       VkPipeline depth_only_1d_pipeline;
535       VkPipeline depth_only_2d_pipeline;
536       VkPipeline depth_only_3d_pipeline;
537 
538       VkPipeline stencil_only_1d_pipeline;
539       VkPipeline stencil_only_2d_pipeline;
540       VkPipeline stencil_only_3d_pipeline;
541       VkPipelineLayout pipeline_layout;
542       VkDescriptorSetLayout ds_layout;
543    } blit;
544 
545    struct {
546       VkPipelineLayout p_layouts[5];
547       VkDescriptorSetLayout ds_layouts[5];
548       VkPipeline pipelines[5][NUM_META_FS_KEYS];
549 
550       VkPipeline depth_only_pipeline[5];
551 
552       VkPipeline stencil_only_pipeline[5];
553    } blit2d[MAX_SAMPLES_LOG2];
554 
555    struct {
556       VkPipelineLayout img_p_layout;
557       VkDescriptorSetLayout img_ds_layout;
558       VkPipeline pipeline;
559       VkPipeline pipeline_3d;
560    } itob;
561    struct {
562       VkPipelineLayout img_p_layout;
563       VkDescriptorSetLayout img_ds_layout;
564       VkPipeline pipeline;
565       VkPipeline pipeline_3d;
566    } btoi;
567    struct {
568       VkPipelineLayout img_p_layout;
569       VkDescriptorSetLayout img_ds_layout;
570       VkPipeline pipeline;
571    } btoi_r32g32b32;
572    struct {
573       VkPipelineLayout img_p_layout;
574       VkDescriptorSetLayout img_ds_layout;
575       VkPipeline pipeline[MAX_SAMPLES_LOG2];
576       VkPipeline pipeline_3d;
577    } itoi;
578    struct {
579       VkPipelineLayout img_p_layout;
580       VkDescriptorSetLayout img_ds_layout;
581       VkPipeline pipeline;
582    } itoi_r32g32b32;
583    struct {
584       VkPipelineLayout img_p_layout;
585       VkDescriptorSetLayout img_ds_layout;
586       VkPipeline pipeline[MAX_SAMPLES_LOG2];
587       VkPipeline pipeline_3d;
588    } cleari;
589    struct {
590       VkPipelineLayout img_p_layout;
591       VkDescriptorSetLayout img_ds_layout;
592       VkPipeline pipeline;
593    } cleari_r32g32b32;
594    struct {
595       VkPipelineLayout p_layout;
596       VkDescriptorSetLayout ds_layout;
597       VkPipeline pipeline[MAX_SAMPLES_LOG2];
598    } fmask_copy;
599 
600    struct {
601       VkPipelineLayout p_layout;
602       VkPipeline pipeline[NUM_META_FS_KEYS];
603    } resolve;
604 
605    struct {
606       VkDescriptorSetLayout ds_layout;
607       VkPipelineLayout p_layout;
608       struct {
609          VkPipeline pipeline;
610          VkPipeline i_pipeline;
611          VkPipeline srgb_pipeline;
612       } rc[MAX_SAMPLES_LOG2];
613 
614       VkPipeline depth_zero_pipeline;
615       struct {
616          VkPipeline average_pipeline;
617          VkPipeline max_pipeline;
618          VkPipeline min_pipeline;
619       } depth[MAX_SAMPLES_LOG2];
620 
621       VkPipeline stencil_zero_pipeline;
622       struct {
623          VkPipeline max_pipeline;
624          VkPipeline min_pipeline;
625       } stencil[MAX_SAMPLES_LOG2];
626    } resolve_compute;
627 
628    struct {
629       VkDescriptorSetLayout ds_layout;
630       VkPipelineLayout p_layout;
631 
632       struct {
633          VkPipeline pipeline[NUM_META_FS_KEYS];
634       } rc[MAX_SAMPLES_LOG2];
635 
636       VkPipeline depth_zero_pipeline;
637       struct {
638          VkPipeline average_pipeline;
639          VkPipeline max_pipeline;
640          VkPipeline min_pipeline;
641       } depth[MAX_SAMPLES_LOG2];
642 
643       VkPipeline stencil_zero_pipeline;
644       struct {
645          VkPipeline max_pipeline;
646          VkPipeline min_pipeline;
647       } stencil[MAX_SAMPLES_LOG2];
648    } resolve_fragment;
649 
650    struct {
651       VkPipelineLayout p_layout;
652       VkPipeline decompress_pipeline;
653       VkPipeline resummarize_pipeline;
654    } depth_decomp[MAX_SAMPLES_LOG2];
655 
656    VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
657    VkPipelineLayout expand_depth_stencil_compute_p_layout;
658    VkPipeline expand_depth_stencil_compute_pipeline;
659 
660    struct {
661       VkPipelineLayout p_layout;
662       VkPipeline cmask_eliminate_pipeline;
663       VkPipeline fmask_decompress_pipeline;
664       VkPipeline dcc_decompress_pipeline;
665 
666       VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
667       VkPipelineLayout dcc_decompress_compute_p_layout;
668       VkPipeline dcc_decompress_compute_pipeline;
669    } fast_clear_flush;
670 
671    struct {
672       VkPipelineLayout fill_p_layout;
673       VkPipelineLayout copy_p_layout;
674       VkPipeline fill_pipeline;
675       VkPipeline copy_pipeline;
676    } buffer;
677 
678    struct {
679       VkDescriptorSetLayout ds_layout;
680       VkPipelineLayout p_layout;
681       VkPipeline occlusion_query_pipeline;
682       VkPipeline pipeline_statistics_query_pipeline;
683       VkPipeline tfb_query_pipeline;
684       VkPipeline timestamp_query_pipeline;
685       VkPipeline pg_query_pipeline;
686       VkPipeline ms_prim_gen_query_pipeline;
687    } query;
688 
689    struct {
690       VkDescriptorSetLayout ds_layout;
691       VkPipelineLayout p_layout;
692       VkPipeline pipeline[MAX_SAMPLES_LOG2];
693    } fmask_expand;
694 
695    struct {
696       VkDescriptorSetLayout ds_layout;
697       VkPipelineLayout p_layout;
698       VkPipeline pipeline[32];
699    } dcc_retile;
700 
701    struct {
702       VkPipelineLayout leaf_p_layout;
703       VkPipeline leaf_pipeline;
704       VkPipelineLayout morton_p_layout;
705       VkPipeline morton_pipeline;
706       VkPipelineLayout lbvh_main_p_layout;
707       VkPipeline lbvh_main_pipeline;
708       VkPipelineLayout lbvh_generate_ir_p_layout;
709       VkPipeline lbvh_generate_ir_pipeline;
710       VkPipelineLayout ploc_p_layout;
711       VkPipeline ploc_pipeline;
712       VkPipelineLayout encode_p_layout;
713       VkPipeline encode_pipeline;
714       VkPipeline encode_compact_pipeline;
715       VkPipelineLayout header_p_layout;
716       VkPipeline header_pipeline;
717       VkPipelineLayout update_p_layout;
718       VkPipeline update_pipeline;
719       VkPipelineLayout copy_p_layout;
720       VkPipeline copy_pipeline;
721 
722       struct radix_sort_vk *radix_sort;
723 
724       struct {
725          VkBuffer buffer;
726          VkDeviceMemory memory;
727          VkAccelerationStructureKHR accel_struct;
728       } null;
729    } accel_struct_build;
730 
731    struct vk_texcompress_etc2_state etc_decode;
732 
733    struct vk_texcompress_astc_state *astc_decode;
734 
735    struct {
736       VkDescriptorSetLayout ds_layout;
737       VkPipelineLayout p_layout;
738       VkPipeline pipeline;
739    } dgc_prepare;
740 };
741 
742 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
743 
744 static inline enum radv_queue_family
vk_queue_to_radv(const struct radv_physical_device * phys_dev,int queue_family_index)745 vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
746 {
747    if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL || queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
748       return RADV_QUEUE_FOREIGN;
749    if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
750       return RADV_QUEUE_IGNORED;
751 
752    assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
753    return phys_dev->vk_queue_to_radv[queue_family_index];
754 }
755 
756 enum amd_ip_type radv_queue_family_to_ring(const struct radv_physical_device *physical_device,
757                                            enum radv_queue_family f);
758 
759 static inline bool
radv_has_uvd(struct radv_physical_device * phys_dev)760 radv_has_uvd(struct radv_physical_device *phys_dev)
761 {
762    enum radeon_family family = phys_dev->rad_info.family;
763    /* Only support UVD on TONGA+ */
764    if (family < CHIP_TONGA)
765       return false;
766    return phys_dev->rad_info.ip[AMD_IP_UVD].num_queues > 0;
767 }
768 
769 struct radv_queue_ring_info {
770    uint32_t scratch_size_per_wave;
771    uint32_t scratch_waves;
772    uint32_t compute_scratch_size_per_wave;
773    uint32_t compute_scratch_waves;
774    uint32_t esgs_ring_size;
775    uint32_t gsvs_ring_size;
776    uint32_t attr_ring_size;
777    bool tess_rings;
778    bool task_rings;
779    bool mesh_scratch_ring;
780    bool gds;
781    bool gds_oa;
782    bool sample_positions;
783 };
784 
785 struct radv_queue_state {
786    enum radv_queue_family qf;
787    struct radv_queue_ring_info ring_info;
788 
789    struct radeon_winsys_bo *scratch_bo;
790    struct radeon_winsys_bo *descriptor_bo;
791    struct radeon_winsys_bo *compute_scratch_bo;
792    struct radeon_winsys_bo *esgs_ring_bo;
793    struct radeon_winsys_bo *gsvs_ring_bo;
794    struct radeon_winsys_bo *tess_rings_bo;
795    struct radeon_winsys_bo *task_rings_bo;
796    struct radeon_winsys_bo *mesh_scratch_ring_bo;
797    struct radeon_winsys_bo *attr_ring_bo;
798    struct radeon_winsys_bo *gds_bo;
799    struct radeon_winsys_bo *gds_oa_bo;
800 
801    struct radeon_cmdbuf *initial_preamble_cs;
802    struct radeon_cmdbuf *initial_full_flush_preamble_cs;
803    struct radeon_cmdbuf *continue_preamble_cs;
804    struct radeon_cmdbuf *gang_wait_preamble_cs;
805    struct radeon_cmdbuf *gang_wait_postamble_cs;
806 
807    /* the uses_shadow_regs here will be set only for general queue */
808    bool uses_shadow_regs;
809    /* register state is saved in shadowed_regs buffer */
810    struct radeon_winsys_bo *shadowed_regs;
811    /* shadow regs preamble ib. This will be the first preamble ib.
812     * This ib has the packets to start register shadowing.
813     */
814    struct radeon_winsys_bo *shadow_regs_ib;
815    uint32_t shadow_regs_ib_size_dw;
816 };
817 
818 struct radv_queue {
819    struct vk_queue vk;
820    struct radv_device *device;
821    struct radeon_winsys_ctx *hw_ctx;
822    enum radeon_ctx_priority priority;
823    struct radv_queue_state state;
824    struct radv_queue_state *follower_state;
825    struct radeon_winsys_bo *gang_sem_bo;
826 
827    uint64_t last_shader_upload_seq;
828    bool sqtt_present;
829 };
830 
831 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
832                     const VkDeviceQueueCreateInfo *create_info,
833                     const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
834 
835 void radv_queue_finish(struct radv_queue *queue);
836 
837 enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj);
838 
839 #define RADV_BORDER_COLOR_COUNT       4096
840 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
841 
842 struct radv_device_border_color_data {
843    bool used[RADV_BORDER_COLOR_COUNT];
844 
845    struct radeon_winsys_bo *bo;
846    VkClearColorValue *colors_gpu_ptr;
847 
848    /* Mutex is required to guarantee vkCreateSampler thread safety
849     * given that we are writing to a buffer and checking color occupation */
850    mtx_t mutex;
851 };
852 
853 enum radv_force_vrs {
854    RADV_FORCE_VRS_1x1 = 0,
855    RADV_FORCE_VRS_2x2,
856    RADV_FORCE_VRS_2x1,
857    RADV_FORCE_VRS_1x2,
858 };
859 
860 struct radv_notifier {
861    int fd;
862    int watch;
863    bool quit;
864    thrd_t thread;
865 };
866 
867 struct radv_memory_trace_data {
868    /* ID of the PTE update event in ftrace data */
869    uint16_t ftrace_update_ptes_id;
870 
871    uint32_t num_cpus;
872    int *pipe_fds;
873 };
874 
875 struct radv_rra_accel_struct_data {
876    VkEvent build_event;
877    uint64_t va;
878    uint64_t size;
879    VkBuffer buffer;
880    VkDeviceMemory memory;
881    VkAccelerationStructureTypeKHR type;
882    bool is_dead;
883 };
884 
885 void radv_destroy_rra_accel_struct_data(VkDevice device, struct radv_rra_accel_struct_data *data);
886 
887 struct radv_ray_history_header {
888    uint32_t offset;
889    uint32_t dispatch_index;
890    uint32_t submit_base_index;
891 };
892 
893 enum radv_packed_token_type {
894    radv_packed_token_end_trace,
895 };
896 
897 struct radv_packed_token_header {
898    uint32_t launch_index : 29;
899    uint32_t hit : 1;
900    uint32_t token_type : 2;
901 };
902 
903 struct radv_packed_end_trace_token {
904    struct radv_packed_token_header header;
905 
906    uint32_t accel_struct_lo;
907    uint32_t accel_struct_hi;
908 
909    uint32_t flags : 16;
910    uint32_t dispatch_index : 16;
911 
912    uint32_t sbt_offset : 4;
913    uint32_t sbt_stride : 4;
914    uint32_t miss_index : 16;
915    uint32_t cull_mask : 8;
916 
917    float origin[3];
918    float tmin;
919    float direction[3];
920    float tmax;
921 
922    uint32_t iteration_count : 16;
923    uint32_t instance_count : 16;
924 
925    uint32_t ahit_count : 16;
926    uint32_t isec_count : 16;
927 
928    uint32_t primitive_id;
929    uint32_t geometry_id;
930 
931    uint32_t instance_id : 24;
932    uint32_t hit_kind : 8;
933 
934    float t;
935 };
936 static_assert(sizeof(struct radv_packed_end_trace_token) == 76, "Unexpected radv_packed_end_trace_token size");
937 
938 enum radv_rra_ray_history_metadata_type {
939    RADV_RRA_COUNTER_INFO = 1,
940    RADV_RRA_DISPATCH_SIZE = 2,
941    RADV_RRA_TRAVERSAL_FLAGS = 3,
942 };
943 
944 struct radv_rra_ray_history_metadata_info {
945    enum radv_rra_ray_history_metadata_type type : 32;
946    uint32_t padding;
947    uint64_t size;
948 };
949 
950 enum radv_rra_pipeline_type {
951    RADV_RRA_PIPELINE_RAY_TRACING,
952 };
953 
954 struct radv_rra_ray_history_counter {
955    uint32_t dispatch_size[3];
956    uint32_t hit_shader_count;
957    uint32_t miss_shader_count;
958    uint32_t shader_count;
959    uint64_t pipeline_api_hash;
960    uint32_t mode;
961    uint32_t mask;
962    uint32_t stride;
963    uint32_t data_size;
964    uint32_t lost_token_size;
965    uint32_t ray_id_begin;
966    uint32_t ray_id_end;
967    enum radv_rra_pipeline_type pipeline_type : 32;
968 };
969 
970 struct radv_rra_ray_history_dispatch_size {
971    uint32_t size[3];
972    uint32_t padding;
973 };
974 
975 struct radv_rra_ray_history_traversal_flags {
976    uint32_t box_sort_mode : 1;
977    uint32_t node_ptr_flags : 1;
978    uint32_t reserved : 30;
979    uint32_t padding;
980 };
981 
982 struct radv_rra_ray_history_metadata {
983    struct radv_rra_ray_history_metadata_info counter_info;
984    struct radv_rra_ray_history_counter counter;
985 
986    struct radv_rra_ray_history_metadata_info dispatch_size_info;
987    struct radv_rra_ray_history_dispatch_size dispatch_size;
988 
989    struct radv_rra_ray_history_metadata_info traversal_flags_info;
990    struct radv_rra_ray_history_traversal_flags traversal_flags;
991 };
992 static_assert(sizeof(struct radv_rra_ray_history_metadata) == 136,
993               "radv_rra_ray_history_metadata does not match RRA expectations");
994 
995 struct radv_rra_ray_history_data {
996    struct radv_rra_ray_history_metadata metadata;
997 };
998 
999 struct radv_rra_trace_data {
1000    struct hash_table *accel_structs;
1001    struct hash_table_u64 *accel_struct_vas;
1002    simple_mtx_t data_mtx;
1003    bool validate_as;
1004    bool copy_after_build;
1005    bool triggered;
1006    uint32_t copy_memory_index;
1007 
1008    struct util_dynarray ray_history;
1009    VkBuffer ray_history_buffer;
1010    VkDeviceMemory ray_history_memory;
1011    void *ray_history_data;
1012    uint64_t ray_history_addr;
1013    uint32_t ray_history_buffer_size;
1014    uint32_t ray_history_resolution_scale;
1015 };
1016 
1017 enum radv_dispatch_table {
1018    RADV_DEVICE_DISPATCH_TABLE,
1019    RADV_APP_DISPATCH_TABLE,
1020    RADV_RGP_DISPATCH_TABLE,
1021    RADV_RRA_DISPATCH_TABLE,
1022    RADV_RMV_DISPATCH_TABLE,
1023    RADV_CTX_ROLL_DISPATCH_TABLE,
1024    RADV_DISPATCH_TABLE_COUNT,
1025 };
1026 
1027 struct radv_layer_dispatch_tables {
1028    struct vk_device_dispatch_table app;
1029    struct vk_device_dispatch_table rgp;
1030    struct vk_device_dispatch_table rra;
1031    struct vk_device_dispatch_table rmv;
1032    struct vk_device_dispatch_table ctx_roll;
1033 };
1034 
1035 enum radv_buffer_robustness {
1036    RADV_BUFFER_ROBUSTNESS_DISABLED,
1037    RADV_BUFFER_ROBUSTNESS_1, /* robustBufferAccess */
1038    RADV_BUFFER_ROBUSTNESS_2, /* robustBufferAccess2 */
1039 };
1040 
1041 struct radv_sqtt_timestamp {
1042    uint8_t *map;
1043    unsigned offset;
1044    uint64_t size;
1045    struct radeon_winsys_bo *bo;
1046    struct list_head list;
1047 };
1048 
1049 struct radv_device_cache_key {
1050    uint32_t disable_trunc_coord : 1;
1051    uint32_t image_2d_view_of_3d : 1;
1052    uint32_t mesh_shader_queries : 1;
1053    uint32_t primitives_generated_query : 1;
1054 };
1055 
1056 struct radv_printf_format {
1057    char *string;
1058    uint32_t divergence_mask;
1059    uint8_t element_sizes[32];
1060 };
1061 
1062 struct radv_printf_data {
1063    uint32_t buffer_size;
1064    VkBuffer buffer;
1065    VkDeviceMemory memory;
1066    VkDeviceAddress buffer_addr;
1067    void *data;
1068    struct util_dynarray formats;
1069 };
1070 
1071 VkResult radv_printf_data_init(struct radv_device *device);
1072 
1073 void radv_printf_data_finish(struct radv_device *device);
1074 
1075 struct radv_printf_buffer_header {
1076    uint32_t offset;
1077    uint32_t size;
1078 };
1079 
1080 typedef struct nir_builder nir_builder;
1081 typedef struct nir_def nir_def;
1082 
1083 void radv_build_printf(nir_builder *b, nir_def *cond, const char *format, ...);
1084 
1085 void radv_dump_printf_data(struct radv_device *device);
1086 
1087 void radv_device_associate_nir(struct radv_device *device, nir_shader *nir);
1088 
1089 struct radv_device {
1090    struct vk_device vk;
1091 
1092    struct radv_instance *instance;
1093    struct radeon_winsys *ws;
1094 
1095    struct radv_layer_dispatch_tables layer_dispatch;
1096 
1097    struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
1098    struct radv_meta_state meta_state;
1099 
1100    struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
1101    int queue_count[RADV_MAX_QUEUE_FAMILIES];
1102 
1103    bool pbb_allowed;
1104    uint32_t scratch_waves;
1105    uint32_t dispatch_initiator;
1106    uint32_t dispatch_initiator_task;
1107 
1108    /* MSAA sample locations.
1109     * The first index is the sample index.
1110     * The second index is the coordinate: X, Y. */
1111    float sample_locations_1x[1][2];
1112    float sample_locations_2x[2][2];
1113    float sample_locations_4x[4][2];
1114    float sample_locations_8x[8][2];
1115 
1116    /* GFX7 and later */
1117    uint32_t gfx_init_size_dw;
1118    struct radeon_winsys_bo *gfx_init;
1119 
1120    struct radeon_winsys_bo *trace_bo;
1121    uint32_t *trace_id_ptr;
1122 
1123    /* Whether to keep shader debug info, for debugging. */
1124    bool keep_shader_info;
1125 
1126    struct radv_physical_device *physical_device;
1127 
1128    /* Backup in-memory cache to be used if the app doesn't provide one */
1129    struct vk_pipeline_cache *mem_cache;
1130 
1131    /*
1132     * use different counters so MSAA MRTs get consecutive surface indices,
1133     * even if MASK is allocated in between.
1134     */
1135    uint32_t image_mrt_offset_counter;
1136    uint32_t fmask_mrt_offset_counter;
1137 
1138    struct list_head shader_arenas;
1139    struct hash_table_u64 *capture_replay_arena_vas;
1140    unsigned shader_arena_shift;
1141    uint8_t shader_free_list_mask;
1142    struct radv_shader_free_list shader_free_list;
1143    struct radv_shader_free_list capture_replay_free_list;
1144    struct list_head shader_block_obj_pool;
1145    mtx_t shader_arena_mutex;
1146 
1147    mtx_t shader_upload_hw_ctx_mutex;
1148    struct radeon_winsys_ctx *shader_upload_hw_ctx;
1149    VkSemaphore shader_upload_sem;
1150    uint64_t shader_upload_seq;
1151    struct list_head shader_dma_submissions;
1152    mtx_t shader_dma_submission_list_mutex;
1153    cnd_t shader_dma_submission_list_cond;
1154 
1155    /* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */
1156    bool shader_use_invisible_vram;
1157 
1158    /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
1159    enum radv_buffer_robustness buffer_robustness;
1160 
1161    /* Whether to inline the compute dispatch size in user sgprs. */
1162    bool load_grid_size_from_user_sgpr;
1163 
1164    /* Whether the driver uses a global BO list. */
1165    bool use_global_bo_list;
1166 
1167    /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
1168    int force_aniso;
1169 
1170    /* Always disable TRUNC_COORD. */
1171    bool disable_trunc_coord;
1172 
1173    struct radv_device_border_color_data border_color_data;
1174 
1175    /* Thread trace. */
1176    struct ac_sqtt sqtt;
1177    bool sqtt_enabled;
1178    bool sqtt_triggered;
1179 
1180    /* SQTT timestamps for queue events. */
1181    simple_mtx_t sqtt_timestamp_mtx;
1182    struct radv_sqtt_timestamp sqtt_timestamp;
1183 
1184    /* SQTT timed cmd buffers. */
1185    simple_mtx_t sqtt_command_pool_mtx;
1186    struct vk_command_pool *sqtt_command_pool[2];
1187 
1188    /* Memory trace. */
1189    struct radv_memory_trace_data memory_trace;
1190 
1191    /* SPM. */
1192    struct ac_spm spm;
1193 
1194    /* Radeon Raytracing Analyzer trace. */
1195    struct radv_rra_trace_data rra_trace;
1196 
1197    FILE *ctx_roll_file;
1198    simple_mtx_t ctx_roll_mtx;
1199 
1200    /* Trap handler. */
1201    struct radv_shader *trap_handler_shader;
1202    struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
1203    uint32_t *tma_ptr;
1204 
1205    /* Overallocation. */
1206    bool overallocation_disallowed;
1207    uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
1208    mtx_t overallocation_mutex;
1209 
1210    /* RADV_FORCE_VRS. */
1211    struct radv_notifier notifier;
1212    enum radv_force_vrs force_vrs;
1213 
1214    /* Depth image for VRS when not bound by the app. */
1215    struct {
1216       struct radv_image *image;
1217       struct radv_buffer *buffer; /* HTILE */
1218       struct radv_device_memory *mem;
1219    } vrs;
1220 
1221    /* Prime blit sdma queue */
1222    struct radv_queue *private_sdma_queue;
1223 
1224    struct radv_shader_part_cache vs_prologs;
1225    struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
1226    struct radv_shader_part *instance_rate_vs_prologs[816];
1227 
1228    struct radv_shader_part_cache ps_epilogs;
1229 
1230    struct radv_shader_part_cache tcs_epilogs;
1231 
1232    simple_mtx_t trace_mtx;
1233 
1234    /* Whether per-vertex VRS is forced. */
1235    bool force_vrs_enabled;
1236 
1237    simple_mtx_t pstate_mtx;
1238    unsigned pstate_cnt;
1239 
1240    /* BO to contain some performance counter helpers:
1241     * - A lock for profiling cmdbuffers.
1242     * - a temporary fence for the end query synchronization.
1243     * - the pass to use for profiling. (as an array of bools)
1244     */
1245    struct radeon_winsys_bo *perf_counter_bo;
1246 
1247    /* Interleaved lock/unlock commandbuffers for perfcounter passes. */
1248    struct radeon_cmdbuf **perf_counter_lock_cs;
1249 
1250    bool uses_shadow_regs;
1251 
1252    struct hash_table *rt_handles;
1253    simple_mtx_t rt_handles_mtx;
1254 
1255    struct radv_printf_data printf;
1256 
1257    struct radv_device_cache_key cache_key;
1258    blake3_hash cache_hash;
1259 
1260    /* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
1261    char *gpu_hang_report;
1262 };
1263 
1264 bool radv_device_set_pstate(struct radv_device *device, bool enable);
1265 bool radv_device_acquire_performance_counters(struct radv_device *device);
1266 void radv_device_release_performance_counters(struct radv_device *device);
1267 
1268 struct radv_device_memory {
1269    struct vk_object_base base;
1270    struct radeon_winsys_bo *bo;
1271    /* for dedicated allocations */
1272    struct radv_image *image;
1273    struct radv_buffer *buffer;
1274    uint32_t heap_index;
1275    uint64_t alloc_size;
1276    void *map;
1277    void *user_ptr;
1278 
1279 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
1280    struct AHardwareBuffer *android_hardware_buffer;
1281 #endif
1282 };
1283 
1284 void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, struct radeon_winsys_bo *bo);
1285 void radv_device_memory_finish(struct radv_device_memory *mem);
1286 
1287 struct radv_buffer {
1288    struct vk_buffer vk;
1289 
1290    /* Set when bound */
1291    struct radeon_winsys_bo *bo;
1292    VkDeviceSize offset;
1293 };
1294 
1295 void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, struct radeon_winsys_bo *bo,
1296                       uint64_t size, uint64_t offset);
1297 void radv_buffer_finish(struct radv_buffer *buffer);
1298 
1299 enum radv_dynamic_state_bits {
1300    RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1301    RADV_DYNAMIC_SCISSOR = 1ull << 1,
1302    RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1303    RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1304    RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1305    RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1306    RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1307    RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1308    RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1309    RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1310    RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1311    RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1312    RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1313    RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1314    RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1315    RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1316    RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1317    RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1318    RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1319    RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1320    RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1321    RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1322    RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1323    RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1324    RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1325    RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1326    RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1327    RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1328    RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1329    RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1330    RADV_DYNAMIC_POLYGON_MODE = 1ull << 30,
1331    RADV_DYNAMIC_TESS_DOMAIN_ORIGIN = 1ull << 31,
1332    RADV_DYNAMIC_LOGIC_OP_ENABLE = 1ull << 32,
1333    RADV_DYNAMIC_LINE_STIPPLE_ENABLE = 1ull << 33,
1334    RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE = 1ull << 34,
1335    RADV_DYNAMIC_SAMPLE_MASK = 1ull << 35,
1336    RADV_DYNAMIC_DEPTH_CLIP_ENABLE = 1ull << 36,
1337    RADV_DYNAMIC_CONSERVATIVE_RAST_MODE = 1ull << 37,
1338    RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE = 1ull << 38,
1339    RADV_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
1340    RADV_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
1341    RADV_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
1342    RADV_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
1343    RADV_DYNAMIC_RASTERIZATION_SAMPLES = 1ull << 43,
1344    RADV_DYNAMIC_LINE_RASTERIZATION_MODE = 1ull << 44,
1345    RADV_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45,
1346    RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46,
1347    RADV_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47,
1348    RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48,
1349    RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE = 1ull << 49,
1350    RADV_DYNAMIC_ALL = (1ull << 50) - 1,
1351 };
1352 
1353 enum radv_cmd_dirty_bits {
1354    /* Keep the dynamic state dirty bits in sync with
1355     * enum radv_dynamic_state_bits */
1356    RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1357    RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1358    RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1359    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1360    RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1361    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1362    RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1363    RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1364    RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1365    RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1366    RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1367    RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1368    RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1369    RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1370    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1371    RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1372    RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1373    RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1374    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1375    RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1376    RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1377    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1378    RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1379    RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1380    RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1381    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1382    RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1383    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1384    RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1385    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1386    RADV_CMD_DIRTY_DYNAMIC_POLYGON_MODE = 1ull << 30,
1387    RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN = 1ull << 31,
1388    RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE = 1ull << 32,
1389    RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE = 1ull << 33,
1390    RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE = 1ull << 34,
1391    RADV_CMD_DIRTY_DYNAMIC_SAMPLE_MASK = 1ull << 35,
1392    RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE = 1ull << 36,
1393    RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE = 1ull << 37,
1394    RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE = 1ull << 38,
1395    RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE = 1ull << 39,
1396    RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE = 1ull << 40,
1397    RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK = 1ull << 41,
1398    RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE = 1ull << 42,
1399    RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES = 1ull << 43,
1400    RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE = 1ull << 44,
1401    RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45,
1402    RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46,
1403    RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47,
1404    RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48,
1405    RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS_ENABLE = 1ull << 49,
1406    RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 50) - 1,
1407    RADV_CMD_DIRTY_PIPELINE = 1ull << 50,
1408    RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 51,
1409    RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 52,
1410    RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 53,
1411    RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 54,
1412    RADV_CMD_DIRTY_GUARDBAND = 1ull << 55,
1413    RADV_CMD_DIRTY_RBPLUS = 1ull << 56,
1414    RADV_CMD_DIRTY_SHADER_QUERY = 1ull << 57,
1415    RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 58,
1416    RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 59,
1417    RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 60,
1418    RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 61,
1419 };
1420 
1421 enum radv_cmd_flush_bits {
1422    /* Instruction cache. */
1423    RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1424    /* Scalar L1 cache. */
1425    RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1426    /* Vector L1 cache. */
1427    RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1428    /* L2 cache + L2 metadata cache writeback & invalidate.
1429     * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1430    RADV_CMD_FLAG_INV_L2 = 1 << 3,
1431    /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1432     * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1433     * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1434    RADV_CMD_FLAG_WB_L2 = 1 << 4,
1435    /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1436     * changed and we want to read an image from shaders. */
1437    RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1438    /* Framebuffer caches */
1439    RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1440    RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1441    RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1442    RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1443    /* Engine synchronization. */
1444    RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1445    RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1446    RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1447    RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1448    /* Pipeline query controls. */
1449    RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1450    RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1451    RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1452 
1453    RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1454                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
1455 
1456    RADV_CMD_FLUSH_ALL_COMPUTE = (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
1457                                  RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
1458 };
1459 
1460 struct radv_vertex_binding {
1461    VkDeviceSize offset;
1462    VkDeviceSize size;
1463    VkDeviceSize stride;
1464 };
1465 
1466 struct radv_streamout_binding {
1467    struct radv_buffer *buffer;
1468    VkDeviceSize offset;
1469    VkDeviceSize size;
1470 };
1471 
1472 struct radv_streamout_state {
1473    /* Mask of bound streamout buffers. */
1474    uint8_t enabled_mask;
1475 
1476    /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1477    uint32_t hw_enabled_mask;
1478 
1479    /* State of VGT_STRMOUT_(CONFIG|EN) */
1480    bool streamout_enabled;
1481 };
1482 
1483 struct radv_sample_locations_state {
1484    VkSampleCountFlagBits per_pixel;
1485    VkExtent2D grid_size;
1486    uint32_t count;
1487    VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1488 };
1489 
1490 struct radv_dynamic_state {
1491    struct vk_dynamic_graphics_state vk;
1492 
1493    /**
1494     * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1495     * Defines the set of saved dynamic state.
1496     */
1497    uint64_t mask;
1498 
1499    struct {
1500       struct {
1501          float scale[3];
1502          float translate[3];
1503       } xform[MAX_VIEWPORTS];
1504    } hw_vp;
1505 
1506    struct radv_sample_locations_state sample_location;
1507 
1508    VkImageAspectFlags feedback_loop_aspects;
1509 };
1510 
1511 const char *radv_get_debug_option_name(int id);
1512 
1513 const char *radv_get_perftest_option_name(int id);
1514 
1515 struct radv_color_buffer_info {
1516    uint64_t cb_color_base;
1517    uint64_t cb_color_cmask;
1518    uint64_t cb_color_fmask;
1519    uint64_t cb_dcc_base;
1520    uint32_t cb_color_slice;
1521    uint32_t cb_color_view;
1522    uint32_t cb_color_info;
1523    uint32_t cb_color_attrib;
1524    uint32_t cb_color_attrib2; /* GFX9 and later */
1525    uint32_t cb_color_attrib3; /* GFX10 and later */
1526    uint32_t cb_dcc_control;
1527    uint32_t cb_color_cmask_slice;
1528    uint32_t cb_color_fmask_slice;
1529    union {
1530       uint32_t cb_color_pitch; // GFX6-GFX8
1531       uint32_t cb_mrt_epitch;  // GFX9+
1532    };
1533 };
1534 
1535 struct radv_ds_buffer_info {
1536    uint64_t db_z_read_base;
1537    uint64_t db_stencil_read_base;
1538    uint64_t db_z_write_base;
1539    uint64_t db_stencil_write_base;
1540    uint64_t db_htile_data_base;
1541    uint32_t db_depth_info;
1542    uint32_t db_z_info;
1543    uint32_t db_stencil_info;
1544    uint32_t db_depth_view;
1545    uint32_t db_depth_size;
1546    uint32_t db_depth_slice;
1547    uint32_t db_htile_surface;
1548    uint32_t db_z_info2;       /* GFX9 only */
1549    uint32_t db_stencil_info2; /* GFX9 only */
1550    uint32_t db_render_override2;
1551    uint32_t db_render_control;
1552 };
1553 
1554 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1555                                    struct radv_image_view *iview);
1556 void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
1557                                 struct radv_image_view *iview, VkImageAspectFlags ds_aspects);
1558 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1559                                  struct radv_ds_buffer_info *ds);
1560 
1561 void radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples,
1562                                       unsigned *db_render_control);
1563 /**
1564  * Attachment state when recording a renderpass instance.
1565  *
1566  * The clear value is valid only if there exists a pending clear.
1567  */
1568 struct radv_attachment {
1569    VkFormat format;
1570    struct radv_image_view *iview;
1571    VkImageLayout layout;
1572    VkImageLayout stencil_layout;
1573 
1574    union {
1575       struct radv_color_buffer_info cb;
1576       struct radv_ds_buffer_info ds;
1577    };
1578 
1579    struct radv_image_view *resolve_iview;
1580    VkResolveModeFlagBits resolve_mode;
1581    VkResolveModeFlagBits stencil_resolve_mode;
1582    VkImageLayout resolve_layout;
1583    VkImageLayout stencil_resolve_layout;
1584 };
1585 
1586 struct radv_rendering_state {
1587    bool active;
1588    bool has_image_views;
1589    VkRect2D area;
1590    uint32_t layer_count;
1591    uint32_t view_mask;
1592    uint32_t color_samples;
1593    uint32_t ds_samples;
1594    uint32_t max_samples;
1595    struct radv_sample_locations_state sample_locations;
1596    uint32_t color_att_count;
1597    struct radv_attachment color_att[MAX_RTS];
1598    struct radv_attachment ds_att;
1599    VkImageAspectFlags ds_att_aspects;
1600    struct radv_attachment vrs_att;
1601    VkExtent2D vrs_texel_size;
1602 };
1603 
1604 struct radv_descriptor_state {
1605    struct radv_descriptor_set *sets[MAX_SETS];
1606    uint32_t dirty;
1607    uint32_t valid;
1608    struct radv_push_descriptor_set push_set;
1609    uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1610    uint64_t descriptor_buffers[MAX_SETS];
1611    bool need_indirect_descriptor_sets;
1612 };
1613 
1614 struct radv_push_constant_state {
1615    uint32_t size;
1616    uint32_t dynamic_offset_count;
1617 };
1618 
1619 enum rgp_flush_bits {
1620    RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1621    RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1622    RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1623    RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1624    RGP_FLUSH_PFP_SYNC_ME = 0x10,
1625    RGP_FLUSH_SYNC_CP_DMA = 0x20,
1626    RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1627    RGP_FLUSH_INVAL_ICACHE = 0x80,
1628    RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1629    RGP_FLUSH_FLUSH_L2 = 0x200,
1630    RGP_FLUSH_INVAL_L2 = 0x400,
1631    RGP_FLUSH_FLUSH_CB = 0x800,
1632    RGP_FLUSH_INVAL_CB = 0x1000,
1633    RGP_FLUSH_FLUSH_DB = 0x2000,
1634    RGP_FLUSH_INVAL_DB = 0x4000,
1635    RGP_FLUSH_INVAL_L1 = 0x8000,
1636 };
1637 
1638 struct radv_multisample_state {
1639    bool sample_shading_enable;
1640    float min_sample_shading;
1641 };
1642 
1643 struct radv_ia_multi_vgt_param_helpers {
1644    uint32_t base;
1645    bool partial_es_wave;
1646    bool ia_switch_on_eoi;
1647    bool partial_vs_wave;
1648 };
1649 
1650 struct radv_cmd_state {
1651    /* Vertex descriptors */
1652    uint64_t vb_va;
1653    unsigned vb_size;
1654 
1655    bool predicating;
1656    uint64_t dirty;
1657 
1658    VkShaderStageFlags active_stages;
1659    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
1660    struct radv_shader *gs_copy_shader;
1661    struct radv_shader *last_vgt_shader;
1662    struct radv_shader *rt_prolog;
1663 
1664    struct radv_shader_object *shader_objs[MESA_VULKAN_SHADER_STAGES];
1665 
1666    uint32_t prefetch_L2_mask;
1667 
1668    struct radv_graphics_pipeline *graphics_pipeline;
1669    struct radv_graphics_pipeline *emitted_graphics_pipeline;
1670    struct radv_compute_pipeline *compute_pipeline;
1671    struct radv_compute_pipeline *emitted_compute_pipeline;
1672    struct radv_ray_tracing_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1673    struct radv_dynamic_state dynamic;
1674    struct radv_vs_input_state dynamic_vs_input;
1675    struct radv_streamout_state streamout;
1676 
1677    struct radv_rendering_state render;
1678 
1679    /* Index buffer */
1680    uint32_t index_type;
1681    uint32_t max_index_count;
1682    uint64_t index_va;
1683    int32_t last_index_type;
1684 
1685    uint32_t last_primitive_reset_index; /* only relevant on GFX6-7 */
1686    enum radv_cmd_flush_bits flush_bits;
1687    unsigned active_occlusion_queries;
1688    bool perfect_occlusion_queries_enabled;
1689    unsigned active_pipeline_queries;
1690    unsigned active_pipeline_gds_queries;
1691    unsigned active_pipeline_ace_queries; /* Task shader invocations query */
1692    unsigned active_prims_gen_queries;
1693    unsigned active_prims_xfb_queries;
1694    unsigned active_prims_gen_gds_queries;
1695    unsigned active_prims_xfb_gds_queries;
1696    uint32_t trace_id;
1697    uint32_t last_ia_multi_vgt_param;
1698    uint32_t last_ge_cntl;
1699 
1700    uint32_t last_num_instances;
1701    uint32_t last_first_instance;
1702    bool last_vertex_offset_valid;
1703    uint32_t last_vertex_offset;
1704    uint32_t last_drawid;
1705    uint32_t last_subpass_color_count;
1706 
1707    uint32_t last_sx_ps_downconvert;
1708    uint32_t last_sx_blend_opt_epsilon;
1709    uint32_t last_sx_blend_opt_control;
1710 
1711    uint32_t last_db_count_control;
1712 
1713    uint32_t last_db_shader_control;
1714 
1715    /* Whether CP DMA is busy/idle. */
1716    bool dma_is_busy;
1717 
1718    /* Whether any images that are not L2 coherent are dirty from the CB. */
1719    bool rb_noncoherent_dirty;
1720 
1721    /* Conditional rendering info. */
1722    uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1723    int predication_type;   /* -1: disabled, 0: normal, 1: inverted */
1724    uint64_t predication_va;
1725 
1726    /* Inheritance info. */
1727    VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1728    bool inherited_occlusion_queries;
1729    VkQueryControlFlags inherited_query_control_flags;
1730 
1731    bool context_roll_without_scissor_emitted;
1732 
1733    /* SQTT related state. */
1734    uint32_t current_event_type;
1735    uint32_t num_events;
1736    uint32_t num_layout_transitions;
1737    bool in_barrier;
1738    bool pending_sqtt_barrier_end;
1739    enum rgp_flush_bits sqtt_flush_bits;
1740 
1741    /* NGG culling state. */
1742    bool has_nggc;
1743 
1744    /* Mesh shading state. */
1745    bool mesh_shading;
1746 
1747    uint8_t cb_mip[MAX_RTS];
1748    uint8_t ds_mip;
1749 
1750    /* Whether DRAW_{INDEX}_INDIRECT_{MULTI} is emitted. */
1751    bool uses_draw_indirect;
1752 
1753    uint32_t rt_stack_size;
1754 
1755    struct radv_shader_part *emitted_vs_prolog;
1756    uint32_t vbo_misaligned_mask;
1757    uint32_t vbo_misaligned_mask_invalid;
1758    uint32_t vbo_bound_mask;
1759 
1760    struct radv_shader_part *emitted_tcs_epilog;
1761    struct radv_shader_part *emitted_ps_epilog;
1762 
1763    /* Per-vertex VRS state. */
1764    uint32_t last_vrs_rates;
1765    int8_t last_vrs_rates_sgpr_idx;
1766 
1767    /* Whether to suspend streamout for internal driver operations. */
1768    bool suspend_streamout;
1769 
1770    /* Whether this commandbuffer uses performance counters. */
1771    bool uses_perf_counters;
1772 
1773    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
1774 
1775    /* Tessellation info when patch control points is dynamic. */
1776    unsigned tess_num_patches;
1777    unsigned tess_lds_size;
1778 
1779    unsigned col_format_non_compacted;
1780 
1781    /* Binning state */
1782    unsigned last_pa_sc_binner_cntl_0;
1783 
1784    struct radv_multisample_state ms;
1785 
1786    /* Custom blend mode for internal operations. */
1787    unsigned custom_blend_mode;
1788    unsigned db_render_control;
1789 
1790    unsigned rast_prim;
1791 
1792    uint32_t vtx_base_sgpr;
1793    uint8_t vtx_emit_num;
1794    bool uses_drawid;
1795    bool uses_baseinstance;
1796 
1797    bool uses_out_of_order_rast;
1798    bool uses_vrs_attachment;
1799    bool uses_dynamic_patch_control_points;
1800    bool uses_dynamic_vertex_binding_stride;
1801 };
1802 
1803 struct radv_cmd_buffer_upload {
1804    uint8_t *map;
1805    unsigned offset;
1806    uint64_t size;
1807    struct radeon_winsys_bo *upload_bo;
1808    struct list_head list;
1809 };
1810 
1811 struct radv_cmd_buffer {
1812    struct vk_command_buffer vk;
1813 
1814    struct radv_device *device;
1815 
1816    VkCommandBufferUsageFlags usage_flags;
1817    struct radeon_cmdbuf *cs;
1818    struct radv_cmd_state state;
1819    struct radv_buffer *vertex_binding_buffers[MAX_VBS];
1820    struct radv_vertex_binding vertex_bindings[MAX_VBS];
1821    uint32_t used_vertex_bindings;
1822    struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1823    enum radv_queue_family qf;
1824 
1825    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1826    VkShaderStageFlags push_constant_stages;
1827    struct radv_descriptor_set_header meta_push_descriptors;
1828 
1829    struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1830 
1831    struct radv_push_constant_state push_constant_state[MAX_BIND_POINTS];
1832 
1833    uint64_t descriptor_buffers[MAX_SETS];
1834 
1835    struct radv_cmd_buffer_upload upload;
1836 
1837    uint32_t scratch_size_per_wave_needed;
1838    uint32_t scratch_waves_wanted;
1839    uint32_t compute_scratch_size_per_wave_needed;
1840    uint32_t compute_scratch_waves_wanted;
1841    uint32_t esgs_ring_size_needed;
1842    uint32_t gsvs_ring_size_needed;
1843    bool tess_rings_needed;
1844    bool task_rings_needed;
1845    bool mesh_scratch_ring_needed;
1846    bool gds_needed;    /* for GFX10 streamout and NGG GS queries */
1847    bool gds_oa_needed; /* for GFX10 streamout */
1848    bool sample_positions_needed;
1849 
1850    uint64_t gfx9_fence_va;
1851    uint32_t gfx9_fence_idx;
1852    uint64_t gfx9_eop_bug_va;
1853 
1854    uint64_t mec_inv_pred_va;  /* For inverted predication when using MEC. */
1855    bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
1856 
1857    struct set vs_prologs;
1858    struct set ps_epilogs;
1859    struct set tcs_epilogs;
1860 
1861    /**
1862     * Gang state.
1863     * Used when the command buffer needs work done on a different queue
1864     * (eg. when a graphics command buffer needs compute work).
1865     * Currently only one follower is possible per command buffer.
1866     */
1867    struct {
1868       /** Follower command stream. */
1869       struct radeon_cmdbuf *cs;
1870 
1871       /** Flush bits for the follower cmdbuf. */
1872       enum radv_cmd_flush_bits flush_bits;
1873 
1874       /**
1875        * For synchronization between the follower and leader.
1876        * The value of these semaphores are incremented whenever we
1877        * encounter a barrier that affects the follower.
1878        *
1879        * DWORD 0: Leader to follower semaphore.
1880        *          The leader writes the value and the follower waits.
1881        * DWORD 1: Follower to leader semaphore.
1882        *          The follower writes the value, and the leader waits.
1883        */
1884       struct {
1885          uint64_t va;                     /* Virtual address of the semaphore. */
1886          uint32_t leader_value;           /* Current value of the leader. */
1887          uint32_t emitted_leader_value;   /* Last value emitted by the leader. */
1888          uint32_t follower_value;         /* Current value of the follower. */
1889          uint32_t emitted_follower_value; /* Last value emitted by the follower. */
1890       } sem;
1891    } gang;
1892 
1893    /**
1894     * Whether a query pool has been reset and we have to flush caches.
1895     */
1896    bool pending_reset_query;
1897 
1898    /**
1899     * Bitmask of pending active query flushes.
1900     */
1901    enum radv_cmd_flush_bits active_query_flush_bits;
1902 
1903    struct {
1904       struct radv_video_session *vid;
1905       struct radv_video_session_params *params;
1906       struct rvcn_sq_var sq;
1907       struct rvcn_decode_buffer_s *decode_buffer;
1908    } video;
1909 
1910    struct {
1911       /* Temporary space for some transfer queue copy command workarounds. */
1912       struct radeon_winsys_bo *copy_temp;
1913    } transfer;
1914 
1915    uint64_t shader_upload_seq;
1916 
1917    uint32_t sqtt_cb_id;
1918 
1919    struct util_dynarray ray_history;
1920 };
1921 
1922 static inline bool
radv_cmdbuf_has_stage(const struct radv_cmd_buffer * cmd_buffer,gl_shader_stage stage)1923 radv_cmdbuf_has_stage(const struct radv_cmd_buffer *cmd_buffer, gl_shader_stage stage)
1924 {
1925    return !!(cmd_buffer->state.active_stages & mesa_to_vk_shader_stage(stage));
1926 }
1927 
1928 static inline uint32_t
radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer * cmd_buffer)1929 radv_get_num_pipeline_stat_queries(struct radv_cmd_buffer *cmd_buffer)
1930 {
1931    /* SAMPLE_STREAMOUTSTATS also requires PIPELINESTAT_START to be enabled. */
1932    return cmd_buffer->state.active_pipeline_queries + cmd_buffer->state.active_prims_gen_queries +
1933           cmd_buffer->state.active_prims_xfb_queries;
1934 }
1935 
1936 extern const struct vk_command_buffer_ops radv_cmd_buffer_ops;
1937 
1938 struct radv_dispatch_info {
1939    /**
1940     * Determine the layout of the grid (in block units) to be used.
1941     */
1942    uint32_t blocks[3];
1943 
1944    /**
1945     * A starting offset for the grid. If unaligned is set, the offset
1946     * must still be aligned.
1947     */
1948    uint32_t offsets[3];
1949 
1950    /**
1951     * Whether it's an unaligned compute dispatch.
1952     */
1953    bool unaligned;
1954 
1955    /**
1956     * Whether waves must be launched in order.
1957     */
1958    bool ordered;
1959 
1960    /**
1961     * Indirect compute parameters resource.
1962     */
1963    struct radeon_winsys_bo *indirect;
1964    uint64_t va;
1965 };
1966 
1967 void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
1968 
1969 struct radv_image;
1970 struct radv_image_view;
1971 
1972 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1973 
1974 void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1975 void radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1976 
1977 void radv_create_gfx_config(struct radv_device *device);
1978 
1979 void radv_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, const VkViewport *viewports);
1980 
1981 void radv_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, unsigned rast_prim,
1982                           unsigned polygon_mode, float line_width);
1983 
1984 VkResult radv_create_shadow_regs_preamble(const struct radv_device *device, struct radv_queue_state *queue_state);
1985 void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, struct radeon_winsys *ws);
1986 void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
1987                                     struct radv_queue_state *queue_state);
1988 VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue);
1989 
1990 uint32_t radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
1991                                      bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
1992                                      bool prim_restart_enable, unsigned patch_control_points,
1993                                      unsigned num_tess_patches);
1994 void radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
1995                                   unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel,
1996                                   uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va);
1997 
1998 struct radv_vgt_shader_key {
1999    uint8_t tess : 1;
2000    uint8_t gs : 1;
2001    uint8_t mesh_scratch_ring : 1;
2002    uint8_t mesh : 1;
2003    uint8_t ngg_passthrough : 1;
2004    uint8_t ngg : 1; /* gfx10+ */
2005    uint8_t ngg_streamout : 1;
2006    uint8_t hs_wave32 : 1;
2007    uint8_t gs_wave32 : 1;
2008    uint8_t vs_wave32 : 1;
2009 };
2010 
2011 void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
2012                               uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
2013                               enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
2014                               uint64_t gfx9_eop_bug_va);
2015 void radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
2016 void radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op,
2017                                      uint64_t va);
2018 void radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size);
2019 void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
2020                              bool predicating);
2021 void radv_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
2022 void radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value);
2023 void radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
2024 
2025 uint32_t radv_get_vgt_index_size(uint32_t type);
2026 
2027 void radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2028                                  const struct radv_vgt_shader_key *key);
2029 
2030 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
2031 
2032 struct radv_ps_epilog_state {
2033    uint8_t color_attachment_count;
2034    VkFormat color_attachment_formats[MAX_RTS];
2035 
2036    uint32_t color_write_mask;
2037    uint32_t color_blend_enable;
2038 
2039    uint32_t colors_written;
2040    bool mrt0_is_dual_src;
2041    bool export_depth;
2042    bool export_stencil;
2043    bool export_sample_mask;
2044    bool alpha_to_coverage_via_mrtz;
2045    uint8_t need_src_alpha;
2046 };
2047 
2048 struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device,
2049                                                       const struct radv_ps_epilog_state *state);
2050 
2051 bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
2052                                        unsigned custom_blend_mode);
2053 
2054 void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer);
2055 bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment,
2056                                           unsigned *out_offset, void **ptr);
2057 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr);
2058 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
2059                                  unsigned *out_offset);
2060 void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
2061                                    const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors,
2062                                    void *vb_ptr);
2063 
2064 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
2065 unsigned radv_get_default_max_sample_dist(int log_samples);
2066 void radv_device_init_msaa(struct radv_device *device);
2067 VkResult radv_device_init_vrs_state(struct radv_device *device);
2068 
2069 void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
2070 
2071 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
2072                                    VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects);
2073 
2074 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
2075                                       int cb_idx, uint32_t color_values[2]);
2076 
2077 void radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
2078                                       const struct legacy_surf_level *base_level_info, unsigned plane_id,
2079                                       unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil,
2080                                       bool is_storage_image, bool disable_compression, bool enable_write_compression,
2081                                       uint32_t *state, const struct ac_surf_nbc_view *nbc_view);
2082 
2083 void radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image,
2084                                   VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping,
2085                                   unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer,
2086                                   unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
2087                                   uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
2088                                   const struct ac_surf_nbc_view *nbc_view,
2089                                   const VkImageViewSlicedCreateInfoEXT *sliced_3d);
2090 
2091 bool radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image);
2092 bool radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image);
2093 
2094 bool radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image);
2095 
2096 unsigned radv_plane_from_aspect(VkImageAspectFlags mask);
2097 
2098 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2099                               const VkImageSubresourceRange *range, bool value);
2100 
2101 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2102                               const VkImageSubresourceRange *range, bool value);
2103 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags,
2104                                                const struct radv_image *image);
2105 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags,
2106                                                const struct radv_image *image);
2107 
2108 void radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage);
2109 
2110 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
2111 bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
2112 void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
2113                       struct radv_device_memory *mem);
2114 
2115 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)2116 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
2117                               bool use_32bit_pointers)
2118 {
2119    radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
2120    radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
2121 }
2122 
2123 static inline void
radv_emit_shader_pointer_body(const struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)2124 radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
2125                               bool use_32bit_pointers)
2126 {
2127    radeon_emit(cs, va);
2128 
2129    if (use_32bit_pointers) {
2130       assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
2131    } else {
2132       radeon_emit(cs, va >> 32);
2133    }
2134 }
2135 
2136 static inline void
radv_emit_shader_pointer(const struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)2137 radv_emit_shader_pointer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va,
2138                          bool global)
2139 {
2140    bool use_32bit_pointers = !global;
2141 
2142    radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
2143    radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
2144 }
2145 
2146 static inline unsigned
vk_to_bind_point(VkPipelineBindPoint bind_point)2147 vk_to_bind_point(VkPipelineBindPoint bind_point)
2148 {
2149    return bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? 2 : bind_point;
2150 }
2151 
2152 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)2153 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
2154 {
2155    return &cmd_buffer->descriptors[vk_to_bind_point(bind_point)];
2156 }
2157 
2158 static inline const struct radv_push_constant_state *
radv_get_push_constants_state(const struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)2159 radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
2160 {
2161    return &cmd_buffer->push_constant_state[vk_to_bind_point(bind_point)];
2162 }
2163 
2164 void radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
2165 
2166 /*
2167  * Takes x,y,z as exact numbers of invocations, instead of blocks.
2168  *
2169  * Limitations: Can't call normal dispatch functions without binding or rebinding
2170  *              the compute pipeline.
2171  */
2172 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z);
2173 
2174 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va);
2175 
2176 struct radv_event {
2177    struct vk_object_base base;
2178    struct radeon_winsys_bo *bo;
2179    uint64_t *map;
2180 };
2181 
2182 struct radv_ray_tracing_group;
2183 
2184 void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, const struct radv_pipeline_layout *layout,
2185                               const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage);
2186 
2187 void radv_hash_graphics_spirv_to_nir(blake3_hash hash, const struct radv_shader_stage *stage,
2188                                      const struct radv_spirv_to_nir_options *options);
2189 
2190 void radv_hash_shaders(const struct radv_device *device, unsigned char *hash, const struct radv_shader_stage *stages,
2191                        uint32_t stage_count, const struct radv_pipeline_layout *layout,
2192                        const struct radv_graphics_state_key *gfx_state);
2193 
2194 struct radv_ray_tracing_stage;
2195 void radv_hash_rt_shaders(const struct radv_device *device, unsigned char *hash,
2196                           const struct radv_ray_tracing_stage *stages,
2197                           const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
2198                           const struct radv_ray_tracing_group *groups);
2199 
2200 bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines);
2201 
2202 bool radv_emulate_rt(const struct radv_physical_device *pdevice);
2203 
2204 struct radv_prim_vertex_count {
2205    uint8_t min;
2206    uint8_t incr;
2207 };
2208 
2209 enum radv_pipeline_type {
2210    RADV_PIPELINE_GRAPHICS,
2211    RADV_PIPELINE_GRAPHICS_LIB,
2212    /* Compute pipeline */
2213    RADV_PIPELINE_COMPUTE,
2214    /* Raytracing pipeline */
2215    RADV_PIPELINE_RAY_TRACING,
2216 };
2217 
2218 struct radv_pipeline_group_handle {
2219    uint64_t recursive_shader_ptr;
2220 
2221    union {
2222       uint32_t general_index;
2223       uint32_t closest_hit_index;
2224    };
2225    union {
2226       uint32_t intersection_index;
2227       uint32_t any_hit_index;
2228    };
2229 };
2230 
2231 struct radv_rt_capture_replay_handle {
2232    struct radv_serialized_shader_arena_block recursive_shader_alloc;
2233    uint32_t non_recursive_idx;
2234 };
2235 
2236 struct radv_pipeline {
2237    struct vk_object_base base;
2238    enum radv_pipeline_type type;
2239 
2240    VkPipelineCreateFlags2KHR create_flags;
2241 
2242    struct vk_pipeline_cache_object *cache_object;
2243 
2244    bool is_internal;
2245    bool need_indirect_descriptor_sets;
2246    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
2247    struct radv_shader *gs_copy_shader;
2248 
2249    uint64_t shader_upload_seq;
2250 
2251    struct radeon_cmdbuf cs;
2252    uint32_t ctx_cs_hash;
2253    struct radeon_cmdbuf ctx_cs;
2254 
2255    uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
2256 
2257    /* Unique pipeline hash identifier. */
2258    uint64_t pipeline_hash;
2259 
2260    /* Pipeline layout info. */
2261    uint32_t push_constant_size;
2262    uint32_t dynamic_offset_count;
2263 };
2264 
2265 struct radv_sqtt_shaders_reloc {
2266    struct radeon_winsys_bo *bo;
2267    union radv_shader_arena_block *alloc;
2268    uint64_t va[MESA_VULKAN_SHADER_STAGES];
2269 };
2270 
2271 struct radv_graphics_pipeline {
2272    struct radv_pipeline base;
2273 
2274    bool uses_drawid;
2275    bool uses_baseinstance;
2276 
2277    /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
2278    bool force_vrs_per_vertex;
2279 
2280    /* Whether the pipeline uses NGG (GFX10+). */
2281    bool is_ngg;
2282    bool has_ngg_culling;
2283 
2284    uint8_t vtx_emit_num;
2285 
2286    uint32_t vtx_base_sgpr;
2287    uint64_t dynamic_states;
2288    uint64_t needed_dynamic_state;
2289 
2290    VkShaderStageFlags active_stages;
2291 
2292    /* Used for rbplus */
2293    uint32_t col_format_non_compacted;
2294 
2295    struct radv_dynamic_state dynamic_state;
2296 
2297    struct radv_vs_input_state vs_input_state;
2298 
2299    struct radv_multisample_state ms;
2300    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
2301    uint32_t binding_stride[MAX_VBS];
2302    uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
2303    uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
2304    uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
2305    uint32_t db_render_control;
2306 
2307    /* Last pre-PS API stage */
2308    gl_shader_stage last_vgt_api_stage;
2309 
2310    /* Not NULL if graphics pipeline uses streamout. */
2311    struct radv_shader *streamout_shader;
2312 
2313    unsigned rast_prim;
2314 
2315    /* For vk_graphics_pipeline_state */
2316    void *state_data;
2317 
2318    /* Custom blend mode for internal operations. */
2319    unsigned custom_blend_mode;
2320 
2321    /* Whether the pipeline uses out-of-order rasterization. */
2322    bool uses_out_of_order_rast;
2323 
2324    /* Whether the pipeline uses a VRS attachment. */
2325    bool uses_vrs_attachment;
2326 
2327    /* For graphics pipeline library */
2328    bool retain_shaders;
2329 
2330    /* For relocation of shaders with RGP. */
2331    struct radv_sqtt_shaders_reloc *sqtt_shaders_reloc;
2332 };
2333 
2334 struct radv_compute_pipeline {
2335    struct radv_pipeline base;
2336 };
2337 
2338 struct radv_ray_tracing_group {
2339    VkRayTracingShaderGroupTypeKHR type;
2340    uint32_t recursive_shader; /* generalShader or closestHitShader */
2341    uint32_t any_hit_shader;
2342    uint32_t intersection_shader;
2343    struct radv_pipeline_group_handle handle;
2344 };
2345 
2346 struct radv_ray_tracing_stage {
2347    struct vk_pipeline_cache_object *nir;
2348    struct radv_shader *shader;
2349    gl_shader_stage stage;
2350    uint32_t stack_size;
2351 
2352    bool can_inline;
2353 
2354    uint8_t sha1[SHA1_DIGEST_LENGTH];
2355 };
2356 
2357 struct radv_ray_tracing_pipeline {
2358    struct radv_compute_pipeline base;
2359 
2360    struct radv_shader *prolog;
2361 
2362    struct radv_ray_tracing_stage *stages;
2363    struct radv_ray_tracing_group *groups;
2364    unsigned stage_count;
2365    unsigned non_imported_stage_count;
2366    unsigned group_count;
2367 
2368    uint8_t sha1[SHA1_DIGEST_LENGTH];
2369    uint32_t stack_size;
2370 
2371    /* set if any shaders from this pipeline require robustness2 in the merged traversal shader */
2372    bool traversal_storage_robustness2 : 1;
2373    bool traversal_uniform_robustness2 : 1;
2374 };
2375 
2376 struct radv_retained_shaders {
2377    struct {
2378       void *serialized_nir;
2379       size_t serialized_nir_size;
2380       unsigned char shader_sha1[SHA1_DIGEST_LENGTH];
2381       struct radv_shader_stage_key key;
2382    } stages[MESA_VULKAN_SHADER_STAGES];
2383 };
2384 
2385 struct radv_graphics_lib_pipeline {
2386    struct radv_graphics_pipeline base;
2387 
2388    struct radv_pipeline_layout layout;
2389 
2390    struct vk_graphics_pipeline_state graphics_state;
2391 
2392    VkGraphicsPipelineLibraryFlagsEXT lib_flags;
2393 
2394    struct radv_retained_shaders retained_shaders;
2395 
2396    void *mem_ctx;
2397 
2398    unsigned stage_count;
2399    VkPipelineShaderStageCreateInfo *stages;
2400    struct radv_shader_stage_key stage_keys[MESA_VULKAN_SHADER_STAGES];
2401 };
2402 
2403 #define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)                                                              \
2404    static inline struct radv_##pipe_type##_pipeline *radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline)      \
2405    {                                                                                                                   \
2406       assert(pipeline->type == pipe_enum);                                                                             \
2407       return (struct radv_##pipe_type##_pipeline *)pipeline;                                                           \
2408    }
2409 
2410 RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
2411 RADV_DECL_PIPELINE_DOWNCAST(graphics_lib, RADV_PIPELINE_GRAPHICS_LIB)
2412 RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
2413 RADV_DECL_PIPELINE_DOWNCAST(ray_tracing, RADV_PIPELINE_RAY_TRACING)
2414 
2415 struct radv_shader_layout {
2416    uint32_t num_sets;
2417 
2418    struct {
2419       struct radv_descriptor_set_layout *layout;
2420       uint32_t dynamic_offset_start;
2421    } set[MAX_SETS];
2422 
2423    uint32_t push_constant_size;
2424    uint32_t dynamic_offset_count;
2425    bool use_dynamic_descriptors;
2426 };
2427 
2428 struct radv_shader_stage {
2429    gl_shader_stage stage;
2430    gl_shader_stage next_stage;
2431 
2432    struct {
2433       const struct vk_object_base *object;
2434       const char *data;
2435       uint32_t size;
2436    } spirv;
2437 
2438    const char *entrypoint;
2439    const VkSpecializationInfo *spec_info;
2440 
2441    unsigned char shader_sha1[20];
2442 
2443    nir_shader *nir;
2444    nir_shader *internal_nir; /* meta shaders */
2445 
2446    struct radv_shader_info info;
2447    struct radv_shader_args args;
2448    struct radv_shader_stage_key key;
2449 
2450    VkPipelineCreationFeedback feedback;
2451 
2452    struct radv_shader_layout layout;
2453 };
2454 
2455 void radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, gl_shader_stage stage,
2456                              struct radv_shader_layout *layout);
2457 
2458 static inline bool
radv_is_last_vgt_stage(const struct radv_shader_stage * stage)2459 radv_is_last_vgt_stage(const struct radv_shader_stage *stage)
2460 {
2461    return (stage->info.stage == MESA_SHADER_VERTEX || stage->info.stage == MESA_SHADER_TESS_EVAL ||
2462            stage->info.stage == MESA_SHADER_GEOMETRY || stage->info.stage == MESA_SHADER_MESH) &&
2463           (stage->info.next_stage == MESA_SHADER_FRAGMENT || stage->info.next_stage == MESA_SHADER_NONE);
2464 }
2465 
2466 static inline bool
radv_pipeline_has_stage(const struct radv_graphics_pipeline * pipeline,gl_shader_stage stage)2467 radv_pipeline_has_stage(const struct radv_graphics_pipeline *pipeline, gl_shader_stage stage)
2468 {
2469    return pipeline->base.shaders[stage];
2470 }
2471 
2472 bool radv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline);
2473 
2474 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
2475 
2476 const struct radv_userdata_info *radv_get_user_sgpr(const struct radv_shader *shader, int idx);
2477 
2478 struct radv_shader *radv_get_shader(struct radv_shader *const *shaders, gl_shader_stage stage);
2479 
2480 void radv_emit_compute_shader(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
2481                               const struct radv_shader *shader);
2482 
2483 bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components,
2484                                  nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
2485 
2486 void radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2487                              const struct radv_shader *vs, const struct radv_shader *next_stage);
2488 
2489 void radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs,
2490                                 const struct radv_shader *tcs);
2491 
2492 void radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2493                                 struct radeon_cmdbuf *cs, const struct radv_shader *tes, const struct radv_shader *gs);
2494 
2495 void radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2496                                const struct radv_shader *ps);
2497 
2498 void radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *cs,
2499                          const struct radv_shader *last_vgt_shader, const struct radv_shader *ps);
2500 
2501 struct radv_ia_multi_vgt_param_helpers radv_compute_ia_multi_vgt_param(const struct radv_device *device,
2502                                                                        struct radv_shader *const *shaders);
2503 
2504 void radv_emit_vgt_vertex_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2505                                 const struct radv_shader *tes);
2506 
2507 void radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2508                           uint32_t vgt_gs_out_prim_type);
2509 
2510 void radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
2511                            const struct radv_shader *last_vgt_api_shader);
2512 
2513 void gfx103_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct radv_shader *mesh_shader,
2514                                        bool enable_vrs);
2515 
2516 void gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps,
2517                            bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex);
2518 
2519 void radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2520                                const struct radv_shader *gs, const struct radv_shader *es,
2521                                const struct radv_shader *gs_copy_shader);
2522 
2523 void radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
2524                            const struct radv_shader *ms);
2525 
2526 void radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
2527                                    struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2528                                    bool keep_executable_info, bool keep_statistic_info, bool is_internal,
2529                                    struct radv_retained_shaders *retained_shaders, bool noop_fs,
2530                                    struct radv_shader **shaders, struct radv_shader_binary **binaries,
2531                                    struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary);
2532 
2533 void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
2534                                 const struct radv_pipeline_layout *layout, struct radv_shader *shader);
2535 
2536 struct radv_shader *radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache,
2537                                     struct radv_shader_stage *cs_stage, bool keep_executable_info,
2538                                     bool keep_statistic_info, bool is_internal, struct radv_shader_binary **cs_binary);
2539 
2540 struct radv_graphics_pipeline_create_info {
2541    bool use_rectlist;
2542    bool db_depth_clear;
2543    bool db_stencil_clear;
2544    bool depth_compress_disable;
2545    bool stencil_compress_disable;
2546    bool resummarize_enable;
2547    uint32_t custom_blend_mode;
2548 };
2549 
2550 struct radv_shader_stage_key radv_pipeline_get_shader_key(const struct radv_device *device,
2551                                                           const VkPipelineShaderStageCreateInfo *stage,
2552                                                           VkPipelineCreateFlags2KHR flags, const void *pNext);
2553 
2554 void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type);
2555 
2556 VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
2557                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
2558                                        const struct radv_graphics_pipeline_create_info *extra,
2559                                        const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
2560 
2561 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
2562                                       const VkComputePipelineCreateInfo *pCreateInfo,
2563                                       const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline);
2564 
2565 bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2KHR flags);
2566 bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2KHR flags);
2567 
2568 VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
2569                                                                     const VkPipelineShaderStageCreateInfo *pStages,
2570                                                                     void *mem_ctx);
2571 
2572 bool radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader);
2573 
2574 bool radv_pipeline_has_ngg(const struct radv_graphics_pipeline *pipeline);
2575 
2576 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
2577                            const VkAllocationCallbacks *allocator);
2578 
2579 struct vk_format_description;
2580 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void);
2581 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc, int first_non_void);
2582 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2583 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2584 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2585 uint32_t radv_translate_dbformat(VkFormat format);
2586 uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, int first_non_void);
2587 uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void);
2588 bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearColorValue *value);
2589 bool radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, VkFormat format);
2590 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, bool *blendable);
2591 bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
2592                                  bool *sign_reinterpret);
2593 bool radv_is_atomic_format_supported(VkFormat format);
2594 bool radv_device_supports_etc(const struct radv_physical_device *physical_device);
2595 bool radv_is_format_emulated(const struct radv_physical_device *physical_device, VkFormat format);
2596 
2597 static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS =
2598    VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
2599    VK_IMAGE_USAGE_STORAGE_BIT;
2600 
2601 struct radv_image_plane {
2602    VkFormat format;
2603    struct radeon_surf surface;
2604 };
2605 
2606 struct radv_image_binding {
2607    /* Set when bound */
2608    struct radeon_winsys_bo *bo;
2609    VkDeviceSize offset;
2610 };
2611 
2612 struct radv_image {
2613    struct vk_image vk;
2614 
2615    VkDeviceSize size;
2616    uint32_t alignment;
2617 
2618    unsigned queue_family_mask;
2619    bool exclusive;
2620    bool shareable;
2621    bool l2_coherent;
2622    bool dcc_sign_reinterpret;
2623    bool support_comp_to_single;
2624 
2625    struct radv_image_binding bindings[3];
2626    bool tc_compatible_cmask;
2627 
2628    uint64_t clear_value_offset;
2629    uint64_t fce_pred_offset;
2630    uint64_t dcc_pred_offset;
2631 
2632    /*
2633     * Metadata for the TC-compat zrange workaround. If the 32-bit value
2634     * stored at this offset is UINT_MAX, the driver will emit
2635     * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2636     * SET_CONTEXT_REG packet.
2637     */
2638    uint64_t tc_compat_zrange_offset;
2639 
2640    /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2641    VkDeviceMemory owned_memory;
2642 
2643    unsigned plane_count;
2644    bool disjoint;
2645    struct radv_image_plane planes[0];
2646 };
2647 
2648 struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image);
2649 
2650 /* Whether the image has a htile  that is known consistent with the contents of
2651  * the image and is allowed to be in compressed form.
2652  *
2653  * If this is false reads that don't use the htile should be able to return
2654  * correct results.
2655  */
2656 bool radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2657                                      VkImageLayout layout, unsigned queue_mask);
2658 
2659 bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
2660                                 VkImageLayout layout, unsigned queue_mask);
2661 
2662 bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
2663                                 VkImageLayout layout, unsigned queue_mask);
2664 
2665 enum radv_fmask_compression {
2666    RADV_FMASK_COMPRESSION_NONE,
2667    RADV_FMASK_COMPRESSION_PARTIAL,
2668    RADV_FMASK_COMPRESSION_FULL,
2669 };
2670 
2671 enum radv_fmask_compression radv_layout_fmask_compression(const struct radv_device *device,
2672                                                           const struct radv_image *image, VkImageLayout layout,
2673                                                           unsigned queue_mask);
2674 
2675 /**
2676  * Return whether the image has CMASK metadata for color surfaces.
2677  */
2678 static inline bool
radv_image_has_cmask(const struct radv_image * image)2679 radv_image_has_cmask(const struct radv_image *image)
2680 {
2681    return image->planes[0].surface.cmask_offset;
2682 }
2683 
2684 /**
2685  * Return whether the image has FMASK metadata for color surfaces.
2686  */
2687 static inline bool
radv_image_has_fmask(const struct radv_image * image)2688 radv_image_has_fmask(const struct radv_image *image)
2689 {
2690    return image->planes[0].surface.fmask_offset;
2691 }
2692 
2693 /**
2694  * Return whether the image has DCC metadata for color surfaces.
2695  */
2696 static inline bool
radv_image_has_dcc(const struct radv_image * image)2697 radv_image_has_dcc(const struct radv_image *image)
2698 {
2699    return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && image->planes[0].surface.meta_offset;
2700 }
2701 
2702 /**
2703  * Return whether the image is TC-compatible CMASK.
2704  */
2705 static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image * image)2706 radv_image_is_tc_compat_cmask(const struct radv_image *image)
2707 {
2708    return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2709 }
2710 
2711 /**
2712  * Return whether DCC metadata is enabled for a level.
2713  */
2714 static inline bool
radv_dcc_enabled(const struct radv_image * image,unsigned level)2715 radv_dcc_enabled(const struct radv_image *image, unsigned level)
2716 {
2717    return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2718 }
2719 
2720 /**
2721  * Return whether the image has CB metadata.
2722  */
2723 static inline bool
radv_image_has_CB_metadata(const struct radv_image * image)2724 radv_image_has_CB_metadata(const struct radv_image *image)
2725 {
2726    return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2727 }
2728 
2729 /**
2730  * Return whether the image has HTILE metadata for depth surfaces.
2731  */
2732 static inline bool
radv_image_has_htile(const struct radv_image * image)2733 radv_image_has_htile(const struct radv_image *image)
2734 {
2735    return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && image->planes[0].surface.meta_size;
2736 }
2737 
2738 /**
2739  * Return whether the image has VRS HTILE metadata for depth surfaces
2740  */
2741 static inline bool
radv_image_has_vrs_htile(const struct radv_device * device,const struct radv_image * image)2742 radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2743 {
2744    const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
2745 
2746    /* Any depth buffer can potentially use VRS on GFX10.3. */
2747    return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate &&
2748           radv_image_has_htile(image) && (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2749 }
2750 
2751 /**
2752  * Return whether HTILE metadata is enabled for a level.
2753  */
2754 static inline bool
radv_htile_enabled(const struct radv_image * image,unsigned level)2755 radv_htile_enabled(const struct radv_image *image, unsigned level)
2756 {
2757    return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2758 }
2759 
2760 /**
2761  * Return whether the image is TC-compatible HTILE.
2762  */
2763 static inline bool
radv_image_is_tc_compat_htile(const struct radv_image * image)2764 radv_image_is_tc_compat_htile(const struct radv_image *image)
2765 {
2766    return radv_image_has_htile(image) && (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2767 }
2768 
2769 /**
2770  * Return whether the entire HTILE buffer can be used for depth in order to
2771  * improve HiZ Z-Range precision.
2772  */
2773 static inline bool
radv_image_tile_stencil_disabled(const struct radv_device * device,const struct radv_image * image)2774 radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2775 {
2776    if (device->physical_device->rad_info.gfx_level >= GFX9) {
2777       return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
2778    } else {
2779       /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2780        * the TC-compat ZRANGE issue even if no stencil is used.
2781        */
2782       return !vk_format_has_stencil(image->vk.format) && !radv_image_is_tc_compat_htile(image);
2783    }
2784 }
2785 
2786 static inline bool
radv_image_has_clear_value(const struct radv_image * image)2787 radv_image_has_clear_value(const struct radv_image *image)
2788 {
2789    return image->clear_value_offset != 0;
2790 }
2791 
2792 static inline uint64_t
radv_image_get_fast_clear_va(const struct radv_image * image,uint32_t base_level)2793 radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2794 {
2795    assert(radv_image_has_clear_value(image));
2796 
2797    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2798    va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2799    return va;
2800 }
2801 
2802 static inline uint64_t
radv_image_get_fce_pred_va(const struct radv_image * image,uint32_t base_level)2803 radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2804 {
2805    assert(image->fce_pred_offset != 0);
2806 
2807    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2808    va += image->bindings[0].offset + image->fce_pred_offset + base_level * 8;
2809    return va;
2810 }
2811 
2812 static inline uint64_t
radv_image_get_dcc_pred_va(const struct radv_image * image,uint32_t base_level)2813 radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2814 {
2815    assert(image->dcc_pred_offset != 0);
2816 
2817    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2818    va += image->bindings[0].offset + image->dcc_pred_offset + base_level * 8;
2819    return va;
2820 }
2821 
2822 static inline uint64_t
radv_get_tc_compat_zrange_va(const struct radv_image * image,uint32_t base_level)2823 radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2824 {
2825    assert(image->tc_compat_zrange_offset != 0);
2826 
2827    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2828    va += image->bindings[0].offset + image->tc_compat_zrange_offset + base_level * 4;
2829    return va;
2830 }
2831 
2832 static inline uint64_t
radv_get_ds_clear_value_va(const struct radv_image * image,uint32_t base_level)2833 radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2834 {
2835    assert(radv_image_has_clear_value(image));
2836 
2837    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2838    va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2839    return va;
2840 }
2841 
2842 static inline uint32_t
radv_get_htile_initial_value(const struct radv_device * device,const struct radv_image * image)2843 radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2844 {
2845    uint32_t initial_value;
2846 
2847    if (radv_image_tile_stencil_disabled(device, image)) {
2848       /* Z only (no stencil):
2849        *
2850        * |31     18|17      4|3     0|
2851        * +---------+---------+-------+
2852        * |  Max Z  |  Min Z  | ZMask |
2853        */
2854       initial_value = 0xfffc000f;
2855    } else {
2856       /* Z and stencil:
2857        *
2858        * |31       12|11 10|9    8|7   6|5   4|3     0|
2859        * +-----------+-----+------+-----+-----+-------+
2860        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
2861        *
2862        * SR0/SR1 contains the stencil test results. Initializing
2863        * SR0/SR1 to 0x3 means the stencil test result is unknown.
2864        *
2865        * Z, stencil and 4 bit VRS encoding:
2866        * |31       12|11        10|9    8|7          6|5   4|3     0|
2867        * +-----------+------------+------+------------+-----+-------+
2868        * |  Z Range  | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2869        */
2870       if (radv_image_has_vrs_htile(device, image)) {
2871          /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2872          initial_value = 0xfffff33f;
2873       } else {
2874          initial_value = 0xfffff3ff;
2875       }
2876    }
2877 
2878    return initial_value;
2879 }
2880 
2881 static inline bool
radv_image_get_iterate256(const struct radv_device * device,struct radv_image * image)2882 radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image)
2883 {
2884    /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2885    return device->physical_device->rad_info.gfx_level >= GFX10 &&
2886           (image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2887           radv_image_is_tc_compat_htile(image) && image->vk.samples > 1;
2888 }
2889 
2890 unsigned radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
2891                                       enum radv_queue_family queue_family);
2892 
2893 bool radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image);
2894 
2895 unsigned radv_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil);
2896 
2897 struct radeon_bo_metadata;
2898 void radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata);
2899 
2900 void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
2901                                        uint32_t stride);
2902 
2903 union radv_descriptor {
2904    struct {
2905       uint32_t plane0_descriptor[8];
2906       uint32_t fmask_descriptor[8];
2907    };
2908    struct {
2909       uint32_t plane_descriptors[3][8];
2910    };
2911 };
2912 
2913 struct radv_image_view {
2914    struct vk_image_view vk;
2915    struct radv_image *image; /**< VkImageViewCreateInfo::image */
2916 
2917    unsigned plane_id;
2918    VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2919 
2920    /* Whether the image iview supports fast clear. */
2921    bool support_fast_clear;
2922 
2923    bool disable_dcc_mrt;
2924 
2925    union radv_descriptor descriptor;
2926 
2927    /* Descriptor for use as a storage image as opposed to a sampled image.
2928     * This has a few differences for cube maps (e.g. type).
2929     */
2930    union radv_descriptor storage_descriptor;
2931 
2932    /* Block-compressed image views on GFX10+. */
2933    struct ac_surf_nbc_view nbc_view;
2934 };
2935 
2936 struct radv_image_create_info {
2937    const VkImageCreateInfo *vk_info;
2938    bool scanout;
2939    bool no_metadata_planes;
2940    bool prime_blit_src;
2941    const struct radeon_bo_metadata *bo_metadata;
2942 };
2943 
2944 VkResult radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2945                                   const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2946                                   const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image);
2947 
2948 VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2949                            const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal);
2950 
2951 bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
2952                                      VkImageCreateFlags flags, bool *sign_reinterpret);
2953 
2954 bool vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format);
2955 
2956 unsigned radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image);
2957 
2958 VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2959                                  const VkNativeBufferANDROID *gralloc_info, const VkAllocationCallbacks *alloc,
2960                                  VkImage *out_image_h);
2961 VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
2962                                 const VkImportAndroidHardwareBufferInfoANDROID *info);
2963 VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
2964                                 const VkMemoryAllocateInfo *pAllocateInfo);
2965 
2966 unsigned radv_ahb_format_for_vk_format(VkFormat vk_format);
2967 
2968 VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2969 
2970 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2971 
2972 struct radv_image_view_extra_create_info {
2973    bool disable_compression;
2974    bool enable_compression;
2975    bool disable_dcc_mrt;
2976    bool from_client; /**< Set only if this came from vkCreateImage */
2977 };
2978 
2979 void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2980                           const VkImageViewCreateInfo *pCreateInfo, VkImageCreateFlags img_create_flags,
2981                           const struct radv_image_view_extra_create_info *extra_create_info);
2982 void radv_image_view_finish(struct radv_image_view *iview);
2983 
2984 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2985 
2986 void radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
2987                           enum pipe_swizzle swizzle[4]);
2988 
2989 unsigned radv_map_swizzle(unsigned swizzle);
2990 
2991 struct radv_buffer_view {
2992    struct vk_buffer_view vk;
2993    struct radeon_winsys_bo *bo;
2994    uint32_t state[4];
2995 };
2996 void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2997                            const VkBufferViewCreateInfo *pCreateInfo);
2998 void radv_buffer_view_finish(struct radv_buffer_view *view);
2999 
3000 static inline bool
radv_image_extent_compare(const struct radv_image * image,const VkExtent3D * extent)3001 radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
3002 {
3003    if (extent->width != image->vk.extent.width || extent->height != image->vk.extent.height ||
3004        extent->depth != image->vk.extent.depth)
3005       return false;
3006    return true;
3007 }
3008 
3009 struct radv_sampler {
3010    struct vk_sampler vk;
3011    uint32_t state[4];
3012    uint32_t border_color_slot;
3013 };
3014 
3015 struct radv_resolve_barrier {
3016    VkPipelineStageFlags2 src_stage_mask;
3017    VkPipelineStageFlags2 dst_stage_mask;
3018    VkAccessFlags2 src_access_mask;
3019    VkAccessFlags2 dst_access_mask;
3020 };
3021 
3022 void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier);
3023 
3024 struct radv_query_pool {
3025    struct vk_query_pool vk;
3026    struct radeon_winsys_bo *bo;
3027    uint32_t stride;
3028    uint32_t availability_offset;
3029    uint64_t size;
3030    char *ptr;
3031    bool uses_gds; /* For NGG GS on GFX10+ */
3032    bool uses_ace; /* For task shader invocations on GFX10.3+ */
3033 };
3034 
3035 struct radv_perfcounter_impl;
3036 
3037 struct radv_pc_query_pool {
3038    struct radv_query_pool b;
3039 
3040    uint32_t *pc_regs;
3041    unsigned num_pc_regs;
3042 
3043    unsigned num_passes;
3044 
3045    unsigned num_counters;
3046    struct radv_perfcounter_impl *counters;
3047 };
3048 
3049 void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
3050 VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice, const VkQueryPoolCreateInfo *pCreateInfo,
3051                                  struct radv_pc_query_pool *pool);
3052 void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va);
3053 void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va);
3054 void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out);
3055 
3056 #define VL_MACROBLOCK_WIDTH  16
3057 #define VL_MACROBLOCK_HEIGHT 16
3058 
3059 struct radv_vid_mem {
3060    struct radv_device_memory *mem;
3061    VkDeviceSize offset;
3062    VkDeviceSize size;
3063 };
3064 
3065 struct radv_video_session {
3066    struct vk_video_session vk;
3067 
3068    uint32_t stream_handle;
3069    unsigned stream_type;
3070    bool interlaced;
3071    enum { DPB_MAX_RES = 0, DPB_DYNAMIC_TIER_1, DPB_DYNAMIC_TIER_2 } dpb_type;
3072    unsigned db_alignment;
3073 
3074    struct radv_vid_mem sessionctx;
3075    struct radv_vid_mem ctx;
3076 
3077    unsigned dbg_frame_cnt;
3078 };
3079 
3080 struct radv_video_session_params {
3081    struct vk_video_session_parameters vk;
3082 };
3083 
3084 bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
3085 
3086 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
3087                     const VkDeviceQueueCreateInfo *create_info,
3088                     const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
3089 
3090 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
3091                              struct radv_descriptor_set *set, unsigned idx);
3092 
3093 void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
3094                                      VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
3095                                      const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
3096                                      const VkCopyDescriptorSet *pDescriptorCopies);
3097 
3098 void radv_cmd_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
3099                                                   struct radv_descriptor_set *set,
3100                                                   VkDescriptorUpdateTemplate descriptorUpdateTemplate,
3101                                                   const void *pData);
3102 
3103 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
3104                                    VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
3105                                    const VkWriteDescriptorSet *pDescriptorWrites);
3106 
3107 void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
3108                                        unsigned range, uint32_t *state);
3109 
3110 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
3111                        const VkImageSubresourceRange *range, uint32_t value);
3112 
3113 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
3114                          const VkImageSubresourceRange *range);
3115 
3116 /* radv_nir_to_llvm.c */
3117 struct radv_shader_args;
3118 struct radv_nir_compiler_options;
3119 struct radv_shader_info;
3120 
3121 void llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info,
3122                          unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary,
3123                          const struct radv_shader_args *args);
3124 
3125 bool radv_sqtt_init(struct radv_device *device);
3126 void radv_sqtt_finish(struct radv_device *device);
3127 bool radv_begin_sqtt(struct radv_queue *queue);
3128 bool radv_end_sqtt(struct radv_queue *queue);
3129 bool radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace);
3130 void radv_reset_sqtt_trace(struct radv_device *device);
3131 void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords);
3132 bool radv_is_instruction_timing_enabled(void);
3133 bool radv_sqtt_queue_events_enabled(void);
3134 bool radv_sqtt_sample_clocks(struct radv_device *device);
3135 
3136 void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit);
3137 void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
3138 
3139 VkResult radv_sqtt_get_timed_cmdbuf(struct radv_queue *queue, struct radeon_winsys_bo *timestamp_bo,
3140                                     uint32_t timestamp_offset, VkPipelineStageFlags2 timestamp_stage,
3141                                     VkCommandBuffer *pcmdbuf);
3142 
3143 VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo,
3144                                          uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);
3145 
3146 VkResult radv_rra_trace_init(struct radv_device *device);
3147 
3148 VkResult radv_rra_dump_trace(VkQueue vk_queue, char *filename);
3149 void radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data);
3150 void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data);
3151 
3152 void radv_memory_trace_init(struct radv_device *device);
3153 void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal);
3154 void radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
3155 void radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
3156                               VkMemoryAllocateFlags alloc_flags);
3157 void radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer);
3158 void radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
3159                                VkImage _image);
3160 void radv_rmv_log_image_bind(struct radv_device *device, VkImage _image);
3161 void radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool pool, bool is_internal);
3162 void radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo,
3163                                            uint32_t executable_size, uint32_t data_size, uint32_t scratch_size);
3164 void radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
3165 void radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo);
3166 void radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
3167 void radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset);
3168 void radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset);
3169 void radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
3170                                          VkDescriptorPool pool, bool is_internal);
3171 void radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline,
3172                                            bool is_internal);
3173 void radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal);
3174 void radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline);
3175 void radv_rmv_log_event_create(struct radv_device *device, VkEvent event, VkEventCreateFlags flags, bool is_internal);
3176 void radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle);
3177 void radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type);
3178 void radv_rmv_fill_device_info(const struct radv_physical_device *device, struct vk_rmv_device_info *info);
3179 void radv_rmv_collect_trace_events(struct radv_device *device);
3180 void radv_memory_trace_finish(struct radv_device *device);
3181 
3182 VkResult radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreateInfo,
3183                             const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, bool is_internal);
3184 VkResult radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
3185                            const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem, bool is_internal);
3186 VkResult radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
3187                                 const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, bool is_internal);
3188 VkResult radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateInfo,
3189                            const VkAllocationCallbacks *pAllocator, VkEvent *pEvent, bool is_internal);
3190 
3191 /* radv_sqtt_layer_.c */
3192 struct radv_barrier_data {
3193    union {
3194       struct {
3195          uint16_t depth_stencil_expand : 1;
3196          uint16_t htile_hiz_range_expand : 1;
3197          uint16_t depth_stencil_resummarize : 1;
3198          uint16_t dcc_decompress : 1;
3199          uint16_t fmask_decompress : 1;
3200          uint16_t fast_clear_eliminate : 1;
3201          uint16_t fmask_color_expand : 1;
3202          uint16_t init_mask_ram : 1;
3203          uint16_t reserved : 8;
3204       };
3205       uint16_t all;
3206    } layout_transitions;
3207 };
3208 
3209 /**
3210  * Value for the reason field of an RGP barrier start marker originating from
3211  * the Vulkan client (does not include PAL-defined values). (Table 15)
3212  */
3213 enum rgp_barrier_reason {
3214    RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
3215 
3216    /* External app-generated barrier reasons, i.e. API synchronization
3217     * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
3218     */
3219    RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
3220    RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
3221    RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
3222 
3223    /* Internal barrier reasons, i.e. implicit synchronization inserted by
3224     * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
3225     */
3226    RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
3227    RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
3228    RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
3229    RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
3230    RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
3231 };
3232 
3233 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3234 void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3235 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
3236 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
3237 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects);
3238 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
3239 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3240 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3241 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason);
3242 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
3243 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
3244 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier);
3245 void radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count);
3246 void radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer);
3247 
3248 void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline);
3249 
3250 void radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
3251                                   const char *str);
3252 
3253 struct radv_indirect_command_layout {
3254    struct vk_object_base base;
3255 
3256    VkIndirectCommandsLayoutUsageFlagsNV flags;
3257    VkPipelineBindPoint pipeline_bind_point;
3258 
3259    uint32_t input_stride;
3260    uint32_t token_count;
3261 
3262    bool indexed;
3263    bool binds_index_buffer;
3264    bool draw_mesh_tasks;
3265    uint16_t draw_params_offset;
3266    uint16_t index_buffer_offset;
3267 
3268    uint16_t dispatch_params_offset;
3269 
3270    uint32_t bind_vbo_mask;
3271    uint32_t vbo_offsets[MAX_VBS];
3272 
3273    uint64_t push_constant_mask;
3274    uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
3275 
3276    uint32_t ibo_type_32;
3277    uint32_t ibo_type_8;
3278 
3279    VkIndirectCommandsLayoutTokenNV tokens[0];
3280 };
3281 
3282 uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
3283 
3284 bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
3285                               const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
3286 void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo,
3287                       bool cond_render_enabled);
3288 
3289 bool radv_dgc_can_preprocess(const struct radv_indirect_command_layout *layout, struct radv_pipeline *pipeline);
3290 
3291 static inline uint32_t
radv_conv_prim_to_gs_out(uint32_t topology,bool is_ngg)3292 radv_conv_prim_to_gs_out(uint32_t topology, bool is_ngg)
3293 {
3294    switch (topology) {
3295    case V_008958_DI_PT_POINTLIST:
3296    case V_008958_DI_PT_PATCH:
3297       return V_028A6C_POINTLIST;
3298    case V_008958_DI_PT_LINELIST:
3299    case V_008958_DI_PT_LINESTRIP:
3300    case V_008958_DI_PT_LINELIST_ADJ:
3301    case V_008958_DI_PT_LINESTRIP_ADJ:
3302       return V_028A6C_LINESTRIP;
3303    case V_008958_DI_PT_TRILIST:
3304    case V_008958_DI_PT_TRISTRIP:
3305    case V_008958_DI_PT_TRIFAN:
3306    case V_008958_DI_PT_TRILIST_ADJ:
3307    case V_008958_DI_PT_TRISTRIP_ADJ:
3308       return V_028A6C_TRISTRIP;
3309    case V_008958_DI_PT_RECTLIST:
3310       return is_ngg ? V_028A6C_RECTLIST : V_028A6C_TRISTRIP;
3311    default:
3312       assert(0);
3313       return 0;
3314    }
3315 }
3316 
3317 static inline uint32_t
radv_translate_prim(unsigned topology)3318 radv_translate_prim(unsigned topology)
3319 {
3320    switch (topology) {
3321    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
3322       return V_008958_DI_PT_POINTLIST;
3323    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
3324       return V_008958_DI_PT_LINELIST;
3325    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
3326       return V_008958_DI_PT_LINESTRIP;
3327    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
3328       return V_008958_DI_PT_TRILIST;
3329    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
3330       return V_008958_DI_PT_TRISTRIP;
3331    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
3332       return V_008958_DI_PT_TRIFAN;
3333    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
3334       return V_008958_DI_PT_LINELIST_ADJ;
3335    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
3336       return V_008958_DI_PT_LINESTRIP_ADJ;
3337    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
3338       return V_008958_DI_PT_TRILIST_ADJ;
3339    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
3340       return V_008958_DI_PT_TRISTRIP_ADJ;
3341    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
3342       return V_008958_DI_PT_PATCH;
3343    default:
3344       unreachable("unhandled primitive type");
3345    }
3346 }
3347 
3348 static inline bool
radv_prim_is_points_or_lines(unsigned topology)3349 radv_prim_is_points_or_lines(unsigned topology)
3350 {
3351    switch (topology) {
3352    case V_008958_DI_PT_POINTLIST:
3353    case V_008958_DI_PT_LINELIST:
3354    case V_008958_DI_PT_LINESTRIP:
3355    case V_008958_DI_PT_LINELIST_ADJ:
3356    case V_008958_DI_PT_LINESTRIP_ADJ:
3357       return true;
3358    default:
3359       return false;
3360    }
3361 }
3362 
3363 static inline bool
radv_rast_prim_is_point(unsigned rast_prim)3364 radv_rast_prim_is_point(unsigned rast_prim)
3365 {
3366    return rast_prim == V_028A6C_POINTLIST;
3367 }
3368 
3369 static inline bool
radv_rast_prim_is_line(unsigned rast_prim)3370 radv_rast_prim_is_line(unsigned rast_prim)
3371 {
3372    return rast_prim == V_028A6C_LINESTRIP;
3373 }
3374 
3375 static inline bool
radv_rast_prim_is_points_or_lines(unsigned rast_prim)3376 radv_rast_prim_is_points_or_lines(unsigned rast_prim)
3377 {
3378    return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
3379 }
3380 
3381 static inline bool
radv_polygon_mode_is_point(unsigned polygon_mode)3382 radv_polygon_mode_is_point(unsigned polygon_mode)
3383 {
3384    return polygon_mode == V_028814_X_DRAW_POINTS;
3385 }
3386 
3387 static inline bool
radv_polygon_mode_is_line(unsigned polygon_mode)3388 radv_polygon_mode_is_line(unsigned polygon_mode)
3389 {
3390    return polygon_mode == V_028814_X_DRAW_LINES;
3391 }
3392 
3393 static inline bool
radv_polygon_mode_is_points_or_lines(unsigned polygon_mode)3394 radv_polygon_mode_is_points_or_lines(unsigned polygon_mode)
3395 {
3396    return radv_polygon_mode_is_point(polygon_mode) || radv_polygon_mode_is_line(polygon_mode);
3397 }
3398 
3399 static inline bool
radv_primitive_topology_is_line_list(unsigned primitive_topology)3400 radv_primitive_topology_is_line_list(unsigned primitive_topology)
3401 {
3402    return primitive_topology == V_008958_DI_PT_LINELIST || primitive_topology == V_008958_DI_PT_LINELIST_ADJ;
3403 }
3404 
3405 static inline unsigned
radv_get_num_vertices_per_prim(const struct radv_graphics_state_key * gfx_state)3406 radv_get_num_vertices_per_prim(const struct radv_graphics_state_key *gfx_state)
3407 {
3408    if (gfx_state->ia.topology == V_008958_DI_PT_NONE) {
3409       /* When the topology is unknown (with graphics pipeline library), return the maximum number of
3410        * vertices per primitives for VS. This is used to lower NGG (the HW will ignore the extra
3411        * bits for points/lines) and also to enable NGG culling unconditionally (it will be disabled
3412        * dynamically for points/lines).
3413        */
3414       return 3;
3415    } else {
3416       /* Need to add 1, because: V_028A6C_POINTLIST=0, V_028A6C_LINESTRIP=1, V_028A6C_TRISTRIP=2, etc. */
3417       return radv_conv_prim_to_gs_out(gfx_state->ia.topology, false) + 1;
3418    }
3419 }
3420 
3421 uint32_t radv_get_vgt_gs_out(struct radv_shader **shaders, uint32_t primitive_topology);
3422 
3423 static inline uint32_t
radv_translate_fill(VkPolygonMode func)3424 radv_translate_fill(VkPolygonMode func)
3425 {
3426    switch (func) {
3427    case VK_POLYGON_MODE_FILL:
3428       return V_028814_X_DRAW_TRIANGLES;
3429    case VK_POLYGON_MODE_LINE:
3430       return V_028814_X_DRAW_LINES;
3431    case VK_POLYGON_MODE_POINT:
3432       return V_028814_X_DRAW_POINTS;
3433    default:
3434       assert(0);
3435       return V_028814_X_DRAW_POINTS;
3436    }
3437 }
3438 
3439 static inline uint32_t
radv_translate_stencil_op(enum VkStencilOp op)3440 radv_translate_stencil_op(enum VkStencilOp op)
3441 {
3442    switch (op) {
3443    case VK_STENCIL_OP_KEEP:
3444       return V_02842C_STENCIL_KEEP;
3445    case VK_STENCIL_OP_ZERO:
3446       return V_02842C_STENCIL_ZERO;
3447    case VK_STENCIL_OP_REPLACE:
3448       return V_02842C_STENCIL_REPLACE_TEST;
3449    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
3450       return V_02842C_STENCIL_ADD_CLAMP;
3451    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
3452       return V_02842C_STENCIL_SUB_CLAMP;
3453    case VK_STENCIL_OP_INVERT:
3454       return V_02842C_STENCIL_INVERT;
3455    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
3456       return V_02842C_STENCIL_ADD_WRAP;
3457    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
3458       return V_02842C_STENCIL_SUB_WRAP;
3459    default:
3460       return 0;
3461    }
3462 }
3463 
3464 static inline uint32_t
radv_translate_blend_logic_op(VkLogicOp op)3465 radv_translate_blend_logic_op(VkLogicOp op)
3466 {
3467    switch (op) {
3468    case VK_LOGIC_OP_CLEAR:
3469       return V_028808_ROP3_CLEAR;
3470    case VK_LOGIC_OP_AND:
3471       return V_028808_ROP3_AND;
3472    case VK_LOGIC_OP_AND_REVERSE:
3473       return V_028808_ROP3_AND_REVERSE;
3474    case VK_LOGIC_OP_COPY:
3475       return V_028808_ROP3_COPY;
3476    case VK_LOGIC_OP_AND_INVERTED:
3477       return V_028808_ROP3_AND_INVERTED;
3478    case VK_LOGIC_OP_NO_OP:
3479       return V_028808_ROP3_NO_OP;
3480    case VK_LOGIC_OP_XOR:
3481       return V_028808_ROP3_XOR;
3482    case VK_LOGIC_OP_OR:
3483       return V_028808_ROP3_OR;
3484    case VK_LOGIC_OP_NOR:
3485       return V_028808_ROP3_NOR;
3486    case VK_LOGIC_OP_EQUIVALENT:
3487       return V_028808_ROP3_EQUIVALENT;
3488    case VK_LOGIC_OP_INVERT:
3489       return V_028808_ROP3_INVERT;
3490    case VK_LOGIC_OP_OR_REVERSE:
3491       return V_028808_ROP3_OR_REVERSE;
3492    case VK_LOGIC_OP_COPY_INVERTED:
3493       return V_028808_ROP3_COPY_INVERTED;
3494    case VK_LOGIC_OP_OR_INVERTED:
3495       return V_028808_ROP3_OR_INVERTED;
3496    case VK_LOGIC_OP_NAND:
3497       return V_028808_ROP3_NAND;
3498    case VK_LOGIC_OP_SET:
3499       return V_028808_ROP3_SET;
3500    default:
3501       unreachable("Unhandled logic op");
3502    }
3503 }
3504 
3505 static inline uint32_t
radv_translate_blend_function(VkBlendOp op)3506 radv_translate_blend_function(VkBlendOp op)
3507 {
3508    switch (op) {
3509    case VK_BLEND_OP_ADD:
3510       return V_028780_COMB_DST_PLUS_SRC;
3511    case VK_BLEND_OP_SUBTRACT:
3512       return V_028780_COMB_SRC_MINUS_DST;
3513    case VK_BLEND_OP_REVERSE_SUBTRACT:
3514       return V_028780_COMB_DST_MINUS_SRC;
3515    case VK_BLEND_OP_MIN:
3516       return V_028780_COMB_MIN_DST_SRC;
3517    case VK_BLEND_OP_MAX:
3518       return V_028780_COMB_MAX_DST_SRC;
3519    default:
3520       return 0;
3521    }
3522 }
3523 
3524 static inline uint32_t
radv_translate_blend_factor(enum amd_gfx_level gfx_level,VkBlendFactor factor)3525 radv_translate_blend_factor(enum amd_gfx_level gfx_level, VkBlendFactor factor)
3526 {
3527    switch (factor) {
3528    case VK_BLEND_FACTOR_ZERO:
3529       return V_028780_BLEND_ZERO;
3530    case VK_BLEND_FACTOR_ONE:
3531       return V_028780_BLEND_ONE;
3532    case VK_BLEND_FACTOR_SRC_COLOR:
3533       return V_028780_BLEND_SRC_COLOR;
3534    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
3535       return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
3536    case VK_BLEND_FACTOR_DST_COLOR:
3537       return V_028780_BLEND_DST_COLOR;
3538    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
3539       return V_028780_BLEND_ONE_MINUS_DST_COLOR;
3540    case VK_BLEND_FACTOR_SRC_ALPHA:
3541       return V_028780_BLEND_SRC_ALPHA;
3542    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
3543       return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
3544    case VK_BLEND_FACTOR_DST_ALPHA:
3545       return V_028780_BLEND_DST_ALPHA;
3546    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
3547       return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
3548    case VK_BLEND_FACTOR_CONSTANT_COLOR:
3549       return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11 : V_028780_BLEND_CONSTANT_COLOR_GFX6;
3550    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
3551       return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11
3552                                 : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6;
3553    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
3554       return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 : V_028780_BLEND_CONSTANT_ALPHA_GFX6;
3555    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
3556       return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11
3557                                 : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6;
3558    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
3559       return V_028780_BLEND_SRC_ALPHA_SATURATE;
3560    case VK_BLEND_FACTOR_SRC1_COLOR:
3561       return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11 : V_028780_BLEND_SRC1_COLOR_GFX6;
3562    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
3563       return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11 : V_028780_BLEND_INV_SRC1_COLOR_GFX6;
3564    case VK_BLEND_FACTOR_SRC1_ALPHA:
3565       return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11 : V_028780_BLEND_SRC1_ALPHA_GFX6;
3566    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
3567       return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11 : V_028780_BLEND_INV_SRC1_ALPHA_GFX6;
3568    default:
3569       return 0;
3570    }
3571 }
3572 
3573 static inline uint32_t
radv_translate_blend_opt_factor(VkBlendFactor factor,bool is_alpha)3574 radv_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
3575 {
3576    switch (factor) {
3577    case VK_BLEND_FACTOR_ZERO:
3578       return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
3579    case VK_BLEND_FACTOR_ONE:
3580       return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
3581    case VK_BLEND_FACTOR_SRC_COLOR:
3582       return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
3583    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
3584       return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
3585    case VK_BLEND_FACTOR_SRC_ALPHA:
3586       return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
3587    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
3588       return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
3589    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
3590       return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
3591    default:
3592       return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
3593    }
3594 }
3595 
3596 static inline uint32_t
radv_translate_blend_opt_function(VkBlendOp op)3597 radv_translate_blend_opt_function(VkBlendOp op)
3598 {
3599    switch (op) {
3600    case VK_BLEND_OP_ADD:
3601       return V_028760_OPT_COMB_ADD;
3602    case VK_BLEND_OP_SUBTRACT:
3603       return V_028760_OPT_COMB_SUBTRACT;
3604    case VK_BLEND_OP_REVERSE_SUBTRACT:
3605       return V_028760_OPT_COMB_REVSUBTRACT;
3606    case VK_BLEND_OP_MIN:
3607       return V_028760_OPT_COMB_MIN;
3608    case VK_BLEND_OP_MAX:
3609       return V_028760_OPT_COMB_MAX;
3610    default:
3611       return V_028760_OPT_COMB_BLEND_DISABLED;
3612    }
3613 }
3614 
3615 static inline bool
radv_blend_factor_uses_dst(VkBlendFactor factor)3616 radv_blend_factor_uses_dst(VkBlendFactor factor)
3617 {
3618    return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA ||
3619           factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
3620           factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
3621 }
3622 
3623 static inline bool
radv_is_dual_src(VkBlendFactor factor)3624 radv_is_dual_src(VkBlendFactor factor)
3625 {
3626    switch (factor) {
3627    case VK_BLEND_FACTOR_SRC1_COLOR:
3628    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
3629    case VK_BLEND_FACTOR_SRC1_ALPHA:
3630    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
3631       return true;
3632    default:
3633       return false;
3634    }
3635 }
3636 
3637 static ALWAYS_INLINE bool
radv_can_enable_dual_src(const struct vk_color_blend_attachment_state * att)3638 radv_can_enable_dual_src(const struct vk_color_blend_attachment_state *att)
3639 {
3640    VkBlendOp eqRGB = att->color_blend_op;
3641    VkBlendFactor srcRGB = att->src_color_blend_factor;
3642    VkBlendFactor dstRGB = att->dst_color_blend_factor;
3643    VkBlendOp eqA = att->alpha_blend_op;
3644    VkBlendFactor srcA = att->src_alpha_blend_factor;
3645    VkBlendFactor dstA = att->dst_alpha_blend_factor;
3646    bool eqRGB_minmax = eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX;
3647    bool eqA_minmax = eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX;
3648 
3649    if (!eqRGB_minmax && (radv_is_dual_src(srcRGB) || radv_is_dual_src(dstRGB)))
3650       return true;
3651    if (!eqA_minmax && (radv_is_dual_src(srcA) || radv_is_dual_src(dstA)))
3652       return true;
3653    return false;
3654 }
3655 
3656 static inline void
radv_normalize_blend_factor(VkBlendOp op,VkBlendFactor * src_factor,VkBlendFactor * dst_factor)3657 radv_normalize_blend_factor(VkBlendOp op, VkBlendFactor *src_factor, VkBlendFactor *dst_factor)
3658 {
3659    if (op == VK_BLEND_OP_MIN || op == VK_BLEND_OP_MAX) {
3660       *src_factor = VK_BLEND_FACTOR_ONE;
3661       *dst_factor = VK_BLEND_FACTOR_ONE;
3662    }
3663 }
3664 
3665 void radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor,
3666                            VkBlendFactor expected_dst, VkBlendFactor replacement_src);
3667 
3668 ALWAYS_INLINE static bool
radv_is_streamout_enabled(struct radv_cmd_buffer * cmd_buffer)3669 radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
3670 {
3671    struct radv_streamout_state *so = &cmd_buffer->state.streamout;
3672 
3673    /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */
3674    return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && !cmd_buffer->state.suspend_streamout;
3675 }
3676 
3677 /*
3678  * Queue helper to get ring.
3679  * placed here as it needs queue + device structs.
3680  */
3681 static inline enum amd_ip_type
radv_queue_ring(const struct radv_queue * queue)3682 radv_queue_ring(const struct radv_queue *queue)
3683 {
3684    return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
3685 }
3686 
3687 /* radv_video */
3688 void radv_init_physical_device_decoder(struct radv_physical_device *pdevice);
3689 void radv_video_get_profile_alignments(struct radv_physical_device *pdevice,
3690                                        const VkVideoProfileListInfoKHR *profile_list, uint32_t *width_align_out,
3691                                        uint32_t *height_align_out);
3692 /**
3693  * Helper used for debugging compiler issues by enabling/disabling LLVM for a
3694  * specific shader stage (developers only).
3695  */
3696 static inline bool
radv_use_llvm_for_stage(const struct radv_device * device,UNUSED gl_shader_stage stage)3697 radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage)
3698 {
3699    return device->physical_device->use_llvm;
3700 }
3701 
3702 static inline bool
radv_has_shader_buffer_float_minmax(const struct radv_physical_device * pdevice,unsigned bitsize)3703 radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice, unsigned bitsize)
3704 {
3705    return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) || pdevice->rad_info.gfx_level == GFX10 ||
3706           pdevice->rad_info.gfx_level == GFX10_3 || (pdevice->rad_info.gfx_level == GFX11 && bitsize == 32);
3707 }
3708 
3709 static inline bool
radv_has_pops(const struct radv_physical_device * pdevice)3710 radv_has_pops(const struct radv_physical_device *pdevice)
3711 {
3712    return pdevice->rad_info.gfx_level >= GFX9 && !pdevice->use_llvm;
3713 }
3714 
3715 unsigned radv_compact_spi_shader_col_format(const struct radv_shader *ps, uint32_t spi_shader_col_format);
3716 
3717 /* radv_perfcounter.c */
3718 void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders);
3719 void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
3720 void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
3721 void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
3722 
3723 /* radv_spm.c */
3724 bool radv_spm_init(struct radv_device *device);
3725 void radv_spm_finish(struct radv_device *device);
3726 void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf);
3727 
3728 void radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline);
3729 void radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline);
3730 void radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline);
3731 void radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline);
3732 
3733 void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va, bool draw_visible);
3734 void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
3735 
3736 bool radv_gang_init(struct radv_cmd_buffer *cmd_buffer);
3737 void radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer);
3738 
3739 static inline bool
radv_uses_device_generated_commands(const struct radv_device * device)3740 radv_uses_device_generated_commands(const struct radv_device *device)
3741 {
3742    return device->vk.enabled_features.deviceGeneratedCommands || device->vk.enabled_features.deviceGeneratedCompute;
3743 }
3744 
3745 static inline bool
radv_uses_primitives_generated_query(const struct radv_device * device)3746 radv_uses_primitives_generated_query(const struct radv_device *device)
3747 {
3748    return device->vk.enabled_features.primitivesGeneratedQuery ||
3749           device->vk.enabled_features.primitivesGeneratedQueryWithRasterizerDiscard ||
3750           device->vk.enabled_features.primitivesGeneratedQueryWithNonZeroStreams;
3751 }
3752 
3753 static inline bool
radv_uses_image_float32_atomics(const struct radv_device * device)3754 radv_uses_image_float32_atomics(const struct radv_device *device)
3755 {
3756    return device->vk.enabled_features.shaderImageFloat32Atomics ||
3757           device->vk.enabled_features.sparseImageFloat32Atomics ||
3758           device->vk.enabled_features.shaderImageFloat32AtomicMinMax ||
3759           device->vk.enabled_features.sparseImageFloat32AtomicMinMax;
3760 }
3761 
3762 bool radv_device_fault_detection_enabled(const struct radv_device *device);
3763 
3764 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) VK_FROM_HANDLE(__radv_type, __name, __handle)
3765 
3766 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
3767 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
3768 VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
3769 VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE)
3770 VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
3771 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
3772 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, vk.base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW)
3773 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL)
3774 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET)
3775 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout,
3776                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
3777 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base, VkDescriptorUpdateTemplate,
3778                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
3779 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY)
3780 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
3781 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
3782 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW);
3783 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
3784                                VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
3785 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE)
3786 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT)
3787 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, vk.base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL)
3788 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, vk.base, VkSampler, VK_OBJECT_TYPE_SAMPLER)
3789 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_object, base, VkShaderEXT, VK_OBJECT_TYPE_SHADER_EXT);
3790 
3791 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
3792 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session_params, vk.base, VkVideoSessionParametersKHR,
3793                                VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
3794 
3795 static inline uint64_t
radv_get_tdr_timeout_for_ip(enum amd_ip_type ip_type)3796 radv_get_tdr_timeout_for_ip(enum amd_ip_type ip_type)
3797 {
3798    const uint64_t compute_tdr_duration_ns = 60000000000ull; /* 1 minute (default in kernel) */
3799    const uint64_t other_tdr_duration_ns = 10000000000ull;   /* 10 seconds (default in kernel) */
3800 
3801    return ip_type == AMD_IP_COMPUTE ? compute_tdr_duration_ns : other_tdr_duration_ns;
3802 }
3803 
3804 #ifdef __cplusplus
3805 }
3806 #endif
3807 
3808 #endif /* RADV_PRIVATE_H */
3809