• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30 
31 #include <assert.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #ifdef HAVE_VALGRIND
38 #include <memcheck.h>
39 #include <valgrind.h>
40 #define VG(x) x
41 #else
42 #define VG(x) ((void)0)
43 #endif
44 
45 #include "c11/threads.h"
46 #ifndef _WIN32
47 #include <amdgpu.h>
48 #include <xf86drm.h>
49 #endif
50 #include "compiler/shader_enums.h"
51 #include "util/bitscan.h"
52 #include "util/list.h"
53 #include "util/macros.h"
54 #include "util/rwlock.h"
55 #include "util/xmlconfig.h"
56 #include "vk_alloc.h"
57 #include "vk_buffer.h"
58 #include "vk_command_buffer.h"
59 #include "vk_command_pool.h"
60 #include "vk_debug_report.h"
61 #include "vk_device.h"
62 #include "vk_format.h"
63 #include "vk_instance.h"
64 #include "vk_log.h"
65 #include "vk_physical_device.h"
66 #include "vk_shader_module.h"
67 #include "vk_queue.h"
68 #include "vk_util.h"
69 #include "vk_image.h"
70 #include "vk_framebuffer.h"
71 
72 #include "ac_binary.h"
73 #include "ac_gpu_info.h"
74 #include "ac_shader_util.h"
75 #include "ac_spm.h"
76 #include "ac_sqtt.h"
77 #include "ac_surface.h"
78 #include "radv_constants.h"
79 #include "radv_descriptor_set.h"
80 #include "radv_radeon_winsys.h"
81 #include "radv_shader.h"
82 #include "radv_shader_args.h"
83 #include "sid.h"
84 
85 #include "radix_sort/radix_sort_vk_devaddr.h"
86 
87 /* Pre-declarations needed for WSI entrypoints */
88 struct wl_surface;
89 struct wl_display;
90 typedef struct xcb_connection_t xcb_connection_t;
91 typedef uint32_t xcb_visualid_t;
92 typedef uint32_t xcb_window_t;
93 
94 #include <vulkan/vk_android_native_buffer.h>
95 #include <vulkan/vk_icd.h>
96 #include <vulkan/vulkan.h>
97 #include <vulkan/vulkan_android.h>
98 
99 #include "radv_entrypoints.h"
100 
101 #include "wsi_common.h"
102 
103 #ifdef __cplusplus
104 extern "C"
105 {
106 #endif
107 
108 /* Helper to determine if we should compile
109  * any of the Android AHB support.
110  *
111  * To actually enable the ext we also need
112  * the necessary kernel support.
113  */
114 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
115 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
116 #include <vndk/hardware_buffer.h>
117 #else
118 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
119 #endif
120 
121 #ifdef _WIN32
122 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
123 #else
124 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
125 #endif
126 
127 #ifdef _WIN32
128 #define radv_printflike(a, b)
129 #else
130 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
131 #endif
132 
133 static inline uint32_t
align_u32(uint32_t v,uint32_t a)134 align_u32(uint32_t v, uint32_t a)
135 {
136    assert(a != 0 && a == (a & -a));
137    return (v + a - 1) & ~(a - 1);
138 }
139 
140 static inline uint32_t
align_u32_npot(uint32_t v,uint32_t a)141 align_u32_npot(uint32_t v, uint32_t a)
142 {
143    return (v + a - 1) / a * a;
144 }
145 
146 static inline uint64_t
align_u64(uint64_t v,uint64_t a)147 align_u64(uint64_t v, uint64_t a)
148 {
149    assert(a != 0 && a == (a & -a));
150    return (v + a - 1) & ~(a - 1);
151 }
152 
153 static inline int32_t
align_i32(int32_t v,int32_t a)154 align_i32(int32_t v, int32_t a)
155 {
156    assert(a != 0 && a == (a & -a));
157    return (v + a - 1) & ~(a - 1);
158 }
159 
160 /** Alignment must be a power of 2. */
161 static inline bool
radv_is_aligned(uintmax_t n,uintmax_t a)162 radv_is_aligned(uintmax_t n, uintmax_t a)
163 {
164    assert(a == (a & -a));
165    return (n & (a - 1)) == 0;
166 }
167 
168 static inline uint32_t
round_up_u32(uint32_t v,uint32_t a)169 round_up_u32(uint32_t v, uint32_t a)
170 {
171    return (v + a - 1) / a;
172 }
173 
174 static inline uint64_t
round_up_u64(uint64_t v,uint64_t a)175 round_up_u64(uint64_t v, uint64_t a)
176 {
177    return (v + a - 1) / a;
178 }
179 
180 static inline uint32_t
radv_minify(uint32_t n,uint32_t levels)181 radv_minify(uint32_t n, uint32_t levels)
182 {
183    if (unlikely(n == 0))
184       return 0;
185    else
186       return MAX2(n >> levels, 1);
187 }
188 static inline float
radv_clamp_f(float f,float min,float max)189 radv_clamp_f(float f, float min, float max)
190 {
191    assert(min < max);
192 
193    if (f > max)
194       return max;
195    else if (f < min)
196       return min;
197    else
198       return f;
199 }
200 
201 static inline bool
radv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)202 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
203 {
204    if (*inout_mask & clear_mask) {
205       *inout_mask &= ~clear_mask;
206       return true;
207    } else {
208       return false;
209    }
210 }
211 
212 static inline int
radv_float_to_sfixed(float value,unsigned frac_bits)213 radv_float_to_sfixed(float value, unsigned frac_bits)
214 {
215    return value * (1 << frac_bits);
216 }
217 
218 static inline unsigned int
radv_float_to_ufixed(float value,unsigned frac_bits)219 radv_float_to_ufixed(float value, unsigned frac_bits)
220 {
221    return value * (1 << frac_bits);
222 }
223 
224 /* Whenever we generate an error, pass it through this function. Useful for
225  * debugging, where we can break on it. Only call at error site, not when
226  * propagating errors. Might be useful to plug in a stack trace here.
227  */
228 
229 struct radv_image_view;
230 struct radv_instance;
231 
232 /* A non-fatal assert.  Useful for debugging. */
233 #ifdef NDEBUG
234 #define radv_assert(x)                                                                             \
235    do {                                                                                            \
236    } while (0)
237 #else
238 #define radv_assert(x)                                                                             \
239    do {                                                                                            \
240       if (unlikely(!(x)))                                                                          \
241          fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x);                            \
242    } while (0)
243 #endif
244 
245 int radv_get_instance_entrypoint_index(const char *name);
246 int radv_get_device_entrypoint_index(const char *name);
247 int radv_get_physical_device_entrypoint_index(const char *name);
248 
249 const char *radv_get_instance_entry_name(int index);
250 const char *radv_get_physical_device_entry_name(int index);
251 const char *radv_get_device_entry_name(int index);
252 
253 /* queue types */
254 enum radv_queue_family {
255    RADV_QUEUE_GENERAL,
256    RADV_QUEUE_COMPUTE,
257    RADV_QUEUE_TRANSFER,
258    RADV_MAX_QUEUE_FAMILIES,
259    RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
260    RADV_QUEUE_IGNORED,
261 };
262 
263 struct radv_perfcounter_desc;
264 
265 struct radv_physical_device {
266    struct vk_physical_device vk;
267 
268    /* Link in radv_instance::physical_devices */
269    struct list_head link;
270 
271    struct radv_instance *instance;
272 
273    struct radeon_winsys *ws;
274    struct radeon_info rad_info;
275    char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
276    char marketing_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
277    uint8_t driver_uuid[VK_UUID_SIZE];
278    uint8_t device_uuid[VK_UUID_SIZE];
279    uint8_t cache_uuid[VK_UUID_SIZE];
280 
281    int local_fd;
282    int master_fd;
283    struct wsi_device wsi_device;
284 
285    bool out_of_order_rast_allowed;
286 
287    /* Whether DCC should be enabled for MSAA textures. */
288    bool dcc_msaa_allowed;
289 
290    /* Whether to enable NGG. */
291    bool use_ngg;
292 
293    /* Whether to enable NGG culling. */
294    bool use_ngg_culling;
295 
296    /* Whether to enable NGG streamout. */
297    bool use_ngg_streamout;
298 
299    /* Number of threads per wave. */
300    uint8_t ps_wave_size;
301    uint8_t cs_wave_size;
302    uint8_t ge_wave_size;
303    uint8_t rt_wave_size;
304 
305    /* Whether to use the LLVM compiler backend */
306    bool use_llvm;
307 
308    /* Whether to emulate ETC2 image support on HW without support. */
309    bool emulate_etc2;
310 
311    /* This is the drivers on-disk cache used as a fallback as opposed to
312     * the pipeline cache defined by apps.
313     */
314    struct disk_cache *disk_cache;
315 
316    VkPhysicalDeviceMemoryProperties memory_properties;
317    enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
318    enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
319    unsigned heaps;
320 
321    /* Bitmask of memory types that use the 32-bit address space. */
322    uint32_t memory_types_32bit;
323 
324 #ifndef _WIN32
325    int available_nodes;
326    drmPciBusInfo bus_info;
327 
328    dev_t primary_devid;
329    dev_t render_devid;
330 #endif
331 
332    nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
333 
334    enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
335    uint32_t num_queues;
336 
337    uint32_t gs_table_depth;
338 
339    struct ac_hs_info hs;
340    struct ac_task_info task_info;
341 
342    /* Performance counters. */
343    struct ac_perfcounters ac_perfcounters;
344 
345    uint32_t num_perfcounters;
346    struct radv_perfcounter_desc *perfcounters;
347 };
348 
349 struct radv_instance {
350    struct vk_instance vk;
351 
352    VkAllocationCallbacks alloc;
353 
354    uint64_t debug_flags;
355    uint64_t perftest_flags;
356 
357    bool physical_devices_enumerated;
358    struct list_head physical_devices;
359 
360    struct driOptionCache dri_options;
361    struct driOptionCache available_dri_options;
362 
363    /**
364     * Workarounds for game bugs.
365     */
366    bool enable_mrt_output_nan_fixup;
367    bool disable_tc_compat_htile_in_general;
368    bool disable_shrink_image_store;
369    bool absolute_depth_bias;
370    bool disable_aniso_single_level;
371    bool zero_vram;
372    bool disable_sinking_load_input_fs;
373    bool flush_before_query_copy;
374 };
375 
376 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
377 void radv_finish_wsi(struct radv_physical_device *physical_device);
378 
379 struct cache_entry;
380 
381 struct radv_pipeline_cache {
382    struct vk_object_base base;
383    struct radv_device *device;
384    mtx_t mutex;
385    VkPipelineCacheCreateFlags flags;
386 
387    uint32_t total_size;
388    uint32_t table_size;
389    uint32_t kernel_count;
390    struct cache_entry **hash_table;
391    bool modified;
392 
393    VkAllocationCallbacks alloc;
394 };
395 
396 struct radv_shader_binary;
397 struct radv_shader;
398 struct radv_pipeline_shader_stack_size;
399 
400 void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
401 void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
402 bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
403 
404 bool radv_create_shaders_from_pipeline_cache(
405    struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
406    struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
407    uint32_t *num_stack_sizes, bool *found_in_application_cache);
408 
409 void radv_pipeline_cache_insert_shaders(
410    struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
411    struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
412    const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
413 
414 VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
415                              struct radv_shader_binary **binaries,
416                              struct radv_shader_binary *gs_copy_binary);
417 
418 enum radv_blit_ds_layout {
419    RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
420    RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
421    RADV_BLIT_DS_LAYOUT_COUNT,
422 };
423 
424 static inline enum radv_blit_ds_layout
radv_meta_blit_ds_to_type(VkImageLayout layout)425 radv_meta_blit_ds_to_type(VkImageLayout layout)
426 {
427    return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
428                                               : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
429 }
430 
431 static inline VkImageLayout
radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)432 radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
433 {
434    return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
435                                                        : VK_IMAGE_LAYOUT_GENERAL;
436 }
437 
438 enum radv_meta_dst_layout {
439    RADV_META_DST_LAYOUT_GENERAL,
440    RADV_META_DST_LAYOUT_OPTIMAL,
441    RADV_META_DST_LAYOUT_COUNT,
442 };
443 
444 static inline enum radv_meta_dst_layout
radv_meta_dst_layout_from_layout(VkImageLayout layout)445 radv_meta_dst_layout_from_layout(VkImageLayout layout)
446 {
447    return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
448                                               : RADV_META_DST_LAYOUT_OPTIMAL;
449 }
450 
451 static inline VkImageLayout
radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)452 radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
453 {
454    return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
455                                                  : VK_IMAGE_LAYOUT_GENERAL;
456 }
457 
458 struct radv_meta_state {
459    VkAllocationCallbacks alloc;
460 
461    struct radv_pipeline_cache cache;
462 
463    /*
464     * For on-demand pipeline creation, makes sure that
465     * only one thread tries to build a pipeline at the same time.
466     */
467    mtx_t mtx;
468 
469    /**
470     * Use array element `i` for images with `2^i` samples.
471     */
472    struct {
473       VkPipeline color_pipelines[NUM_META_FS_KEYS];
474    } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
475 
476    struct {
477       VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
478       VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
479       VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
480 
481       VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
482       VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
483       VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
484    } ds_clear[MAX_SAMPLES_LOG2];
485 
486    VkPipelineLayout clear_color_p_layout;
487    VkPipelineLayout clear_depth_p_layout;
488    VkPipelineLayout clear_depth_unrestricted_p_layout;
489 
490    /* Optimized compute fast HTILE clear for stencil or depth only. */
491    VkPipeline clear_htile_mask_pipeline;
492    VkPipelineLayout clear_htile_mask_p_layout;
493    VkDescriptorSetLayout clear_htile_mask_ds_layout;
494 
495    /* Copy VRS into HTILE. */
496    VkPipeline copy_vrs_htile_pipeline;
497    VkPipelineLayout copy_vrs_htile_p_layout;
498    VkDescriptorSetLayout copy_vrs_htile_ds_layout;
499 
500    /* Clear DCC with comp-to-single. */
501    VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
502    VkPipelineLayout clear_dcc_comp_to_single_p_layout;
503    VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
504 
505    struct {
506       /** Pipeline that blits from a 1D image. */
507       VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
508 
509       /** Pipeline that blits from a 2D image. */
510       VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
511 
512       /** Pipeline that blits from a 3D image. */
513       VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
514 
515       VkPipeline depth_only_1d_pipeline;
516       VkPipeline depth_only_2d_pipeline;
517       VkPipeline depth_only_3d_pipeline;
518 
519       VkPipeline stencil_only_1d_pipeline;
520       VkPipeline stencil_only_2d_pipeline;
521       VkPipeline stencil_only_3d_pipeline;
522       VkPipelineLayout pipeline_layout;
523       VkDescriptorSetLayout ds_layout;
524    } blit;
525 
526    struct {
527       VkPipelineLayout p_layouts[5];
528       VkDescriptorSetLayout ds_layouts[5];
529       VkPipeline pipelines[5][NUM_META_FS_KEYS];
530 
531       VkPipeline depth_only_pipeline[5];
532 
533       VkPipeline stencil_only_pipeline[5];
534    } blit2d[MAX_SAMPLES_LOG2];
535 
536    struct {
537       VkPipelineLayout img_p_layout;
538       VkDescriptorSetLayout img_ds_layout;
539       VkPipeline pipeline;
540       VkPipeline pipeline_3d;
541    } itob;
542    struct {
543       VkPipelineLayout img_p_layout;
544       VkDescriptorSetLayout img_ds_layout;
545       VkPipeline pipeline;
546       VkPipeline pipeline_3d;
547    } btoi;
548    struct {
549       VkPipelineLayout img_p_layout;
550       VkDescriptorSetLayout img_ds_layout;
551       VkPipeline pipeline;
552    } btoi_r32g32b32;
553    struct {
554       VkPipelineLayout img_p_layout;
555       VkDescriptorSetLayout img_ds_layout;
556       VkPipeline pipeline[MAX_SAMPLES_LOG2];
557       VkPipeline pipeline_3d;
558    } itoi;
559    struct {
560       VkPipelineLayout img_p_layout;
561       VkDescriptorSetLayout img_ds_layout;
562       VkPipeline pipeline;
563    } itoi_r32g32b32;
564    struct {
565       VkPipelineLayout img_p_layout;
566       VkDescriptorSetLayout img_ds_layout;
567       VkPipeline pipeline[MAX_SAMPLES_LOG2];
568       VkPipeline pipeline_3d;
569    } cleari;
570    struct {
571       VkPipelineLayout img_p_layout;
572       VkDescriptorSetLayout img_ds_layout;
573       VkPipeline pipeline;
574    } cleari_r32g32b32;
575    struct {
576       VkPipelineLayout p_layout;
577       VkDescriptorSetLayout ds_layout;
578       VkPipeline pipeline[MAX_SAMPLES_LOG2];
579    } fmask_copy;
580 
581    struct {
582       VkPipelineLayout p_layout;
583       VkPipeline pipeline[NUM_META_FS_KEYS];
584    } resolve;
585 
586    struct {
587       VkDescriptorSetLayout ds_layout;
588       VkPipelineLayout p_layout;
589       struct {
590          VkPipeline pipeline;
591          VkPipeline i_pipeline;
592          VkPipeline srgb_pipeline;
593       } rc[MAX_SAMPLES_LOG2];
594 
595       VkPipeline depth_zero_pipeline;
596       struct {
597          VkPipeline average_pipeline;
598          VkPipeline max_pipeline;
599          VkPipeline min_pipeline;
600       } depth[MAX_SAMPLES_LOG2];
601 
602       VkPipeline stencil_zero_pipeline;
603       struct {
604          VkPipeline max_pipeline;
605          VkPipeline min_pipeline;
606       } stencil[MAX_SAMPLES_LOG2];
607    } resolve_compute;
608 
609    struct {
610       VkDescriptorSetLayout ds_layout;
611       VkPipelineLayout p_layout;
612 
613       struct {
614          VkPipeline pipeline[NUM_META_FS_KEYS];
615       } rc[MAX_SAMPLES_LOG2];
616 
617       VkPipeline depth_zero_pipeline;
618       struct {
619          VkPipeline average_pipeline;
620          VkPipeline max_pipeline;
621          VkPipeline min_pipeline;
622       } depth[MAX_SAMPLES_LOG2];
623 
624       VkPipeline stencil_zero_pipeline;
625       struct {
626          VkPipeline max_pipeline;
627          VkPipeline min_pipeline;
628       } stencil[MAX_SAMPLES_LOG2];
629    } resolve_fragment;
630 
631    struct {
632       VkPipelineLayout p_layout;
633       VkPipeline decompress_pipeline;
634       VkPipeline resummarize_pipeline;
635    } depth_decomp[MAX_SAMPLES_LOG2];
636 
637    VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
638    VkPipelineLayout expand_depth_stencil_compute_p_layout;
639    VkPipeline expand_depth_stencil_compute_pipeline;
640 
641    struct {
642       VkPipelineLayout p_layout;
643       VkPipeline cmask_eliminate_pipeline;
644       VkPipeline fmask_decompress_pipeline;
645       VkPipeline dcc_decompress_pipeline;
646 
647       VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
648       VkPipelineLayout dcc_decompress_compute_p_layout;
649       VkPipeline dcc_decompress_compute_pipeline;
650    } fast_clear_flush;
651 
652    struct {
653       VkPipelineLayout fill_p_layout;
654       VkPipelineLayout copy_p_layout;
655       VkPipeline fill_pipeline;
656       VkPipeline copy_pipeline;
657    } buffer;
658 
659    struct {
660       VkDescriptorSetLayout ds_layout;
661       VkPipelineLayout p_layout;
662       VkPipeline occlusion_query_pipeline;
663       VkPipeline pipeline_statistics_query_pipeline;
664       VkPipeline tfb_query_pipeline;
665       VkPipeline timestamp_query_pipeline;
666       VkPipeline pg_query_pipeline;
667    } query;
668 
669    struct {
670       VkDescriptorSetLayout ds_layout;
671       VkPipelineLayout p_layout;
672       VkPipeline pipeline[MAX_SAMPLES_LOG2];
673    } fmask_expand;
674 
675    struct {
676       VkDescriptorSetLayout ds_layout;
677       VkPipelineLayout p_layout;
678       VkPipeline pipeline[32];
679    } dcc_retile;
680 
681    struct {
682       VkPipelineLayout leaf_p_layout;
683       VkPipeline leaf_pipeline;
684       VkPipelineLayout morton_p_layout;
685       VkPipeline morton_pipeline;
686       VkPipelineLayout internal_p_layout;
687       VkPipeline internal_pipeline;
688       VkPipelineLayout copy_p_layout;
689       VkPipeline copy_pipeline;
690 
691       struct radix_sort_vk *radix_sort;
692       struct radix_sort_vk_sort_devaddr_info radix_sort_info;
693    } accel_struct_build;
694 
695    struct {
696       VkDescriptorSetLayout ds_layout;
697       VkPipelineLayout p_layout;
698       VkPipeline pipeline;
699    } etc_decode;
700 
701    struct {
702       VkDescriptorSetLayout ds_layout;
703       VkPipelineLayout p_layout;
704       VkPipeline pipeline;
705    } dgc_prepare;
706 };
707 
708 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
709 
710 struct radv_deferred_queue_submission;
711 
712 static inline enum radv_queue_family
vk_queue_to_radv(const struct radv_physical_device * phys_dev,int queue_family_index)713 vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
714 {
715    if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL ||
716        queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
717       return RADV_QUEUE_FOREIGN;
718    if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
719       return RADV_QUEUE_IGNORED;
720 
721    assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
722    return phys_dev->vk_queue_to_radv[queue_family_index];
723 }
724 
725 enum amd_ip_type radv_queue_family_to_ring(struct radv_physical_device *physical_device,
726                                          enum radv_queue_family f);
727 
728 struct radv_queue_ring_info {
729    uint32_t scratch_size_per_wave;
730    uint32_t scratch_waves;
731    uint32_t compute_scratch_size_per_wave;
732    uint32_t compute_scratch_waves;
733    uint32_t esgs_ring_size;
734    uint32_t gsvs_ring_size;
735    bool tess_rings;
736    bool task_rings;
737    bool mesh_scratch_ring;
738    bool gds;
739    bool gds_oa;
740    bool sample_positions;
741 };
742 
743 struct radv_queue_state {
744    enum radv_queue_family qf;
745    struct radv_queue_ring_info ring_info;
746 
747    struct radeon_winsys_bo *scratch_bo;
748    struct radeon_winsys_bo *descriptor_bo;
749    struct radeon_winsys_bo *compute_scratch_bo;
750    struct radeon_winsys_bo *esgs_ring_bo;
751    struct radeon_winsys_bo *gsvs_ring_bo;
752    struct radeon_winsys_bo *tess_rings_bo;
753    struct radeon_winsys_bo *task_rings_bo;
754    struct radeon_winsys_bo *mesh_scratch_ring_bo;
755    struct radeon_winsys_bo *gds_bo;
756    struct radeon_winsys_bo *gds_oa_bo;
757 
758    struct radeon_cmdbuf *initial_preamble_cs;
759    struct radeon_cmdbuf *initial_full_flush_preamble_cs;
760    struct radeon_cmdbuf *continue_preamble_cs;
761 };
762 
763 struct radv_queue {
764    struct vk_queue vk;
765    struct radv_device *device;
766    struct radeon_winsys_ctx *hw_ctx;
767    enum radeon_ctx_priority priority;
768    struct radv_queue_state state;
769    struct radv_queue_state *ace_internal_state;
770 };
771 
772 #define RADV_BORDER_COLOR_COUNT       4096
773 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
774 
775 struct radv_device_border_color_data {
776    bool used[RADV_BORDER_COLOR_COUNT];
777 
778    struct radeon_winsys_bo *bo;
779    VkClearColorValue *colors_gpu_ptr;
780 
781    /* Mutex is required to guarantee vkCreateSampler thread safety
782     * given that we are writing to a buffer and checking color occupation */
783    mtx_t mutex;
784 };
785 
786 enum radv_force_vrs {
787    RADV_FORCE_VRS_1x1 = 0,
788    RADV_FORCE_VRS_2x2,
789    RADV_FORCE_VRS_2x1,
790    RADV_FORCE_VRS_1x2,
791 };
792 
793 struct radv_notifier {
794    int fd;
795    int watch;
796    bool quit;
797    thrd_t thread;
798 };
799 
800 struct radv_device {
801    struct vk_device vk;
802 
803    struct radv_instance *instance;
804    struct radeon_winsys *ws;
805 
806    struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
807    struct radv_meta_state meta_state;
808 
809    struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
810    int queue_count[RADV_MAX_QUEUE_FAMILIES];
811 
812    bool pbb_allowed;
813    uint32_t scratch_waves;
814    uint32_t dispatch_initiator;
815    uint32_t dispatch_initiator_task;
816 
817    /* MSAA sample locations.
818     * The first index is the sample index.
819     * The second index is the coordinate: X, Y. */
820    float sample_locations_1x[1][2];
821    float sample_locations_2x[2][2];
822    float sample_locations_4x[4][2];
823    float sample_locations_8x[8][2];
824 
825    /* GFX7 and later */
826    uint32_t gfx_init_size_dw;
827    struct radeon_winsys_bo *gfx_init;
828 
829    struct radeon_winsys_bo *trace_bo;
830    uint32_t *trace_id_ptr;
831 
832    /* Whether to keep shader debug info, for debugging. */
833    bool keep_shader_info;
834 
835    struct radv_physical_device *physical_device;
836 
837    /* Backup in-memory cache to be used if the app doesn't provide one */
838    struct radv_pipeline_cache *mem_cache;
839 
840    /*
841     * use different counters so MSAA MRTs get consecutive surface indices,
842     * even if MASK is allocated in between.
843     */
844    uint32_t image_mrt_offset_counter;
845    uint32_t fmask_mrt_offset_counter;
846 
847    struct list_head shader_arenas;
848    unsigned shader_arena_shift;
849    uint8_t shader_free_list_mask;
850    struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS];
851    struct list_head shader_block_obj_pool;
852    mtx_t shader_arena_mutex;
853 
854    /* For detecting VM faults reported by dmesg. */
855    uint64_t dmesg_timestamp;
856 
857    /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
858    bool robust_buffer_access;
859    bool robust_buffer_access2;
860 
861    /* Whether to inline the compute dispatch size in user sgprs. */
862    bool load_grid_size_from_user_sgpr;
863 
864    /* Whether the driver uses a global BO list. */
865    bool use_global_bo_list;
866 
867    /* Whether attachment VRS is enabled. */
868    bool attachment_vrs_enabled;
869 
870    /* Whether shader image 32-bit float atomics are enabled. */
871    bool image_float32_atomics;
872 
873    /* Whether 2D views of 3D image is enabled. */
874    bool image_2d_view_of_3d;
875 
876    /* Whether primitives generated query features are enabled. */
877    bool primitives_generated_query;
878 
879    /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
880    int force_aniso;
881 
882    struct radv_device_border_color_data border_color_data;
883 
884    /* Thread trace. */
885    struct ac_thread_trace_data thread_trace;
886 
887    /* SPM. */
888    struct ac_spm_trace_data spm_trace;
889 
890    /* Trap handler. */
891    struct radv_trap_handler_shader *trap_handler_shader;
892    struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
893    uint32_t *tma_ptr;
894 
895    /* Overallocation. */
896    bool overallocation_disallowed;
897    uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
898    mtx_t overallocation_mutex;
899 
900    /* RADV_FORCE_VRS. */
901    struct radv_notifier notifier;
902    enum radv_force_vrs force_vrs;
903 
904    /* Depth image for VRS when not bound by the app. */
905    struct {
906       struct radv_image *image;
907       struct radv_buffer *buffer; /* HTILE */
908       struct radv_device_memory *mem;
909    } vrs;
910 
911    struct u_rwlock vs_prologs_lock;
912    struct hash_table *vs_prologs;
913 
914    /* Prime blit sdma queue */
915    struct radv_queue *private_sdma_queue;
916 
917    struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
918    struct radv_shader_part *instance_rate_vs_prologs[816];
919 
920    simple_mtx_t trace_mtx;
921 
922    /* Whether per-vertex VRS is forced. */
923    bool force_vrs_enabled;
924 
925    /* Whether shaders created through application entrypoints are considered internal. */
926    bool app_shaders_internal;
927 
928    simple_mtx_t pstate_mtx;
929    unsigned pstate_cnt;
930 
931    /* BO to contain some performance counter helpers:
932     * - A lock for profiling cmdbuffers.
933     * - a temporary fence for the end query synchronization.
934     * - the pass to use for profiling. (as an array of bools)
935     */
936    struct radeon_winsys_bo *perf_counter_bo;
937 
938    /* Interleaved lock/unlock commandbuffers for perfcounter passes. */
939    struct radeon_cmdbuf **perf_counter_lock_cs;
940 
941    bool uses_device_generated_commands;
942 };
943 
944 bool radv_device_acquire_performance_counters(struct radv_device *device);
945 void radv_device_release_performance_counters(struct radv_device *device);
946 
947 struct radv_device_memory {
948    struct vk_object_base base;
949    struct radeon_winsys_bo *bo;
950    /* for dedicated allocations */
951    struct radv_image *image;
952    struct radv_buffer *buffer;
953    uint32_t heap_index;
954    uint64_t alloc_size;
955    void *map;
956    void *user_ptr;
957 
958 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
959    struct AHardwareBuffer *android_hardware_buffer;
960 #endif
961 };
962 
963 void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
964                              struct radeon_winsys_bo *bo);
965 void radv_device_memory_finish(struct radv_device_memory *mem);
966 
967 struct radv_descriptor_range {
968    uint64_t va;
969    uint32_t size;
970 };
971 
972 struct radv_descriptor_set_header {
973    struct vk_object_base base;
974    struct radv_descriptor_set_layout *layout;
975    uint32_t size;
976    uint32_t buffer_count;
977 
978    struct radeon_winsys_bo *bo;
979    uint64_t va;
980    uint32_t *mapped_ptr;
981    struct radv_descriptor_range *dynamic_descriptors;
982 };
983 
984 struct radv_descriptor_set {
985    struct radv_descriptor_set_header header;
986 
987    struct radeon_winsys_bo *descriptors[];
988 };
989 
990 struct radv_push_descriptor_set {
991    struct radv_descriptor_set_header set;
992    uint32_t capacity;
993 };
994 
995 struct radv_descriptor_pool_entry {
996    uint32_t offset;
997    uint32_t size;
998    struct radv_descriptor_set *set;
999 };
1000 
1001 struct radv_descriptor_pool {
1002    struct vk_object_base base;
1003    struct radeon_winsys_bo *bo;
1004    uint8_t *host_bo;
1005    uint8_t *mapped_ptr;
1006    uint64_t current_offset;
1007    uint64_t size;
1008 
1009    uint8_t *host_memory_base;
1010    uint8_t *host_memory_ptr;
1011    uint8_t *host_memory_end;
1012 
1013    uint32_t entry_count;
1014    uint32_t max_entry_count;
1015    struct radv_descriptor_pool_entry entries[0];
1016 };
1017 
1018 struct radv_descriptor_update_template_entry {
1019    VkDescriptorType descriptor_type;
1020 
1021    /* The number of descriptors to update */
1022    uint32_t descriptor_count;
1023 
1024    /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
1025    uint32_t dst_offset;
1026 
1027    /* In dwords. Not valid/used for dynamic descriptors */
1028    uint32_t dst_stride;
1029 
1030    uint32_t buffer_offset;
1031 
1032    /* Only valid for combined image samplers and samplers */
1033    uint8_t has_sampler;
1034    uint8_t sampler_offset;
1035 
1036    /* In bytes */
1037    size_t src_offset;
1038    size_t src_stride;
1039 
1040    /* For push descriptors */
1041    const uint32_t *immutable_samplers;
1042 };
1043 
1044 struct radv_descriptor_update_template {
1045    struct vk_object_base base;
1046    uint32_t entry_count;
1047    VkPipelineBindPoint bind_point;
1048    struct radv_descriptor_update_template_entry entry[0];
1049 };
1050 
1051 struct radv_buffer {
1052    struct vk_buffer vk;
1053 
1054    /* Set when bound */
1055    struct radeon_winsys_bo *bo;
1056    VkDeviceSize offset;
1057 };
1058 
1059 void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
1060                       struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
1061 void radv_buffer_finish(struct radv_buffer *buffer);
1062 
1063 enum radv_dynamic_state_bits {
1064    RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1065    RADV_DYNAMIC_SCISSOR = 1ull << 1,
1066    RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1067    RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1068    RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1069    RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1070    RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1071    RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1072    RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1073    RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1074    RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1075    RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1076    RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1077    RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1078    RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1079    RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1080    RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1081    RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1082    RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1083    RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1084    RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1085    RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1086    RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1087    RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1088    RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1089    RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1090    RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1091    RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1092    RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1093    RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1094    RADV_DYNAMIC_ALL = (1ull << 30) - 1,
1095 };
1096 
1097 enum radv_cmd_dirty_bits {
1098    /* Keep the dynamic state dirty bits in sync with
1099     * enum radv_dynamic_state_bits */
1100    RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1101    RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1102    RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1103    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1104    RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1105    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1106    RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1107    RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1108    RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1109    RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1110    RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1111    RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1112    RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1113    RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1114    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1115    RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1116    RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1117    RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1118    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1119    RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1120    RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1121    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1122    RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1123    RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1124    RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1125    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1126    RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1127    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1128    RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1129    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1130    RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1,
1131    RADV_CMD_DIRTY_PIPELINE = 1ull << 30,
1132    RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31,
1133    RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
1134    RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
1135    RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
1136 };
1137 
1138 enum radv_cmd_flush_bits {
1139    /* Instruction cache. */
1140    RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1141    /* Scalar L1 cache. */
1142    RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1143    /* Vector L1 cache. */
1144    RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1145    /* L2 cache + L2 metadata cache writeback & invalidate.
1146     * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1147    RADV_CMD_FLAG_INV_L2 = 1 << 3,
1148    /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1149     * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1150     * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1151    RADV_CMD_FLAG_WB_L2 = 1 << 4,
1152    /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1153     * changed and we want to read an image from shaders. */
1154    RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1155    /* Framebuffer caches */
1156    RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1157    RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1158    RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1159    RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1160    /* Engine synchronization. */
1161    RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1162    RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1163    RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1164    RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1165    /* Pipeline query controls. */
1166    RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1167    RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1168    RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1169 
1170    RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
1171       (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1172        RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
1173 
1174    RADV_CMD_FLUSH_ALL_COMPUTE =
1175       (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
1176        RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
1177 };
1178 
1179 enum radv_nggc_settings {
1180    radv_nggc_none = 0,
1181    radv_nggc_front_face = 1 << 0,
1182    radv_nggc_back_face = 1 << 1,
1183    radv_nggc_face_is_ccw = 1 << 2,
1184    radv_nggc_small_primitives = 1 << 3,
1185 };
1186 
1187 struct radv_vertex_binding {
1188    VkDeviceSize offset;
1189    VkDeviceSize size;
1190    VkDeviceSize stride;
1191 };
1192 
1193 struct radv_streamout_binding {
1194    struct radv_buffer *buffer;
1195    VkDeviceSize offset;
1196    VkDeviceSize size;
1197 };
1198 
1199 struct radv_streamout_state {
1200    /* Mask of bound streamout buffers. */
1201    uint8_t enabled_mask;
1202 
1203    /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1204    uint32_t hw_enabled_mask;
1205 
1206    /* State of VGT_STRMOUT_(CONFIG|EN) */
1207    bool streamout_enabled;
1208 };
1209 
1210 struct radv_viewport_state {
1211    uint32_t count;
1212    VkViewport viewports[MAX_VIEWPORTS];
1213    struct {
1214       float scale[3];
1215       float translate[3];
1216    } xform[MAX_VIEWPORTS];
1217 };
1218 
1219 struct radv_scissor_state {
1220    uint32_t count;
1221    VkRect2D scissors[MAX_SCISSORS];
1222 };
1223 
1224 struct radv_discard_rectangle_state {
1225    uint32_t count;
1226    VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
1227 };
1228 
1229 struct radv_sample_locations_state {
1230    VkSampleCountFlagBits per_pixel;
1231    VkExtent2D grid_size;
1232    uint32_t count;
1233    VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1234 };
1235 
1236 struct radv_dynamic_state {
1237    /**
1238     * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1239     * Defines the set of saved dynamic state.
1240     */
1241    uint64_t mask;
1242 
1243    struct radv_viewport_state viewport;
1244 
1245    struct radv_scissor_state scissor;
1246 
1247    float line_width;
1248 
1249    struct {
1250       float bias;
1251       float clamp;
1252       float slope;
1253    } depth_bias;
1254 
1255    float blend_constants[4];
1256 
1257    struct {
1258       float min;
1259       float max;
1260    } depth_bounds;
1261 
1262    struct {
1263       uint32_t front;
1264       uint32_t back;
1265    } stencil_compare_mask;
1266 
1267    struct {
1268       uint32_t front;
1269       uint32_t back;
1270    } stencil_write_mask;
1271 
1272    struct {
1273       struct {
1274          VkStencilOp fail_op;
1275          VkStencilOp pass_op;
1276          VkStencilOp depth_fail_op;
1277          VkCompareOp compare_op;
1278       } front;
1279 
1280       struct {
1281          VkStencilOp fail_op;
1282          VkStencilOp pass_op;
1283          VkStencilOp depth_fail_op;
1284          VkCompareOp compare_op;
1285       } back;
1286    } stencil_op;
1287 
1288    struct {
1289       uint32_t front;
1290       uint32_t back;
1291    } stencil_reference;
1292 
1293    struct radv_discard_rectangle_state discard_rectangle;
1294 
1295    struct radv_sample_locations_state sample_location;
1296 
1297    struct {
1298       uint32_t factor;
1299       uint16_t pattern;
1300    } line_stipple;
1301 
1302    VkCullModeFlags cull_mode;
1303    VkFrontFace front_face;
1304    unsigned primitive_topology;
1305 
1306    bool depth_test_enable;
1307    bool depth_write_enable;
1308    VkCompareOp depth_compare_op;
1309    bool depth_bounds_test_enable;
1310    bool stencil_test_enable;
1311 
1312    struct {
1313       VkExtent2D size;
1314       VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
1315    } fragment_shading_rate;
1316 
1317    bool depth_bias_enable;
1318    bool primitive_restart_enable;
1319    bool rasterizer_discard_enable;
1320 
1321    unsigned logic_op;
1322 
1323    uint32_t color_write_enable;
1324 };
1325 
1326 extern const struct radv_dynamic_state default_dynamic_state;
1327 
1328 const char *radv_get_debug_option_name(int id);
1329 
1330 const char *radv_get_perftest_option_name(int id);
1331 
1332 int radv_get_int_debug_option(const char *name, int default_value);
1333 
1334 struct radv_color_buffer_info {
1335    uint64_t cb_color_base;
1336    uint64_t cb_color_cmask;
1337    uint64_t cb_color_fmask;
1338    uint64_t cb_dcc_base;
1339    uint32_t cb_color_slice;
1340    uint32_t cb_color_view;
1341    uint32_t cb_color_info;
1342    uint32_t cb_color_attrib;
1343    uint32_t cb_color_attrib2; /* GFX9 and later */
1344    uint32_t cb_color_attrib3; /* GFX10 and later */
1345    uint32_t cb_dcc_control;
1346    uint32_t cb_color_cmask_slice;
1347    uint32_t cb_color_fmask_slice;
1348    union {
1349       uint32_t cb_color_pitch; // GFX6-GFX8
1350       uint32_t cb_mrt_epitch;  // GFX9+
1351    };
1352 };
1353 
1354 struct radv_ds_buffer_info {
1355    uint64_t db_z_read_base;
1356    uint64_t db_stencil_read_base;
1357    uint64_t db_z_write_base;
1358    uint64_t db_stencil_write_base;
1359    uint64_t db_htile_data_base;
1360    uint32_t db_depth_info;
1361    uint32_t db_z_info;
1362    uint32_t db_stencil_info;
1363    uint32_t db_depth_view;
1364    uint32_t db_depth_size;
1365    uint32_t db_depth_slice;
1366    uint32_t db_htile_surface;
1367    uint32_t pa_su_poly_offset_db_fmt_cntl;
1368    uint32_t db_z_info2;       /* GFX9 only */
1369    uint32_t db_stencil_info2; /* GFX9 only */
1370 };
1371 
1372 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1373                                    struct radv_image_view *iview);
1374 void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
1375                                 struct radv_image_view *iview);
1376 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1377                                  struct radv_ds_buffer_info *ds);
1378 
1379 /**
1380  * Attachment state when recording a renderpass instance.
1381  *
1382  * The clear value is valid only if there exists a pending clear.
1383  */
1384 struct radv_attachment_state {
1385    VkImageAspectFlags pending_clear_aspects;
1386    uint32_t cleared_views;
1387    VkClearValue clear_value;
1388    VkImageLayout current_layout;
1389    VkImageLayout current_stencil_layout;
1390    bool current_in_render_loop;
1391    struct radv_sample_locations_state sample_location;
1392 
1393    union {
1394       struct radv_color_buffer_info cb;
1395       struct radv_ds_buffer_info ds;
1396    };
1397    struct radv_image_view *iview;
1398 };
1399 
1400 struct radv_descriptor_state {
1401    struct radv_descriptor_set *sets[MAX_SETS];
1402    uint32_t dirty;
1403    uint32_t valid;
1404    struct radv_push_descriptor_set push_set;
1405    bool push_dirty;
1406    uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1407 };
1408 
1409 struct radv_subpass_sample_locs_state {
1410    uint32_t subpass_idx;
1411    struct radv_sample_locations_state sample_location;
1412 };
1413 
1414 enum rgp_flush_bits {
1415    RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1416    RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1417    RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1418    RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1419    RGP_FLUSH_PFP_SYNC_ME = 0x10,
1420    RGP_FLUSH_SYNC_CP_DMA = 0x20,
1421    RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1422    RGP_FLUSH_INVAL_ICACHE = 0x80,
1423    RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1424    RGP_FLUSH_FLUSH_L2 = 0x200,
1425    RGP_FLUSH_INVAL_L2 = 0x400,
1426    RGP_FLUSH_FLUSH_CB = 0x800,
1427    RGP_FLUSH_INVAL_CB = 0x1000,
1428    RGP_FLUSH_FLUSH_DB = 0x2000,
1429    RGP_FLUSH_INVAL_DB = 0x4000,
1430    RGP_FLUSH_INVAL_L1 = 0x8000,
1431 };
1432 
1433 struct radv_cmd_state {
1434    /* Vertex descriptors */
1435    uint64_t vb_va;
1436 
1437    bool predicating;
1438    uint64_t dirty;
1439 
1440    uint32_t prefetch_L2_mask;
1441 
1442    struct radv_graphics_pipeline *graphics_pipeline;
1443    struct radv_graphics_pipeline *emitted_graphics_pipeline;
1444    struct radv_compute_pipeline *compute_pipeline;
1445    struct radv_compute_pipeline *emitted_compute_pipeline;
1446    struct radv_compute_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1447    struct vk_framebuffer *framebuffer;
1448    struct radv_render_pass *pass;
1449    const struct radv_subpass *subpass;
1450    struct radv_dynamic_state dynamic;
1451    struct radv_vs_input_state dynamic_vs_input;
1452    struct radv_attachment_state *attachments;
1453    struct radv_streamout_state streamout;
1454    VkRect2D render_area;
1455 
1456    uint32_t num_subpass_sample_locs;
1457    struct radv_subpass_sample_locs_state *subpass_sample_locs;
1458 
1459    /* Index buffer */
1460    struct radv_buffer *index_buffer;
1461    uint64_t index_offset;
1462    uint32_t index_type;
1463    uint32_t max_index_count;
1464    uint64_t index_va;
1465    int32_t last_index_type;
1466 
1467    int32_t last_primitive_reset_en;
1468    uint32_t last_primitive_reset_index;
1469    enum radv_cmd_flush_bits flush_bits;
1470    unsigned active_occlusion_queries;
1471    bool perfect_occlusion_queries_enabled;
1472    unsigned active_pipeline_queries;
1473    unsigned active_pipeline_gds_queries;
1474    bool prims_gen_query_enabled;
1475    uint32_t trace_id;
1476    uint32_t last_ia_multi_vgt_param;
1477 
1478    uint32_t last_num_instances;
1479    uint32_t last_first_instance;
1480    uint32_t last_vertex_offset;
1481    uint32_t last_drawid;
1482    uint32_t last_subpass_color_count;
1483 
1484    uint32_t last_sx_ps_downconvert;
1485    uint32_t last_sx_blend_opt_epsilon;
1486    uint32_t last_sx_blend_opt_control;
1487 
1488    /* Whether CP DMA is busy/idle. */
1489    bool dma_is_busy;
1490 
1491    /* Whether any images that are not L2 coherent are dirty from the CB. */
1492    bool rb_noncoherent_dirty;
1493 
1494    /* Conditional rendering info. */
1495    uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1496    int predication_type;   /* -1: disabled, 0: normal, 1: inverted */
1497    uint64_t predication_va;
1498 
1499    /* Inheritance info. */
1500    VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1501 
1502    bool context_roll_without_scissor_emitted;
1503 
1504    /* SQTT related state. */
1505    uint32_t current_event_type;
1506    uint32_t num_events;
1507    uint32_t num_layout_transitions;
1508    bool pending_sqtt_barrier_end;
1509    enum rgp_flush_bits sqtt_flush_bits;
1510 
1511    /* NGG culling state. */
1512    uint32_t last_nggc_settings;
1513    int8_t last_nggc_settings_sgpr_idx;
1514    bool last_nggc_skip;
1515 
1516    /* Mesh shading state. */
1517    bool mesh_shading;
1518 
1519    uint8_t cb_mip[MAX_RTS];
1520 
1521    /* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
1522    bool uses_draw_indirect_multi;
1523 
1524    uint32_t rt_stack_size;
1525 
1526    struct radv_shader_part *emitted_vs_prolog;
1527    uint32_t *emitted_vs_prolog_key;
1528    uint32_t emitted_vs_prolog_key_hash;
1529    uint32_t vbo_misaligned_mask;
1530    uint32_t vbo_misaligned_mask_invalid;
1531    uint32_t vbo_bound_mask;
1532 
1533    /* Whether the cmdbuffer owns the current render pass rather than the app. */
1534    bool own_render_pass;
1535 
1536    /* Per-vertex VRS state. */
1537    uint32_t last_vrs_rates;
1538    int8_t last_vrs_rates_sgpr_idx;
1539 
1540    /* Whether to suspend streamout for internal driver operations. */
1541    bool suspend_streamout;
1542 
1543    /* Whether this commandbuffer uses performance counters. */
1544    bool uses_perf_counters;
1545 };
1546 
1547 struct radv_cmd_pool {
1548    struct vk_command_pool vk;
1549    struct list_head cmd_buffers;
1550    struct list_head free_cmd_buffers;
1551 };
1552 
1553 struct radv_cmd_buffer_upload {
1554    uint8_t *map;
1555    unsigned offset;
1556    uint64_t size;
1557    struct radeon_winsys_bo *upload_bo;
1558    struct list_head list;
1559 };
1560 
1561 enum radv_cmd_buffer_status {
1562    RADV_CMD_BUFFER_STATUS_INVALID,
1563    RADV_CMD_BUFFER_STATUS_INITIAL,
1564    RADV_CMD_BUFFER_STATUS_RECORDING,
1565    RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1566    RADV_CMD_BUFFER_STATUS_PENDING,
1567 };
1568 
1569 struct dynamic_vertex_format_cache {
1570    VkFormat format;
1571    uint8_t hw_fmt;
1572    uint8_t fmt_align_req_minus_1;
1573    uint8_t fmt_size;
1574    bool post_shuffle;
1575    bool alpha_adjust_lo;
1576    bool alpha_adjust_hi;
1577 };
1578 
1579 struct radv_cmd_buffer {
1580    struct vk_command_buffer vk;
1581 
1582    struct radv_device *device;
1583 
1584    struct radv_cmd_pool *pool;
1585    struct list_head pool_link;
1586 
1587    struct util_dynarray cached_vertex_formats;
1588    VkCommandBufferUsageFlags usage_flags;
1589    enum radv_cmd_buffer_status status;
1590    struct radeon_cmdbuf *cs;
1591    struct radv_cmd_state state;
1592    struct radv_buffer *vertex_binding_buffers[MAX_VBS];
1593    struct radv_vertex_binding vertex_bindings[MAX_VBS];
1594    uint32_t used_vertex_bindings;
1595    struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1596    enum radv_queue_family qf;
1597 
1598    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1599    VkShaderStageFlags push_constant_stages;
1600    struct radv_descriptor_set_header meta_push_descriptors;
1601 
1602    struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1603 
1604    struct radv_cmd_buffer_upload upload;
1605 
1606    uint32_t scratch_size_per_wave_needed;
1607    uint32_t scratch_waves_wanted;
1608    uint32_t compute_scratch_size_per_wave_needed;
1609    uint32_t compute_scratch_waves_wanted;
1610    uint32_t esgs_ring_size_needed;
1611    uint32_t gsvs_ring_size_needed;
1612    bool tess_rings_needed;
1613    bool task_rings_needed;
1614    bool mesh_scratch_ring_needed;
1615    bool gds_needed;    /* for GFX10 streamout and NGG GS queries */
1616    bool gds_oa_needed; /* for GFX10 streamout */
1617    bool sample_positions_needed;
1618 
1619    VkResult record_result;
1620 
1621    uint64_t gfx9_fence_va;
1622    uint32_t gfx9_fence_idx;
1623    uint64_t gfx9_eop_bug_va;
1624 
1625    uint64_t mec_inv_pred_va;  /* For inverted predication when using MEC. */
1626    bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
1627 
1628    struct {
1629       /**
1630        * Internal command stream that is used when some graphics work
1631        * also requires a submission to the compute queue.
1632        */
1633       struct radeon_cmdbuf *cs;
1634 
1635       /** Flush bits for the internal cmdbuf. */
1636       enum radv_cmd_flush_bits flush_bits;
1637 
1638       /**
1639        * For synchronization between the ACE and GFX cmdbuf.
1640        * The value of this semaphore is incremented whenever we
1641        * encounter a barrier that affects ACE. At sync points,
1642        * GFX writes the value to its address, and ACE waits until
1643        * it detects that the value has been written.
1644        */
1645       struct {
1646          uint64_t va;                    /* Virtual address of the semaphore. */
1647          uint32_t gfx2ace_value;         /* Current value on GFX. */
1648          uint32_t emitted_gfx2ace_value; /* Emitted value on GFX. */
1649       } sem;
1650    } ace_internal;
1651 
1652    /**
1653     * Whether a query pool has been resetted and we have to flush caches.
1654     */
1655    bool pending_reset_query;
1656 
1657    /**
1658     * Bitmask of pending active query flushes.
1659     */
1660    enum radv_cmd_flush_bits active_query_flush_bits;
1661 };
1662 
1663 struct radv_image;
1664 struct radv_image_view;
1665 
1666 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1667 
1668 bool radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer);
1669 void radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer);
1670 
1671 void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1672 void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1673 
1674 void cik_create_gfx_config(struct radv_device *device);
1675 
1676 void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
1677                        const VkViewport *viewports, unsigned rast_prim, float line_width);
1678 
1679 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
1680                                    bool indirect_draw, bool count_from_stream_output,
1681                                    uint32_t draw_vertex_count, unsigned topology,
1682                                    bool prim_restart_enable);
1683 void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
1684                                 unsigned event, unsigned event_flags, unsigned dst_sel,
1685                                 unsigned data_sel, uint64_t va, uint32_t new_fence,
1686                                 uint64_t gfx9_eop_bug_va);
1687 
1688 void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
1689                       uint32_t mask);
1690 void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
1691                             uint32_t *fence_ptr, uint64_t va, bool is_mec,
1692                             enum radv_cmd_flush_bits flush_bits,
1693                             enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
1694 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1695 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
1696                                    unsigned pred_op, uint64_t va);
1697 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
1698                            uint64_t size);
1699 void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1700                            unsigned size, bool predicating);
1701 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
1702 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
1703                             unsigned value);
1704 void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1705 
1706 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries);
1707 uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer);
1708 uint32_t radv_get_vgt_index_size(uint32_t type);
1709 
1710 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
1711 uint32_t radv_hash_vs_prolog(const void *key_);
1712 bool radv_cmp_vs_prolog(const void *a_, const void *b_);
1713 
1714 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1715                                   unsigned *out_offset, void **ptr);
1716 void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1717                                  const struct radv_subpass *subpass);
1718 void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
1719                                      const struct radv_subpass *subpass);
1720 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1721                                  const void *data, unsigned *out_offset);
1722 void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
1723                                    const struct radv_graphics_pipeline *pipeline,
1724                                    bool full_null_descriptors, void *vb_ptr);
1725 void radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs);
1726 
1727 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1728 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1729 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1730 void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
1731                                            VkImageAspectFlags aspects,
1732                                            VkResolveModeFlagBits resolve_mode);
1733 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1734 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
1735                                            VkImageAspectFlags aspects,
1736                                            VkResolveModeFlagBits resolve_mode);
1737 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
1738 unsigned radv_get_default_max_sample_dist(int log_samples);
1739 void radv_device_init_msaa(struct radv_device *device);
1740 VkResult radv_device_init_vrs_state(struct radv_device *device);
1741 
1742 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1743                                    const struct radv_image_view *iview,
1744                                    VkClearDepthStencilValue ds_clear_value,
1745                                    VkImageAspectFlags aspects);
1746 
1747 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1748                                       const struct radv_image_view *iview, int cb_idx,
1749                                       uint32_t color_values[2]);
1750 
1751 bool radv_image_use_dcc_image_stores(const struct radv_device *device,
1752                                      const struct radv_image *image);
1753 bool radv_image_use_dcc_predication(const struct radv_device *device,
1754                                     const struct radv_image *image);
1755 
1756 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1757                               const VkImageSubresourceRange *range, bool value);
1758 
1759 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1760                               const VkImageSubresourceRange *range, bool value);
1761 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
1762                                                VkAccessFlags2 src_flags,
1763                                                const struct radv_image *image);
1764 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
1765                                                VkAccessFlags2 dst_flags,
1766                                                const struct radv_image *image);
1767 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1768                           struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value);
1769 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
1770                       struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
1771                       uint64_t size);
1772 
1773 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1774 bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
1775 void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1776                       struct radv_device_memory *mem);
1777 
1778 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)1779 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
1780                               bool use_32bit_pointers)
1781 {
1782    radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1783    radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1784 }
1785 
1786 static inline void
radv_emit_shader_pointer_body(struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)1787 radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1788                               bool use_32bit_pointers)
1789 {
1790    radeon_emit(cs, va);
1791 
1792    if (use_32bit_pointers) {
1793       assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
1794    } else {
1795       radeon_emit(cs, va >> 32);
1796    }
1797 }
1798 
1799 static inline void
radv_emit_shader_pointer(struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)1800 radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
1801                          uint64_t va, bool global)
1802 {
1803    bool use_32bit_pointers = !global;
1804 
1805    radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1806    radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1807 }
1808 
1809 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1810 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
1811 {
1812    switch (bind_point) {
1813    case VK_PIPELINE_BIND_POINT_GRAPHICS:
1814    case VK_PIPELINE_BIND_POINT_COMPUTE:
1815       return &cmd_buffer->descriptors[bind_point];
1816    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1817       return &cmd_buffer->descriptors[2];
1818    default:
1819       unreachable("Unhandled bind point");
1820    }
1821 }
1822 
1823 void
1824 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
1825 
1826 /*
1827  * Takes x,y,z as exact numbers of invocations, instead of blocks.
1828  *
1829  * Limitations: Can't call normal dispatch functions without binding or rebinding
1830  *              the compute pipeline.
1831  */
1832 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
1833                              uint32_t z);
1834 
1835 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
1836                             uint64_t va);
1837 
1838 struct radv_event {
1839    struct vk_object_base base;
1840    struct radeon_winsys_bo *bo;
1841    uint64_t *map;
1842 };
1843 
1844 #define RADV_HASH_SHADER_CS_WAVE32         (1 << 1)
1845 #define RADV_HASH_SHADER_PS_WAVE32         (1 << 2)
1846 #define RADV_HASH_SHADER_GE_WAVE32         (1 << 3)
1847 #define RADV_HASH_SHADER_LLVM              (1 << 4)
1848 #define RADV_HASH_SHADER_KEEP_STATISTICS   (1 << 8)
1849 #define RADV_HASH_SHADER_USE_NGG_CULLING   (1 << 13)
1850 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
1851 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
1852 #define RADV_HASH_SHADER_EMULATE_RT            (1 << 16)
1853 #define RADV_HASH_SHADER_SPLIT_FMA             (1 << 17)
1854 #define RADV_HASH_SHADER_RT_WAVE64             (1 << 18)
1855 
1856 struct radv_pipeline_key;
1857 
1858 void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
1859                               struct radv_pipeline_stage *out_stage, gl_shader_stage stage);
1860 
1861 void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
1862                        const struct radv_pipeline_layout *layout,
1863                        const struct radv_pipeline_key *key, uint32_t flags);
1864 
1865 void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1866                           uint32_t flags);
1867 
1868 uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
1869 
1870 bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
1871 
1872 bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines);
1873 
1874 bool radv_emulate_rt(const struct radv_physical_device *pdevice);
1875 
1876 enum {
1877    RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
1878                          VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
1879                          VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR)
1880 };
1881 
1882 #define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
1883 
1884 #define radv_foreach_stage(stage, stage_bits)                                                      \
1885    for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK);            \
1886         stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1887 
1888 extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1889 unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
1890 
1891 struct radv_multisample_state {
1892    uint32_t db_eqaa;
1893    uint32_t pa_sc_mode_cntl_0;
1894    uint32_t pa_sc_mode_cntl_1;
1895    uint32_t pa_sc_aa_config;
1896    uint32_t pa_sc_aa_mask[2];
1897    unsigned num_samples;
1898 };
1899 
1900 struct radv_vrs_state {
1901    uint32_t pa_cl_vrs_cntl;
1902 };
1903 
1904 struct radv_prim_vertex_count {
1905    uint8_t min;
1906    uint8_t incr;
1907 };
1908 
1909 struct radv_ia_multi_vgt_param_helpers {
1910    uint32_t base;
1911    bool partial_es_wave;
1912    uint8_t primgroup_size;
1913    bool ia_switch_on_eoi;
1914    bool partial_vs_wave;
1915 };
1916 
1917 struct radv_binning_state {
1918    uint32_t pa_sc_binner_cntl_0;
1919 };
1920 
1921 #define SI_GS_PER_ES 128
1922 
1923 enum radv_pipeline_type {
1924    RADV_PIPELINE_GRAPHICS,
1925    /* Compute pipeline (incl raytracing pipeline) */
1926    RADV_PIPELINE_COMPUTE,
1927    /* Pipeline library. This can't actually run and merely is a partial pipeline. */
1928    RADV_PIPELINE_LIBRARY
1929 };
1930 
1931 struct radv_pipeline_group_handle {
1932    uint32_t handles[2];
1933 };
1934 
1935 struct radv_pipeline_shader_stack_size {
1936    uint32_t recursive_size;
1937    /* anyhit + intersection */
1938    uint32_t non_recursive_size;
1939 };
1940 
1941 struct radv_pipeline_slab {
1942    uint32_t ref_count;
1943 
1944    union radv_shader_arena_block *alloc;
1945 };
1946 
1947 void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
1948 
1949 struct radv_vertex_input_info {
1950    uint32_t instance_rate_inputs;
1951    uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
1952    uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
1953    uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
1954    uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
1955    uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
1956    uint8_t vertex_binding_align[MAX_VBS];
1957    enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
1958    uint32_t vertex_post_shuffle;
1959    uint32_t binding_stride[MAX_VBS];
1960    uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
1961    uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
1962    uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
1963 };
1964 
1965 struct radv_input_assembly_info {
1966    uint8_t primitive_topology; /* VkPrimitiveTopology */
1967    bool primitive_restart_enable;
1968 };
1969 
1970 struct radv_tessellation_info {
1971    uint8_t patch_control_points;
1972    VkTessellationDomainOrigin domain_origin;
1973 };
1974 
1975 struct radv_viewport_info {
1976    bool negative_one_to_one;
1977    uint8_t viewport_count;
1978    uint8_t scissor_count;
1979    VkRect2D scissors[MAX_SCISSORS];
1980    VkViewport viewports[MAX_VIEWPORTS];
1981 };
1982 
1983 struct radv_rasterization_info {
1984    bool discard_enable;
1985    VkFrontFace front_face;
1986    VkCullModeFlags cull_mode;
1987    uint8_t polygon_mode; /* VkPolygonMode */
1988    bool depth_bias_enable;
1989    bool depth_clamp_enable;
1990    float line_width;
1991    float depth_bias_constant_factor;
1992    float depth_bias_clamp;
1993    float depth_bias_slope_factor;
1994    VkConservativeRasterizationModeEXT conservative_mode;
1995    bool provoking_vtx_last;
1996    bool stippled_line_enable;
1997    VkLineRasterizationModeEXT line_raster_mode;
1998    uint32_t line_stipple_factor;
1999    uint16_t line_stipple_pattern;
2000    bool depth_clip_disable;
2001    VkRasterizationOrderAMD order;
2002 };
2003 
2004 struct radv_discard_rectangle_info {
2005    VkDiscardRectangleModeEXT mode;
2006    VkRect2D rects[MAX_DISCARD_RECTANGLES];
2007    uint8_t count;
2008 };
2009 
2010 struct radv_multisample_info {
2011    bool sample_shading_enable;
2012    bool alpha_to_coverage_enable;
2013    bool sample_locs_enable;
2014    VkSampleCountFlagBits raster_samples;
2015    float min_sample_shading;
2016    uint16_t sample_mask;
2017    uint8_t sample_locs_count;
2018    VkSampleCountFlagBits sample_locs_per_pixel;
2019    VkExtent2D sample_locs_grid_size;
2020    VkSampleLocationEXT sample_locs[MAX_SAMPLE_LOCATIONS];
2021 };
2022 
2023 struct radv_stencil_op_info {
2024    VkStencilOp fail_op;
2025    VkStencilOp pass_op;
2026    VkStencilOp depth_fail_op;
2027    VkCompareOp compare_op;
2028    uint8_t compare_mask;
2029    uint8_t write_mask;
2030    uint8_t reference;
2031 };
2032 
2033 struct radv_depth_stencil_info {
2034    bool stencil_test_enable;
2035    bool depth_test_enable;
2036    bool depth_write_enable;
2037    bool depth_bounds_test_enable;
2038    struct {
2039       float min;
2040       float max;
2041    } depth_bounds;
2042    struct radv_stencil_op_info front;
2043    struct radv_stencil_op_info back;
2044    VkCompareOp depth_compare_op;
2045 };
2046 
2047 struct radv_rendering_info {
2048    uint32_t view_mask;
2049    uint32_t color_att_count;
2050    VkFormat color_att_formats[MAX_RTS];
2051    VkFormat depth_att_format;
2052    VkFormat stencil_att_format;
2053 };
2054 
2055 struct radv_color_blend_info {
2056    bool logic_op_enable;
2057    uint8_t att_count;
2058    uint16_t logic_op;
2059    uint32_t color_write_enable;
2060    float blend_constants[4];
2061    struct {
2062       uint8_t color_write_mask;
2063       bool blend_enable;
2064       uint16_t color_blend_op;
2065       uint16_t alpha_blend_op;
2066       uint16_t src_color_blend_factor;
2067       uint16_t dst_color_blend_factor;
2068       uint16_t src_alpha_blend_factor;
2069       uint16_t dst_alpha_blend_factor;
2070    } att[MAX_RTS];
2071 };
2072 
2073 struct radv_fragment_shading_rate_info {
2074    VkExtent2D size;
2075    VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
2076 };
2077 
2078 struct radv_graphics_pipeline_info {
2079    struct radv_vertex_input_info vi;
2080    struct radv_input_assembly_info ia;
2081 
2082    struct radv_tessellation_info ts;
2083    struct radv_viewport_info vp;
2084    struct radv_rasterization_info rs;
2085    struct radv_discard_rectangle_info dr;
2086 
2087    struct radv_multisample_info ms;
2088    struct radv_depth_stencil_info ds;
2089    struct radv_rendering_info ri;
2090    struct radv_color_blend_info cb;
2091 
2092    struct radv_fragment_shading_rate_info fsr;
2093 
2094    /* VK_AMD_mixed_attachment_samples */
2095    uint8_t color_att_samples;
2096    uint8_t ds_att_samples;
2097 };
2098 
2099 enum radv_depth_clamp_mode {
2100    RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0,       /* Clamp to the viewport min/max depth bounds */
2101    RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1,    /* Clamp between 0.0f and 1.0f */
2102    RADV_DEPTH_CLAMP_MODE_DISABLED = 2,       /* Disable depth clamping */
2103 };
2104 
2105 struct radv_pipeline {
2106    struct vk_object_base base;
2107    enum radv_pipeline_type type;
2108 
2109    struct radv_device *device;
2110 
2111    struct radv_pipeline_slab *slab;
2112    struct radeon_winsys_bo *slab_bo;
2113 
2114    bool need_indirect_descriptor_sets;
2115    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
2116    struct radv_shader *gs_copy_shader;
2117 
2118    struct radeon_cmdbuf cs;
2119    uint32_t ctx_cs_hash;
2120    struct radeon_cmdbuf ctx_cs;
2121 
2122    uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
2123 
2124    unsigned max_waves;
2125    unsigned scratch_bytes_per_wave;
2126 
2127    /* Unique pipeline hash identifier. */
2128    uint64_t pipeline_hash;
2129 
2130    /* Pipeline layout info. */
2131    uint32_t push_constant_size;
2132    uint32_t dynamic_offset_count;
2133 };
2134 
2135 struct radv_graphics_pipeline {
2136    struct radv_pipeline base;
2137 
2138    VkShaderStageFlags active_stages;
2139 
2140    struct radv_dynamic_state dynamic_state;
2141 
2142    uint64_t dynamic_states;
2143    struct radv_multisample_state ms;
2144    struct radv_binning_state binning;
2145    struct radv_vrs_state vrs;
2146    uint32_t spi_baryc_cntl;
2147    unsigned esgs_ring_size;
2148    unsigned gsvs_ring_size;
2149    uint32_t vtx_base_sgpr;
2150    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
2151    uint8_t vtx_emit_num;
2152    uint64_t needed_dynamic_state;
2153    unsigned tess_patch_control_points;
2154    unsigned pa_su_sc_mode_cntl;
2155    unsigned db_depth_control;
2156    unsigned pa_cl_clip_cntl;
2157    unsigned cb_color_control;
2158    uint32_t binding_stride[MAX_VBS];
2159    uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
2160    uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
2161    uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
2162    uint8_t last_vertex_attrib_bit;
2163    uint8_t next_vertex_stage : 8;
2164    uint32_t vb_desc_usage_mask;
2165    uint32_t vb_desc_alloc_size;
2166 
2167    /* Last pre-PS API stage */
2168    gl_shader_stage last_vgt_api_stage;
2169 
2170    /* Used for rbplus */
2171    uint32_t col_format;
2172    uint32_t cb_target_mask;
2173 
2174    bool disable_out_of_order_rast_for_occlusion;
2175    bool uses_drawid;
2176    bool uses_baseinstance;
2177    bool uses_dynamic_stride;
2178    bool uses_conservative_overestimate;
2179    bool negative_one_to_one;
2180    enum radv_depth_clamp_mode depth_clamp_mode;
2181    bool use_per_attribute_vb_descs;
2182    bool can_use_simple_input;
2183    bool uses_user_sample_locations;
2184 
2185    /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
2186    bool force_vrs_per_vertex;
2187 
2188    /* Whether the pipeline uses NGG (GFX10+). */
2189    bool is_ngg;
2190    bool has_ngg_culling;
2191 
2192    /* Not NULL if graphics pipeline uses streamout. */
2193    struct radv_shader *streamout_shader;
2194 
2195    unsigned rast_prim;
2196    float line_width;
2197 };
2198 
2199 struct radv_compute_pipeline {
2200    struct radv_pipeline base;
2201 
2202    bool cs_regalloc_hang_bug;
2203 
2204    /* Raytracing */
2205    struct radv_pipeline_group_handle *rt_group_handles;
2206    struct radv_pipeline_shader_stack_size *rt_stack_sizes;
2207    bool dynamic_stack_size;
2208    uint32_t group_count;
2209 };
2210 
2211 struct radv_library_pipeline {
2212    struct radv_pipeline base;
2213 
2214    unsigned stage_count;
2215    VkPipelineShaderStageCreateInfo *stages;
2216    unsigned group_count;
2217    VkRayTracingShaderGroupCreateInfoKHR *groups;
2218    VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifiers;
2219    struct {
2220       uint8_t sha1[SHA1_DIGEST_LENGTH];
2221    } *hashes;
2222 };
2223 
2224 #define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)            \
2225    static inline struct radv_##pipe_type##_pipeline *                \
2226    radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline)      \
2227    {                                                                 \
2228       assert(pipeline->type == pipe_enum);                           \
2229       return (struct radv_##pipe_type##_pipeline *) pipeline;        \
2230    }
2231 
2232 RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
2233 RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
2234 RADV_DECL_PIPELINE_DOWNCAST(library, RADV_PIPELINE_LIBRARY)
2235 
2236 struct radv_pipeline_stage {
2237    gl_shader_stage stage;
2238 
2239    struct {
2240       const struct vk_object_base *object;
2241       const char *data;
2242       uint32_t size;
2243       unsigned char sha1[20];
2244    } spirv;
2245 
2246    const char *entrypoint;
2247    const VkSpecializationInfo *spec_info;
2248 
2249    unsigned char shader_sha1[20];
2250 
2251    nir_shader *nir;
2252    nir_shader *internal_nir; /* meta shaders */
2253 
2254    struct radv_shader_info info;
2255    struct radv_shader_args args;
2256 
2257    VkPipelineCreationFeedback feedback;
2258 };
2259 
2260 static inline bool
radv_pipeline_has_stage(const struct radv_graphics_pipeline * pipeline,gl_shader_stage stage)2261 radv_pipeline_has_stage(const struct radv_graphics_pipeline *pipeline, gl_shader_stage stage)
2262 {
2263    return pipeline->base.shaders[stage];
2264 }
2265 
2266 bool radv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline);
2267 
2268 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
2269 
2270 struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
2271                                                  gl_shader_stage stage, int idx);
2272 
2273 struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage);
2274 
2275 void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
2276                               const struct radv_shader *shader);
2277 
2278 void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
2279                                       struct radeon_cmdbuf *cs, const struct radv_shader *shader);
2280 
2281 struct radv_graphics_pipeline_create_info {
2282    bool use_rectlist;
2283    bool db_depth_clear;
2284    bool db_stencil_clear;
2285    bool depth_compress_disable;
2286    bool stencil_compress_disable;
2287    bool resummarize_enable;
2288    uint32_t custom_blend_mode;
2289 };
2290 
2291 void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
2292                         enum radv_pipeline_type type);
2293 
2294 VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
2295                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
2296                                        const struct radv_graphics_pipeline_create_info *extra,
2297                                        const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
2298 
2299 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
2300                                       const VkComputePipelineCreateInfo *pCreateInfo,
2301                                       const VkAllocationCallbacks *pAllocator,
2302                                       const uint8_t *custom_hash,
2303                                       struct radv_pipeline_shader_stack_size *rt_stack_sizes,
2304                                       uint32_t rt_group_count, VkPipeline *pPipeline);
2305 
2306 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
2307                            const VkAllocationCallbacks *allocator);
2308 
2309 struct radv_binning_settings {
2310    unsigned context_states_per_bin;    /* allowed range: [1, 6] */
2311    unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
2312    unsigned fpovs_per_batch;           /* allowed range: [0, 255], 0 = unlimited */
2313 };
2314 
2315 struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
2316 
2317 struct vk_format_description;
2318 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
2319                                           int first_non_void);
2320 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
2321                                          int first_non_void);
2322 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2323 void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
2324                                   const struct util_format_description *desc, unsigned *dfmt,
2325                                   unsigned *nfmt, bool *post_shuffle,
2326                                   enum radv_vs_input_alpha_adjust *alpha_adjust);
2327 uint32_t radv_translate_colorformat(VkFormat format);
2328 uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
2329                                         int first_non_void);
2330 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2331 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2332 uint32_t radv_translate_dbformat(VkFormat format);
2333 uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
2334                                        int first_non_void);
2335 uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
2336                                       int first_non_void);
2337 bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
2338                                   VkClearColorValue *value);
2339 bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
2340                                             VkFormat format);
2341 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
2342                                           VkFormat format, bool *blendable);
2343 bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
2344                                  bool *sign_reinterpret);
2345 bool radv_is_atomic_format_supported(VkFormat format);
2346 bool radv_device_supports_etc(struct radv_physical_device *physical_device);
2347 
2348 static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS =
2349    VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2350    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
2351 
2352 struct radv_image_plane {
2353    VkFormat format;
2354    struct radeon_surf surface;
2355 };
2356 
2357 struct radv_image_binding {
2358    /* Set when bound */
2359    struct radeon_winsys_bo *bo;
2360    VkDeviceSize offset;
2361 };
2362 
2363 struct radv_image {
2364    struct vk_image vk;
2365 
2366    struct ac_surf_info info;
2367 
2368    VkDeviceSize size;
2369    uint32_t alignment;
2370 
2371    unsigned queue_family_mask;
2372    bool exclusive;
2373    bool shareable;
2374    bool l2_coherent;
2375    bool dcc_sign_reinterpret;
2376    bool support_comp_to_single;
2377 
2378    struct radv_image_binding bindings[3];
2379    bool tc_compatible_cmask;
2380 
2381    uint64_t clear_value_offset;
2382    uint64_t fce_pred_offset;
2383    uint64_t dcc_pred_offset;
2384 
2385    /*
2386     * Metadata for the TC-compat zrange workaround. If the 32-bit value
2387     * stored at this offset is UINT_MAX, the driver will emit
2388     * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2389     * SET_CONTEXT_REG packet.
2390     */
2391    uint64_t tc_compat_zrange_offset;
2392 
2393    /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2394    VkDeviceMemory owned_memory;
2395 
2396    unsigned plane_count;
2397    bool disjoint;
2398    struct radv_image_plane planes[0];
2399 };
2400 
2401 /* Whether the image has a htile  that is known consistent with the contents of
2402  * the image and is allowed to be in compressed form.
2403  *
2404  * If this is false reads that don't use the htile should be able to return
2405  * correct results.
2406  */
2407 bool radv_layout_is_htile_compressed(const struct radv_device *device,
2408                                      const struct radv_image *image, VkImageLayout layout,
2409                                      bool in_render_loop, unsigned queue_mask);
2410 
2411 bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2412                                 unsigned level, VkImageLayout layout, bool in_render_loop,
2413                                 unsigned queue_mask);
2414 
2415 bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2416                                 unsigned level, VkImageLayout layout, bool in_render_loop,
2417                                 unsigned queue_mask);
2418 
2419 bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2420                                   VkImageLayout layout, unsigned queue_mask);
2421 
2422 /**
2423  * Return whether the image has CMASK metadata for color surfaces.
2424  */
2425 static inline bool
radv_image_has_cmask(const struct radv_image * image)2426 radv_image_has_cmask(const struct radv_image *image)
2427 {
2428    return image->planes[0].surface.cmask_offset;
2429 }
2430 
2431 /**
2432  * Return whether the image has FMASK metadata for color surfaces.
2433  */
2434 static inline bool
radv_image_has_fmask(const struct radv_image * image)2435 radv_image_has_fmask(const struct radv_image *image)
2436 {
2437    return image->planes[0].surface.fmask_offset;
2438 }
2439 
2440 /**
2441  * Return whether the image has DCC metadata for color surfaces.
2442  */
2443 static inline bool
radv_image_has_dcc(const struct radv_image * image)2444 radv_image_has_dcc(const struct radv_image *image)
2445 {
2446    return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
2447           image->planes[0].surface.meta_offset;
2448 }
2449 
2450 /**
2451  * Return whether the image is TC-compatible CMASK.
2452  */
2453 static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image * image)2454 radv_image_is_tc_compat_cmask(const struct radv_image *image)
2455 {
2456    return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2457 }
2458 
2459 /**
2460  * Return whether DCC metadata is enabled for a level.
2461  */
2462 static inline bool
radv_dcc_enabled(const struct radv_image * image,unsigned level)2463 radv_dcc_enabled(const struct radv_image *image, unsigned level)
2464 {
2465    return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2466 }
2467 
2468 /**
2469  * Return whether the image has CB metadata.
2470  */
2471 static inline bool
radv_image_has_CB_metadata(const struct radv_image * image)2472 radv_image_has_CB_metadata(const struct radv_image *image)
2473 {
2474    return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2475 }
2476 
2477 /**
2478  * Return whether the image has HTILE metadata for depth surfaces.
2479  */
2480 static inline bool
radv_image_has_htile(const struct radv_image * image)2481 radv_image_has_htile(const struct radv_image *image)
2482 {
2483    return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
2484           image->planes[0].surface.meta_size;
2485 }
2486 
2487 /**
2488  * Return whether the image has VRS HTILE metadata for depth surfaces
2489  */
2490 static inline bool
radv_image_has_vrs_htile(const struct radv_device * device,const struct radv_image * image)2491 radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2492 {
2493    /* Any depth buffer can potentially use VRS. */
2494    return device->attachment_vrs_enabled && radv_image_has_htile(image) &&
2495           (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2496 }
2497 
2498 /**
2499  * Return whether HTILE metadata is enabled for a level.
2500  */
2501 static inline bool
radv_htile_enabled(const struct radv_image * image,unsigned level)2502 radv_htile_enabled(const struct radv_image *image, unsigned level)
2503 {
2504    return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2505 }
2506 
2507 /**
2508  * Return whether the image is TC-compatible HTILE.
2509  */
2510 static inline bool
radv_image_is_tc_compat_htile(const struct radv_image * image)2511 radv_image_is_tc_compat_htile(const struct radv_image *image)
2512 {
2513    return radv_image_has_htile(image) &&
2514           (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2515 }
2516 
2517 /**
2518  * Return whether the entire HTILE buffer can be used for depth in order to
2519  * improve HiZ Z-Range precision.
2520  */
2521 static inline bool
radv_image_tile_stencil_disabled(const struct radv_device * device,const struct radv_image * image)2522 radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2523 {
2524    if (device->physical_device->rad_info.gfx_level >= GFX9) {
2525       return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
2526    } else {
2527       /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2528        * the TC-compat ZRANGE issue even if no stencil is used.
2529        */
2530       return !vk_format_has_stencil(image->vk.format) && !radv_image_is_tc_compat_htile(image);
2531    }
2532 }
2533 
2534 static inline bool
radv_image_has_clear_value(const struct radv_image * image)2535 radv_image_has_clear_value(const struct radv_image *image)
2536 {
2537    return image->clear_value_offset != 0;
2538 }
2539 
2540 static inline uint64_t
radv_image_get_fast_clear_va(const struct radv_image * image,uint32_t base_level)2541 radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2542 {
2543    assert(radv_image_has_clear_value(image));
2544 
2545    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2546    va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2547    return va;
2548 }
2549 
2550 static inline uint64_t
radv_image_get_fce_pred_va(const struct radv_image * image,uint32_t base_level)2551 radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2552 {
2553    assert(image->fce_pred_offset != 0);
2554 
2555    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2556    va += image->bindings[0].offset + image->fce_pred_offset + base_level * 8;
2557    return va;
2558 }
2559 
2560 static inline uint64_t
radv_image_get_dcc_pred_va(const struct radv_image * image,uint32_t base_level)2561 radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2562 {
2563    assert(image->dcc_pred_offset != 0);
2564 
2565    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2566    va += image->bindings[0].offset + image->dcc_pred_offset + base_level * 8;
2567    return va;
2568 }
2569 
2570 static inline uint64_t
radv_get_tc_compat_zrange_va(const struct radv_image * image,uint32_t base_level)2571 radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2572 {
2573    assert(image->tc_compat_zrange_offset != 0);
2574 
2575    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2576    va += image->bindings[0].offset + image->tc_compat_zrange_offset + base_level * 4;
2577    return va;
2578 }
2579 
2580 static inline uint64_t
radv_get_ds_clear_value_va(const struct radv_image * image,uint32_t base_level)2581 radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2582 {
2583    assert(radv_image_has_clear_value(image));
2584 
2585    uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2586    va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2587    return va;
2588 }
2589 
2590 static inline uint32_t
radv_get_htile_initial_value(const struct radv_device * device,const struct radv_image * image)2591 radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2592 {
2593    uint32_t initial_value;
2594 
2595    if (radv_image_tile_stencil_disabled(device, image)) {
2596       /* Z only (no stencil):
2597        *
2598        * |31     18|17      4|3     0|
2599        * +---------+---------+-------+
2600        * |  Max Z  |  Min Z  | ZMask |
2601        */
2602       initial_value = 0xfffc000f;
2603    } else {
2604       /* Z and stencil:
2605        *
2606        * |31       12|11 10|9    8|7   6|5   4|3     0|
2607        * +-----------+-----+------+-----+-----+-------+
2608        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
2609        *
2610        * SR0/SR1 contains the stencil test results. Initializing
2611        * SR0/SR1 to 0x3 means the stencil test result is unknown.
2612        *
2613        * Z, stencil and 4 bit VRS encoding:
2614        * |31       12|11        10|9    8|7          6|5   4|3     0|
2615        * +-----------+------------+------+------------+-----+-------+
2616        * |  Z Range  | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2617        */
2618       if (radv_image_has_vrs_htile(device, image)) {
2619          /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2620          initial_value = 0xfffff33f;
2621       } else {
2622          initial_value = 0xfffff3ff;
2623       }
2624    }
2625 
2626    return initial_value;
2627 }
2628 
2629 static inline bool
radv_image_get_iterate256(struct radv_device * device,struct radv_image * image)2630 radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
2631 {
2632    /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2633    return device->physical_device->rad_info.gfx_level >= GFX10 &&
2634           (image->vk.usage &
2635            (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2636           radv_image_is_tc_compat_htile(image) && image->info.samples > 1;
2637 }
2638 
2639 unsigned radv_image_queue_family_mask(const struct radv_image *image,
2640                                       enum radv_queue_family family,
2641                                       enum radv_queue_family queue_family);
2642 
2643 static inline uint32_t
radv_get_layerCount(const struct radv_image * image,const VkImageSubresourceRange * range)2644 radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2645 {
2646    return range->layerCount == VK_REMAINING_ARRAY_LAYERS
2647              ? image->info.array_size - range->baseArrayLayer
2648              : range->layerCount;
2649 }
2650 
2651 static inline uint32_t
radv_get_levelCount(const struct radv_image * image,const VkImageSubresourceRange * range)2652 radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2653 {
2654    return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
2655                                                        : range->levelCount;
2656 }
2657 
2658 bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
2659 
2660 struct radeon_bo_metadata;
2661 void radv_init_metadata(struct radv_device *device, struct radv_image *image,
2662                         struct radeon_bo_metadata *metadata);
2663 
2664 void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
2665                                        uint64_t offset, uint32_t stride);
2666 
2667 union radv_descriptor {
2668    struct {
2669       uint32_t plane0_descriptor[8];
2670       uint32_t fmask_descriptor[8];
2671    };
2672    struct {
2673       uint32_t plane_descriptors[3][8];
2674    };
2675 };
2676 
2677 struct radv_image_view {
2678    struct vk_image_view vk;
2679    struct radv_image *image; /**< VkImageViewCreateInfo::image */
2680 
2681    unsigned plane_id;
2682    VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2683 
2684    /* Whether the image iview supports fast clear. */
2685    bool support_fast_clear;
2686 
2687    bool disable_dcc_mrt;
2688 
2689    union radv_descriptor descriptor;
2690 
2691    /* Descriptor for use as a storage image as opposed to a sampled image.
2692     * This has a few differences for cube maps (e.g. type).
2693     */
2694    union radv_descriptor storage_descriptor;
2695 };
2696 
2697 struct radv_image_create_info {
2698    const VkImageCreateInfo *vk_info;
2699    bool scanout;
2700    bool no_metadata_planes;
2701    bool prime_blit_src;
2702    const struct radeon_bo_metadata *bo_metadata;
2703 };
2704 
2705 VkResult
2706 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2707                          const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2708                          struct radv_image *image);
2709 
2710 VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2711                            const VkAllocationCallbacks *alloc, VkImage *pImage);
2712 
2713 bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
2714                                      VkFormat format, VkImageCreateFlags flags,
2715                                      bool *sign_reinterpret);
2716 
2717 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
2718 
2719 VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2720                                  const VkNativeBufferANDROID *gralloc_info,
2721                                  const VkAllocationCallbacks *alloc, VkImage *out_image_h);
2722 uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
2723                                       const VkImageUsageFlags vk_usage);
2724 VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2725                                 unsigned priority,
2726                                 const VkImportAndroidHardwareBufferInfoANDROID *info);
2727 VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2728                                 unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
2729 
2730 VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2731 
2732 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2733 
2734 struct radv_image_view_extra_create_info {
2735    bool disable_compression;
2736    bool enable_compression;
2737    bool disable_dcc_mrt;
2738    bool from_client; /**< Set only if this came from vkCreateImage */
2739 };
2740 
2741 void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2742                           const VkImageViewCreateInfo *pCreateInfo,
2743                           VkImageCreateFlags img_create_flags,
2744                           const struct radv_image_view_extra_create_info *extra_create_info);
2745 void radv_image_view_finish(struct radv_image_view *iview);
2746 
2747 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2748 
2749 struct radv_sampler_ycbcr_conversion_state {
2750    VkFormat format;
2751    VkSamplerYcbcrModelConversion ycbcr_model;
2752    VkSamplerYcbcrRange ycbcr_range;
2753    VkComponentMapping components;
2754    VkChromaLocation chroma_offsets[2];
2755    VkFilter chroma_filter;
2756 };
2757 
2758 struct radv_sampler_ycbcr_conversion {
2759    struct vk_object_base base;
2760    /* The state is hashed for the descriptor set layout. */
2761    struct radv_sampler_ycbcr_conversion_state state;
2762 };
2763 
2764 struct radv_buffer_view {
2765    struct vk_object_base base;
2766    struct radeon_winsys_bo *bo;
2767    VkFormat vk_format;
2768    uint64_t range; /**< VkBufferViewCreateInfo::range */
2769    uint32_t state[4];
2770 };
2771 void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2772                            const VkBufferViewCreateInfo *pCreateInfo);
2773 void radv_buffer_view_finish(struct radv_buffer_view *view);
2774 
2775 static inline bool
radv_image_extent_compare(const struct radv_image * image,const VkExtent3D * extent)2776 radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
2777 {
2778    if (extent->width != image->info.width || extent->height != image->info.height ||
2779        extent->depth != image->info.depth)
2780       return false;
2781    return true;
2782 }
2783 
2784 struct radv_sampler {
2785    struct vk_object_base base;
2786    uint32_t state[4];
2787    struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
2788    uint32_t border_color_slot;
2789 };
2790 
2791 struct radv_subpass_barrier {
2792    VkPipelineStageFlags2 src_stage_mask;
2793    VkPipelineStageFlags2 dst_stage_mask;
2794    VkAccessFlags2 src_access_mask;
2795    VkAccessFlags2 dst_access_mask;
2796 };
2797 
2798 void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
2799                                const struct radv_subpass_barrier *barrier);
2800 
2801 struct radv_subpass_attachment {
2802    uint32_t attachment;
2803    VkImageLayout layout;
2804    VkImageLayout stencil_layout;
2805    bool in_render_loop;
2806 };
2807 
2808 struct radv_subpass {
2809    uint32_t attachment_count;
2810    struct radv_subpass_attachment *attachments;
2811 
2812    uint32_t input_count;
2813    uint32_t color_count;
2814    struct radv_subpass_attachment *input_attachments;
2815    struct radv_subpass_attachment *color_attachments;
2816    struct radv_subpass_attachment *resolve_attachments;
2817    struct radv_subpass_attachment *depth_stencil_attachment;
2818    struct radv_subpass_attachment *ds_resolve_attachment;
2819    struct radv_subpass_attachment *vrs_attachment;
2820    VkResolveModeFlagBits depth_resolve_mode;
2821    VkResolveModeFlagBits stencil_resolve_mode;
2822 
2823    /** Subpass has at least one color resolve attachment */
2824    bool has_color_resolve;
2825 
2826    struct radv_subpass_barrier start_barrier;
2827 
2828    uint32_t view_mask;
2829 
2830    VkSampleCountFlagBits color_sample_count;
2831    VkSampleCountFlagBits depth_sample_count;
2832    VkSampleCountFlagBits max_sample_count;
2833 
2834    /* Whether the subpass has ingoing/outgoing external dependencies. */
2835    bool has_ingoing_dep;
2836    bool has_outgoing_dep;
2837 };
2838 
2839 uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
2840 
2841 struct radv_render_pass_attachment {
2842    VkFormat format;
2843    uint32_t samples;
2844    VkAttachmentLoadOp load_op;
2845    VkAttachmentLoadOp stencil_load_op;
2846    VkImageLayout initial_layout;
2847    VkImageLayout final_layout;
2848    VkImageLayout stencil_initial_layout;
2849    VkImageLayout stencil_final_layout;
2850 
2851    /* The subpass id in which the attachment will be used first/last. */
2852    uint32_t first_subpass_idx;
2853    uint32_t last_subpass_idx;
2854 };
2855 
2856 struct radv_render_pass {
2857    struct vk_object_base base;
2858    uint32_t attachment_count;
2859    uint32_t subpass_count;
2860    struct radv_subpass_attachment *subpass_attachments;
2861    struct radv_render_pass_attachment *attachments;
2862    struct radv_subpass_barrier end_barrier;
2863    struct radv_subpass subpasses[0];
2864 };
2865 
2866 VkResult radv_device_init_meta(struct radv_device *device);
2867 void radv_device_finish_meta(struct radv_device *device);
2868 
2869 struct radv_query_pool {
2870    struct vk_object_base base;
2871    struct radeon_winsys_bo *bo;
2872    uint32_t stride;
2873    uint32_t availability_offset;
2874    uint64_t size;
2875    char *ptr;
2876    VkQueryType type;
2877    uint32_t pipeline_stats_mask;
2878    bool uses_gds; /* For NGG GS on GFX10+ */
2879 };
2880 
2881 struct radv_perfcounter_impl;
2882 
2883 struct radv_pc_query_pool {
2884    struct radv_query_pool b;
2885 
2886    uint32_t *pc_regs;
2887    unsigned num_pc_regs;
2888 
2889    unsigned num_passes;
2890 
2891    unsigned num_counters;
2892    struct radv_perfcounter_impl *counters;
2893 };
2894 
2895 void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
2896 VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice,
2897                                  const VkQueryPoolCreateInfo *pCreateInfo,
2898                                  struct radv_pc_query_pool *pool);
2899 void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
2900                          uint64_t va);
2901 void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
2902                        uint64_t va);
2903 void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out);
2904 
2905 bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
2906 
2907 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2908                     const VkDeviceQueueCreateInfo *create_info,
2909                     const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
2910 
2911 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
2912                              struct radv_descriptor_set *set, unsigned idx);
2913 
2914 void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
2915                                      VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
2916                                      const VkWriteDescriptorSet *pDescriptorWrites,
2917                                      uint32_t descriptorCopyCount,
2918                                      const VkCopyDescriptorSet *pDescriptorCopies);
2919 
2920 void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
2921                                                   struct radv_cmd_buffer *cmd_buffer,
2922                                                   struct radv_descriptor_set *set,
2923                                                   VkDescriptorUpdateTemplate descriptorUpdateTemplate,
2924                                                   const void *pData);
2925 
2926 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
2927                                    VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
2928                                    uint32_t set, uint32_t descriptorWriteCount,
2929                                    const VkWriteDescriptorSet *pDescriptorWrites);
2930 
2931 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2932                        const VkImageSubresourceRange *range, uint32_t value);
2933 
2934 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2935                          const VkImageSubresourceRange *range);
2936 
2937 /* radv_nir_to_llvm.c */
2938 struct radv_shader_args;
2939 struct radv_nir_compiler_options;
2940 struct radv_shader_info;
2941 
2942 void llvm_compile_shader(const struct radv_nir_compiler_options *options,
2943                          const struct radv_shader_info *info, unsigned shader_count,
2944                          struct nir_shader *const *shaders, struct radv_shader_binary **binary,
2945                          const struct radv_shader_args *args);
2946 
2947 /* radv_shader_info.h */
2948 struct radv_shader_info;
2949 
2950 void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
2951                                const struct radv_pipeline_layout *layout,
2952                                const struct radv_pipeline_key *pipeline_key,
2953                                struct radv_shader_info *info);
2954 
2955 void radv_nir_shader_info_init(struct radv_shader_info *info);
2956 
2957 bool radv_thread_trace_init(struct radv_device *device);
2958 void radv_thread_trace_finish(struct radv_device *device);
2959 bool radv_begin_thread_trace(struct radv_queue *queue);
2960 bool radv_end_thread_trace(struct radv_queue *queue);
2961 bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
2962 void radv_emit_thread_trace_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data,
2963                                      uint32_t num_dwords);
2964 bool radv_is_instruction_timing_enabled(void);
2965 
2966 void radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs,
2967                                    bool inhibit);
2968 void radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
2969 
2970 bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2971                           struct radv_buffer *buffer, const VkBufferImageCopy2 *region);
2972 
2973 /* radv_sqtt_layer_.c */
2974 struct radv_barrier_data {
2975    union {
2976       struct {
2977          uint16_t depth_stencil_expand : 1;
2978          uint16_t htile_hiz_range_expand : 1;
2979          uint16_t depth_stencil_resummarize : 1;
2980          uint16_t dcc_decompress : 1;
2981          uint16_t fmask_decompress : 1;
2982          uint16_t fast_clear_eliminate : 1;
2983          uint16_t fmask_color_expand : 1;
2984          uint16_t init_mask_ram : 1;
2985          uint16_t reserved : 8;
2986       };
2987       uint16_t all;
2988    } layout_transitions;
2989 };
2990 
2991 /**
2992  * Value for the reason field of an RGP barrier start marker originating from
2993  * the Vulkan client (does not include PAL-defined values). (Table 15)
2994  */
2995 enum rgp_barrier_reason {
2996    RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
2997 
2998    /* External app-generated barrier reasons, i.e. API synchronization
2999     * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
3000     */
3001    RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
3002    RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
3003    RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
3004 
3005    /* Internal barrier reasons, i.e. implicit synchronization inserted by
3006     * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
3007     */
3008    RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
3009    RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
3010    RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
3011    RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
3012    RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
3013 };
3014 
3015 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3016 void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3017 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
3018 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
3019 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
3020                                            VkImageAspectFlagBits aspects);
3021 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
3022 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3023 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3024 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
3025                                  enum rgp_barrier_reason reason);
3026 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
3027 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
3028 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
3029                                      const struct radv_barrier_data *barrier);
3030 
3031 struct radv_indirect_command_layout {
3032    struct vk_object_base base;
3033 
3034    uint32_t input_stride;
3035    uint32_t token_count;
3036 
3037    bool indexed;
3038    bool binds_index_buffer;
3039    bool binds_state;
3040    uint16_t draw_params_offset;
3041    uint16_t index_buffer_offset;
3042 
3043    uint16_t state_offset;
3044 
3045    uint32_t bind_vbo_mask;
3046    uint32_t vbo_offsets[MAX_VBS];
3047 
3048    uint64_t push_constant_mask;
3049    uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
3050 
3051    uint32_t ibo_type_32;
3052    uint32_t ibo_type_8;
3053 
3054    VkIndirectCommandsLayoutTokenNV tokens[0];
3055 };
3056 
3057 uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
3058 
3059 void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
3060                       const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
3061 
3062 uint64_t radv_get_current_time(void);
3063 
3064 static inline uint32_t
si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)3065 si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)
3066 {
3067    switch (gl_prim) {
3068    case SHADER_PRIM_POINTS:
3069       return 1;
3070    case SHADER_PRIM_LINES:
3071    case SHADER_PRIM_LINE_STRIP:
3072       return 2;
3073    case SHADER_PRIM_TRIANGLES:
3074    case SHADER_PRIM_TRIANGLE_STRIP:
3075       return 3;
3076    case SHADER_PRIM_LINES_ADJACENCY:
3077       return 4;
3078    case SHADER_PRIM_TRIANGLES_ADJACENCY:
3079       return 6;
3080    case SHADER_PRIM_QUADS:
3081       return V_028A6C_TRISTRIP;
3082    default:
3083       assert(0);
3084       return 0;
3085    }
3086 }
3087 
3088 static inline uint32_t
si_conv_prim_to_gs_out(uint32_t topology)3089 si_conv_prim_to_gs_out(uint32_t topology)
3090 {
3091    switch (topology) {
3092    case V_008958_DI_PT_POINTLIST:
3093    case V_008958_DI_PT_PATCH:
3094       return V_028A6C_POINTLIST;
3095    case V_008958_DI_PT_LINELIST:
3096    case V_008958_DI_PT_LINESTRIP:
3097    case V_008958_DI_PT_LINELIST_ADJ:
3098    case V_008958_DI_PT_LINESTRIP_ADJ:
3099       return V_028A6C_LINESTRIP;
3100    case V_008958_DI_PT_TRILIST:
3101    case V_008958_DI_PT_TRISTRIP:
3102    case V_008958_DI_PT_TRIFAN:
3103    case V_008958_DI_PT_TRILIST_ADJ:
3104    case V_008958_DI_PT_TRISTRIP_ADJ:
3105       return V_028A6C_TRISTRIP;
3106    default:
3107       assert(0);
3108       return 0;
3109    }
3110 }
3111 
3112 static inline uint32_t
si_translate_prim(unsigned topology)3113 si_translate_prim(unsigned topology)
3114 {
3115    switch (topology) {
3116    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
3117       return V_008958_DI_PT_POINTLIST;
3118    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
3119       return V_008958_DI_PT_LINELIST;
3120    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
3121       return V_008958_DI_PT_LINESTRIP;
3122    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
3123       return V_008958_DI_PT_TRILIST;
3124    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
3125       return V_008958_DI_PT_TRISTRIP;
3126    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
3127       return V_008958_DI_PT_TRIFAN;
3128    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
3129       return V_008958_DI_PT_LINELIST_ADJ;
3130    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
3131       return V_008958_DI_PT_LINESTRIP_ADJ;
3132    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
3133       return V_008958_DI_PT_TRILIST_ADJ;
3134    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
3135       return V_008958_DI_PT_TRISTRIP_ADJ;
3136    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
3137       return V_008958_DI_PT_PATCH;
3138    default:
3139       unreachable("unhandled primitive type");
3140    }
3141 }
3142 
3143 static inline bool
radv_prim_is_points_or_lines(unsigned topology)3144 radv_prim_is_points_or_lines(unsigned topology)
3145 {
3146    switch (topology) {
3147    case V_008958_DI_PT_POINTLIST:
3148    case V_008958_DI_PT_LINELIST:
3149    case V_008958_DI_PT_LINESTRIP:
3150    case V_008958_DI_PT_LINELIST_ADJ:
3151    case V_008958_DI_PT_LINESTRIP_ADJ:
3152       return true;
3153    default:
3154       return false;
3155    }
3156 }
3157 
3158 static inline bool
radv_rast_prim_is_point(unsigned rast_prim)3159 radv_rast_prim_is_point(unsigned rast_prim)
3160 {
3161    return rast_prim == V_028A6C_POINTLIST;
3162 }
3163 
3164 static inline bool
radv_rast_prim_is_line(unsigned rast_prim)3165 radv_rast_prim_is_line(unsigned rast_prim)
3166 {
3167    return rast_prim == V_028A6C_LINESTRIP;
3168 }
3169 
3170 static inline bool
radv_rast_prim_is_points_or_lines(unsigned rast_prim)3171 radv_rast_prim_is_points_or_lines(unsigned rast_prim)
3172 {
3173    return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
3174 }
3175 
3176 static inline uint32_t
si_translate_stencil_op(enum VkStencilOp op)3177 si_translate_stencil_op(enum VkStencilOp op)
3178 {
3179    switch (op) {
3180    case VK_STENCIL_OP_KEEP:
3181       return V_02842C_STENCIL_KEEP;
3182    case VK_STENCIL_OP_ZERO:
3183       return V_02842C_STENCIL_ZERO;
3184    case VK_STENCIL_OP_REPLACE:
3185       return V_02842C_STENCIL_REPLACE_TEST;
3186    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
3187       return V_02842C_STENCIL_ADD_CLAMP;
3188    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
3189       return V_02842C_STENCIL_SUB_CLAMP;
3190    case VK_STENCIL_OP_INVERT:
3191       return V_02842C_STENCIL_INVERT;
3192    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
3193       return V_02842C_STENCIL_ADD_WRAP;
3194    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
3195       return V_02842C_STENCIL_SUB_WRAP;
3196    default:
3197       return 0;
3198    }
3199 }
3200 
3201 static inline uint32_t
si_translate_blend_logic_op(VkLogicOp op)3202 si_translate_blend_logic_op(VkLogicOp op)
3203 {
3204    switch (op) {
3205    case VK_LOGIC_OP_CLEAR:
3206       return V_028808_ROP3_CLEAR;
3207    case VK_LOGIC_OP_AND:
3208       return V_028808_ROP3_AND;
3209    case VK_LOGIC_OP_AND_REVERSE:
3210       return V_028808_ROP3_AND_REVERSE;
3211    case VK_LOGIC_OP_COPY:
3212       return V_028808_ROP3_COPY;
3213    case VK_LOGIC_OP_AND_INVERTED:
3214       return V_028808_ROP3_AND_INVERTED;
3215    case VK_LOGIC_OP_NO_OP:
3216       return V_028808_ROP3_NO_OP;
3217    case VK_LOGIC_OP_XOR:
3218       return V_028808_ROP3_XOR;
3219    case VK_LOGIC_OP_OR:
3220       return V_028808_ROP3_OR;
3221    case VK_LOGIC_OP_NOR:
3222       return V_028808_ROP3_NOR;
3223    case VK_LOGIC_OP_EQUIVALENT:
3224       return V_028808_ROP3_EQUIVALENT;
3225    case VK_LOGIC_OP_INVERT:
3226       return V_028808_ROP3_INVERT;
3227    case VK_LOGIC_OP_OR_REVERSE:
3228       return V_028808_ROP3_OR_REVERSE;
3229    case VK_LOGIC_OP_COPY_INVERTED:
3230       return V_028808_ROP3_COPY_INVERTED;
3231    case VK_LOGIC_OP_OR_INVERTED:
3232       return V_028808_ROP3_OR_INVERTED;
3233    case VK_LOGIC_OP_NAND:
3234       return V_028808_ROP3_NAND;
3235    case VK_LOGIC_OP_SET:
3236       return V_028808_ROP3_SET;
3237    default:
3238       unreachable("Unhandled logic op");
3239    }
3240 }
3241 
3242 /*
3243  * Queue helper to get ring.
3244  * placed here as it needs queue + device structs.
3245  */
3246 static inline enum amd_ip_type
radv_queue_ring(struct radv_queue * queue)3247 radv_queue_ring(struct radv_queue *queue)
3248 {
3249    return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
3250 }
3251 
3252 /**
3253  * Helper used for debugging compiler issues by enabling/disabling LLVM for a
3254  * specific shader stage (developers only).
3255  */
3256 static inline bool
radv_use_llvm_for_stage(struct radv_device * device,UNUSED gl_shader_stage stage)3257 radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
3258 {
3259    return device->physical_device->use_llvm;
3260 }
3261 
3262 static inline bool
radv_has_shader_buffer_float_minmax(const struct radv_physical_device * pdevice)3263 radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice)
3264 {
3265    return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) ||
3266           pdevice->rad_info.gfx_level >= GFX10;
3267 }
3268 
3269 struct radv_acceleration_structure {
3270    struct vk_object_base base;
3271 
3272    struct radeon_winsys_bo *bo;
3273    uint64_t mem_offset;
3274    uint64_t size;
3275 };
3276 
3277 static inline uint64_t
radv_accel_struct_get_va(const struct radv_acceleration_structure * accel)3278 radv_accel_struct_get_va(const struct radv_acceleration_structure *accel)
3279 {
3280    return radv_buffer_get_va(accel->bo) + accel->mem_offset;
3281 }
3282 
3283 /* radv_perfcounter.c */
3284 void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
3285 void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
3286 void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs,
3287                                      int family);
3288 void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
3289                                     int family);
3290 
3291 /* radv_spm.c */
3292 bool radv_spm_init(struct radv_device *device);
3293 void radv_spm_finish(struct radv_device *device);
3294 void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
3295 
3296 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
3297    VK_FROM_HANDLE(__radv_type, __name, __handle)
3298 
3299 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
3300                        VK_OBJECT_TYPE_COMMAND_BUFFER)
3301 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
3302 VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
3303 VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
3304                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
3305 VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
3306 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base,
3307                                VkAccelerationStructureKHR,
3308                                VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
3309 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool,
3310                                VK_OBJECT_TYPE_COMMAND_POOL)
3311 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
3312 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
3313                                VK_OBJECT_TYPE_BUFFER_VIEW)
3314 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
3315                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
3316 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
3317                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
3318 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout,
3319                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
3320 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
3321                                VkDescriptorUpdateTemplate,
3322                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
3323 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
3324                                VK_OBJECT_TYPE_DEVICE_MEMORY)
3325 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
3326 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
3327 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView,
3328                                VK_OBJECT_TYPE_IMAGE_VIEW);
3329 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
3330                                VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
3331 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
3332                                VK_OBJECT_TYPE_PIPELINE_CACHE)
3333 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
3334                                VK_OBJECT_TYPE_PIPELINE)
3335 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
3336                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
3337 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
3338                                VK_OBJECT_TYPE_QUERY_POOL)
3339 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass,
3340                                VK_OBJECT_TYPE_RENDER_PASS)
3341 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
3342                                VK_OBJECT_TYPE_SAMPLER)
3343 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base,
3344                                VkSamplerYcbcrConversion,
3345                                VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
3346 
3347 #ifdef __cplusplus
3348 }
3349 #endif
3350 
3351 #endif /* RADV_PRIVATE_H */
3352