1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30
31 #include <assert.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #ifdef HAVE_VALGRIND
38 #include <memcheck.h>
39 #include <valgrind.h>
40 #define VG(x) x
41 #else
42 #define VG(x) ((void)0)
43 #endif
44
45 #include "c11/threads.h"
46 #ifndef _WIN32
47 #include <amdgpu.h>
48 #include <xf86drm.h>
49 #endif
50 #include "compiler/shader_enums.h"
51 #include "util/bitscan.h"
52 #include "util/list.h"
53 #include "util/macros.h"
54 #include "util/rwlock.h"
55 #include "util/xmlconfig.h"
56 #include "vk_alloc.h"
57 #include "vk_buffer.h"
58 #include "vk_command_buffer.h"
59 #include "vk_command_pool.h"
60 #include "vk_debug_report.h"
61 #include "vk_device.h"
62 #include "vk_format.h"
63 #include "vk_instance.h"
64 #include "vk_log.h"
65 #include "vk_physical_device.h"
66 #include "vk_shader_module.h"
67 #include "vk_queue.h"
68 #include "vk_util.h"
69 #include "vk_image.h"
70 #include "vk_framebuffer.h"
71
72 #include "ac_binary.h"
73 #include "ac_gpu_info.h"
74 #include "ac_shader_util.h"
75 #include "ac_spm.h"
76 #include "ac_sqtt.h"
77 #include "ac_surface.h"
78 #include "radv_constants.h"
79 #include "radv_descriptor_set.h"
80 #include "radv_radeon_winsys.h"
81 #include "radv_shader.h"
82 #include "radv_shader_args.h"
83 #include "sid.h"
84
85 #include "radix_sort/radix_sort_vk_devaddr.h"
86
87 /* Pre-declarations needed for WSI entrypoints */
88 struct wl_surface;
89 struct wl_display;
90 typedef struct xcb_connection_t xcb_connection_t;
91 typedef uint32_t xcb_visualid_t;
92 typedef uint32_t xcb_window_t;
93
94 #include <vulkan/vk_android_native_buffer.h>
95 #include <vulkan/vk_icd.h>
96 #include <vulkan/vulkan.h>
97 #include <vulkan/vulkan_android.h>
98
99 #include "radv_entrypoints.h"
100
101 #include "wsi_common.h"
102
103 #ifdef __cplusplus
104 extern "C"
105 {
106 #endif
107
108 /* Helper to determine if we should compile
109 * any of the Android AHB support.
110 *
111 * To actually enable the ext we also need
112 * the necessary kernel support.
113 */
114 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
115 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
116 #include <vndk/hardware_buffer.h>
117 #else
118 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
119 #endif
120
121 #ifdef _WIN32
122 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
123 #else
124 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
125 #endif
126
127 #ifdef _WIN32
128 #define radv_printflike(a, b)
129 #else
130 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
131 #endif
132
133 static inline uint32_t
align_u32(uint32_t v,uint32_t a)134 align_u32(uint32_t v, uint32_t a)
135 {
136 assert(a != 0 && a == (a & -a));
137 return (v + a - 1) & ~(a - 1);
138 }
139
140 static inline uint32_t
align_u32_npot(uint32_t v,uint32_t a)141 align_u32_npot(uint32_t v, uint32_t a)
142 {
143 return (v + a - 1) / a * a;
144 }
145
146 static inline uint64_t
align_u64(uint64_t v,uint64_t a)147 align_u64(uint64_t v, uint64_t a)
148 {
149 assert(a != 0 && a == (a & -a));
150 return (v + a - 1) & ~(a - 1);
151 }
152
153 static inline int32_t
align_i32(int32_t v,int32_t a)154 align_i32(int32_t v, int32_t a)
155 {
156 assert(a != 0 && a == (a & -a));
157 return (v + a - 1) & ~(a - 1);
158 }
159
160 /** Alignment must be a power of 2. */
161 static inline bool
radv_is_aligned(uintmax_t n,uintmax_t a)162 radv_is_aligned(uintmax_t n, uintmax_t a)
163 {
164 assert(a == (a & -a));
165 return (n & (a - 1)) == 0;
166 }
167
168 static inline uint32_t
round_up_u32(uint32_t v,uint32_t a)169 round_up_u32(uint32_t v, uint32_t a)
170 {
171 return (v + a - 1) / a;
172 }
173
174 static inline uint64_t
round_up_u64(uint64_t v,uint64_t a)175 round_up_u64(uint64_t v, uint64_t a)
176 {
177 return (v + a - 1) / a;
178 }
179
180 static inline uint32_t
radv_minify(uint32_t n,uint32_t levels)181 radv_minify(uint32_t n, uint32_t levels)
182 {
183 if (unlikely(n == 0))
184 return 0;
185 else
186 return MAX2(n >> levels, 1);
187 }
188 static inline float
radv_clamp_f(float f,float min,float max)189 radv_clamp_f(float f, float min, float max)
190 {
191 assert(min < max);
192
193 if (f > max)
194 return max;
195 else if (f < min)
196 return min;
197 else
198 return f;
199 }
200
201 static inline bool
radv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)202 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
203 {
204 if (*inout_mask & clear_mask) {
205 *inout_mask &= ~clear_mask;
206 return true;
207 } else {
208 return false;
209 }
210 }
211
212 static inline int
radv_float_to_sfixed(float value,unsigned frac_bits)213 radv_float_to_sfixed(float value, unsigned frac_bits)
214 {
215 return value * (1 << frac_bits);
216 }
217
218 static inline unsigned int
radv_float_to_ufixed(float value,unsigned frac_bits)219 radv_float_to_ufixed(float value, unsigned frac_bits)
220 {
221 return value * (1 << frac_bits);
222 }
223
224 /* Whenever we generate an error, pass it through this function. Useful for
225 * debugging, where we can break on it. Only call at error site, not when
226 * propagating errors. Might be useful to plug in a stack trace here.
227 */
228
229 struct radv_image_view;
230 struct radv_instance;
231
232 /* A non-fatal assert. Useful for debugging. */
233 #ifdef NDEBUG
234 #define radv_assert(x) \
235 do { \
236 } while (0)
237 #else
238 #define radv_assert(x) \
239 do { \
240 if (unlikely(!(x))) \
241 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
242 } while (0)
243 #endif
244
245 int radv_get_instance_entrypoint_index(const char *name);
246 int radv_get_device_entrypoint_index(const char *name);
247 int radv_get_physical_device_entrypoint_index(const char *name);
248
249 const char *radv_get_instance_entry_name(int index);
250 const char *radv_get_physical_device_entry_name(int index);
251 const char *radv_get_device_entry_name(int index);
252
253 /* queue types */
254 enum radv_queue_family {
255 RADV_QUEUE_GENERAL,
256 RADV_QUEUE_COMPUTE,
257 RADV_QUEUE_TRANSFER,
258 RADV_MAX_QUEUE_FAMILIES,
259 RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
260 RADV_QUEUE_IGNORED,
261 };
262
263 struct radv_perfcounter_desc;
264
265 struct radv_physical_device {
266 struct vk_physical_device vk;
267
268 /* Link in radv_instance::physical_devices */
269 struct list_head link;
270
271 struct radv_instance *instance;
272
273 struct radeon_winsys *ws;
274 struct radeon_info rad_info;
275 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
276 char marketing_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
277 uint8_t driver_uuid[VK_UUID_SIZE];
278 uint8_t device_uuid[VK_UUID_SIZE];
279 uint8_t cache_uuid[VK_UUID_SIZE];
280
281 int local_fd;
282 int master_fd;
283 struct wsi_device wsi_device;
284
285 bool out_of_order_rast_allowed;
286
287 /* Whether DCC should be enabled for MSAA textures. */
288 bool dcc_msaa_allowed;
289
290 /* Whether to enable NGG. */
291 bool use_ngg;
292
293 /* Whether to enable NGG culling. */
294 bool use_ngg_culling;
295
296 /* Whether to enable NGG streamout. */
297 bool use_ngg_streamout;
298
299 /* Number of threads per wave. */
300 uint8_t ps_wave_size;
301 uint8_t cs_wave_size;
302 uint8_t ge_wave_size;
303 uint8_t rt_wave_size;
304
305 /* Whether to use the LLVM compiler backend */
306 bool use_llvm;
307
308 /* Whether to emulate ETC2 image support on HW without support. */
309 bool emulate_etc2;
310
311 /* This is the drivers on-disk cache used as a fallback as opposed to
312 * the pipeline cache defined by apps.
313 */
314 struct disk_cache *disk_cache;
315
316 VkPhysicalDeviceMemoryProperties memory_properties;
317 enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
318 enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
319 unsigned heaps;
320
321 /* Bitmask of memory types that use the 32-bit address space. */
322 uint32_t memory_types_32bit;
323
324 #ifndef _WIN32
325 int available_nodes;
326 drmPciBusInfo bus_info;
327
328 dev_t primary_devid;
329 dev_t render_devid;
330 #endif
331
332 nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
333
334 enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
335 uint32_t num_queues;
336
337 uint32_t gs_table_depth;
338
339 struct ac_hs_info hs;
340 struct ac_task_info task_info;
341
342 /* Performance counters. */
343 struct ac_perfcounters ac_perfcounters;
344
345 uint32_t num_perfcounters;
346 struct radv_perfcounter_desc *perfcounters;
347 };
348
349 struct radv_instance {
350 struct vk_instance vk;
351
352 VkAllocationCallbacks alloc;
353
354 uint64_t debug_flags;
355 uint64_t perftest_flags;
356
357 bool physical_devices_enumerated;
358 struct list_head physical_devices;
359
360 struct driOptionCache dri_options;
361 struct driOptionCache available_dri_options;
362
363 /**
364 * Workarounds for game bugs.
365 */
366 bool enable_mrt_output_nan_fixup;
367 bool disable_tc_compat_htile_in_general;
368 bool disable_shrink_image_store;
369 bool absolute_depth_bias;
370 bool disable_aniso_single_level;
371 bool zero_vram;
372 bool disable_sinking_load_input_fs;
373 bool flush_before_query_copy;
374 };
375
376 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
377 void radv_finish_wsi(struct radv_physical_device *physical_device);
378
379 struct cache_entry;
380
381 struct radv_pipeline_cache {
382 struct vk_object_base base;
383 struct radv_device *device;
384 mtx_t mutex;
385 VkPipelineCacheCreateFlags flags;
386
387 uint32_t total_size;
388 uint32_t table_size;
389 uint32_t kernel_count;
390 struct cache_entry **hash_table;
391 bool modified;
392
393 VkAllocationCallbacks alloc;
394 };
395
396 struct radv_shader_binary;
397 struct radv_shader;
398 struct radv_pipeline_shader_stack_size;
399
400 void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
401 void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
402 bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
403
404 bool radv_create_shaders_from_pipeline_cache(
405 struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
406 struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
407 uint32_t *num_stack_sizes, bool *found_in_application_cache);
408
409 void radv_pipeline_cache_insert_shaders(
410 struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
411 struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
412 const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
413
414 VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
415 struct radv_shader_binary **binaries,
416 struct radv_shader_binary *gs_copy_binary);
417
418 enum radv_blit_ds_layout {
419 RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
420 RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
421 RADV_BLIT_DS_LAYOUT_COUNT,
422 };
423
424 static inline enum radv_blit_ds_layout
radv_meta_blit_ds_to_type(VkImageLayout layout)425 radv_meta_blit_ds_to_type(VkImageLayout layout)
426 {
427 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
428 : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
429 }
430
431 static inline VkImageLayout
radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)432 radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
433 {
434 return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
435 : VK_IMAGE_LAYOUT_GENERAL;
436 }
437
438 enum radv_meta_dst_layout {
439 RADV_META_DST_LAYOUT_GENERAL,
440 RADV_META_DST_LAYOUT_OPTIMAL,
441 RADV_META_DST_LAYOUT_COUNT,
442 };
443
444 static inline enum radv_meta_dst_layout
radv_meta_dst_layout_from_layout(VkImageLayout layout)445 radv_meta_dst_layout_from_layout(VkImageLayout layout)
446 {
447 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
448 : RADV_META_DST_LAYOUT_OPTIMAL;
449 }
450
451 static inline VkImageLayout
radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)452 radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
453 {
454 return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
455 : VK_IMAGE_LAYOUT_GENERAL;
456 }
457
458 struct radv_meta_state {
459 VkAllocationCallbacks alloc;
460
461 struct radv_pipeline_cache cache;
462
463 /*
464 * For on-demand pipeline creation, makes sure that
465 * only one thread tries to build a pipeline at the same time.
466 */
467 mtx_t mtx;
468
469 /**
470 * Use array element `i` for images with `2^i` samples.
471 */
472 struct {
473 VkPipeline color_pipelines[NUM_META_FS_KEYS];
474 } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
475
476 struct {
477 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
478 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
479 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
480
481 VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
482 VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
483 VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
484 } ds_clear[MAX_SAMPLES_LOG2];
485
486 VkPipelineLayout clear_color_p_layout;
487 VkPipelineLayout clear_depth_p_layout;
488 VkPipelineLayout clear_depth_unrestricted_p_layout;
489
490 /* Optimized compute fast HTILE clear for stencil or depth only. */
491 VkPipeline clear_htile_mask_pipeline;
492 VkPipelineLayout clear_htile_mask_p_layout;
493 VkDescriptorSetLayout clear_htile_mask_ds_layout;
494
495 /* Copy VRS into HTILE. */
496 VkPipeline copy_vrs_htile_pipeline;
497 VkPipelineLayout copy_vrs_htile_p_layout;
498 VkDescriptorSetLayout copy_vrs_htile_ds_layout;
499
500 /* Clear DCC with comp-to-single. */
501 VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
502 VkPipelineLayout clear_dcc_comp_to_single_p_layout;
503 VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
504
505 struct {
506 /** Pipeline that blits from a 1D image. */
507 VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
508
509 /** Pipeline that blits from a 2D image. */
510 VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
511
512 /** Pipeline that blits from a 3D image. */
513 VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
514
515 VkPipeline depth_only_1d_pipeline;
516 VkPipeline depth_only_2d_pipeline;
517 VkPipeline depth_only_3d_pipeline;
518
519 VkPipeline stencil_only_1d_pipeline;
520 VkPipeline stencil_only_2d_pipeline;
521 VkPipeline stencil_only_3d_pipeline;
522 VkPipelineLayout pipeline_layout;
523 VkDescriptorSetLayout ds_layout;
524 } blit;
525
526 struct {
527 VkPipelineLayout p_layouts[5];
528 VkDescriptorSetLayout ds_layouts[5];
529 VkPipeline pipelines[5][NUM_META_FS_KEYS];
530
531 VkPipeline depth_only_pipeline[5];
532
533 VkPipeline stencil_only_pipeline[5];
534 } blit2d[MAX_SAMPLES_LOG2];
535
536 struct {
537 VkPipelineLayout img_p_layout;
538 VkDescriptorSetLayout img_ds_layout;
539 VkPipeline pipeline;
540 VkPipeline pipeline_3d;
541 } itob;
542 struct {
543 VkPipelineLayout img_p_layout;
544 VkDescriptorSetLayout img_ds_layout;
545 VkPipeline pipeline;
546 VkPipeline pipeline_3d;
547 } btoi;
548 struct {
549 VkPipelineLayout img_p_layout;
550 VkDescriptorSetLayout img_ds_layout;
551 VkPipeline pipeline;
552 } btoi_r32g32b32;
553 struct {
554 VkPipelineLayout img_p_layout;
555 VkDescriptorSetLayout img_ds_layout;
556 VkPipeline pipeline[MAX_SAMPLES_LOG2];
557 VkPipeline pipeline_3d;
558 } itoi;
559 struct {
560 VkPipelineLayout img_p_layout;
561 VkDescriptorSetLayout img_ds_layout;
562 VkPipeline pipeline;
563 } itoi_r32g32b32;
564 struct {
565 VkPipelineLayout img_p_layout;
566 VkDescriptorSetLayout img_ds_layout;
567 VkPipeline pipeline[MAX_SAMPLES_LOG2];
568 VkPipeline pipeline_3d;
569 } cleari;
570 struct {
571 VkPipelineLayout img_p_layout;
572 VkDescriptorSetLayout img_ds_layout;
573 VkPipeline pipeline;
574 } cleari_r32g32b32;
575 struct {
576 VkPipelineLayout p_layout;
577 VkDescriptorSetLayout ds_layout;
578 VkPipeline pipeline[MAX_SAMPLES_LOG2];
579 } fmask_copy;
580
581 struct {
582 VkPipelineLayout p_layout;
583 VkPipeline pipeline[NUM_META_FS_KEYS];
584 } resolve;
585
586 struct {
587 VkDescriptorSetLayout ds_layout;
588 VkPipelineLayout p_layout;
589 struct {
590 VkPipeline pipeline;
591 VkPipeline i_pipeline;
592 VkPipeline srgb_pipeline;
593 } rc[MAX_SAMPLES_LOG2];
594
595 VkPipeline depth_zero_pipeline;
596 struct {
597 VkPipeline average_pipeline;
598 VkPipeline max_pipeline;
599 VkPipeline min_pipeline;
600 } depth[MAX_SAMPLES_LOG2];
601
602 VkPipeline stencil_zero_pipeline;
603 struct {
604 VkPipeline max_pipeline;
605 VkPipeline min_pipeline;
606 } stencil[MAX_SAMPLES_LOG2];
607 } resolve_compute;
608
609 struct {
610 VkDescriptorSetLayout ds_layout;
611 VkPipelineLayout p_layout;
612
613 struct {
614 VkPipeline pipeline[NUM_META_FS_KEYS];
615 } rc[MAX_SAMPLES_LOG2];
616
617 VkPipeline depth_zero_pipeline;
618 struct {
619 VkPipeline average_pipeline;
620 VkPipeline max_pipeline;
621 VkPipeline min_pipeline;
622 } depth[MAX_SAMPLES_LOG2];
623
624 VkPipeline stencil_zero_pipeline;
625 struct {
626 VkPipeline max_pipeline;
627 VkPipeline min_pipeline;
628 } stencil[MAX_SAMPLES_LOG2];
629 } resolve_fragment;
630
631 struct {
632 VkPipelineLayout p_layout;
633 VkPipeline decompress_pipeline;
634 VkPipeline resummarize_pipeline;
635 } depth_decomp[MAX_SAMPLES_LOG2];
636
637 VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
638 VkPipelineLayout expand_depth_stencil_compute_p_layout;
639 VkPipeline expand_depth_stencil_compute_pipeline;
640
641 struct {
642 VkPipelineLayout p_layout;
643 VkPipeline cmask_eliminate_pipeline;
644 VkPipeline fmask_decompress_pipeline;
645 VkPipeline dcc_decompress_pipeline;
646
647 VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
648 VkPipelineLayout dcc_decompress_compute_p_layout;
649 VkPipeline dcc_decompress_compute_pipeline;
650 } fast_clear_flush;
651
652 struct {
653 VkPipelineLayout fill_p_layout;
654 VkPipelineLayout copy_p_layout;
655 VkPipeline fill_pipeline;
656 VkPipeline copy_pipeline;
657 } buffer;
658
659 struct {
660 VkDescriptorSetLayout ds_layout;
661 VkPipelineLayout p_layout;
662 VkPipeline occlusion_query_pipeline;
663 VkPipeline pipeline_statistics_query_pipeline;
664 VkPipeline tfb_query_pipeline;
665 VkPipeline timestamp_query_pipeline;
666 VkPipeline pg_query_pipeline;
667 } query;
668
669 struct {
670 VkDescriptorSetLayout ds_layout;
671 VkPipelineLayout p_layout;
672 VkPipeline pipeline[MAX_SAMPLES_LOG2];
673 } fmask_expand;
674
675 struct {
676 VkDescriptorSetLayout ds_layout;
677 VkPipelineLayout p_layout;
678 VkPipeline pipeline[32];
679 } dcc_retile;
680
681 struct {
682 VkPipelineLayout leaf_p_layout;
683 VkPipeline leaf_pipeline;
684 VkPipelineLayout morton_p_layout;
685 VkPipeline morton_pipeline;
686 VkPipelineLayout internal_p_layout;
687 VkPipeline internal_pipeline;
688 VkPipelineLayout copy_p_layout;
689 VkPipeline copy_pipeline;
690
691 struct radix_sort_vk *radix_sort;
692 struct radix_sort_vk_sort_devaddr_info radix_sort_info;
693 } accel_struct_build;
694
695 struct {
696 VkDescriptorSetLayout ds_layout;
697 VkPipelineLayout p_layout;
698 VkPipeline pipeline;
699 } etc_decode;
700
701 struct {
702 VkDescriptorSetLayout ds_layout;
703 VkPipelineLayout p_layout;
704 VkPipeline pipeline;
705 } dgc_prepare;
706 };
707
708 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
709
710 struct radv_deferred_queue_submission;
711
712 static inline enum radv_queue_family
vk_queue_to_radv(const struct radv_physical_device * phys_dev,int queue_family_index)713 vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
714 {
715 if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL ||
716 queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
717 return RADV_QUEUE_FOREIGN;
718 if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
719 return RADV_QUEUE_IGNORED;
720
721 assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
722 return phys_dev->vk_queue_to_radv[queue_family_index];
723 }
724
725 enum amd_ip_type radv_queue_family_to_ring(struct radv_physical_device *physical_device,
726 enum radv_queue_family f);
727
728 struct radv_queue_ring_info {
729 uint32_t scratch_size_per_wave;
730 uint32_t scratch_waves;
731 uint32_t compute_scratch_size_per_wave;
732 uint32_t compute_scratch_waves;
733 uint32_t esgs_ring_size;
734 uint32_t gsvs_ring_size;
735 bool tess_rings;
736 bool task_rings;
737 bool mesh_scratch_ring;
738 bool gds;
739 bool gds_oa;
740 bool sample_positions;
741 };
742
743 struct radv_queue_state {
744 enum radv_queue_family qf;
745 struct radv_queue_ring_info ring_info;
746
747 struct radeon_winsys_bo *scratch_bo;
748 struct radeon_winsys_bo *descriptor_bo;
749 struct radeon_winsys_bo *compute_scratch_bo;
750 struct radeon_winsys_bo *esgs_ring_bo;
751 struct radeon_winsys_bo *gsvs_ring_bo;
752 struct radeon_winsys_bo *tess_rings_bo;
753 struct radeon_winsys_bo *task_rings_bo;
754 struct radeon_winsys_bo *mesh_scratch_ring_bo;
755 struct radeon_winsys_bo *gds_bo;
756 struct radeon_winsys_bo *gds_oa_bo;
757
758 struct radeon_cmdbuf *initial_preamble_cs;
759 struct radeon_cmdbuf *initial_full_flush_preamble_cs;
760 struct radeon_cmdbuf *continue_preamble_cs;
761 };
762
763 struct radv_queue {
764 struct vk_queue vk;
765 struct radv_device *device;
766 struct radeon_winsys_ctx *hw_ctx;
767 enum radeon_ctx_priority priority;
768 struct radv_queue_state state;
769 struct radv_queue_state *ace_internal_state;
770 };
771
772 #define RADV_BORDER_COLOR_COUNT 4096
773 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
774
775 struct radv_device_border_color_data {
776 bool used[RADV_BORDER_COLOR_COUNT];
777
778 struct radeon_winsys_bo *bo;
779 VkClearColorValue *colors_gpu_ptr;
780
781 /* Mutex is required to guarantee vkCreateSampler thread safety
782 * given that we are writing to a buffer and checking color occupation */
783 mtx_t mutex;
784 };
785
786 enum radv_force_vrs {
787 RADV_FORCE_VRS_1x1 = 0,
788 RADV_FORCE_VRS_2x2,
789 RADV_FORCE_VRS_2x1,
790 RADV_FORCE_VRS_1x2,
791 };
792
793 struct radv_notifier {
794 int fd;
795 int watch;
796 bool quit;
797 thrd_t thread;
798 };
799
800 struct radv_device {
801 struct vk_device vk;
802
803 struct radv_instance *instance;
804 struct radeon_winsys *ws;
805
806 struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
807 struct radv_meta_state meta_state;
808
809 struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
810 int queue_count[RADV_MAX_QUEUE_FAMILIES];
811
812 bool pbb_allowed;
813 uint32_t scratch_waves;
814 uint32_t dispatch_initiator;
815 uint32_t dispatch_initiator_task;
816
817 /* MSAA sample locations.
818 * The first index is the sample index.
819 * The second index is the coordinate: X, Y. */
820 float sample_locations_1x[1][2];
821 float sample_locations_2x[2][2];
822 float sample_locations_4x[4][2];
823 float sample_locations_8x[8][2];
824
825 /* GFX7 and later */
826 uint32_t gfx_init_size_dw;
827 struct radeon_winsys_bo *gfx_init;
828
829 struct radeon_winsys_bo *trace_bo;
830 uint32_t *trace_id_ptr;
831
832 /* Whether to keep shader debug info, for debugging. */
833 bool keep_shader_info;
834
835 struct radv_physical_device *physical_device;
836
837 /* Backup in-memory cache to be used if the app doesn't provide one */
838 struct radv_pipeline_cache *mem_cache;
839
840 /*
841 * use different counters so MSAA MRTs get consecutive surface indices,
842 * even if MASK is allocated in between.
843 */
844 uint32_t image_mrt_offset_counter;
845 uint32_t fmask_mrt_offset_counter;
846
847 struct list_head shader_arenas;
848 unsigned shader_arena_shift;
849 uint8_t shader_free_list_mask;
850 struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS];
851 struct list_head shader_block_obj_pool;
852 mtx_t shader_arena_mutex;
853
854 /* For detecting VM faults reported by dmesg. */
855 uint64_t dmesg_timestamp;
856
857 /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
858 bool robust_buffer_access;
859 bool robust_buffer_access2;
860
861 /* Whether to inline the compute dispatch size in user sgprs. */
862 bool load_grid_size_from_user_sgpr;
863
864 /* Whether the driver uses a global BO list. */
865 bool use_global_bo_list;
866
867 /* Whether attachment VRS is enabled. */
868 bool attachment_vrs_enabled;
869
870 /* Whether shader image 32-bit float atomics are enabled. */
871 bool image_float32_atomics;
872
873 /* Whether 2D views of 3D image is enabled. */
874 bool image_2d_view_of_3d;
875
876 /* Whether primitives generated query features are enabled. */
877 bool primitives_generated_query;
878
879 /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
880 int force_aniso;
881
882 struct radv_device_border_color_data border_color_data;
883
884 /* Thread trace. */
885 struct ac_thread_trace_data thread_trace;
886
887 /* SPM. */
888 struct ac_spm_trace_data spm_trace;
889
890 /* Trap handler. */
891 struct radv_trap_handler_shader *trap_handler_shader;
892 struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
893 uint32_t *tma_ptr;
894
895 /* Overallocation. */
896 bool overallocation_disallowed;
897 uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
898 mtx_t overallocation_mutex;
899
900 /* RADV_FORCE_VRS. */
901 struct radv_notifier notifier;
902 enum radv_force_vrs force_vrs;
903
904 /* Depth image for VRS when not bound by the app. */
905 struct {
906 struct radv_image *image;
907 struct radv_buffer *buffer; /* HTILE */
908 struct radv_device_memory *mem;
909 } vrs;
910
911 struct u_rwlock vs_prologs_lock;
912 struct hash_table *vs_prologs;
913
914 /* Prime blit sdma queue */
915 struct radv_queue *private_sdma_queue;
916
917 struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
918 struct radv_shader_part *instance_rate_vs_prologs[816];
919
920 simple_mtx_t trace_mtx;
921
922 /* Whether per-vertex VRS is forced. */
923 bool force_vrs_enabled;
924
925 /* Whether shaders created through application entrypoints are considered internal. */
926 bool app_shaders_internal;
927
928 simple_mtx_t pstate_mtx;
929 unsigned pstate_cnt;
930
931 /* BO to contain some performance counter helpers:
932 * - A lock for profiling cmdbuffers.
933 * - a temporary fence for the end query synchronization.
934 * - the pass to use for profiling. (as an array of bools)
935 */
936 struct radeon_winsys_bo *perf_counter_bo;
937
938 /* Interleaved lock/unlock commandbuffers for perfcounter passes. */
939 struct radeon_cmdbuf **perf_counter_lock_cs;
940
941 bool uses_device_generated_commands;
942 };
943
944 bool radv_device_acquire_performance_counters(struct radv_device *device);
945 void radv_device_release_performance_counters(struct radv_device *device);
946
947 struct radv_device_memory {
948 struct vk_object_base base;
949 struct radeon_winsys_bo *bo;
950 /* for dedicated allocations */
951 struct radv_image *image;
952 struct radv_buffer *buffer;
953 uint32_t heap_index;
954 uint64_t alloc_size;
955 void *map;
956 void *user_ptr;
957
958 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
959 struct AHardwareBuffer *android_hardware_buffer;
960 #endif
961 };
962
963 void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
964 struct radeon_winsys_bo *bo);
965 void radv_device_memory_finish(struct radv_device_memory *mem);
966
967 struct radv_descriptor_range {
968 uint64_t va;
969 uint32_t size;
970 };
971
972 struct radv_descriptor_set_header {
973 struct vk_object_base base;
974 struct radv_descriptor_set_layout *layout;
975 uint32_t size;
976 uint32_t buffer_count;
977
978 struct radeon_winsys_bo *bo;
979 uint64_t va;
980 uint32_t *mapped_ptr;
981 struct radv_descriptor_range *dynamic_descriptors;
982 };
983
984 struct radv_descriptor_set {
985 struct radv_descriptor_set_header header;
986
987 struct radeon_winsys_bo *descriptors[];
988 };
989
990 struct radv_push_descriptor_set {
991 struct radv_descriptor_set_header set;
992 uint32_t capacity;
993 };
994
995 struct radv_descriptor_pool_entry {
996 uint32_t offset;
997 uint32_t size;
998 struct radv_descriptor_set *set;
999 };
1000
1001 struct radv_descriptor_pool {
1002 struct vk_object_base base;
1003 struct radeon_winsys_bo *bo;
1004 uint8_t *host_bo;
1005 uint8_t *mapped_ptr;
1006 uint64_t current_offset;
1007 uint64_t size;
1008
1009 uint8_t *host_memory_base;
1010 uint8_t *host_memory_ptr;
1011 uint8_t *host_memory_end;
1012
1013 uint32_t entry_count;
1014 uint32_t max_entry_count;
1015 struct radv_descriptor_pool_entry entries[0];
1016 };
1017
1018 struct radv_descriptor_update_template_entry {
1019 VkDescriptorType descriptor_type;
1020
1021 /* The number of descriptors to update */
1022 uint32_t descriptor_count;
1023
1024 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
1025 uint32_t dst_offset;
1026
1027 /* In dwords. Not valid/used for dynamic descriptors */
1028 uint32_t dst_stride;
1029
1030 uint32_t buffer_offset;
1031
1032 /* Only valid for combined image samplers and samplers */
1033 uint8_t has_sampler;
1034 uint8_t sampler_offset;
1035
1036 /* In bytes */
1037 size_t src_offset;
1038 size_t src_stride;
1039
1040 /* For push descriptors */
1041 const uint32_t *immutable_samplers;
1042 };
1043
1044 struct radv_descriptor_update_template {
1045 struct vk_object_base base;
1046 uint32_t entry_count;
1047 VkPipelineBindPoint bind_point;
1048 struct radv_descriptor_update_template_entry entry[0];
1049 };
1050
1051 struct radv_buffer {
1052 struct vk_buffer vk;
1053
1054 /* Set when bound */
1055 struct radeon_winsys_bo *bo;
1056 VkDeviceSize offset;
1057 };
1058
1059 void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
1060 struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
1061 void radv_buffer_finish(struct radv_buffer *buffer);
1062
1063 enum radv_dynamic_state_bits {
1064 RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1065 RADV_DYNAMIC_SCISSOR = 1ull << 1,
1066 RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1067 RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1068 RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1069 RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1070 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1071 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1072 RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1073 RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1074 RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1075 RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1076 RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1077 RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1078 RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1079 RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1080 RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1081 RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1082 RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1083 RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1084 RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1085 RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1086 RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1087 RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1088 RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1089 RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1090 RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1091 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1092 RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1093 RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1094 RADV_DYNAMIC_ALL = (1ull << 30) - 1,
1095 };
1096
1097 enum radv_cmd_dirty_bits {
1098 /* Keep the dynamic state dirty bits in sync with
1099 * enum radv_dynamic_state_bits */
1100 RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1101 RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1102 RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1103 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1104 RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1105 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1106 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1107 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1108 RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1109 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1110 RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1111 RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1112 RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1113 RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1114 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1115 RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1116 RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1117 RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1118 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1119 RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1120 RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1121 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1122 RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1123 RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1124 RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1125 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1126 RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1127 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1128 RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1129 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1130 RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1,
1131 RADV_CMD_DIRTY_PIPELINE = 1ull << 30,
1132 RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31,
1133 RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
1134 RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
1135 RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
1136 };
1137
1138 enum radv_cmd_flush_bits {
1139 /* Instruction cache. */
1140 RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1141 /* Scalar L1 cache. */
1142 RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1143 /* Vector L1 cache. */
1144 RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1145 /* L2 cache + L2 metadata cache writeback & invalidate.
1146 * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1147 RADV_CMD_FLAG_INV_L2 = 1 << 3,
1148 /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1149 * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1150 * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1151 RADV_CMD_FLAG_WB_L2 = 1 << 4,
1152 /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1153 * changed and we want to read an image from shaders. */
1154 RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1155 /* Framebuffer caches */
1156 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1157 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1158 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1159 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1160 /* Engine synchronization. */
1161 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1162 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1163 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1164 RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1165 /* Pipeline query controls. */
1166 RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1167 RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1168 RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1169
1170 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
1171 (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1172 RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
1173
1174 RADV_CMD_FLUSH_ALL_COMPUTE =
1175 (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
1176 RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
1177 };
1178
1179 enum radv_nggc_settings {
1180 radv_nggc_none = 0,
1181 radv_nggc_front_face = 1 << 0,
1182 radv_nggc_back_face = 1 << 1,
1183 radv_nggc_face_is_ccw = 1 << 2,
1184 radv_nggc_small_primitives = 1 << 3,
1185 };
1186
1187 struct radv_vertex_binding {
1188 VkDeviceSize offset;
1189 VkDeviceSize size;
1190 VkDeviceSize stride;
1191 };
1192
1193 struct radv_streamout_binding {
1194 struct radv_buffer *buffer;
1195 VkDeviceSize offset;
1196 VkDeviceSize size;
1197 };
1198
1199 struct radv_streamout_state {
1200 /* Mask of bound streamout buffers. */
1201 uint8_t enabled_mask;
1202
1203 /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1204 uint32_t hw_enabled_mask;
1205
1206 /* State of VGT_STRMOUT_(CONFIG|EN) */
1207 bool streamout_enabled;
1208 };
1209
1210 struct radv_viewport_state {
1211 uint32_t count;
1212 VkViewport viewports[MAX_VIEWPORTS];
1213 struct {
1214 float scale[3];
1215 float translate[3];
1216 } xform[MAX_VIEWPORTS];
1217 };
1218
1219 struct radv_scissor_state {
1220 uint32_t count;
1221 VkRect2D scissors[MAX_SCISSORS];
1222 };
1223
1224 struct radv_discard_rectangle_state {
1225 uint32_t count;
1226 VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
1227 };
1228
1229 struct radv_sample_locations_state {
1230 VkSampleCountFlagBits per_pixel;
1231 VkExtent2D grid_size;
1232 uint32_t count;
1233 VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1234 };
1235
1236 struct radv_dynamic_state {
1237 /**
1238 * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1239 * Defines the set of saved dynamic state.
1240 */
1241 uint64_t mask;
1242
1243 struct radv_viewport_state viewport;
1244
1245 struct radv_scissor_state scissor;
1246
1247 float line_width;
1248
1249 struct {
1250 float bias;
1251 float clamp;
1252 float slope;
1253 } depth_bias;
1254
1255 float blend_constants[4];
1256
1257 struct {
1258 float min;
1259 float max;
1260 } depth_bounds;
1261
1262 struct {
1263 uint32_t front;
1264 uint32_t back;
1265 } stencil_compare_mask;
1266
1267 struct {
1268 uint32_t front;
1269 uint32_t back;
1270 } stencil_write_mask;
1271
1272 struct {
1273 struct {
1274 VkStencilOp fail_op;
1275 VkStencilOp pass_op;
1276 VkStencilOp depth_fail_op;
1277 VkCompareOp compare_op;
1278 } front;
1279
1280 struct {
1281 VkStencilOp fail_op;
1282 VkStencilOp pass_op;
1283 VkStencilOp depth_fail_op;
1284 VkCompareOp compare_op;
1285 } back;
1286 } stencil_op;
1287
1288 struct {
1289 uint32_t front;
1290 uint32_t back;
1291 } stencil_reference;
1292
1293 struct radv_discard_rectangle_state discard_rectangle;
1294
1295 struct radv_sample_locations_state sample_location;
1296
1297 struct {
1298 uint32_t factor;
1299 uint16_t pattern;
1300 } line_stipple;
1301
1302 VkCullModeFlags cull_mode;
1303 VkFrontFace front_face;
1304 unsigned primitive_topology;
1305
1306 bool depth_test_enable;
1307 bool depth_write_enable;
1308 VkCompareOp depth_compare_op;
1309 bool depth_bounds_test_enable;
1310 bool stencil_test_enable;
1311
1312 struct {
1313 VkExtent2D size;
1314 VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
1315 } fragment_shading_rate;
1316
1317 bool depth_bias_enable;
1318 bool primitive_restart_enable;
1319 bool rasterizer_discard_enable;
1320
1321 unsigned logic_op;
1322
1323 uint32_t color_write_enable;
1324 };
1325
1326 extern const struct radv_dynamic_state default_dynamic_state;
1327
1328 const char *radv_get_debug_option_name(int id);
1329
1330 const char *radv_get_perftest_option_name(int id);
1331
1332 int radv_get_int_debug_option(const char *name, int default_value);
1333
1334 struct radv_color_buffer_info {
1335 uint64_t cb_color_base;
1336 uint64_t cb_color_cmask;
1337 uint64_t cb_color_fmask;
1338 uint64_t cb_dcc_base;
1339 uint32_t cb_color_slice;
1340 uint32_t cb_color_view;
1341 uint32_t cb_color_info;
1342 uint32_t cb_color_attrib;
1343 uint32_t cb_color_attrib2; /* GFX9 and later */
1344 uint32_t cb_color_attrib3; /* GFX10 and later */
1345 uint32_t cb_dcc_control;
1346 uint32_t cb_color_cmask_slice;
1347 uint32_t cb_color_fmask_slice;
1348 union {
1349 uint32_t cb_color_pitch; // GFX6-GFX8
1350 uint32_t cb_mrt_epitch; // GFX9+
1351 };
1352 };
1353
1354 struct radv_ds_buffer_info {
1355 uint64_t db_z_read_base;
1356 uint64_t db_stencil_read_base;
1357 uint64_t db_z_write_base;
1358 uint64_t db_stencil_write_base;
1359 uint64_t db_htile_data_base;
1360 uint32_t db_depth_info;
1361 uint32_t db_z_info;
1362 uint32_t db_stencil_info;
1363 uint32_t db_depth_view;
1364 uint32_t db_depth_size;
1365 uint32_t db_depth_slice;
1366 uint32_t db_htile_surface;
1367 uint32_t pa_su_poly_offset_db_fmt_cntl;
1368 uint32_t db_z_info2; /* GFX9 only */
1369 uint32_t db_stencil_info2; /* GFX9 only */
1370 };
1371
1372 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1373 struct radv_image_view *iview);
1374 void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
1375 struct radv_image_view *iview);
1376 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1377 struct radv_ds_buffer_info *ds);
1378
1379 /**
1380 * Attachment state when recording a renderpass instance.
1381 *
1382 * The clear value is valid only if there exists a pending clear.
1383 */
1384 struct radv_attachment_state {
1385 VkImageAspectFlags pending_clear_aspects;
1386 uint32_t cleared_views;
1387 VkClearValue clear_value;
1388 VkImageLayout current_layout;
1389 VkImageLayout current_stencil_layout;
1390 bool current_in_render_loop;
1391 struct radv_sample_locations_state sample_location;
1392
1393 union {
1394 struct radv_color_buffer_info cb;
1395 struct radv_ds_buffer_info ds;
1396 };
1397 struct radv_image_view *iview;
1398 };
1399
1400 struct radv_descriptor_state {
1401 struct radv_descriptor_set *sets[MAX_SETS];
1402 uint32_t dirty;
1403 uint32_t valid;
1404 struct radv_push_descriptor_set push_set;
1405 bool push_dirty;
1406 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1407 };
1408
1409 struct radv_subpass_sample_locs_state {
1410 uint32_t subpass_idx;
1411 struct radv_sample_locations_state sample_location;
1412 };
1413
1414 enum rgp_flush_bits {
1415 RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1416 RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1417 RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1418 RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1419 RGP_FLUSH_PFP_SYNC_ME = 0x10,
1420 RGP_FLUSH_SYNC_CP_DMA = 0x20,
1421 RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1422 RGP_FLUSH_INVAL_ICACHE = 0x80,
1423 RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1424 RGP_FLUSH_FLUSH_L2 = 0x200,
1425 RGP_FLUSH_INVAL_L2 = 0x400,
1426 RGP_FLUSH_FLUSH_CB = 0x800,
1427 RGP_FLUSH_INVAL_CB = 0x1000,
1428 RGP_FLUSH_FLUSH_DB = 0x2000,
1429 RGP_FLUSH_INVAL_DB = 0x4000,
1430 RGP_FLUSH_INVAL_L1 = 0x8000,
1431 };
1432
1433 struct radv_cmd_state {
1434 /* Vertex descriptors */
1435 uint64_t vb_va;
1436
1437 bool predicating;
1438 uint64_t dirty;
1439
1440 uint32_t prefetch_L2_mask;
1441
1442 struct radv_graphics_pipeline *graphics_pipeline;
1443 struct radv_graphics_pipeline *emitted_graphics_pipeline;
1444 struct radv_compute_pipeline *compute_pipeline;
1445 struct radv_compute_pipeline *emitted_compute_pipeline;
1446 struct radv_compute_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1447 struct vk_framebuffer *framebuffer;
1448 struct radv_render_pass *pass;
1449 const struct radv_subpass *subpass;
1450 struct radv_dynamic_state dynamic;
1451 struct radv_vs_input_state dynamic_vs_input;
1452 struct radv_attachment_state *attachments;
1453 struct radv_streamout_state streamout;
1454 VkRect2D render_area;
1455
1456 uint32_t num_subpass_sample_locs;
1457 struct radv_subpass_sample_locs_state *subpass_sample_locs;
1458
1459 /* Index buffer */
1460 struct radv_buffer *index_buffer;
1461 uint64_t index_offset;
1462 uint32_t index_type;
1463 uint32_t max_index_count;
1464 uint64_t index_va;
1465 int32_t last_index_type;
1466
1467 int32_t last_primitive_reset_en;
1468 uint32_t last_primitive_reset_index;
1469 enum radv_cmd_flush_bits flush_bits;
1470 unsigned active_occlusion_queries;
1471 bool perfect_occlusion_queries_enabled;
1472 unsigned active_pipeline_queries;
1473 unsigned active_pipeline_gds_queries;
1474 bool prims_gen_query_enabled;
1475 uint32_t trace_id;
1476 uint32_t last_ia_multi_vgt_param;
1477
1478 uint32_t last_num_instances;
1479 uint32_t last_first_instance;
1480 uint32_t last_vertex_offset;
1481 uint32_t last_drawid;
1482 uint32_t last_subpass_color_count;
1483
1484 uint32_t last_sx_ps_downconvert;
1485 uint32_t last_sx_blend_opt_epsilon;
1486 uint32_t last_sx_blend_opt_control;
1487
1488 /* Whether CP DMA is busy/idle. */
1489 bool dma_is_busy;
1490
1491 /* Whether any images that are not L2 coherent are dirty from the CB. */
1492 bool rb_noncoherent_dirty;
1493
1494 /* Conditional rendering info. */
1495 uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1496 int predication_type; /* -1: disabled, 0: normal, 1: inverted */
1497 uint64_t predication_va;
1498
1499 /* Inheritance info. */
1500 VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1501
1502 bool context_roll_without_scissor_emitted;
1503
1504 /* SQTT related state. */
1505 uint32_t current_event_type;
1506 uint32_t num_events;
1507 uint32_t num_layout_transitions;
1508 bool pending_sqtt_barrier_end;
1509 enum rgp_flush_bits sqtt_flush_bits;
1510
1511 /* NGG culling state. */
1512 uint32_t last_nggc_settings;
1513 int8_t last_nggc_settings_sgpr_idx;
1514 bool last_nggc_skip;
1515
1516 /* Mesh shading state. */
1517 bool mesh_shading;
1518
1519 uint8_t cb_mip[MAX_RTS];
1520
1521 /* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
1522 bool uses_draw_indirect_multi;
1523
1524 uint32_t rt_stack_size;
1525
1526 struct radv_shader_part *emitted_vs_prolog;
1527 uint32_t *emitted_vs_prolog_key;
1528 uint32_t emitted_vs_prolog_key_hash;
1529 uint32_t vbo_misaligned_mask;
1530 uint32_t vbo_misaligned_mask_invalid;
1531 uint32_t vbo_bound_mask;
1532
1533 /* Whether the cmdbuffer owns the current render pass rather than the app. */
1534 bool own_render_pass;
1535
1536 /* Per-vertex VRS state. */
1537 uint32_t last_vrs_rates;
1538 int8_t last_vrs_rates_sgpr_idx;
1539
1540 /* Whether to suspend streamout for internal driver operations. */
1541 bool suspend_streamout;
1542
1543 /* Whether this commandbuffer uses performance counters. */
1544 bool uses_perf_counters;
1545 };
1546
1547 struct radv_cmd_pool {
1548 struct vk_command_pool vk;
1549 struct list_head cmd_buffers;
1550 struct list_head free_cmd_buffers;
1551 };
1552
1553 struct radv_cmd_buffer_upload {
1554 uint8_t *map;
1555 unsigned offset;
1556 uint64_t size;
1557 struct radeon_winsys_bo *upload_bo;
1558 struct list_head list;
1559 };
1560
1561 enum radv_cmd_buffer_status {
1562 RADV_CMD_BUFFER_STATUS_INVALID,
1563 RADV_CMD_BUFFER_STATUS_INITIAL,
1564 RADV_CMD_BUFFER_STATUS_RECORDING,
1565 RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1566 RADV_CMD_BUFFER_STATUS_PENDING,
1567 };
1568
1569 struct dynamic_vertex_format_cache {
1570 VkFormat format;
1571 uint8_t hw_fmt;
1572 uint8_t fmt_align_req_minus_1;
1573 uint8_t fmt_size;
1574 bool post_shuffle;
1575 bool alpha_adjust_lo;
1576 bool alpha_adjust_hi;
1577 };
1578
1579 struct radv_cmd_buffer {
1580 struct vk_command_buffer vk;
1581
1582 struct radv_device *device;
1583
1584 struct radv_cmd_pool *pool;
1585 struct list_head pool_link;
1586
1587 struct util_dynarray cached_vertex_formats;
1588 VkCommandBufferUsageFlags usage_flags;
1589 enum radv_cmd_buffer_status status;
1590 struct radeon_cmdbuf *cs;
1591 struct radv_cmd_state state;
1592 struct radv_buffer *vertex_binding_buffers[MAX_VBS];
1593 struct radv_vertex_binding vertex_bindings[MAX_VBS];
1594 uint32_t used_vertex_bindings;
1595 struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1596 enum radv_queue_family qf;
1597
1598 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1599 VkShaderStageFlags push_constant_stages;
1600 struct radv_descriptor_set_header meta_push_descriptors;
1601
1602 struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1603
1604 struct radv_cmd_buffer_upload upload;
1605
1606 uint32_t scratch_size_per_wave_needed;
1607 uint32_t scratch_waves_wanted;
1608 uint32_t compute_scratch_size_per_wave_needed;
1609 uint32_t compute_scratch_waves_wanted;
1610 uint32_t esgs_ring_size_needed;
1611 uint32_t gsvs_ring_size_needed;
1612 bool tess_rings_needed;
1613 bool task_rings_needed;
1614 bool mesh_scratch_ring_needed;
1615 bool gds_needed; /* for GFX10 streamout and NGG GS queries */
1616 bool gds_oa_needed; /* for GFX10 streamout */
1617 bool sample_positions_needed;
1618
1619 VkResult record_result;
1620
1621 uint64_t gfx9_fence_va;
1622 uint32_t gfx9_fence_idx;
1623 uint64_t gfx9_eop_bug_va;
1624
1625 uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
1626 bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
1627
1628 struct {
1629 /**
1630 * Internal command stream that is used when some graphics work
1631 * also requires a submission to the compute queue.
1632 */
1633 struct radeon_cmdbuf *cs;
1634
1635 /** Flush bits for the internal cmdbuf. */
1636 enum radv_cmd_flush_bits flush_bits;
1637
1638 /**
1639 * For synchronization between the ACE and GFX cmdbuf.
1640 * The value of this semaphore is incremented whenever we
1641 * encounter a barrier that affects ACE. At sync points,
1642 * GFX writes the value to its address, and ACE waits until
1643 * it detects that the value has been written.
1644 */
1645 struct {
1646 uint64_t va; /* Virtual address of the semaphore. */
1647 uint32_t gfx2ace_value; /* Current value on GFX. */
1648 uint32_t emitted_gfx2ace_value; /* Emitted value on GFX. */
1649 } sem;
1650 } ace_internal;
1651
1652 /**
1653 * Whether a query pool has been resetted and we have to flush caches.
1654 */
1655 bool pending_reset_query;
1656
1657 /**
1658 * Bitmask of pending active query flushes.
1659 */
1660 enum radv_cmd_flush_bits active_query_flush_bits;
1661 };
1662
1663 struct radv_image;
1664 struct radv_image_view;
1665
1666 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1667
1668 bool radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer);
1669 void radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer);
1670
1671 void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1672 void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1673
1674 void cik_create_gfx_config(struct radv_device *device);
1675
1676 void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
1677 const VkViewport *viewports, unsigned rast_prim, float line_width);
1678
1679 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
1680 bool indirect_draw, bool count_from_stream_output,
1681 uint32_t draw_vertex_count, unsigned topology,
1682 bool prim_restart_enable);
1683 void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
1684 unsigned event, unsigned event_flags, unsigned dst_sel,
1685 unsigned data_sel, uint64_t va, uint32_t new_fence,
1686 uint64_t gfx9_eop_bug_va);
1687
1688 void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
1689 uint32_t mask);
1690 void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
1691 uint32_t *fence_ptr, uint64_t va, bool is_mec,
1692 enum radv_cmd_flush_bits flush_bits,
1693 enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
1694 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1695 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
1696 unsigned pred_op, uint64_t va);
1697 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
1698 uint64_t size);
1699 void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1700 unsigned size, bool predicating);
1701 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
1702 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
1703 unsigned value);
1704 void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1705
1706 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries);
1707 uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer);
1708 uint32_t radv_get_vgt_index_size(uint32_t type);
1709
1710 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
1711 uint32_t radv_hash_vs_prolog(const void *key_);
1712 bool radv_cmp_vs_prolog(const void *a_, const void *b_);
1713
1714 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1715 unsigned *out_offset, void **ptr);
1716 void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1717 const struct radv_subpass *subpass);
1718 void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
1719 const struct radv_subpass *subpass);
1720 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1721 const void *data, unsigned *out_offset);
1722 void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
1723 const struct radv_graphics_pipeline *pipeline,
1724 bool full_null_descriptors, void *vb_ptr);
1725 void radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs);
1726
1727 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1728 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1729 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1730 void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
1731 VkImageAspectFlags aspects,
1732 VkResolveModeFlagBits resolve_mode);
1733 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1734 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
1735 VkImageAspectFlags aspects,
1736 VkResolveModeFlagBits resolve_mode);
1737 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
1738 unsigned radv_get_default_max_sample_dist(int log_samples);
1739 void radv_device_init_msaa(struct radv_device *device);
1740 VkResult radv_device_init_vrs_state(struct radv_device *device);
1741
1742 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1743 const struct radv_image_view *iview,
1744 VkClearDepthStencilValue ds_clear_value,
1745 VkImageAspectFlags aspects);
1746
1747 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1748 const struct radv_image_view *iview, int cb_idx,
1749 uint32_t color_values[2]);
1750
1751 bool radv_image_use_dcc_image_stores(const struct radv_device *device,
1752 const struct radv_image *image);
1753 bool radv_image_use_dcc_predication(const struct radv_device *device,
1754 const struct radv_image *image);
1755
1756 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1757 const VkImageSubresourceRange *range, bool value);
1758
1759 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1760 const VkImageSubresourceRange *range, bool value);
1761 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
1762 VkAccessFlags2 src_flags,
1763 const struct radv_image *image);
1764 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
1765 VkAccessFlags2 dst_flags,
1766 const struct radv_image *image);
1767 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1768 struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value);
1769 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
1770 struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
1771 uint64_t size);
1772
1773 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1774 bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
1775 void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1776 struct radv_device_memory *mem);
1777
1778 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)1779 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
1780 bool use_32bit_pointers)
1781 {
1782 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1783 radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1784 }
1785
1786 static inline void
radv_emit_shader_pointer_body(struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)1787 radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1788 bool use_32bit_pointers)
1789 {
1790 radeon_emit(cs, va);
1791
1792 if (use_32bit_pointers) {
1793 assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
1794 } else {
1795 radeon_emit(cs, va >> 32);
1796 }
1797 }
1798
1799 static inline void
radv_emit_shader_pointer(struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)1800 radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
1801 uint64_t va, bool global)
1802 {
1803 bool use_32bit_pointers = !global;
1804
1805 radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1806 radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1807 }
1808
1809 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1810 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
1811 {
1812 switch (bind_point) {
1813 case VK_PIPELINE_BIND_POINT_GRAPHICS:
1814 case VK_PIPELINE_BIND_POINT_COMPUTE:
1815 return &cmd_buffer->descriptors[bind_point];
1816 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1817 return &cmd_buffer->descriptors[2];
1818 default:
1819 unreachable("Unhandled bind point");
1820 }
1821 }
1822
1823 void
1824 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
1825
1826 /*
1827 * Takes x,y,z as exact numbers of invocations, instead of blocks.
1828 *
1829 * Limitations: Can't call normal dispatch functions without binding or rebinding
1830 * the compute pipeline.
1831 */
1832 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
1833 uint32_t z);
1834
1835 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
1836 uint64_t va);
1837
1838 struct radv_event {
1839 struct vk_object_base base;
1840 struct radeon_winsys_bo *bo;
1841 uint64_t *map;
1842 };
1843
1844 #define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
1845 #define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
1846 #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
1847 #define RADV_HASH_SHADER_LLVM (1 << 4)
1848 #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
1849 #define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
1850 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
1851 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
1852 #define RADV_HASH_SHADER_EMULATE_RT (1 << 16)
1853 #define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
1854 #define RADV_HASH_SHADER_RT_WAVE64 (1 << 18)
1855
1856 struct radv_pipeline_key;
1857
1858 void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
1859 struct radv_pipeline_stage *out_stage, gl_shader_stage stage);
1860
1861 void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
1862 const struct radv_pipeline_layout *layout,
1863 const struct radv_pipeline_key *key, uint32_t flags);
1864
1865 void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1866 uint32_t flags);
1867
1868 uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
1869
1870 bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
1871
1872 bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines);
1873
1874 bool radv_emulate_rt(const struct radv_physical_device *pdevice);
1875
1876 enum {
1877 RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
1878 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
1879 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR)
1880 };
1881
1882 #define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
1883
1884 #define radv_foreach_stage(stage, stage_bits) \
1885 for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \
1886 stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1887
1888 extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1889 unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
1890
1891 struct radv_multisample_state {
1892 uint32_t db_eqaa;
1893 uint32_t pa_sc_mode_cntl_0;
1894 uint32_t pa_sc_mode_cntl_1;
1895 uint32_t pa_sc_aa_config;
1896 uint32_t pa_sc_aa_mask[2];
1897 unsigned num_samples;
1898 };
1899
1900 struct radv_vrs_state {
1901 uint32_t pa_cl_vrs_cntl;
1902 };
1903
1904 struct radv_prim_vertex_count {
1905 uint8_t min;
1906 uint8_t incr;
1907 };
1908
1909 struct radv_ia_multi_vgt_param_helpers {
1910 uint32_t base;
1911 bool partial_es_wave;
1912 uint8_t primgroup_size;
1913 bool ia_switch_on_eoi;
1914 bool partial_vs_wave;
1915 };
1916
1917 struct radv_binning_state {
1918 uint32_t pa_sc_binner_cntl_0;
1919 };
1920
1921 #define SI_GS_PER_ES 128
1922
1923 enum radv_pipeline_type {
1924 RADV_PIPELINE_GRAPHICS,
1925 /* Compute pipeline (incl raytracing pipeline) */
1926 RADV_PIPELINE_COMPUTE,
1927 /* Pipeline library. This can't actually run and merely is a partial pipeline. */
1928 RADV_PIPELINE_LIBRARY
1929 };
1930
1931 struct radv_pipeline_group_handle {
1932 uint32_t handles[2];
1933 };
1934
1935 struct radv_pipeline_shader_stack_size {
1936 uint32_t recursive_size;
1937 /* anyhit + intersection */
1938 uint32_t non_recursive_size;
1939 };
1940
1941 struct radv_pipeline_slab {
1942 uint32_t ref_count;
1943
1944 union radv_shader_arena_block *alloc;
1945 };
1946
1947 void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
1948
1949 struct radv_vertex_input_info {
1950 uint32_t instance_rate_inputs;
1951 uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
1952 uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
1953 uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
1954 uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
1955 uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
1956 uint8_t vertex_binding_align[MAX_VBS];
1957 enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
1958 uint32_t vertex_post_shuffle;
1959 uint32_t binding_stride[MAX_VBS];
1960 uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
1961 uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
1962 uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
1963 };
1964
1965 struct radv_input_assembly_info {
1966 uint8_t primitive_topology; /* VkPrimitiveTopology */
1967 bool primitive_restart_enable;
1968 };
1969
1970 struct radv_tessellation_info {
1971 uint8_t patch_control_points;
1972 VkTessellationDomainOrigin domain_origin;
1973 };
1974
1975 struct radv_viewport_info {
1976 bool negative_one_to_one;
1977 uint8_t viewport_count;
1978 uint8_t scissor_count;
1979 VkRect2D scissors[MAX_SCISSORS];
1980 VkViewport viewports[MAX_VIEWPORTS];
1981 };
1982
1983 struct radv_rasterization_info {
1984 bool discard_enable;
1985 VkFrontFace front_face;
1986 VkCullModeFlags cull_mode;
1987 uint8_t polygon_mode; /* VkPolygonMode */
1988 bool depth_bias_enable;
1989 bool depth_clamp_enable;
1990 float line_width;
1991 float depth_bias_constant_factor;
1992 float depth_bias_clamp;
1993 float depth_bias_slope_factor;
1994 VkConservativeRasterizationModeEXT conservative_mode;
1995 bool provoking_vtx_last;
1996 bool stippled_line_enable;
1997 VkLineRasterizationModeEXT line_raster_mode;
1998 uint32_t line_stipple_factor;
1999 uint16_t line_stipple_pattern;
2000 bool depth_clip_disable;
2001 VkRasterizationOrderAMD order;
2002 };
2003
2004 struct radv_discard_rectangle_info {
2005 VkDiscardRectangleModeEXT mode;
2006 VkRect2D rects[MAX_DISCARD_RECTANGLES];
2007 uint8_t count;
2008 };
2009
2010 struct radv_multisample_info {
2011 bool sample_shading_enable;
2012 bool alpha_to_coverage_enable;
2013 bool sample_locs_enable;
2014 VkSampleCountFlagBits raster_samples;
2015 float min_sample_shading;
2016 uint16_t sample_mask;
2017 uint8_t sample_locs_count;
2018 VkSampleCountFlagBits sample_locs_per_pixel;
2019 VkExtent2D sample_locs_grid_size;
2020 VkSampleLocationEXT sample_locs[MAX_SAMPLE_LOCATIONS];
2021 };
2022
2023 struct radv_stencil_op_info {
2024 VkStencilOp fail_op;
2025 VkStencilOp pass_op;
2026 VkStencilOp depth_fail_op;
2027 VkCompareOp compare_op;
2028 uint8_t compare_mask;
2029 uint8_t write_mask;
2030 uint8_t reference;
2031 };
2032
2033 struct radv_depth_stencil_info {
2034 bool stencil_test_enable;
2035 bool depth_test_enable;
2036 bool depth_write_enable;
2037 bool depth_bounds_test_enable;
2038 struct {
2039 float min;
2040 float max;
2041 } depth_bounds;
2042 struct radv_stencil_op_info front;
2043 struct radv_stencil_op_info back;
2044 VkCompareOp depth_compare_op;
2045 };
2046
2047 struct radv_rendering_info {
2048 uint32_t view_mask;
2049 uint32_t color_att_count;
2050 VkFormat color_att_formats[MAX_RTS];
2051 VkFormat depth_att_format;
2052 VkFormat stencil_att_format;
2053 };
2054
2055 struct radv_color_blend_info {
2056 bool logic_op_enable;
2057 uint8_t att_count;
2058 uint16_t logic_op;
2059 uint32_t color_write_enable;
2060 float blend_constants[4];
2061 struct {
2062 uint8_t color_write_mask;
2063 bool blend_enable;
2064 uint16_t color_blend_op;
2065 uint16_t alpha_blend_op;
2066 uint16_t src_color_blend_factor;
2067 uint16_t dst_color_blend_factor;
2068 uint16_t src_alpha_blend_factor;
2069 uint16_t dst_alpha_blend_factor;
2070 } att[MAX_RTS];
2071 };
2072
2073 struct radv_fragment_shading_rate_info {
2074 VkExtent2D size;
2075 VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
2076 };
2077
2078 struct radv_graphics_pipeline_info {
2079 struct radv_vertex_input_info vi;
2080 struct radv_input_assembly_info ia;
2081
2082 struct radv_tessellation_info ts;
2083 struct radv_viewport_info vp;
2084 struct radv_rasterization_info rs;
2085 struct radv_discard_rectangle_info dr;
2086
2087 struct radv_multisample_info ms;
2088 struct radv_depth_stencil_info ds;
2089 struct radv_rendering_info ri;
2090 struct radv_color_blend_info cb;
2091
2092 struct radv_fragment_shading_rate_info fsr;
2093
2094 /* VK_AMD_mixed_attachment_samples */
2095 uint8_t color_att_samples;
2096 uint8_t ds_att_samples;
2097 };
2098
2099 enum radv_depth_clamp_mode {
2100 RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0, /* Clamp to the viewport min/max depth bounds */
2101 RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1, /* Clamp between 0.0f and 1.0f */
2102 RADV_DEPTH_CLAMP_MODE_DISABLED = 2, /* Disable depth clamping */
2103 };
2104
2105 struct radv_pipeline {
2106 struct vk_object_base base;
2107 enum radv_pipeline_type type;
2108
2109 struct radv_device *device;
2110
2111 struct radv_pipeline_slab *slab;
2112 struct radeon_winsys_bo *slab_bo;
2113
2114 bool need_indirect_descriptor_sets;
2115 struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
2116 struct radv_shader *gs_copy_shader;
2117
2118 struct radeon_cmdbuf cs;
2119 uint32_t ctx_cs_hash;
2120 struct radeon_cmdbuf ctx_cs;
2121
2122 uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
2123
2124 unsigned max_waves;
2125 unsigned scratch_bytes_per_wave;
2126
2127 /* Unique pipeline hash identifier. */
2128 uint64_t pipeline_hash;
2129
2130 /* Pipeline layout info. */
2131 uint32_t push_constant_size;
2132 uint32_t dynamic_offset_count;
2133 };
2134
2135 struct radv_graphics_pipeline {
2136 struct radv_pipeline base;
2137
2138 VkShaderStageFlags active_stages;
2139
2140 struct radv_dynamic_state dynamic_state;
2141
2142 uint64_t dynamic_states;
2143 struct radv_multisample_state ms;
2144 struct radv_binning_state binning;
2145 struct radv_vrs_state vrs;
2146 uint32_t spi_baryc_cntl;
2147 unsigned esgs_ring_size;
2148 unsigned gsvs_ring_size;
2149 uint32_t vtx_base_sgpr;
2150 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
2151 uint8_t vtx_emit_num;
2152 uint64_t needed_dynamic_state;
2153 unsigned tess_patch_control_points;
2154 unsigned pa_su_sc_mode_cntl;
2155 unsigned db_depth_control;
2156 unsigned pa_cl_clip_cntl;
2157 unsigned cb_color_control;
2158 uint32_t binding_stride[MAX_VBS];
2159 uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
2160 uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
2161 uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
2162 uint8_t last_vertex_attrib_bit;
2163 uint8_t next_vertex_stage : 8;
2164 uint32_t vb_desc_usage_mask;
2165 uint32_t vb_desc_alloc_size;
2166
2167 /* Last pre-PS API stage */
2168 gl_shader_stage last_vgt_api_stage;
2169
2170 /* Used for rbplus */
2171 uint32_t col_format;
2172 uint32_t cb_target_mask;
2173
2174 bool disable_out_of_order_rast_for_occlusion;
2175 bool uses_drawid;
2176 bool uses_baseinstance;
2177 bool uses_dynamic_stride;
2178 bool uses_conservative_overestimate;
2179 bool negative_one_to_one;
2180 enum radv_depth_clamp_mode depth_clamp_mode;
2181 bool use_per_attribute_vb_descs;
2182 bool can_use_simple_input;
2183 bool uses_user_sample_locations;
2184
2185 /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
2186 bool force_vrs_per_vertex;
2187
2188 /* Whether the pipeline uses NGG (GFX10+). */
2189 bool is_ngg;
2190 bool has_ngg_culling;
2191
2192 /* Not NULL if graphics pipeline uses streamout. */
2193 struct radv_shader *streamout_shader;
2194
2195 unsigned rast_prim;
2196 float line_width;
2197 };
2198
2199 struct radv_compute_pipeline {
2200 struct radv_pipeline base;
2201
2202 bool cs_regalloc_hang_bug;
2203
2204 /* Raytracing */
2205 struct radv_pipeline_group_handle *rt_group_handles;
2206 struct radv_pipeline_shader_stack_size *rt_stack_sizes;
2207 bool dynamic_stack_size;
2208 uint32_t group_count;
2209 };
2210
2211 struct radv_library_pipeline {
2212 struct radv_pipeline base;
2213
2214 unsigned stage_count;
2215 VkPipelineShaderStageCreateInfo *stages;
2216 unsigned group_count;
2217 VkRayTracingShaderGroupCreateInfoKHR *groups;
2218 VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifiers;
2219 struct {
2220 uint8_t sha1[SHA1_DIGEST_LENGTH];
2221 } *hashes;
2222 };
2223
2224 #define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
2225 static inline struct radv_##pipe_type##_pipeline * \
2226 radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \
2227 { \
2228 assert(pipeline->type == pipe_enum); \
2229 return (struct radv_##pipe_type##_pipeline *) pipeline; \
2230 }
2231
2232 RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
2233 RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
2234 RADV_DECL_PIPELINE_DOWNCAST(library, RADV_PIPELINE_LIBRARY)
2235
2236 struct radv_pipeline_stage {
2237 gl_shader_stage stage;
2238
2239 struct {
2240 const struct vk_object_base *object;
2241 const char *data;
2242 uint32_t size;
2243 unsigned char sha1[20];
2244 } spirv;
2245
2246 const char *entrypoint;
2247 const VkSpecializationInfo *spec_info;
2248
2249 unsigned char shader_sha1[20];
2250
2251 nir_shader *nir;
2252 nir_shader *internal_nir; /* meta shaders */
2253
2254 struct radv_shader_info info;
2255 struct radv_shader_args args;
2256
2257 VkPipelineCreationFeedback feedback;
2258 };
2259
2260 static inline bool
radv_pipeline_has_stage(const struct radv_graphics_pipeline * pipeline,gl_shader_stage stage)2261 radv_pipeline_has_stage(const struct radv_graphics_pipeline *pipeline, gl_shader_stage stage)
2262 {
2263 return pipeline->base.shaders[stage];
2264 }
2265
2266 bool radv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline);
2267
2268 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
2269
2270 struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
2271 gl_shader_stage stage, int idx);
2272
2273 struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage);
2274
2275 void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
2276 const struct radv_shader *shader);
2277
2278 void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
2279 struct radeon_cmdbuf *cs, const struct radv_shader *shader);
2280
2281 struct radv_graphics_pipeline_create_info {
2282 bool use_rectlist;
2283 bool db_depth_clear;
2284 bool db_stencil_clear;
2285 bool depth_compress_disable;
2286 bool stencil_compress_disable;
2287 bool resummarize_enable;
2288 uint32_t custom_blend_mode;
2289 };
2290
2291 void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
2292 enum radv_pipeline_type type);
2293
2294 VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
2295 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2296 const struct radv_graphics_pipeline_create_info *extra,
2297 const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
2298
2299 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
2300 const VkComputePipelineCreateInfo *pCreateInfo,
2301 const VkAllocationCallbacks *pAllocator,
2302 const uint8_t *custom_hash,
2303 struct radv_pipeline_shader_stack_size *rt_stack_sizes,
2304 uint32_t rt_group_count, VkPipeline *pPipeline);
2305
2306 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
2307 const VkAllocationCallbacks *allocator);
2308
2309 struct radv_binning_settings {
2310 unsigned context_states_per_bin; /* allowed range: [1, 6] */
2311 unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
2312 unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
2313 };
2314
2315 struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
2316
2317 struct vk_format_description;
2318 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
2319 int first_non_void);
2320 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
2321 int first_non_void);
2322 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2323 void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
2324 const struct util_format_description *desc, unsigned *dfmt,
2325 unsigned *nfmt, bool *post_shuffle,
2326 enum radv_vs_input_alpha_adjust *alpha_adjust);
2327 uint32_t radv_translate_colorformat(VkFormat format);
2328 uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
2329 int first_non_void);
2330 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2331 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2332 uint32_t radv_translate_dbformat(VkFormat format);
2333 uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
2334 int first_non_void);
2335 uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
2336 int first_non_void);
2337 bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
2338 VkClearColorValue *value);
2339 bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
2340 VkFormat format);
2341 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
2342 VkFormat format, bool *blendable);
2343 bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
2344 bool *sign_reinterpret);
2345 bool radv_is_atomic_format_supported(VkFormat format);
2346 bool radv_device_supports_etc(struct radv_physical_device *physical_device);
2347
2348 static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS =
2349 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2350 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
2351
2352 struct radv_image_plane {
2353 VkFormat format;
2354 struct radeon_surf surface;
2355 };
2356
2357 struct radv_image_binding {
2358 /* Set when bound */
2359 struct radeon_winsys_bo *bo;
2360 VkDeviceSize offset;
2361 };
2362
2363 struct radv_image {
2364 struct vk_image vk;
2365
2366 struct ac_surf_info info;
2367
2368 VkDeviceSize size;
2369 uint32_t alignment;
2370
2371 unsigned queue_family_mask;
2372 bool exclusive;
2373 bool shareable;
2374 bool l2_coherent;
2375 bool dcc_sign_reinterpret;
2376 bool support_comp_to_single;
2377
2378 struct radv_image_binding bindings[3];
2379 bool tc_compatible_cmask;
2380
2381 uint64_t clear_value_offset;
2382 uint64_t fce_pred_offset;
2383 uint64_t dcc_pred_offset;
2384
2385 /*
2386 * Metadata for the TC-compat zrange workaround. If the 32-bit value
2387 * stored at this offset is UINT_MAX, the driver will emit
2388 * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2389 * SET_CONTEXT_REG packet.
2390 */
2391 uint64_t tc_compat_zrange_offset;
2392
2393 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2394 VkDeviceMemory owned_memory;
2395
2396 unsigned plane_count;
2397 bool disjoint;
2398 struct radv_image_plane planes[0];
2399 };
2400
2401 /* Whether the image has a htile that is known consistent with the contents of
2402 * the image and is allowed to be in compressed form.
2403 *
2404 * If this is false reads that don't use the htile should be able to return
2405 * correct results.
2406 */
2407 bool radv_layout_is_htile_compressed(const struct radv_device *device,
2408 const struct radv_image *image, VkImageLayout layout,
2409 bool in_render_loop, unsigned queue_mask);
2410
2411 bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2412 unsigned level, VkImageLayout layout, bool in_render_loop,
2413 unsigned queue_mask);
2414
2415 bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2416 unsigned level, VkImageLayout layout, bool in_render_loop,
2417 unsigned queue_mask);
2418
2419 bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2420 VkImageLayout layout, unsigned queue_mask);
2421
2422 /**
2423 * Return whether the image has CMASK metadata for color surfaces.
2424 */
2425 static inline bool
radv_image_has_cmask(const struct radv_image * image)2426 radv_image_has_cmask(const struct radv_image *image)
2427 {
2428 return image->planes[0].surface.cmask_offset;
2429 }
2430
2431 /**
2432 * Return whether the image has FMASK metadata for color surfaces.
2433 */
2434 static inline bool
radv_image_has_fmask(const struct radv_image * image)2435 radv_image_has_fmask(const struct radv_image *image)
2436 {
2437 return image->planes[0].surface.fmask_offset;
2438 }
2439
2440 /**
2441 * Return whether the image has DCC metadata for color surfaces.
2442 */
2443 static inline bool
radv_image_has_dcc(const struct radv_image * image)2444 radv_image_has_dcc(const struct radv_image *image)
2445 {
2446 return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
2447 image->planes[0].surface.meta_offset;
2448 }
2449
2450 /**
2451 * Return whether the image is TC-compatible CMASK.
2452 */
2453 static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image * image)2454 radv_image_is_tc_compat_cmask(const struct radv_image *image)
2455 {
2456 return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2457 }
2458
2459 /**
2460 * Return whether DCC metadata is enabled for a level.
2461 */
2462 static inline bool
radv_dcc_enabled(const struct radv_image * image,unsigned level)2463 radv_dcc_enabled(const struct radv_image *image, unsigned level)
2464 {
2465 return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2466 }
2467
2468 /**
2469 * Return whether the image has CB metadata.
2470 */
2471 static inline bool
radv_image_has_CB_metadata(const struct radv_image * image)2472 radv_image_has_CB_metadata(const struct radv_image *image)
2473 {
2474 return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2475 }
2476
2477 /**
2478 * Return whether the image has HTILE metadata for depth surfaces.
2479 */
2480 static inline bool
radv_image_has_htile(const struct radv_image * image)2481 radv_image_has_htile(const struct radv_image *image)
2482 {
2483 return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
2484 image->planes[0].surface.meta_size;
2485 }
2486
2487 /**
2488 * Return whether the image has VRS HTILE metadata for depth surfaces
2489 */
2490 static inline bool
radv_image_has_vrs_htile(const struct radv_device * device,const struct radv_image * image)2491 radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2492 {
2493 /* Any depth buffer can potentially use VRS. */
2494 return device->attachment_vrs_enabled && radv_image_has_htile(image) &&
2495 (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2496 }
2497
2498 /**
2499 * Return whether HTILE metadata is enabled for a level.
2500 */
2501 static inline bool
radv_htile_enabled(const struct radv_image * image,unsigned level)2502 radv_htile_enabled(const struct radv_image *image, unsigned level)
2503 {
2504 return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2505 }
2506
2507 /**
2508 * Return whether the image is TC-compatible HTILE.
2509 */
2510 static inline bool
radv_image_is_tc_compat_htile(const struct radv_image * image)2511 radv_image_is_tc_compat_htile(const struct radv_image *image)
2512 {
2513 return radv_image_has_htile(image) &&
2514 (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2515 }
2516
2517 /**
2518 * Return whether the entire HTILE buffer can be used for depth in order to
2519 * improve HiZ Z-Range precision.
2520 */
2521 static inline bool
radv_image_tile_stencil_disabled(const struct radv_device * device,const struct radv_image * image)2522 radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2523 {
2524 if (device->physical_device->rad_info.gfx_level >= GFX9) {
2525 return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
2526 } else {
2527 /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2528 * the TC-compat ZRANGE issue even if no stencil is used.
2529 */
2530 return !vk_format_has_stencil(image->vk.format) && !radv_image_is_tc_compat_htile(image);
2531 }
2532 }
2533
2534 static inline bool
radv_image_has_clear_value(const struct radv_image * image)2535 radv_image_has_clear_value(const struct radv_image *image)
2536 {
2537 return image->clear_value_offset != 0;
2538 }
2539
2540 static inline uint64_t
radv_image_get_fast_clear_va(const struct radv_image * image,uint32_t base_level)2541 radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2542 {
2543 assert(radv_image_has_clear_value(image));
2544
2545 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2546 va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2547 return va;
2548 }
2549
2550 static inline uint64_t
radv_image_get_fce_pred_va(const struct radv_image * image,uint32_t base_level)2551 radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2552 {
2553 assert(image->fce_pred_offset != 0);
2554
2555 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2556 va += image->bindings[0].offset + image->fce_pred_offset + base_level * 8;
2557 return va;
2558 }
2559
2560 static inline uint64_t
radv_image_get_dcc_pred_va(const struct radv_image * image,uint32_t base_level)2561 radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2562 {
2563 assert(image->dcc_pred_offset != 0);
2564
2565 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2566 va += image->bindings[0].offset + image->dcc_pred_offset + base_level * 8;
2567 return va;
2568 }
2569
2570 static inline uint64_t
radv_get_tc_compat_zrange_va(const struct radv_image * image,uint32_t base_level)2571 radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2572 {
2573 assert(image->tc_compat_zrange_offset != 0);
2574
2575 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2576 va += image->bindings[0].offset + image->tc_compat_zrange_offset + base_level * 4;
2577 return va;
2578 }
2579
2580 static inline uint64_t
radv_get_ds_clear_value_va(const struct radv_image * image,uint32_t base_level)2581 radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2582 {
2583 assert(radv_image_has_clear_value(image));
2584
2585 uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2586 va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2587 return va;
2588 }
2589
2590 static inline uint32_t
radv_get_htile_initial_value(const struct radv_device * device,const struct radv_image * image)2591 radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2592 {
2593 uint32_t initial_value;
2594
2595 if (radv_image_tile_stencil_disabled(device, image)) {
2596 /* Z only (no stencil):
2597 *
2598 * |31 18|17 4|3 0|
2599 * +---------+---------+-------+
2600 * | Max Z | Min Z | ZMask |
2601 */
2602 initial_value = 0xfffc000f;
2603 } else {
2604 /* Z and stencil:
2605 *
2606 * |31 12|11 10|9 8|7 6|5 4|3 0|
2607 * +-----------+-----+------+-----+-----+-------+
2608 * | Z Range | | SMem | SR1 | SR0 | ZMask |
2609 *
2610 * SR0/SR1 contains the stencil test results. Initializing
2611 * SR0/SR1 to 0x3 means the stencil test result is unknown.
2612 *
2613 * Z, stencil and 4 bit VRS encoding:
2614 * |31 12|11 10|9 8|7 6|5 4|3 0|
2615 * +-----------+------------+------+------------+-----+-------+
2616 * | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2617 */
2618 if (radv_image_has_vrs_htile(device, image)) {
2619 /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2620 initial_value = 0xfffff33f;
2621 } else {
2622 initial_value = 0xfffff3ff;
2623 }
2624 }
2625
2626 return initial_value;
2627 }
2628
2629 static inline bool
radv_image_get_iterate256(struct radv_device * device,struct radv_image * image)2630 radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
2631 {
2632 /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2633 return device->physical_device->rad_info.gfx_level >= GFX10 &&
2634 (image->vk.usage &
2635 (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2636 radv_image_is_tc_compat_htile(image) && image->info.samples > 1;
2637 }
2638
2639 unsigned radv_image_queue_family_mask(const struct radv_image *image,
2640 enum radv_queue_family family,
2641 enum radv_queue_family queue_family);
2642
2643 static inline uint32_t
radv_get_layerCount(const struct radv_image * image,const VkImageSubresourceRange * range)2644 radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2645 {
2646 return range->layerCount == VK_REMAINING_ARRAY_LAYERS
2647 ? image->info.array_size - range->baseArrayLayer
2648 : range->layerCount;
2649 }
2650
2651 static inline uint32_t
radv_get_levelCount(const struct radv_image * image,const VkImageSubresourceRange * range)2652 radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2653 {
2654 return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
2655 : range->levelCount;
2656 }
2657
2658 bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
2659
2660 struct radeon_bo_metadata;
2661 void radv_init_metadata(struct radv_device *device, struct radv_image *image,
2662 struct radeon_bo_metadata *metadata);
2663
2664 void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
2665 uint64_t offset, uint32_t stride);
2666
2667 union radv_descriptor {
2668 struct {
2669 uint32_t plane0_descriptor[8];
2670 uint32_t fmask_descriptor[8];
2671 };
2672 struct {
2673 uint32_t plane_descriptors[3][8];
2674 };
2675 };
2676
2677 struct radv_image_view {
2678 struct vk_image_view vk;
2679 struct radv_image *image; /**< VkImageViewCreateInfo::image */
2680
2681 unsigned plane_id;
2682 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2683
2684 /* Whether the image iview supports fast clear. */
2685 bool support_fast_clear;
2686
2687 bool disable_dcc_mrt;
2688
2689 union radv_descriptor descriptor;
2690
2691 /* Descriptor for use as a storage image as opposed to a sampled image.
2692 * This has a few differences for cube maps (e.g. type).
2693 */
2694 union radv_descriptor storage_descriptor;
2695 };
2696
2697 struct radv_image_create_info {
2698 const VkImageCreateInfo *vk_info;
2699 bool scanout;
2700 bool no_metadata_planes;
2701 bool prime_blit_src;
2702 const struct radeon_bo_metadata *bo_metadata;
2703 };
2704
2705 VkResult
2706 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2707 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2708 struct radv_image *image);
2709
2710 VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2711 const VkAllocationCallbacks *alloc, VkImage *pImage);
2712
2713 bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
2714 VkFormat format, VkImageCreateFlags flags,
2715 bool *sign_reinterpret);
2716
2717 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
2718
2719 VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2720 const VkNativeBufferANDROID *gralloc_info,
2721 const VkAllocationCallbacks *alloc, VkImage *out_image_h);
2722 uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
2723 const VkImageUsageFlags vk_usage);
2724 VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2725 unsigned priority,
2726 const VkImportAndroidHardwareBufferInfoANDROID *info);
2727 VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2728 unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
2729
2730 VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2731
2732 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2733
2734 struct radv_image_view_extra_create_info {
2735 bool disable_compression;
2736 bool enable_compression;
2737 bool disable_dcc_mrt;
2738 bool from_client; /**< Set only if this came from vkCreateImage */
2739 };
2740
2741 void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2742 const VkImageViewCreateInfo *pCreateInfo,
2743 VkImageCreateFlags img_create_flags,
2744 const struct radv_image_view_extra_create_info *extra_create_info);
2745 void radv_image_view_finish(struct radv_image_view *iview);
2746
2747 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2748
2749 struct radv_sampler_ycbcr_conversion_state {
2750 VkFormat format;
2751 VkSamplerYcbcrModelConversion ycbcr_model;
2752 VkSamplerYcbcrRange ycbcr_range;
2753 VkComponentMapping components;
2754 VkChromaLocation chroma_offsets[2];
2755 VkFilter chroma_filter;
2756 };
2757
2758 struct radv_sampler_ycbcr_conversion {
2759 struct vk_object_base base;
2760 /* The state is hashed for the descriptor set layout. */
2761 struct radv_sampler_ycbcr_conversion_state state;
2762 };
2763
2764 struct radv_buffer_view {
2765 struct vk_object_base base;
2766 struct radeon_winsys_bo *bo;
2767 VkFormat vk_format;
2768 uint64_t range; /**< VkBufferViewCreateInfo::range */
2769 uint32_t state[4];
2770 };
2771 void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2772 const VkBufferViewCreateInfo *pCreateInfo);
2773 void radv_buffer_view_finish(struct radv_buffer_view *view);
2774
2775 static inline bool
radv_image_extent_compare(const struct radv_image * image,const VkExtent3D * extent)2776 radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
2777 {
2778 if (extent->width != image->info.width || extent->height != image->info.height ||
2779 extent->depth != image->info.depth)
2780 return false;
2781 return true;
2782 }
2783
2784 struct radv_sampler {
2785 struct vk_object_base base;
2786 uint32_t state[4];
2787 struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
2788 uint32_t border_color_slot;
2789 };
2790
2791 struct radv_subpass_barrier {
2792 VkPipelineStageFlags2 src_stage_mask;
2793 VkPipelineStageFlags2 dst_stage_mask;
2794 VkAccessFlags2 src_access_mask;
2795 VkAccessFlags2 dst_access_mask;
2796 };
2797
2798 void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
2799 const struct radv_subpass_barrier *barrier);
2800
2801 struct radv_subpass_attachment {
2802 uint32_t attachment;
2803 VkImageLayout layout;
2804 VkImageLayout stencil_layout;
2805 bool in_render_loop;
2806 };
2807
2808 struct radv_subpass {
2809 uint32_t attachment_count;
2810 struct radv_subpass_attachment *attachments;
2811
2812 uint32_t input_count;
2813 uint32_t color_count;
2814 struct radv_subpass_attachment *input_attachments;
2815 struct radv_subpass_attachment *color_attachments;
2816 struct radv_subpass_attachment *resolve_attachments;
2817 struct radv_subpass_attachment *depth_stencil_attachment;
2818 struct radv_subpass_attachment *ds_resolve_attachment;
2819 struct radv_subpass_attachment *vrs_attachment;
2820 VkResolveModeFlagBits depth_resolve_mode;
2821 VkResolveModeFlagBits stencil_resolve_mode;
2822
2823 /** Subpass has at least one color resolve attachment */
2824 bool has_color_resolve;
2825
2826 struct radv_subpass_barrier start_barrier;
2827
2828 uint32_t view_mask;
2829
2830 VkSampleCountFlagBits color_sample_count;
2831 VkSampleCountFlagBits depth_sample_count;
2832 VkSampleCountFlagBits max_sample_count;
2833
2834 /* Whether the subpass has ingoing/outgoing external dependencies. */
2835 bool has_ingoing_dep;
2836 bool has_outgoing_dep;
2837 };
2838
2839 uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
2840
2841 struct radv_render_pass_attachment {
2842 VkFormat format;
2843 uint32_t samples;
2844 VkAttachmentLoadOp load_op;
2845 VkAttachmentLoadOp stencil_load_op;
2846 VkImageLayout initial_layout;
2847 VkImageLayout final_layout;
2848 VkImageLayout stencil_initial_layout;
2849 VkImageLayout stencil_final_layout;
2850
2851 /* The subpass id in which the attachment will be used first/last. */
2852 uint32_t first_subpass_idx;
2853 uint32_t last_subpass_idx;
2854 };
2855
2856 struct radv_render_pass {
2857 struct vk_object_base base;
2858 uint32_t attachment_count;
2859 uint32_t subpass_count;
2860 struct radv_subpass_attachment *subpass_attachments;
2861 struct radv_render_pass_attachment *attachments;
2862 struct radv_subpass_barrier end_barrier;
2863 struct radv_subpass subpasses[0];
2864 };
2865
2866 VkResult radv_device_init_meta(struct radv_device *device);
2867 void radv_device_finish_meta(struct radv_device *device);
2868
2869 struct radv_query_pool {
2870 struct vk_object_base base;
2871 struct radeon_winsys_bo *bo;
2872 uint32_t stride;
2873 uint32_t availability_offset;
2874 uint64_t size;
2875 char *ptr;
2876 VkQueryType type;
2877 uint32_t pipeline_stats_mask;
2878 bool uses_gds; /* For NGG GS on GFX10+ */
2879 };
2880
2881 struct radv_perfcounter_impl;
2882
2883 struct radv_pc_query_pool {
2884 struct radv_query_pool b;
2885
2886 uint32_t *pc_regs;
2887 unsigned num_pc_regs;
2888
2889 unsigned num_passes;
2890
2891 unsigned num_counters;
2892 struct radv_perfcounter_impl *counters;
2893 };
2894
2895 void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
2896 VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice,
2897 const VkQueryPoolCreateInfo *pCreateInfo,
2898 struct radv_pc_query_pool *pool);
2899 void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
2900 uint64_t va);
2901 void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
2902 uint64_t va);
2903 void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out);
2904
2905 bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
2906
2907 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2908 const VkDeviceQueueCreateInfo *create_info,
2909 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
2910
2911 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
2912 struct radv_descriptor_set *set, unsigned idx);
2913
2914 void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
2915 VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
2916 const VkWriteDescriptorSet *pDescriptorWrites,
2917 uint32_t descriptorCopyCount,
2918 const VkCopyDescriptorSet *pDescriptorCopies);
2919
2920 void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
2921 struct radv_cmd_buffer *cmd_buffer,
2922 struct radv_descriptor_set *set,
2923 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
2924 const void *pData);
2925
2926 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
2927 VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
2928 uint32_t set, uint32_t descriptorWriteCount,
2929 const VkWriteDescriptorSet *pDescriptorWrites);
2930
2931 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2932 const VkImageSubresourceRange *range, uint32_t value);
2933
2934 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2935 const VkImageSubresourceRange *range);
2936
2937 /* radv_nir_to_llvm.c */
2938 struct radv_shader_args;
2939 struct radv_nir_compiler_options;
2940 struct radv_shader_info;
2941
2942 void llvm_compile_shader(const struct radv_nir_compiler_options *options,
2943 const struct radv_shader_info *info, unsigned shader_count,
2944 struct nir_shader *const *shaders, struct radv_shader_binary **binary,
2945 const struct radv_shader_args *args);
2946
2947 /* radv_shader_info.h */
2948 struct radv_shader_info;
2949
2950 void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
2951 const struct radv_pipeline_layout *layout,
2952 const struct radv_pipeline_key *pipeline_key,
2953 struct radv_shader_info *info);
2954
2955 void radv_nir_shader_info_init(struct radv_shader_info *info);
2956
2957 bool radv_thread_trace_init(struct radv_device *device);
2958 void radv_thread_trace_finish(struct radv_device *device);
2959 bool radv_begin_thread_trace(struct radv_queue *queue);
2960 bool radv_end_thread_trace(struct radv_queue *queue);
2961 bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
2962 void radv_emit_thread_trace_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data,
2963 uint32_t num_dwords);
2964 bool radv_is_instruction_timing_enabled(void);
2965
2966 void radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs,
2967 bool inhibit);
2968 void radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
2969
2970 bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2971 struct radv_buffer *buffer, const VkBufferImageCopy2 *region);
2972
2973 /* radv_sqtt_layer_.c */
2974 struct radv_barrier_data {
2975 union {
2976 struct {
2977 uint16_t depth_stencil_expand : 1;
2978 uint16_t htile_hiz_range_expand : 1;
2979 uint16_t depth_stencil_resummarize : 1;
2980 uint16_t dcc_decompress : 1;
2981 uint16_t fmask_decompress : 1;
2982 uint16_t fast_clear_eliminate : 1;
2983 uint16_t fmask_color_expand : 1;
2984 uint16_t init_mask_ram : 1;
2985 uint16_t reserved : 8;
2986 };
2987 uint16_t all;
2988 } layout_transitions;
2989 };
2990
2991 /**
2992 * Value for the reason field of an RGP barrier start marker originating from
2993 * the Vulkan client (does not include PAL-defined values). (Table 15)
2994 */
2995 enum rgp_barrier_reason {
2996 RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
2997
2998 /* External app-generated barrier reasons, i.e. API synchronization
2999 * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
3000 */
3001 RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
3002 RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
3003 RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
3004
3005 /* Internal barrier reasons, i.e. implicit synchronization inserted by
3006 * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
3007 */
3008 RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
3009 RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
3010 RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
3011 RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
3012 RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
3013 };
3014
3015 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3016 void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3017 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
3018 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
3019 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
3020 VkImageAspectFlagBits aspects);
3021 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
3022 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3023 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3024 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
3025 enum rgp_barrier_reason reason);
3026 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
3027 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
3028 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
3029 const struct radv_barrier_data *barrier);
3030
3031 struct radv_indirect_command_layout {
3032 struct vk_object_base base;
3033
3034 uint32_t input_stride;
3035 uint32_t token_count;
3036
3037 bool indexed;
3038 bool binds_index_buffer;
3039 bool binds_state;
3040 uint16_t draw_params_offset;
3041 uint16_t index_buffer_offset;
3042
3043 uint16_t state_offset;
3044
3045 uint32_t bind_vbo_mask;
3046 uint32_t vbo_offsets[MAX_VBS];
3047
3048 uint64_t push_constant_mask;
3049 uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
3050
3051 uint32_t ibo_type_32;
3052 uint32_t ibo_type_8;
3053
3054 VkIndirectCommandsLayoutTokenNV tokens[0];
3055 };
3056
3057 uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
3058
3059 void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
3060 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
3061
3062 uint64_t radv_get_current_time(void);
3063
3064 static inline uint32_t
si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)3065 si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)
3066 {
3067 switch (gl_prim) {
3068 case SHADER_PRIM_POINTS:
3069 return 1;
3070 case SHADER_PRIM_LINES:
3071 case SHADER_PRIM_LINE_STRIP:
3072 return 2;
3073 case SHADER_PRIM_TRIANGLES:
3074 case SHADER_PRIM_TRIANGLE_STRIP:
3075 return 3;
3076 case SHADER_PRIM_LINES_ADJACENCY:
3077 return 4;
3078 case SHADER_PRIM_TRIANGLES_ADJACENCY:
3079 return 6;
3080 case SHADER_PRIM_QUADS:
3081 return V_028A6C_TRISTRIP;
3082 default:
3083 assert(0);
3084 return 0;
3085 }
3086 }
3087
3088 static inline uint32_t
si_conv_prim_to_gs_out(uint32_t topology)3089 si_conv_prim_to_gs_out(uint32_t topology)
3090 {
3091 switch (topology) {
3092 case V_008958_DI_PT_POINTLIST:
3093 case V_008958_DI_PT_PATCH:
3094 return V_028A6C_POINTLIST;
3095 case V_008958_DI_PT_LINELIST:
3096 case V_008958_DI_PT_LINESTRIP:
3097 case V_008958_DI_PT_LINELIST_ADJ:
3098 case V_008958_DI_PT_LINESTRIP_ADJ:
3099 return V_028A6C_LINESTRIP;
3100 case V_008958_DI_PT_TRILIST:
3101 case V_008958_DI_PT_TRISTRIP:
3102 case V_008958_DI_PT_TRIFAN:
3103 case V_008958_DI_PT_TRILIST_ADJ:
3104 case V_008958_DI_PT_TRISTRIP_ADJ:
3105 return V_028A6C_TRISTRIP;
3106 default:
3107 assert(0);
3108 return 0;
3109 }
3110 }
3111
3112 static inline uint32_t
si_translate_prim(unsigned topology)3113 si_translate_prim(unsigned topology)
3114 {
3115 switch (topology) {
3116 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
3117 return V_008958_DI_PT_POINTLIST;
3118 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
3119 return V_008958_DI_PT_LINELIST;
3120 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
3121 return V_008958_DI_PT_LINESTRIP;
3122 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
3123 return V_008958_DI_PT_TRILIST;
3124 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
3125 return V_008958_DI_PT_TRISTRIP;
3126 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
3127 return V_008958_DI_PT_TRIFAN;
3128 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
3129 return V_008958_DI_PT_LINELIST_ADJ;
3130 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
3131 return V_008958_DI_PT_LINESTRIP_ADJ;
3132 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
3133 return V_008958_DI_PT_TRILIST_ADJ;
3134 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
3135 return V_008958_DI_PT_TRISTRIP_ADJ;
3136 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
3137 return V_008958_DI_PT_PATCH;
3138 default:
3139 unreachable("unhandled primitive type");
3140 }
3141 }
3142
3143 static inline bool
radv_prim_is_points_or_lines(unsigned topology)3144 radv_prim_is_points_or_lines(unsigned topology)
3145 {
3146 switch (topology) {
3147 case V_008958_DI_PT_POINTLIST:
3148 case V_008958_DI_PT_LINELIST:
3149 case V_008958_DI_PT_LINESTRIP:
3150 case V_008958_DI_PT_LINELIST_ADJ:
3151 case V_008958_DI_PT_LINESTRIP_ADJ:
3152 return true;
3153 default:
3154 return false;
3155 }
3156 }
3157
3158 static inline bool
radv_rast_prim_is_point(unsigned rast_prim)3159 radv_rast_prim_is_point(unsigned rast_prim)
3160 {
3161 return rast_prim == V_028A6C_POINTLIST;
3162 }
3163
3164 static inline bool
radv_rast_prim_is_line(unsigned rast_prim)3165 radv_rast_prim_is_line(unsigned rast_prim)
3166 {
3167 return rast_prim == V_028A6C_LINESTRIP;
3168 }
3169
3170 static inline bool
radv_rast_prim_is_points_or_lines(unsigned rast_prim)3171 radv_rast_prim_is_points_or_lines(unsigned rast_prim)
3172 {
3173 return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
3174 }
3175
3176 static inline uint32_t
si_translate_stencil_op(enum VkStencilOp op)3177 si_translate_stencil_op(enum VkStencilOp op)
3178 {
3179 switch (op) {
3180 case VK_STENCIL_OP_KEEP:
3181 return V_02842C_STENCIL_KEEP;
3182 case VK_STENCIL_OP_ZERO:
3183 return V_02842C_STENCIL_ZERO;
3184 case VK_STENCIL_OP_REPLACE:
3185 return V_02842C_STENCIL_REPLACE_TEST;
3186 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
3187 return V_02842C_STENCIL_ADD_CLAMP;
3188 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
3189 return V_02842C_STENCIL_SUB_CLAMP;
3190 case VK_STENCIL_OP_INVERT:
3191 return V_02842C_STENCIL_INVERT;
3192 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
3193 return V_02842C_STENCIL_ADD_WRAP;
3194 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
3195 return V_02842C_STENCIL_SUB_WRAP;
3196 default:
3197 return 0;
3198 }
3199 }
3200
3201 static inline uint32_t
si_translate_blend_logic_op(VkLogicOp op)3202 si_translate_blend_logic_op(VkLogicOp op)
3203 {
3204 switch (op) {
3205 case VK_LOGIC_OP_CLEAR:
3206 return V_028808_ROP3_CLEAR;
3207 case VK_LOGIC_OP_AND:
3208 return V_028808_ROP3_AND;
3209 case VK_LOGIC_OP_AND_REVERSE:
3210 return V_028808_ROP3_AND_REVERSE;
3211 case VK_LOGIC_OP_COPY:
3212 return V_028808_ROP3_COPY;
3213 case VK_LOGIC_OP_AND_INVERTED:
3214 return V_028808_ROP3_AND_INVERTED;
3215 case VK_LOGIC_OP_NO_OP:
3216 return V_028808_ROP3_NO_OP;
3217 case VK_LOGIC_OP_XOR:
3218 return V_028808_ROP3_XOR;
3219 case VK_LOGIC_OP_OR:
3220 return V_028808_ROP3_OR;
3221 case VK_LOGIC_OP_NOR:
3222 return V_028808_ROP3_NOR;
3223 case VK_LOGIC_OP_EQUIVALENT:
3224 return V_028808_ROP3_EQUIVALENT;
3225 case VK_LOGIC_OP_INVERT:
3226 return V_028808_ROP3_INVERT;
3227 case VK_LOGIC_OP_OR_REVERSE:
3228 return V_028808_ROP3_OR_REVERSE;
3229 case VK_LOGIC_OP_COPY_INVERTED:
3230 return V_028808_ROP3_COPY_INVERTED;
3231 case VK_LOGIC_OP_OR_INVERTED:
3232 return V_028808_ROP3_OR_INVERTED;
3233 case VK_LOGIC_OP_NAND:
3234 return V_028808_ROP3_NAND;
3235 case VK_LOGIC_OP_SET:
3236 return V_028808_ROP3_SET;
3237 default:
3238 unreachable("Unhandled logic op");
3239 }
3240 }
3241
3242 /*
3243 * Queue helper to get ring.
3244 * placed here as it needs queue + device structs.
3245 */
3246 static inline enum amd_ip_type
radv_queue_ring(struct radv_queue * queue)3247 radv_queue_ring(struct radv_queue *queue)
3248 {
3249 return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
3250 }
3251
3252 /**
3253 * Helper used for debugging compiler issues by enabling/disabling LLVM for a
3254 * specific shader stage (developers only).
3255 */
3256 static inline bool
radv_use_llvm_for_stage(struct radv_device * device,UNUSED gl_shader_stage stage)3257 radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
3258 {
3259 return device->physical_device->use_llvm;
3260 }
3261
3262 static inline bool
radv_has_shader_buffer_float_minmax(const struct radv_physical_device * pdevice)3263 radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice)
3264 {
3265 return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) ||
3266 pdevice->rad_info.gfx_level >= GFX10;
3267 }
3268
3269 struct radv_acceleration_structure {
3270 struct vk_object_base base;
3271
3272 struct radeon_winsys_bo *bo;
3273 uint64_t mem_offset;
3274 uint64_t size;
3275 };
3276
3277 static inline uint64_t
radv_accel_struct_get_va(const struct radv_acceleration_structure * accel)3278 radv_accel_struct_get_va(const struct radv_acceleration_structure *accel)
3279 {
3280 return radv_buffer_get_va(accel->bo) + accel->mem_offset;
3281 }
3282
3283 /* radv_perfcounter.c */
3284 void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
3285 void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
3286 void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs,
3287 int family);
3288 void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
3289 int family);
3290
3291 /* radv_spm.c */
3292 bool radv_spm_init(struct radv_device *device);
3293 void radv_spm_finish(struct radv_device *device);
3294 void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
3295
3296 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
3297 VK_FROM_HANDLE(__radv_type, __name, __handle)
3298
3299 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
3300 VK_OBJECT_TYPE_COMMAND_BUFFER)
3301 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
3302 VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
3303 VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
3304 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
3305 VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
3306 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base,
3307 VkAccelerationStructureKHR,
3308 VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
3309 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool,
3310 VK_OBJECT_TYPE_COMMAND_POOL)
3311 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
3312 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
3313 VK_OBJECT_TYPE_BUFFER_VIEW)
3314 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
3315 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
3316 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
3317 VK_OBJECT_TYPE_DESCRIPTOR_SET)
3318 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout,
3319 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
3320 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
3321 VkDescriptorUpdateTemplate,
3322 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
3323 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
3324 VK_OBJECT_TYPE_DEVICE_MEMORY)
3325 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
3326 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
3327 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView,
3328 VK_OBJECT_TYPE_IMAGE_VIEW);
3329 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
3330 VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
3331 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
3332 VK_OBJECT_TYPE_PIPELINE_CACHE)
3333 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
3334 VK_OBJECT_TYPE_PIPELINE)
3335 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
3336 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
3337 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
3338 VK_OBJECT_TYPE_QUERY_POOL)
3339 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass,
3340 VK_OBJECT_TYPE_RENDER_PASS)
3341 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
3342 VK_OBJECT_TYPE_SAMPLER)
3343 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base,
3344 VkSamplerYcbcrConversion,
3345 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
3346
3347 #ifdef __cplusplus
3348 }
3349 #endif
3350
3351 #endif /* RADV_PRIVATE_H */
3352