• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #ifndef RADV_DEVICE_H
12 #define RADV_DEVICE_H
13 
14 #include "ac_descriptors.h"
15 #include "ac_spm.h"
16 #include "ac_sqtt.h"
17 
18 #include "util/mesa-blake3.h"
19 
20 #include "radv_pipeline.h"
21 #include "radv_printf.h"
22 #include "radv_queue.h"
23 #include "radv_radeon_winsys.h"
24 #include "radv_rra.h"
25 #include "radv_shader.h"
26 
27 #include "vk_acceleration_structure.h"
28 #include "vk_device.h"
29 #include "vk_meta.h"
30 #include "vk_texcompress_astc.h"
31 #include "vk_texcompress_etc2.h"
32 
33 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
34 
35 struct radv_image_view;
36 
37 enum radv_dispatch_table {
38    RADV_DEVICE_DISPATCH_TABLE,
39    RADV_ANNOTATE_DISPATCH_TABLE,
40    RADV_APP_DISPATCH_TABLE,
41    RADV_RGP_DISPATCH_TABLE,
42    RADV_RRA_DISPATCH_TABLE,
43    RADV_RMV_DISPATCH_TABLE,
44    RADV_CTX_ROLL_DISPATCH_TABLE,
45    RADV_DISPATCH_TABLE_COUNT,
46 };
47 
48 struct radv_layer_dispatch_tables {
49    struct vk_device_dispatch_table annotate;
50    struct vk_device_dispatch_table app;
51    struct vk_device_dispatch_table rgp;
52    struct vk_device_dispatch_table rra;
53    struct vk_device_dispatch_table rmv;
54    struct vk_device_dispatch_table ctx_roll;
55 };
56 
57 struct radv_device_cache_key {
58    uint32_t keep_shader_info : 1;
59    uint32_t disable_trunc_coord : 1;
60    uint32_t image_2d_view_of_3d : 1;
61    uint32_t mesh_shader_queries : 1;
62    uint32_t primitives_generated_query : 1;
63    uint32_t trap_excp_flags : 4;
64 };
65 
66 enum radv_force_vrs {
67    RADV_FORCE_VRS_1x1 = 0,
68    RADV_FORCE_VRS_2x2,
69    RADV_FORCE_VRS_2x1,
70    RADV_FORCE_VRS_1x2,
71 };
72 
73 struct radv_notifier {
74    int fd;
75    int watch;
76    bool quit;
77    thrd_t thread;
78 };
79 
80 struct radv_meta_state {
81    VkAllocationCallbacks alloc;
82 
83    VkPipelineCache cache;
84    uint32_t initial_cache_entries;
85 
86    /*
87     * For on-demand pipeline creation, makes sure that
88     * only one thread tries to build a pipeline at the same time.
89     */
90    mtx_t mtx;
91 
92    struct {
93       VkPipelineLayout encode_p_layout;
94       VkPipeline encode_pipeline;
95       VkPipeline encode_compact_pipeline;
96       VkPipelineLayout header_p_layout;
97       VkPipeline header_pipeline;
98       VkPipelineLayout update_p_layout;
99       VkPipeline update_pipeline;
100       VkPipelineLayout copy_p_layout;
101       VkPipeline copy_pipeline;
102 
103       struct radix_sort_vk *radix_sort;
104       struct vk_acceleration_structure_build_args build_args;
105 
106       struct {
107          VkBuffer buffer;
108          VkDeviceMemory memory;
109          VkAccelerationStructureKHR accel_struct;
110       } null;
111    } accel_struct_build;
112 
113    struct vk_texcompress_etc2_state etc_decode;
114 
115    struct vk_texcompress_astc_state *astc_decode;
116 
117    struct vk_meta_device device;
118 };
119 
120 struct radv_memory_trace_data {
121    /* ID of the PTE update event in ftrace data */
122    uint16_t ftrace_update_ptes_id;
123 
124    uint32_t num_cpus;
125    int *pipe_fds;
126 };
127 
128 struct radv_sqtt_timestamp {
129    uint8_t *map;
130    unsigned offset;
131    uint64_t size;
132    struct radeon_winsys_bo *bo;
133    struct list_head list;
134 };
135 
136 #define RADV_BORDER_COLOR_COUNT       4096
137 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
138 
139 struct radv_device_border_color_data {
140    bool used[RADV_BORDER_COLOR_COUNT];
141 
142    struct radeon_winsys_bo *bo;
143    VkClearColorValue *colors_gpu_ptr;
144 
145    /* Mutex is required to guarantee vkCreateSampler thread safety
146     * given that we are writing to a buffer and checking color occupation */
147    mtx_t mutex;
148 };
149 
150 struct radv_pso_cache_stats {
151    uint32_t hits;
152    uint32_t misses;
153 };
154 
155 struct radv_device {
156    struct vk_device vk;
157 
158    struct radeon_winsys *ws;
159 
160    struct radv_layer_dispatch_tables layer_dispatch;
161 
162    struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
163    struct radv_meta_state meta_state;
164 
165    struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
166    int queue_count[RADV_MAX_QUEUE_FAMILIES];
167 
168    bool pbb_allowed;
169    uint32_t scratch_waves;
170    uint32_t dispatch_initiator;
171    uint32_t dispatch_initiator_task;
172 
173    /* MSAA sample locations.
174     * The first index is the sample index.
175     * The second index is the coordinate: X, Y. */
176    float sample_locations_1x[1][2];
177    float sample_locations_2x[2][2];
178    float sample_locations_4x[4][2];
179    float sample_locations_8x[8][2];
180 
181    /* GFX7 and later */
182    uint32_t gfx_init_size_dw;
183    struct radeon_winsys_bo *gfx_init;
184 
185    struct radeon_winsys_bo *trace_bo;
186    struct radv_trace_data *trace_data;
187 
188    /* Whether to keep shader debug info, for debugging. */
189    bool keep_shader_info;
190 
191    /* Backup in-memory cache to be used if the app doesn't provide one */
192    struct vk_pipeline_cache *mem_cache;
193 
194    /*
195     * use different counters so MSAA MRTs get consecutive surface indices,
196     * even if MASK is allocated in between.
197     */
198    uint32_t image_mrt_offset_counter;
199    uint32_t fmask_mrt_offset_counter;
200 
201    struct list_head shader_arenas;
202    struct hash_table_u64 *capture_replay_arena_vas;
203    unsigned shader_arena_shift;
204    uint8_t shader_free_list_mask;
205    struct radv_shader_free_list shader_free_list;
206    struct radv_shader_free_list capture_replay_free_list;
207    struct list_head shader_block_obj_pool;
208    mtx_t shader_arena_mutex;
209 
210    mtx_t shader_upload_hw_ctx_mutex;
211    struct radeon_winsys_ctx *shader_upload_hw_ctx;
212    VkSemaphore shader_upload_sem;
213    uint64_t shader_upload_seq;
214    struct list_head shader_dma_submissions;
215    mtx_t shader_dma_submission_list_mutex;
216    cnd_t shader_dma_submission_list_cond;
217 
218    /* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */
219    bool shader_use_invisible_vram;
220 
221    /* Whether to inline the compute dispatch size in user sgprs. */
222    bool load_grid_size_from_user_sgpr;
223 
224    /* Whether the driver uses a global BO list. */
225    bool use_global_bo_list;
226 
227    /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
228    int force_aniso;
229 
230    /* Always disable TRUNC_COORD. */
231    bool disable_trunc_coord;
232 
233    struct radv_device_border_color_data border_color_data;
234 
235    /* Thread trace. */
236    struct ac_sqtt sqtt;
237    bool sqtt_enabled;
238    bool sqtt_triggered;
239 
240    /* SQTT timestamps for queue events. */
241    simple_mtx_t sqtt_timestamp_mtx;
242    struct radv_sqtt_timestamp sqtt_timestamp;
243 
244    /* SQTT timed cmd buffers. */
245    simple_mtx_t sqtt_command_pool_mtx;
246    struct vk_command_pool *sqtt_command_pool[2];
247 
248    /* Memory trace. */
249    struct radv_memory_trace_data memory_trace;
250 
251    /* SPM. */
252    struct ac_spm spm;
253 
254    /* Radeon Raytracing Analyzer trace. */
255    struct radv_rra_trace_data rra_trace;
256 
257    FILE *ctx_roll_file;
258    simple_mtx_t ctx_roll_mtx;
259 
260    /* Trap handler. */
261    struct radv_shader *trap_handler_shader;
262    struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
263    uint32_t *tma_ptr;
264 
265    /* Overallocation. */
266    bool overallocation_disallowed;
267    uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
268    mtx_t overallocation_mutex;
269 
270    /* RADV_FORCE_VRS. */
271    struct radv_notifier notifier;
272    enum radv_force_vrs force_vrs;
273 
274    /* Depth image for VRS when not bound by the app. */
275    struct {
276       struct radv_image *image;
277       struct radv_buffer *buffer; /* HTILE */
278       struct radv_device_memory *mem;
279    } vrs;
280 
281    /* Prime blit sdma queue */
282    struct radv_queue *private_sdma_queue;
283 
284    struct radv_shader_part_cache vs_prologs;
285    struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
286    struct radv_shader_part *instance_rate_vs_prologs[816];
287 
288    struct radv_shader_part_cache ps_epilogs;
289 
290    simple_mtx_t trace_mtx;
291 
292    /* Whether per-vertex VRS is forced. */
293    bool force_vrs_enabled;
294 
295    simple_mtx_t pstate_mtx;
296    unsigned pstate_cnt;
297 
298    /* BO to contain some performance counter helpers:
299     * - A lock for profiling cmdbuffers.
300     * - a temporary fence for the end query synchronization.
301     * - the pass to use for profiling. (as an array of bools)
302     */
303    struct radeon_winsys_bo *perf_counter_bo;
304 
305    /* Interleaved lock/unlock commandbuffers for perfcounter passes. */
306    struct radeon_cmdbuf **perf_counter_lock_cs;
307 
308    bool uses_shadow_regs;
309 
310    struct hash_table *rt_handles;
311    simple_mtx_t rt_handles_mtx;
312 
313    struct radv_printf_data printf;
314 
315    struct radv_device_cache_key cache_key;
316    blake3_hash cache_hash;
317 
318    /* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
319    char *gpu_hang_report;
320 
321    /* PSO cache stats */
322    simple_mtx_t pso_cache_stats_mtx;
323    struct radv_pso_cache_stats pso_cache_stats[RADV_PIPELINE_TYPE_COUNT];
324 
325    struct radv_address_binding_tracker *addr_binding_tracker;
326 };
327 
328 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
329 
330 static inline struct radv_physical_device *
radv_device_physical(const struct radv_device * dev)331 radv_device_physical(const struct radv_device *dev)
332 {
333    return (struct radv_physical_device *)dev->vk.physical;
334 }
335 
336 static inline bool
radv_uses_primitives_generated_query(const struct radv_device * device)337 radv_uses_primitives_generated_query(const struct radv_device *device)
338 {
339    return device->vk.enabled_features.primitivesGeneratedQuery ||
340           device->vk.enabled_features.primitivesGeneratedQueryWithRasterizerDiscard ||
341           device->vk.enabled_features.primitivesGeneratedQueryWithNonZeroStreams;
342 }
343 
344 static inline bool
radv_uses_image_float32_atomics(const struct radv_device * device)345 radv_uses_image_float32_atomics(const struct radv_device *device)
346 {
347    return device->vk.enabled_features.shaderImageFloat32Atomics ||
348           device->vk.enabled_features.sparseImageFloat32Atomics ||
349           device->vk.enabled_features.shaderImageFloat32AtomicMinMax ||
350           device->vk.enabled_features.sparseImageFloat32AtomicMinMax;
351 }
352 
353 VkResult radv_device_init_vrs_state(struct radv_device *device);
354 
355 unsigned radv_get_default_max_sample_dist(int log_samples);
356 
357 void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
358                                         int nr_samples);
359 
360 unsigned radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image);
361 
362 struct radv_color_buffer_info {
363    struct ac_cb_surface ac;
364 };
365 
366 struct radv_ds_buffer_info {
367    struct ac_ds_surface ac;
368 
369    uint32_t db_render_override2;
370    uint32_t db_render_control;
371 };
372 
373 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
374                                    struct radv_image_view *iview);
375 
376 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
377                                  struct radv_ds_buffer_info *ds);
378 
379 
380 void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
381                                 struct radv_image_view *iview, VkImageAspectFlags ds_aspects);
382 
383 void radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples,
384                                       unsigned *db_render_control);
385 
386 bool radv_device_set_pstate(struct radv_device *device, bool enable);
387 
388 bool radv_device_acquire_performance_counters(struct radv_device *device);
389 
390 void radv_device_release_performance_counters(struct radv_device *device);
391 
392 #endif /* RADV_DEVICE_H */
393