1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38
39 #include "vk_descriptor_update_template.h"
40 #include "vk_device.h"
41 #include "vk_device_memory.h"
42 #include "vk_format.h"
43 #include "vk_instance.h"
44 #include "vk_image.h"
45 #include "vk_log.h"
46 #include "vk_physical_device.h"
47 #include "vk_shader_module.h"
48 #include "vk_sync.h"
49 #include "vk_sync_timeline.h"
50 #include "vk_util.h"
51 #include "vk_ycbcr_conversion.h"
52
53 #include "vk_command_buffer.h"
54 #include "vk_command_pool.h"
55 #include "vk_queue.h"
56 #include "vk_pipeline.h"
57
58 #include <xf86drm.h>
59
60 #ifdef HAVE_VALGRIND
61 #include <valgrind.h>
62 #include <memcheck.h>
63 #define VG(x) x
64 #else
65 #define VG(x) ((void)0)
66 #endif
67
68 #include "util/detect_os.h"
69
70 #include "v3dv_limits.h"
71
72 #include "common/v3d_device_info.h"
73 #include "common/v3d_limits.h"
74 #include "common/v3d_tiling.h"
75 #include "common/v3d_util.h"
76
77 #include "compiler/shader_enums.h"
78 #include "compiler/spirv/nir_spirv.h"
79
80 #include "compiler/v3d_compiler.h"
81
82 #include "vk_debug_report.h"
83 #include "util/set.h"
84 #include "util/hash_table.h"
85 #include "util/sparse_array.h"
86 #include "util/xmlconfig.h"
87 #include "util/u_atomic.h"
88
89 #include "v3dv_entrypoints.h"
90 #include "v3dv_bo.h"
91
92 #include "drm-uapi/v3d_drm.h"
93
94 #include "vk_alloc.h"
95 #include "perfcntrs/v3d_perfcntrs.h"
96 #include "simulator/v3d_simulator.h"
97
98 #include "v3dv_cl.h"
99
100 #include "wsi_common.h"
101
102 /* A non-fatal assert. Useful for debugging. */
103 #if MESA_DEBUG
104 #define v3dv_assert(x) ({ \
105 if (unlikely(!(x))) \
106 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
107 })
108 #else
109 #define v3dv_assert(x)
110 #endif
111
112 #define perf_debug(...) do { \
113 if (V3D_DBG(PERF)) \
114 mesa_logi(__VA_ARGS__); \
115 } while (0)
116
117 struct v3dv_instance;
118
119 struct v3d_simulator_file;
120
121 /* Minimum required by the Vulkan 1.1 spec */
122 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
123
124 /* Maximum performance counters number */
125 #define V3D_MAX_PERFCNT 93
126
127 struct v3dv_physical_device {
128 struct vk_physical_device vk;
129
130 char *name;
131 int32_t render_fd;
132 int32_t display_fd;
133
134 /* We need these because it is not clear how to detect
135 * valid devids in a portable way
136 */
137 bool has_primary;
138 bool has_render;
139
140 dev_t primary_devid;
141 dev_t render_devid;
142
143 uint8_t driver_build_sha1[20];
144 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
145 uint8_t device_uuid[VK_UUID_SIZE];
146 uint8_t driver_uuid[VK_UUID_SIZE];
147
148 struct vk_sync_type drm_syncobj_type;
149 struct vk_sync_timeline_type sync_timeline_type;
150 const struct vk_sync_type *sync_types[3];
151
152 struct disk_cache *disk_cache;
153
154 mtx_t mutex;
155
156 struct wsi_device wsi_device;
157
158 VkPhysicalDeviceMemoryProperties memory;
159
160 struct v3d_device_info devinfo;
161 struct v3d_perfcntrs *perfcntr;
162
163 #if USE_V3D_SIMULATOR
164 struct v3d_simulator_file *sim_file;
165 #endif
166
167 const struct v3d_compiler *compiler;
168 uint32_t next_program_id;
169
170 alignas(8) uint64_t heap_used;
171
172 /* This array holds all our 'struct v3dv_bo' allocations. We use this
173 * so we can add a refcount to our BOs and check if a particular BO
174 * was already allocated in this device using its GEM handle. This is
175 * necessary to properly manage BO imports, because the kernel doesn't
176 * refcount the underlying BO memory.
177 *
178 * Specifically, when self-importing (i.e. importing a BO into the same
179 * device that created it), the kernel will give us the same BO handle
180 * for both BOs and we must only free it once when both references are
181 * freed. Otherwise, if we are not self-importing, we get two different BO
182 * handles, and we want to free each one individually.
183 *
184 * The BOs in this map all have a refcnt with the reference counter and
185 * only self-imported BOs will ever have a refcnt > 1.
186 */
187 struct util_sparse_array bo_map;
188
189 struct {
190 bool merge_jobs;
191 } options;
192
193 struct {
194 bool cpu_queue;
195 bool multisync;
196 bool perfmon;
197 } caps;
198 };
199
200 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)201 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
202 {
203 return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
204 }
205
206 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
207 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
208 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
209 uint32_t index);
210
211 void v3dv_meta_clear_init(struct v3dv_device *device);
212 void v3dv_meta_clear_finish(struct v3dv_device *device);
213
214 void v3dv_meta_blit_init(struct v3dv_device *device);
215 void v3dv_meta_blit_finish(struct v3dv_device *device);
216
217 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
218 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
219
220 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
221 uint8_t plane,
222 uint8_t miplevel,
223 const VkOffset3D *offset,
224 const VkExtent3D *extent,
225 VkFormat *compat_format);
226
227 struct v3dv_instance {
228 struct vk_instance vk;
229
230 bool pipeline_cache_enabled;
231 bool default_pipeline_cache_enabled;
232 bool meta_cache_enabled;
233 };
234
235 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
236 * tfu), we still need a syncobj to track the last overall job submitted
237 * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
238 * start expecting multisync to be present and drop the legacy implementation
239 * together with this V3DV_QUEUE_ANY tracker.
240 */
241 enum v3dv_queue_type {
242 V3DV_QUEUE_CL = 0,
243 V3DV_QUEUE_CSD,
244 V3DV_QUEUE_TFU,
245 V3DV_QUEUE_CPU,
246 V3DV_QUEUE_ANY,
247 V3DV_QUEUE_COUNT,
248 };
249
250 /* For each GPU queue, we use a syncobj to track the last job submitted. We
251 * set the flag `first` to determine when we are starting a new cmd buffer
252 * batch and therefore a job submitted to a given queue will be the first in a
253 * cmd buf batch.
254 */
255 struct v3dv_last_job_sync {
256 /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
257 *
258 * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
259 */
260 bool first[V3DV_QUEUE_COUNT];
261 /* Array of syncobj to track the last job submitted to a GPU queue.
262 *
263 * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
264 * queue, but without multisync we only track the last job submitted to any
265 * queue in V3DV_QUEUE_ANY.
266 */
267 uint32_t syncs[V3DV_QUEUE_COUNT];
268 };
269
270 struct v3dv_queue {
271 struct vk_queue vk;
272
273 struct v3dv_device *device;
274
275 struct v3dv_last_job_sync last_job_syncs;
276
277 struct v3dv_job *noop_job;
278
279 /* The last active perfmon ID to prevent mixing of counter results when a
280 * job is submitted with a different perfmon id.
281 */
282 uint32_t last_perfmon_id;
283 };
284
285 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
286 struct vk_queue_submit *submit);
287
288 #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
289 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
290 sizeof(VkComponentMapping))
291
292 struct v3dv_meta_color_clear_pipeline {
293 VkPipeline pipeline;
294 VkRenderPass pass;
295 bool cached;
296 uint64_t key;
297 };
298
299 struct v3dv_meta_depth_clear_pipeline {
300 VkPipeline pipeline;
301 uint64_t key;
302 };
303
304 struct v3dv_meta_blit_pipeline {
305 VkPipeline pipeline;
306 VkRenderPass pass;
307 VkRenderPass pass_no_load;
308 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
309 };
310
311 struct v3dv_meta_texel_buffer_copy_pipeline {
312 VkPipeline pipeline;
313 VkRenderPass pass;
314 VkRenderPass pass_no_load;
315 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
316 };
317
318 struct v3dv_pipeline_key {
319 uint8_t topology;
320 uint8_t logicop_func;
321 bool msaa;
322 bool sample_alpha_to_coverage;
323 bool sample_alpha_to_one;
324 uint8_t cbufs;
325 struct {
326 enum pipe_format format;
327 uint8_t swizzle[4];
328 } color_fmt[V3D_MAX_DRAW_BUFFERS];
329 uint8_t f32_color_rb;
330 uint32_t va_swap_rb_mask;
331 bool has_multiview;
332 bool line_smooth;
333 };
334
335 struct v3dv_pipeline_cache_stats {
336 uint32_t miss;
337 uint32_t hit;
338 uint32_t count;
339 uint32_t on_disk_hit;
340 };
341
342 /* Equivalent to gl_shader_stage, but including the coordinate shaders
343 *
344 * FIXME: perhaps move to common
345 */
346 enum broadcom_shader_stage {
347 BROADCOM_SHADER_VERTEX,
348 BROADCOM_SHADER_VERTEX_BIN,
349 BROADCOM_SHADER_GEOMETRY,
350 BROADCOM_SHADER_GEOMETRY_BIN,
351 BROADCOM_SHADER_FRAGMENT,
352 BROADCOM_SHADER_COMPUTE,
353 };
354
355 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
356
357 /* Assumes that coordinate shaders will be custom-handled by the caller */
358 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)359 gl_shader_stage_to_broadcom(gl_shader_stage stage)
360 {
361 switch (stage) {
362 case MESA_SHADER_VERTEX:
363 return BROADCOM_SHADER_VERTEX;
364 case MESA_SHADER_GEOMETRY:
365 return BROADCOM_SHADER_GEOMETRY;
366 case MESA_SHADER_FRAGMENT:
367 return BROADCOM_SHADER_FRAGMENT;
368 case MESA_SHADER_COMPUTE:
369 return BROADCOM_SHADER_COMPUTE;
370 default:
371 unreachable("Unknown gl shader stage");
372 }
373 }
374
375 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)376 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
377 {
378 switch (stage) {
379 case BROADCOM_SHADER_VERTEX:
380 case BROADCOM_SHADER_VERTEX_BIN:
381 return MESA_SHADER_VERTEX;
382 case BROADCOM_SHADER_GEOMETRY:
383 case BROADCOM_SHADER_GEOMETRY_BIN:
384 return MESA_SHADER_GEOMETRY;
385 case BROADCOM_SHADER_FRAGMENT:
386 return MESA_SHADER_FRAGMENT;
387 case BROADCOM_SHADER_COMPUTE:
388 return MESA_SHADER_COMPUTE;
389 default:
390 unreachable("Unknown broadcom shader stage");
391 }
392 }
393
394 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)395 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
396 {
397 switch (stage) {
398 case BROADCOM_SHADER_VERTEX_BIN:
399 case BROADCOM_SHADER_GEOMETRY_BIN:
400 return true;
401 default:
402 return false;
403 }
404 }
405
406 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)407 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
408 {
409 switch (stage) {
410 case BROADCOM_SHADER_VERTEX:
411 case BROADCOM_SHADER_GEOMETRY:
412 return true;
413 default:
414 return false;
415 }
416 }
417
418 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)419 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
420 {
421 switch (stage) {
422 case BROADCOM_SHADER_VERTEX:
423 return BROADCOM_SHADER_VERTEX_BIN;
424 case BROADCOM_SHADER_GEOMETRY:
425 return BROADCOM_SHADER_GEOMETRY_BIN;
426 default:
427 unreachable("Invalid shader stage");
428 }
429 }
430
431 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)432 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
433 {
434 switch(stage) {
435 case BROADCOM_SHADER_VERTEX_BIN:
436 return "MESA_SHADER_VERTEX_BIN";
437 case BROADCOM_SHADER_GEOMETRY_BIN:
438 return "MESA_SHADER_GEOMETRY_BIN";
439 default:
440 return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
441 }
442 }
443
444 struct v3dv_pipeline_cache {
445 struct vk_object_base base;
446
447 struct v3dv_device *device;
448 mtx_t mutex;
449
450 struct hash_table *nir_cache;
451 struct v3dv_pipeline_cache_stats nir_stats;
452
453 struct hash_table *cache;
454 struct v3dv_pipeline_cache_stats stats;
455
456 /* For VK_EXT_pipeline_creation_cache_control. */
457 bool externally_synchronized;
458 };
459
460 struct v3dv_device {
461 struct vk_device vk;
462
463 struct v3dv_instance *instance;
464 struct v3dv_physical_device *pdevice;
465
466 struct v3d_device_info devinfo;
467 struct v3dv_queue queue;
468
469 /* Guards query->maybe_available and value for timestamps */
470 mtx_t query_mutex;
471
472 /* Signaled whenever a query is ended */
473 cnd_t query_ended;
474
475 /* Resources used for meta operations */
476 struct {
477 mtx_t mtx;
478 struct {
479 VkPipelineLayout p_layout;
480 struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
481 } color_clear;
482 struct {
483 VkPipelineLayout p_layout;
484 struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
485 } depth_clear;
486 struct {
487 VkDescriptorSetLayout ds_layout;
488 VkPipelineLayout p_layout;
489 struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
490 } blit;
491 struct {
492 VkDescriptorSetLayout ds_layout;
493 VkPipelineLayout p_layout;
494 struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
495 } texel_buffer_copy;
496 } meta;
497
498 struct v3dv_bo_cache {
499 /** List of struct v3d_bo freed, by age. */
500 struct list_head time_list;
501 /** List of struct v3d_bo freed, per size, by age. */
502 struct list_head *size_list;
503 uint32_t size_list_size;
504
505 mtx_t lock;
506
507 uint32_t cache_size;
508 uint32_t cache_count;
509 uint32_t max_cache_size;
510 } bo_cache;
511
512 uint32_t bo_size;
513 uint32_t bo_count;
514
515 /* Event handling resources.
516 *
517 * Our implementation of events uses a BO to store event state (signaled vs
518 * reset) and dispatches compute shaders to handle GPU event functions
519 * (signal, reset, wait). This struct holds all the resources required
520 * by the implementation.
521 */
522 struct {
523 mtx_t lock;
524
525 /* BO for the event states: signaled (1) or reset (0) */
526 struct v3dv_bo *bo;
527
528 /* We pre-allocate all the events we can fit for the size of the BO we
529 * create to track their states, where each event has an index which is
530 * basically the offset of its state in that BO. We keep a free list with
531 * the pre-allocated events that are available.
532 */
533 uint32_t event_count;
534 struct v3dv_event *events;
535 struct list_head free_list;
536
537 /* Vulkan resources to access the event BO from shaders. We have a
538 * pipeline that sets the state of an event and another that waits on
539 * a single event. Both pipelines require access to the event state BO,
540 * for which we need to allocate a single descripot set.
541 */
542 VkBuffer buffer;
543 VkDeviceMemory mem;
544 VkDescriptorSetLayout descriptor_set_layout;
545 VkPipelineLayout pipeline_layout;
546 VkDescriptorPool descriptor_pool;
547 VkDescriptorSet descriptor_set;
548 VkPipeline set_event_pipeline;
549 VkPipeline wait_event_pipeline;
550 } events;
551
552 /* Query handling resources.
553 *
554 * Our implementation of occlusion queries uses a BO per pool to keep track
555 * of the per-query availability state and dispatches compute shaders to
556 * handle GPU query functions that read and write that state. This struct
557 * holds Vulkan resources that can be shared across all query pools to
558 * implement this. This framework may be extended in the future to handle
559 * more query types.
560 */
561 struct {
562 VkDescriptorSetLayout buf_descriptor_set_layout;
563
564 /* Set query availability */
565 VkPipelineLayout avail_pipeline_layout;
566 VkPipeline avail_pipeline;
567
568 /* Reset query availability and clear occlusion counters */
569 VkPipelineLayout reset_occlusion_pipeline_layout;
570 VkPipeline reset_occlusion_pipeline;
571
572 /* Copy query results */
573 VkPipelineLayout copy_pipeline_layout;
574 VkPipeline copy_pipeline[8];
575 } queries;
576
577 struct v3dv_pipeline_cache default_pipeline_cache;
578
579 /* GL_SHADER_STATE_RECORD needs to specify default attribute values. The
580 * following covers the most common case, that is all attributes format
581 * being float being float, allowing us to reuse the same BO for all
582 * pipelines matching this requirement. Pipelines that need integer
583 * attributes will create their own BO.
584 *
585 * Note that since v71 the default attribute values are not needed, so this
586 * can be NULL.
587 */
588 struct v3dv_bo *default_attribute_float;
589
590 void *device_address_mem_ctx;
591 struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
592 };
593
594 struct v3dv_device_memory {
595 struct vk_device_memory vk;
596
597 struct v3dv_bo *bo;
598 const VkMemoryType *type;
599 bool is_for_wsi;
600 bool is_for_device_address;
601 };
602
603 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
604 #define TEXTURE_DATA_FORMAT_NO 255
605
606 #define V3DV_MAX_PLANE_COUNT 3
607 struct v3dv_format_plane {
608 /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
609 uint8_t rt_type;
610
611 /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
612 uint8_t tex_type;
613
614 /* Swizzle to apply to the RGBA shader output for storing to the tile
615 * buffer, to the RGBA tile buffer to produce shader input (for
616 * blending), and for turning the rgba8888 texture sampler return
617 * value into shader rgba values.
618 */
619 uint8_t swizzle[4];
620
621 /* Whether the return value is 16F/I/UI or 32F/I/UI. */
622 uint8_t return_size;
623 };
624
625 struct v3dv_format {
626 /* Non 0 plane count implies supported */
627 uint8_t plane_count;
628
629 struct v3dv_format_plane planes[V3DV_MAX_PLANE_COUNT];
630
631 /* If the format supports (linear) filtering when texturing. */
632 bool supports_filtering;
633 };
634
635 /* Note that although VkImageAspectFlags would allow to combine more than one
636 * PLANE bit, for all the use cases we implement that use VkImageAspectFlags,
637 * only one plane is allowed, like for example vkCmdCopyImage:
638 *
639 * "If srcImage has a VkFormat with two planes then for each element of
640 * pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_PLANE_0_BIT
641 * or VK_IMAGE_ASPECT_PLANE_1_BIT"
642 *
643 */
v3dv_plane_from_aspect(VkImageAspectFlags aspect)644 static uint8_t v3dv_plane_from_aspect(VkImageAspectFlags aspect)
645 {
646 switch (aspect) {
647 case VK_IMAGE_ASPECT_COLOR_BIT:
648 case VK_IMAGE_ASPECT_DEPTH_BIT:
649 case VK_IMAGE_ASPECT_STENCIL_BIT:
650 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
651 case VK_IMAGE_ASPECT_PLANE_0_BIT:
652 case VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT:
653 return 0;
654 case VK_IMAGE_ASPECT_PLANE_1_BIT:
655 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
656 return 1;
657 case VK_IMAGE_ASPECT_PLANE_2_BIT:
658 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
659 return 2;
660 default:
661 unreachable("invalid image aspect");
662 }
663 }
664
665 struct v3d_resource_slice {
666 uint32_t offset;
667 uint32_t stride;
668 uint32_t padded_height;
669 uint32_t width;
670 uint32_t height;
671 /* Size of a single pane of the slice. For 3D textures, there will be
672 * a number of panes equal to the minified, power-of-two-aligned
673 * depth.
674 */
675 uint32_t size;
676 uint8_t ub_pad;
677 enum v3d_tiling_mode tiling;
678 uint32_t padded_height_of_output_image_in_uif_blocks;
679 };
680
681 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
682 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
683
684 struct v3dv_image {
685 struct vk_image vk;
686
687 const struct v3dv_format *format;
688 bool tiled;
689
690 uint8_t plane_count;
691
692 /* If 0, this is a multi-plane image with use disjoint memory, where each
693 * plane binds a different device memory. Otherwise, all the planes share
694 * the same device memory and this stores the total size of the image in
695 * bytes.
696 */
697 uint32_t non_disjoint_size;
698
699 struct {
700 uint32_t cpp;
701
702 struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
703 /* Total size of the plane in bytes. */
704 uint64_t size;
705 uint32_t cube_map_stride;
706
707 /* If not using disjoint memory, mem and mem_offset is the same for all
708 * planes, in which case mem_offset is the offset of plane 0.
709 */
710 struct v3dv_device_memory *mem;
711 VkDeviceSize mem_offset;
712 uint32_t alignment;
713
714 /* Pre-subsampled per plane width and height
715 */
716 uint32_t width;
717 uint32_t height;
718
719 /* Even if we can get it from the parent image format, we keep the
720 * format here for convenience
721 */
722 VkFormat vk_format;
723 } planes[V3DV_MAX_PLANE_COUNT];
724
725 /* Used only when sampling a linear texture (which V3D doesn't support).
726 * This holds a tiled copy of the image we can use for that purpose.
727 */
728 struct v3dv_image *shadow;
729 };
730
731 VkResult
732 v3dv_image_init(struct v3dv_device *device,
733 const VkImageCreateInfo *pCreateInfo,
734 const VkAllocationCallbacks *pAllocator,
735 struct v3dv_image *image);
736
737 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
738
739 static uint32_t
v3dv_image_aspect_to_plane(const struct v3dv_image * image,VkImageAspectFlagBits aspect)740 v3dv_image_aspect_to_plane(const struct v3dv_image *image,
741 VkImageAspectFlagBits aspect)
742 {
743 assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects));
744
745 /* Because we always put image and view planes in aspect-bit-order, the
746 * plane index is the number of bits in the image aspect before aspect.
747 */
748 return util_bitcount(image->vk.aspects & (aspect - 1));
749 }
750
751 /* Pre-generating packets needs to consider changes in packet sizes across hw
752 * versions. Keep things simple and allocate enough space for any supported
753 * version. We ensure the size is large enough through static asserts.
754 */
755 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
756 #define V3DV_SAMPLER_STATE_LENGTH 24
757 #define V3DV_BLEND_CFG_LENGTH 5
758 #define V3DV_CFG_BITS_LENGTH 4
759 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
760 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
761 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
762 #define V3DV_STENCIL_CFG_LENGTH 6
763
764 struct v3dv_image_view {
765 struct vk_image_view vk;
766
767 const struct v3dv_format *format;
768
769 uint8_t view_swizzle[4];
770
771 uint8_t plane_count;
772 struct {
773 uint8_t image_plane;
774
775 bool swap_rb;
776 bool channel_reverse;
777 uint32_t internal_bpp;
778 uint32_t internal_type;
779 uint32_t offset;
780
781 /* Precomputed swizzle (composed from the view swizzle and the format
782 * swizzle).
783 *
784 * This could be also included on the descriptor bo, but the shader state
785 * packet doesn't need it on a bo, so we can just avoid a memory copy
786 */
787 uint8_t swizzle[4];
788
789 /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
790 * during UpdateDescriptorSets.
791 *
792 * Empirical tests show that cube arrays need a different shader state
793 * depending on whether they are used with a sampler or not, so for these
794 * we generate two states and select the one to use based on the descriptor
795 * type.
796 */
797 uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
798 } planes[V3DV_MAX_PLANE_COUNT];
799
800 /* Used only when sampling a linear texture (which V3D doesn't support).
801 * This would represent a view over the tiled shadow image.
802 */
803 struct v3dv_image_view *shadow;
804 };
805
806 VkResult v3dv_create_image_view(struct v3dv_device *device,
807 const VkImageViewCreateInfo *pCreateInfo,
808 VkImageView *pView);
809
810 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer,
811 uint8_t plane);
812
813 struct v3dv_buffer {
814 struct vk_object_base base;
815
816 VkDeviceSize size;
817 VkBufferUsageFlagBits2KHR usage;
818 uint32_t alignment;
819
820 struct v3dv_device_memory *mem;
821 VkDeviceSize mem_offset;
822 };
823
824 void
825 v3dv_buffer_init(struct v3dv_device *device,
826 const VkBufferCreateInfo *pCreateInfo,
827 struct v3dv_buffer *buffer,
828 uint32_t alignment);
829
830 void
831 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info);
832
833 struct v3dv_buffer_view {
834 struct vk_object_base base;
835
836 struct v3dv_buffer *buffer;
837
838 VkFormat vk_format;
839 const struct v3dv_format *format;
840 uint32_t internal_bpp;
841 uint32_t internal_type;
842
843 uint32_t offset;
844 uint32_t size;
845 uint32_t num_elements;
846
847 /* Prepacked TEXTURE_SHADER_STATE. */
848 uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
849 };
850
851 struct v3dv_subpass_attachment {
852 uint32_t attachment;
853 VkImageLayout layout;
854 };
855
856 struct v3dv_subpass {
857 uint32_t input_count;
858 struct v3dv_subpass_attachment *input_attachments;
859
860 uint32_t color_count;
861 struct v3dv_subpass_attachment *color_attachments;
862 struct v3dv_subpass_attachment *resolve_attachments;
863
864 struct v3dv_subpass_attachment ds_attachment;
865 struct v3dv_subpass_attachment ds_resolve_attachment;
866 bool resolve_depth, resolve_stencil;
867
868 /* If we need to emit the clear of the depth/stencil attachment using a
869 * a draw call instead of using the TLB (GFXH-1461).
870 */
871 bool do_depth_clear_with_draw;
872 bool do_stencil_clear_with_draw;
873
874 /* Multiview */
875 uint32_t view_mask;
876 };
877
878 struct v3dv_render_pass_attachment {
879 VkAttachmentDescription2 desc;
880
881 uint32_t first_subpass;
882 uint32_t last_subpass;
883
884 /* When multiview is enabled, we no longer care about when a particular
885 * attachment is first or last used in a render pass, since not all views
886 * in the attachment will meet that criteria. Instead, we need to track
887 * each individual view (layer) in each attachment and emit our stores,
888 * loads and clears accordingly.
889 */
890 struct {
891 uint32_t first_subpass;
892 uint32_t last_subpass;
893 } views[MAX_MULTIVIEW_VIEW_COUNT];
894
895 /* If this is a multisampled attachment that is going to be resolved,
896 * whether we may be able to use the TLB hardware resolve based on the
897 * attachment format.
898 */
899 bool try_tlb_resolve;
900 };
901
902 struct v3dv_render_pass {
903 struct vk_object_base base;
904
905 bool multiview_enabled;
906
907 uint32_t attachment_count;
908 struct v3dv_render_pass_attachment *attachments;
909
910 uint32_t subpass_count;
911 struct v3dv_subpass *subpasses;
912
913 struct v3dv_subpass_attachment *subpass_attachments;
914 };
915
916 struct v3dv_framebuffer {
917 struct vk_object_base base;
918
919 uint32_t width;
920 uint32_t height;
921 uint32_t layers;
922
923 /* Typically, edge tiles in the framebuffer have padding depending on the
924 * underlying tiling layout. One consequence of this is that when the
925 * framebuffer dimensions are not aligned to tile boundaries, tile stores
926 * would still write full tiles on the edges and write to the padded area.
927 * If the framebuffer is aliasing a smaller region of a larger image, then
928 * we need to be careful with this though, as we won't have padding on the
929 * edge tiles (which typically means that we need to load the tile buffer
930 * before we store).
931 */
932 bool has_edge_padding;
933
934 uint32_t attachment_count;
935 uint32_t color_attachment_count;
936
937 /* Notice that elements in 'attachments' will be NULL if the framebuffer
938 * was created imageless. The driver is expected to access attachment info
939 * from the command buffer state instead.
940 */
941 struct v3dv_image_view *attachments[0];
942 };
943
944 struct v3dv_frame_tiling {
945 uint32_t width;
946 uint32_t height;
947 uint32_t layers;
948 uint32_t render_target_count;
949 uint32_t internal_bpp;
950 uint32_t total_color_bpp;
951 bool msaa;
952 bool double_buffer;
953 uint32_t tile_width;
954 uint32_t tile_height;
955 uint32_t draw_tiles_x;
956 uint32_t draw_tiles_y;
957 uint32_t supertile_width;
958 uint32_t supertile_height;
959 uint32_t frame_width_in_supertiles;
960 uint32_t frame_height_in_supertiles;
961 };
962
963 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
964 const VkRect2D *area,
965 struct v3dv_framebuffer *fb,
966 struct v3dv_render_pass *pass,
967 uint32_t subpass_idx);
968
969 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
970 * This happens when we render at least 2 tiles, because in this mode each
971 * tile uses a different half of the tile buffer memory so we can have 2 tiles
972 * in flight (one being stored to memory and the next being rendered). In this
973 * scenario, if we emit a single initial tile clear we would only clear the
974 * first half of the tile buffer.
975 */
976 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)977 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
978 {
979 return tiling->double_buffer &&
980 (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
981 tiling->layers > 1);
982 }
983
984 enum v3dv_cmd_buffer_status {
985 V3DV_CMD_BUFFER_STATUS_NEW = 0,
986 V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
987 V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
988 V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
989 };
990
991 union v3dv_clear_value {
992 uint32_t color[4];
993 struct {
994 float z;
995 uint8_t s;
996 };
997 };
998
999 struct v3dv_cmd_buffer_attachment_state {
1000 /* The original clear value as provided by the Vulkan API */
1001 VkClearValue vk_clear_value;
1002
1003 /* The hardware clear value */
1004 union v3dv_clear_value clear_value;
1005
1006 /* The underlying image view (from the framebuffer or, if imageless
1007 * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
1008 */
1009 struct v3dv_image_view *image_view;
1010
1011 /* If this is a multisampled attachment with a resolve operation. */
1012 bool has_resolve;
1013
1014 /* If this is a multisampled attachment with a resolve operation,
1015 * whether we can use the TLB for the resolve.
1016 */
1017 bool use_tlb_resolve;
1018 };
1019
1020 /* Cached values derived from Vulkan viewport/count */
1021 struct v3dv_viewport_state {
1022 float translate[MAX_VIEWPORTS][3];
1023 float scale[MAX_VIEWPORTS][3];
1024 };
1025
1026 /* Flags for custom dirty state, that could lead to packet emission.
1027 *
1028 * Note *custom*, for all the dynamic state tracking coming from the Vulkan
1029 * API, we use the Mesa runtime framework and their predefined flags
1030 * (MESA_VK_DYNAMIC_XXX).
1031 *
1032 * Here we defined additional flags used to track dirty state.
1033 */
1034 enum v3dv_cmd_dirty_bits {
1035 V3DV_CMD_DIRTY_PIPELINE = 1 << 0,
1036 V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
1037 V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 2,
1038 V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 3,
1039 V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 4,
1040 V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 5,
1041 V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 6,
1042 V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO = 1 << 7,
1043 V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 8,
1044 V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 9,
1045 V3DV_CMD_DIRTY_DRAW_ID = 1 << 10,
1046 V3DV_CMD_DIRTY_ALL = (1 << 10) - 1,
1047 };
1048
1049 struct v3dv_dynamic_state {
1050 /* FIXME: we keep some viewport info cached (translate, scale) because we
1051 * use that on more that one place. But note that translate_z and scale_z
1052 * is also used in several places, and we recompute it based on
1053 * scissor/viewport info all time. So perhaps we could do the same with the
1054 * x and y component.
1055 */
1056 struct v3dv_viewport_state viewport;
1057
1058 /* We cache the color_write_enable as the vulkan runtime keeps a 8-bit
1059 * bitset with a bit per attachment, but in order to combine with the
1060 * color_write_masks is easier to cache a 32-bit bitset with 4 bits per
1061 * attachment.
1062 */
1063 uint32_t color_write_enable;
1064 };
1065
1066 void v3dv_viewport_compute_xform(const VkViewport *viewport,
1067 float scale[3],
1068 float translate[3]);
1069
1070 enum v3dv_ez_state {
1071 V3D_EZ_UNDECIDED = 0,
1072 V3D_EZ_GT_GE,
1073 V3D_EZ_LT_LE,
1074 V3D_EZ_DISABLED,
1075 };
1076
1077 enum v3dv_job_type {
1078 V3DV_JOB_TYPE_GPU_CL = 0,
1079 V3DV_JOB_TYPE_GPU_CL_INCOMPLETE,
1080 V3DV_JOB_TYPE_GPU_TFU,
1081 V3DV_JOB_TYPE_GPU_CSD,
1082 V3DV_JOB_TYPE_CPU_RESET_QUERIES,
1083 V3DV_JOB_TYPE_CPU_END_QUERY,
1084 V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
1085 V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
1086 V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
1087 };
1088
1089 struct v3dv_reset_query_cpu_job_info {
1090 struct v3dv_query_pool *pool;
1091 uint32_t first;
1092 uint32_t count;
1093 };
1094
1095 struct v3dv_end_query_info {
1096 struct v3dv_query_pool *pool;
1097 uint32_t query;
1098
1099 /* This is one unless multiview is used */
1100 uint32_t count;
1101 };
1102
1103 struct v3dv_copy_query_results_cpu_job_info {
1104 struct v3dv_query_pool *pool;
1105 uint32_t first;
1106 uint32_t count;
1107 struct v3dv_buffer *dst;
1108 uint32_t offset;
1109 uint32_t stride;
1110 VkQueryResultFlags flags;
1111 };
1112
1113 struct v3dv_submit_sync_info {
1114 /* List of syncs to wait before running a job */
1115 uint32_t wait_count;
1116 struct vk_sync_wait *waits;
1117
1118 /* List of syncs to signal when all jobs complete */
1119 uint32_t signal_count;
1120 struct vk_sync_signal *signals;
1121 };
1122
1123 struct v3dv_csd_indirect_cpu_job_info {
1124 struct v3dv_buffer *buffer;
1125 uint32_t offset;
1126 struct v3dv_job *csd_job;
1127 uint32_t wg_size;
1128 uint32_t *wg_uniform_offsets[3];
1129 bool needs_wg_uniform_rewrite;
1130 };
1131
1132 struct v3dv_timestamp_query_cpu_job_info {
1133 struct v3dv_query_pool *pool;
1134 uint32_t query;
1135
1136 /* This is one unless multiview is used */
1137 uint32_t count;
1138 };
1139
1140 /* Number of perfmons required to handle all supported performance counters */
1141 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \
1142 DRM_V3D_MAX_PERF_COUNTERS)
1143
1144 struct v3dv_perf_query {
1145 uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1146
1147 /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1148 * performance data.
1149 */
1150 struct vk_sync *last_job_sync;
1151 };
1152
1153 struct v3dv_job {
1154 struct list_head list_link;
1155
1156 /* We only create job clones when executing secondary command buffers into
1157 * primaries. These clones don't make deep copies of the original object
1158 * so we want to flag them to avoid freeing resources they don't own.
1159 */
1160 bool is_clone;
1161
1162 /* If this is a cloned job, if it has its own BCL resource. This happens
1163 * when we suspend jobs with in command buffers with the
1164 * VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT flag.
1165 */
1166 bool clone_owns_bcl;
1167
1168 /* VK_KHR_dynamic_rendering */
1169 bool suspending;
1170 bool resuming;
1171 struct v3dv_cl_out *suspend_branch_inst_ptr;
1172 uint32_t suspended_bcl_end;
1173
1174 /* If the job executes on the transfer stage of the pipeline */
1175 bool is_transfer;
1176
1177 /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1178 * dereference memory in any buffer that has been flagged with
1179 * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. These buffers may not
1180 * be bound via descriptor sets, so we need to make sure that a job that
1181 * uses this functionality includes all these buffers in its kernel
1182 * submission.
1183 */
1184 bool uses_buffer_device_address;
1185
1186 /* True if we have not identified anything that would be incompatible
1187 * with double-buffer (like MSAA) or that would make double-buffer mode
1188 * not efficient (like tile loads or not having any stores).
1189 */
1190 bool can_use_double_buffer;
1191
1192 /* This structure keeps track of various scores to inform a heuristic
1193 * for double-buffer mode.
1194 */
1195 struct v3d_double_buffer_score double_buffer_score;
1196
1197 /* We only need to allocate tile state for all layers if the binner
1198 * writes primitives to layers other than the first. This can only be
1199 * done using layered rendering (writing gl_Layer from a geometry shader),
1200 * so for other cases of multilayered framebuffers (typically with
1201 * meta copy/clear operations) that won't use layered rendering, we only
1202 * need one layer worth of of tile state for the binner.
1203 */
1204 bool allocate_tile_state_for_all_layers;
1205
1206 /* A pointer to the location of the TILE_BINNING_MODE_CFG packet so we can
1207 * rewrite it to enable double-buffer mode by the time we have enough info
1208 * about the job to make that decision.
1209 */
1210 struct v3dv_cl_out *bcl_tile_binning_mode_ptr;
1211
1212 enum v3dv_job_type type;
1213
1214 struct v3dv_device *device;
1215
1216 struct v3dv_cmd_buffer *cmd_buffer;
1217
1218 struct v3dv_cl bcl;
1219 struct v3dv_cl rcl;
1220 struct v3dv_cl indirect;
1221
1222 /* Set of all BOs referenced by the job. This will be used for making
1223 * the list of BOs that the kernel will need to have paged in to
1224 * execute our job.
1225 */
1226 struct set *bos;
1227 uint32_t bo_count;
1228 uint64_t bo_handle_mask;
1229
1230 struct v3dv_bo *tile_alloc;
1231 struct v3dv_bo *tile_state;
1232
1233 bool tmu_dirty_rcl;
1234
1235 uint32_t first_subpass;
1236
1237 /* When the current subpass is split into multiple jobs, this flag is set
1238 * to true for any jobs after the first in the same subpass.
1239 */
1240 bool is_subpass_continue;
1241
1242 /* If this job is the last job emitted for a subpass. */
1243 bool is_subpass_finish;
1244
1245 struct v3dv_frame_tiling frame_tiling;
1246
1247 enum v3dv_ez_state ez_state;
1248 enum v3dv_ez_state first_ez_state;
1249
1250 /* If we have already decided if we need to disable Early Z/S completely
1251 * for this job.
1252 */
1253 bool decided_global_ez_enable;
1254
1255 /* If the job emitted any draw calls with Early Z/S enabled */
1256 bool has_ez_draws;
1257
1258 /* If this job has been configured to use early Z/S clear */
1259 bool early_zs_clear;
1260
1261 /* Number of draw calls recorded into the job */
1262 uint32_t draw_count;
1263
1264 /* A flag indicating whether we want to flush every draw separately. This
1265 * can be used for debugging, or for cases where special circumstances
1266 * require this behavior.
1267 */
1268 bool always_flush;
1269
1270 /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1271 * can use this to select the hw queues where we need to serialize the job.
1272 */
1273 uint8_t serialize;
1274
1275 /* If this is a CL job, whether we should sync before binning */
1276 bool needs_bcl_sync;
1277
1278 /* If we have emitted a (default) point size packet in this job */
1279 bool emitted_default_point_size;
1280
1281 /* Job specs for CPU jobs */
1282 union {
1283 struct v3dv_reset_query_cpu_job_info query_reset;
1284 struct v3dv_end_query_info query_end;
1285 struct v3dv_copy_query_results_cpu_job_info query_copy_results;
1286 struct v3dv_csd_indirect_cpu_job_info csd_indirect;
1287 struct v3dv_timestamp_query_cpu_job_info query_timestamp;
1288 } cpu;
1289
1290 /* Job specs for TFU jobs */
1291 struct drm_v3d_submit_tfu tfu;
1292
1293 /* Job specs for CSD jobs */
1294 struct {
1295 struct v3dv_bo *shared_memory;
1296 uint32_t wg_count[3];
1297 uint32_t wg_base[3];
1298 struct drm_v3d_submit_csd submit;
1299 } csd;
1300
1301 /* Perfmons with last job sync for CSD and CL jobs */
1302 struct v3dv_perf_query *perf;
1303 };
1304
1305 void v3dv_job_init(struct v3dv_job *job,
1306 enum v3dv_job_type type,
1307 struct v3dv_device *device,
1308 struct v3dv_cmd_buffer *cmd_buffer,
1309 int32_t subpass_idx);
1310 void v3dv_job_destroy(struct v3dv_job *job);
1311
1312 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1313 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1314
1315 void v3dv_job_start_frame(struct v3dv_job *job,
1316 uint32_t width,
1317 uint32_t height,
1318 uint32_t layers,
1319 bool allocate_tile_state_for_all_layers,
1320 bool allocate_tile_state_now,
1321 uint32_t render_target_count,
1322 uint8_t max_internal_bpp,
1323 uint8_t total_color_bpp,
1324 bool msaa);
1325
1326 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1327
1328 struct v3dv_job *
1329 v3dv_job_clone(struct v3dv_job *job, bool skip_bcl);
1330
1331 struct v3dv_job *
1332 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1333 struct v3dv_cmd_buffer *cmd_buffer);
1334
1335 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1336 enum v3dv_job_type type,
1337 struct v3dv_cmd_buffer *cmd_buffer,
1338 uint32_t subpass_idx);
1339
1340 void
1341 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1342 uint32_t slot_size,
1343 uint32_t used_count,
1344 uint32_t *alloc_count,
1345 void **ptr);
1346
1347 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1348 bool indexed, bool indirect,
1349 uint32_t vertex_count);
1350
1351 bool v3dv_job_allocate_tile_state(struct v3dv_job *job);
1352
1353 void
1354 v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer,
1355 const VkRenderingInfoKHR *pRenderingInfo);
1356
1357 void
1358 v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer);
1359
1360 void
1361 v3dv_setup_dynamic_render_pass(struct v3dv_cmd_buffer *cmd_buffer,
1362 const VkRenderingInfoKHR *pRenderingInfo);
1363
1364 void
1365 v3dv_setup_dynamic_render_pass_inheritance(struct v3dv_cmd_buffer *cmd_buffer,
1366 const VkCommandBufferInheritanceRenderingInfo *info);
1367
1368 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1369 * cmd_buffer specific header?
1370 */
1371 struct v3dv_draw_info {
1372 uint32_t vertex_count;
1373 uint32_t instance_count;
1374 uint32_t first_vertex;
1375 uint32_t first_instance;
1376 };
1377
1378 struct v3dv_vertex_binding {
1379 struct v3dv_buffer *buffer;
1380 VkDeviceSize offset;
1381 VkDeviceSize size;
1382 };
1383
1384 struct v3dv_descriptor_state {
1385 struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1386 uint32_t valid;
1387 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1388 };
1389
1390 struct v3dv_cmd_pipeline_state {
1391 struct v3dv_pipeline *pipeline;
1392
1393 struct v3dv_descriptor_state descriptor_state;
1394 };
1395
1396 enum {
1397 V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1398 V3DV_BARRIER_COMPUTE_BIT = (1 << 1),
1399 V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1400 V3DV_BARRIER_CPU_BIT = (1 << 3),
1401 };
1402 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1403 V3DV_BARRIER_TRANSFER_BIT | \
1404 V3DV_BARRIER_COMPUTE_BIT | \
1405 V3DV_BARRIER_CPU_BIT);
1406
1407 struct v3dv_barrier_state {
1408 /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1409 uint8_t dst_mask;
1410
1411 /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1412 * indicating the sources of the dependency.
1413 */
1414 uint8_t src_mask_graphics;
1415 uint8_t src_mask_transfer;
1416 uint8_t src_mask_compute;
1417
1418 /* For graphics barriers, access masks involved. Used to decide if we need
1419 * to execute a binning or render barrier.
1420 */
1421 VkAccessFlags2 bcl_buffer_access;
1422 VkAccessFlags2 bcl_image_access;
1423 };
1424
1425 struct v3dv_cmd_buffer_state {
1426 struct v3dv_render_pass *pass;
1427 struct v3dv_framebuffer *framebuffer;
1428
1429 /* VK_KHR_dynamic_rendering */
1430 struct v3dv_render_pass dynamic_pass;
1431 struct v3dv_subpass dynamic_subpass;
1432 struct v3dv_render_pass_attachment dynamic_attachments[18 /* (8 color + D/S) x 2 (for resolves) */];
1433 struct v3dv_subpass_attachment dynamic_subpass_attachments[18];
1434 struct v3dv_framebuffer *dynamic_framebuffer;
1435
1436 VkRect2D render_area;
1437
1438 /* Current job being recorded */
1439 struct v3dv_job *job;
1440
1441 uint32_t subpass_idx;
1442
1443 struct v3dv_cmd_pipeline_state gfx;
1444 struct v3dv_cmd_pipeline_state compute;
1445
1446 /* For most state tracking we rely on vk_dynamic_graphics_state, but we
1447 * maintain a custom structure for some state-related data that we want to
1448 * cache.
1449 */
1450 struct v3dv_dynamic_state dynamic;
1451
1452 /* This dirty is for v3dv_cmd_dirty_bits (FIXME: perhaps we should be more
1453 * explicit about it). For dirty flags coming from Vulkan dynamic state,
1454 * use the vk_dynamic_graphics_state handled by the vk_cmd_buffer
1455 */
1456 uint32_t dirty;
1457 VkShaderStageFlagBits dirty_descriptor_stages;
1458 VkShaderStageFlagBits dirty_push_constants_stages;
1459
1460 /* Current clip window. We use this to check whether we have an active
1461 * scissor, since in that case we can't use TLB clears and need to fallback
1462 * to drawing rects.
1463 */
1464 VkRect2D clip_window;
1465
1466 /* Whether our render area is aligned to tile boundaries. If this is false
1467 * then we have tiles that are only partially covered by the render area,
1468 * and therefore, we need to be careful with our loads and stores so we don't
1469 * modify pixels for the tile area that is not covered by the render area.
1470 * This means, for example, that we can't use the TLB to clear, since that
1471 * always clears full tiles.
1472 */
1473 bool tile_aligned_render_area;
1474
1475 /* FIXME: we have just one client-side BO for the push constants,
1476 * independently of the stageFlags in vkCmdPushConstants, and the
1477 * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1478 * tuning in the future if it makes sense.
1479 */
1480 uint32_t push_constants_size;
1481 uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1482
1483 uint32_t attachment_alloc_count;
1484 struct v3dv_cmd_buffer_attachment_state *attachments;
1485
1486 struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1487
1488 struct {
1489 VkBuffer buffer;
1490 VkDeviceSize offset;
1491 VkDeviceSize size;
1492 uint8_t index_size;
1493 } index_buffer;
1494
1495 /* Current uniforms */
1496 struct {
1497 struct v3dv_cl_reloc vs_bin;
1498 struct v3dv_cl_reloc vs;
1499 struct v3dv_cl_reloc gs_bin;
1500 struct v3dv_cl_reloc gs;
1501 struct v3dv_cl_reloc fs;
1502 } uniforms;
1503
1504 /* Current view index for multiview rendering */
1505 uint32_t view_index;
1506
1507 /* Current draw ID for multidraw */
1508 uint32_t draw_id;
1509
1510 /* Used to flag OOM conditions during command buffer recording */
1511 bool oom;
1512
1513 /* If we are currently recording job(s) for a transfer operation */
1514 bool is_transfer;
1515
1516 /* VK_KHR_dynamic_rendering */
1517 bool suspending;
1518 bool resuming;
1519
1520 /* Barrier state tracking */
1521 struct v3dv_barrier_state barrier;
1522
1523 /* Secondary command buffer state */
1524 struct {
1525 bool occlusion_query_enable;
1526 } inheritance;
1527
1528 /* Command buffer state saved during a meta operation */
1529 struct {
1530 uint32_t subpass_idx;
1531 VkRenderPass pass;
1532 VkFramebuffer framebuffer;
1533
1534 uint32_t attachment_alloc_count;
1535 uint32_t attachment_count;
1536 struct v3dv_cmd_buffer_attachment_state *attachments;
1537
1538 bool tile_aligned_render_area;
1539 VkRect2D render_area;
1540
1541 struct vk_dynamic_graphics_state dynamic_graphics_state;
1542 struct v3dv_dynamic_state dynamic;
1543
1544 struct v3dv_cmd_pipeline_state gfx;
1545 bool has_descriptor_state;
1546
1547 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1548 uint32_t push_constants_size;
1549 } meta;
1550
1551 /* Command buffer state for queries */
1552 struct {
1553 /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1554 * a render pass. We queue these here and then schedule the corresponding
1555 * CPU jobs for them at the time we finish the GPU job in which they have
1556 * been recorded.
1557 */
1558 struct {
1559 uint32_t used_count;
1560 uint32_t alloc_count;
1561 struct v3dv_end_query_info *states;
1562 } end;
1563
1564 struct {
1565 /* This BO is not NULL if we have an active occlusion query, that is,
1566 * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1567 */
1568 struct v3dv_bo *bo;
1569 uint32_t offset;
1570 /* When the driver emits draw calls to implement other operations in
1571 * the middle of a render pass (such as an attachment clear), we need
1572 * to pause occlusion query recording and resume it later so that
1573 * these draw calls don't register in occlussion counters. We use
1574 * this to store the BO reference in which we should resume occlusion
1575 * query counters after the driver is done emitting its draw calls.
1576 */
1577 struct v3dv_bo *paused_bo;
1578
1579 /* This pointer is not NULL if we have an active performance query */
1580 struct v3dv_perf_query *perf;
1581 } active_query;
1582 } query;
1583
1584 /* This is dynamic state since VK_EXT_extended_dynamic_state. */
1585 bool z_updates_enable;
1586
1587 /* ez_state can be dynamic since VK_EXT_extended_dynamic_state so we need
1588 * to keep track of it in the cmd_buffer state
1589 */
1590 enum v3dv_ez_state ez_state;
1591
1592 /* incompatible_ez_test can be dynamic since VK_EXT_extended_dynamic_state
1593 * so we need to keep track of it in the cmd_buffer state
1594 */
1595 bool incompatible_ez_test;
1596 };
1597
1598 void
1599 v3dv_cmd_buffer_state_get_viewport_z_xform(struct v3dv_cmd_buffer *cmd_buffer,
1600 uint32_t vp_idx,
1601 float *translate_z, float *scale_z);
1602
1603 /* The following struct represents the info from a descriptor that we store on
1604 * the host memory. They are mostly links to other existing vulkan objects,
1605 * like the image_view in order to access to swizzle info, or the buffer used
1606 * for a UBO/SSBO, for example.
1607 *
1608 * FIXME: revisit if makes sense to just move everything that would be needed
1609 * from a descriptor to the bo.
1610 */
1611 struct v3dv_descriptor {
1612 VkDescriptorType type;
1613
1614 union {
1615 struct {
1616 struct v3dv_image_view *image_view;
1617 struct v3dv_sampler *sampler;
1618 };
1619
1620 struct {
1621 struct v3dv_buffer *buffer;
1622 size_t offset;
1623 size_t range;
1624 };
1625
1626 struct v3dv_buffer_view *buffer_view;
1627 };
1628 };
1629
1630 struct v3dv_query {
1631 /* Used by queries where we implement result copying in the CPU so we can
1632 * tell if the relevant jobs have been submitted for execution. Currently
1633 * these are all but occlusion queries.
1634 */
1635 bool maybe_available;
1636
1637 union {
1638 /* Used by occlusion queries */
1639 struct {
1640 /* Offset of this query in the occlusion query counter BO */
1641 uint32_t offset;
1642 } occlusion;
1643
1644 /* Used by timestamp queries */
1645 struct {
1646 /* Offset of this query in the timestamp BO for its value */
1647 uint32_t offset;
1648
1649 /* Syncobj to signal timestamp query availability */
1650 struct vk_sync *sync;
1651 } timestamp;
1652
1653 /* Used by performance queries */
1654 struct v3dv_perf_query perf;
1655 };
1656 };
1657
1658 struct v3dv_query_pool {
1659 struct vk_object_base base;
1660
1661 /* Per-pool Vulkan resources required to implement GPU-side query
1662 * functions (only occlusion queries for now).
1663 */
1664 struct {
1665 /* Buffer to access the BO with the occlusion query results and
1666 * availability info.
1667 */
1668 VkBuffer buf;
1669 VkDeviceMemory mem;
1670
1671 /* Descriptor set for accessing the buffer from a pipeline. */
1672 VkDescriptorPool descriptor_pool;
1673 VkDescriptorSet descriptor_set;
1674 } meta;
1675
1676 /* Only used with occlusion queries */
1677 struct {
1678 /* BO with the occlusion counters and query availability */
1679 struct v3dv_bo *bo;
1680 /* Offset of the availability info in the BO */
1681 uint32_t avail_offset;
1682 } occlusion;
1683
1684 /* Only used with timestamp queries */
1685 struct {
1686 /* BO with the query timestamp values */
1687 struct v3dv_bo *bo;
1688 } timestamp;
1689
1690 /* Only used with performance queries */
1691 struct {
1692 uint32_t ncounters;
1693 uint8_t counters[V3D_MAX_PERFCNT];
1694
1695 /* V3D has a limit on the number of counters we can track in a
1696 * single performance monitor, so if too many counters are requested
1697 * we need to create multiple monitors to record all of them. This
1698 * field represents the number of monitors required for the number
1699 * of counters requested.
1700 */
1701 uint8_t nperfmons;
1702 } perfmon;
1703
1704 VkQueryType query_type;
1705 uint32_t query_count;
1706 struct v3dv_query *queries;
1707 };
1708
1709 VkResult
1710 v3dv_query_allocate_resources(struct v3dv_device *decice);
1711
1712 void
1713 v3dv_query_free_resources(struct v3dv_device *decice);
1714
1715 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1716 struct v3dv_query_pool *pool,
1717 uint32_t first,
1718 uint32_t count,
1719 void *data,
1720 VkDeviceSize stride,
1721 VkQueryResultFlags flags);
1722
1723 void v3dv_reset_query_pool_cpu(struct v3dv_device *device,
1724 struct v3dv_query_pool *query_pool,
1725 uint32_t first,
1726 uint32_t last);
1727
1728 void v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
1729 struct v3dv_query_pool *pool,
1730 uint32_t query, uint32_t count,
1731 uint8_t availability);
1732
1733 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1734 uint64_t pobj,
1735 VkAllocationCallbacks *alloc);
1736 struct v3dv_cmd_buffer_private_obj {
1737 struct list_head list_link;
1738 uint64_t obj;
1739 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1740 };
1741
1742 extern const struct vk_command_buffer_ops v3dv_cmd_buffer_ops;
1743
1744 struct v3dv_cmd_buffer {
1745 struct vk_command_buffer vk;
1746
1747 struct v3dv_device *device;
1748
1749 VkCommandBufferUsageFlags usage_flags;
1750
1751 enum v3dv_cmd_buffer_status status;
1752
1753 struct v3dv_cmd_buffer_state state;
1754
1755 /* Buffer where we upload push constant data to resolve indirect indexing */
1756 struct v3dv_cl_reloc push_constants_resource;
1757
1758 /* Collection of Vulkan objects created internally by the driver (typically
1759 * during recording of meta operations) that are part of the command buffer
1760 * and should be destroyed with it.
1761 */
1762 struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1763
1764 /* Per-command buffer resources for meta operations. */
1765 struct {
1766 struct {
1767 /* The current descriptor pool for blit sources */
1768 VkDescriptorPool dspool;
1769 } blit;
1770 struct {
1771 /* The current descriptor pool for texel buffer copy sources */
1772 VkDescriptorPool dspool;
1773 } texel_buffer_copy;
1774 struct {
1775 /* The current descriptor pool for the copy query results output buffer */
1776 VkDescriptorPool dspool;
1777 } query;
1778 } meta;
1779
1780 /* List of jobs in the command buffer. For primary command buffers it
1781 * represents the jobs we want to submit to the GPU. For secondary command
1782 * buffers it represents jobs that will be merged into a primary command
1783 * buffer via vkCmdExecuteCommands.
1784 */
1785 struct list_head jobs;
1786 };
1787
1788 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1789 int32_t subpass_idx,
1790 enum v3dv_job_type type);
1791 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1792
1793 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1794 uint32_t subpass_idx);
1795 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1796 uint32_t subpass_idx);
1797
1798 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1799
1800 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1801 bool push_descriptor_state);
1802 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1803 bool needs_subpass_resume);
1804
1805 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1806 struct v3dv_query_pool *pool,
1807 uint32_t query,
1808 VkQueryControlFlags flags);
1809
1810 void v3dv_cmd_buffer_pause_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1811 void v3dv_cmd_buffer_resume_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1812
1813 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1814 struct v3dv_query_pool *pool,
1815 uint32_t query);
1816
1817 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1818 struct v3dv_query_pool *pool,
1819 uint32_t first,
1820 uint32_t count,
1821 struct v3dv_buffer *dst,
1822 uint32_t offset,
1823 uint32_t stride,
1824 VkQueryResultFlags flags);
1825
1826 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1827 struct drm_v3d_submit_tfu *tfu);
1828
1829 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
1830 struct v3dv_csd_indirect_cpu_job_info *info,
1831 const uint32_t *wg_counts);
1832
1833 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1834 uint64_t obj,
1835 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1836
1837 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1838 struct v3dv_barrier_state *src);
1839
1840 void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
1841 struct v3dv_job *job);
1842
1843 bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state,
1844 VkImageAspectFlags aspect,
1845 uint32_t first_subpass_idx,
1846 VkAttachmentLoadOp load_op,
1847 uint32_t last_subpass_idx,
1848 VkAttachmentStoreOp store_op);
1849
1850 bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state,
1851 VkImageAspectFlags aspect,
1852 uint32_t last_subpass_idx,
1853 VkAttachmentStoreOp store_op);
1854
1855 void v3dv_cmd_buffer_emit_pipeline_barrier(struct v3dv_cmd_buffer *cmd_buffer,
1856 const VkDependencyInfo *info);
1857
1858 bool v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1859 struct v3dv_image *dst,
1860 struct v3dv_image *src,
1861 const VkImageCopy2 *region);
1862
1863 struct v3dv_event {
1864 struct vk_object_base base;
1865
1866 /* Link in the device list of pre-allocated free events */
1867 struct list_head link;
1868
1869 /* Each event gets a different index, which we use to compute the offset
1870 * in the BO we use to track their state (signaled vs reset).
1871 */
1872 uint32_t index;
1873 };
1874
1875 VkResult
1876 v3dv_event_allocate_resources(struct v3dv_device *device);
1877
1878 void
1879 v3dv_event_free_resources(struct v3dv_device *device);
1880
1881 struct v3dv_shader_variant {
1882 enum broadcom_shader_stage stage;
1883
1884 union {
1885 struct v3d_prog_data *base;
1886 struct v3d_vs_prog_data *vs;
1887 struct v3d_gs_prog_data *gs;
1888 struct v3d_fs_prog_data *fs;
1889 struct v3d_compute_prog_data *cs;
1890 } prog_data;
1891
1892 /* We explicitly save the prog_data_size as it would make easier to
1893 * serialize
1894 */
1895 uint32_t prog_data_size;
1896
1897 /* The assembly for this variant will be uploaded to a BO shared with all
1898 * other shader stages in that pipeline. This is the offset in that BO.
1899 */
1900 uint32_t assembly_offset;
1901
1902 /* Note: don't assume qpu_insts to be always NULL or not-NULL. In general
1903 * we will try to free it as soon as we upload it to the shared bo while we
1904 * compile the different stages. But we can decide to keep it around based
1905 * on some pipeline creation flags, like
1906 * VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT.
1907 */
1908 uint64_t *qpu_insts;
1909 uint32_t qpu_insts_size;
1910 };
1911
1912 /*
1913 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1914 * other methods doesn't have so many parameters.
1915 *
1916 * FIXME: for the case of the coordinate shader and the vertex shader, module,
1917 * entrypoint, spec_info and nir are the same. There are also info only
1918 * relevant to some stages. But seemed too much a hassle to create a new
1919 * struct only to handle that. Revisit if such kind of info starts to grow.
1920 */
1921 struct v3dv_pipeline_stage {
1922 struct v3dv_pipeline *pipeline;
1923
1924 enum broadcom_shader_stage stage;
1925
1926 const struct vk_shader_module *module;
1927 const char *entrypoint;
1928 const VkSpecializationInfo *spec_info;
1929 const VkShaderModuleCreateInfo *module_info;
1930
1931 nir_shader *nir;
1932
1933 /* The following is the combined hash of module+entrypoint+spec_info+nir */
1934 unsigned char shader_sha1[20];
1935
1936 /** A name for this program, so you can track it in shader-db output. */
1937 uint32_t program_id;
1938
1939 VkPipelineCreationFeedback feedback;
1940
1941 struct vk_pipeline_robustness_state robustness;
1942 };
1943
1944 /* We are using the descriptor pool entry for two things:
1945 * * Track the allocated sets, so we can properly free it if needed
1946 * * Track the suballocated pool bo regions, so if some descriptor set is
1947 * freed, the gap could be reallocated later.
1948 *
1949 * Those only make sense if the pool was not created with the flag
1950 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1951 */
1952 struct v3dv_descriptor_pool_entry
1953 {
1954 struct v3dv_descriptor_set *set;
1955 /* Offset and size of the subregion allocated for this entry from the
1956 * pool->bo
1957 */
1958 uint32_t offset;
1959 uint32_t size;
1960 };
1961
1962 struct v3dv_descriptor_pool {
1963 struct vk_object_base base;
1964
1965 /* A list with all descriptor sets allocated from the pool. */
1966 struct list_head set_list;
1967
1968 /* If this descriptor pool has been allocated for the driver for internal
1969 * use, typically to implement meta operations.
1970 */
1971 bool is_driver_internal;
1972
1973 struct v3dv_bo *bo;
1974 /* Current offset at the descriptor bo. 0 means that we didn't use it for
1975 * any descriptor. If the descriptor bo is NULL, current offset is
1976 * meaningless
1977 */
1978 uint32_t current_offset;
1979
1980 /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1981 * descriptor sets are handled as a whole as pool memory and handled by the
1982 * following pointers. If set, they are not used, and individually
1983 * descriptor sets are allocated/freed.
1984 */
1985 uint8_t *host_memory_base;
1986 uint8_t *host_memory_ptr;
1987 uint8_t *host_memory_end;
1988
1989 uint32_t entry_count;
1990 uint32_t max_entry_count;
1991 struct v3dv_descriptor_pool_entry entries[0];
1992 };
1993
1994 struct v3dv_descriptor_set {
1995 struct vk_object_base base;
1996
1997 /* List link into the list of all sets allocated from the pool */
1998 struct list_head pool_link;
1999
2000 struct v3dv_descriptor_pool *pool;
2001
2002 struct v3dv_descriptor_set_layout *layout;
2003
2004 /* Offset relative to the descriptor pool bo for this set */
2005 uint32_t base_offset;
2006
2007 /* The descriptors below can be indexed (set/binding) using the set_layout
2008 */
2009 struct v3dv_descriptor descriptors[0];
2010 };
2011
2012 struct v3dv_descriptor_set_binding_layout {
2013 VkDescriptorType type;
2014
2015 /* Number of array elements in this binding */
2016 uint32_t array_size;
2017
2018 /* Index into the flattened descriptor set */
2019 uint32_t descriptor_index;
2020
2021 uint32_t dynamic_offset_count;
2022 uint32_t dynamic_offset_index;
2023
2024 /* Offset into the descriptor set where this descriptor lives (final offset
2025 * on the descriptor bo need to take into account set->base_offset)
2026 */
2027 uint32_t descriptor_offset;
2028
2029 /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
2030 * if there are no immutable samplers.
2031 */
2032 uint32_t immutable_samplers_offset;
2033
2034 /* Descriptors for multiplanar combined image samplers are larger.
2035 * For mutable descriptors, this is always 1.
2036 */
2037 uint8_t plane_stride;
2038 };
2039
2040 struct v3dv_descriptor_set_layout {
2041 struct vk_object_base base;
2042
2043 VkDescriptorSetLayoutCreateFlags flags;
2044
2045 /* Number of bindings in this descriptor set */
2046 uint32_t binding_count;
2047
2048 /* Total bo size needed for this descriptor set
2049 */
2050 uint32_t bo_size;
2051
2052 /* Shader stages affected by this descriptor set */
2053 uint16_t shader_stages;
2054
2055 /* Number of dynamic offsets used by this descriptor set */
2056 uint16_t dynamic_offset_count;
2057
2058 /* Number of descriptors in this descriptor set */
2059 uint32_t descriptor_count;
2060
2061 /* Descriptor set layouts can be destroyed even if they are still being
2062 * used.
2063 */
2064 uint32_t ref_cnt;
2065
2066 /* Bindings in this descriptor set */
2067 struct v3dv_descriptor_set_binding_layout binding[0];
2068 };
2069
2070 void
2071 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
2072 struct v3dv_descriptor_set_layout *set_layout);
2073
2074 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)2075 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
2076 {
2077 assert(set_layout && set_layout->ref_cnt >= 1);
2078 p_atomic_inc(&set_layout->ref_cnt);
2079 }
2080
2081 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)2082 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
2083 struct v3dv_descriptor_set_layout *set_layout)
2084 {
2085 assert(set_layout && set_layout->ref_cnt >= 1);
2086 if (p_atomic_dec_zero(&set_layout->ref_cnt))
2087 v3dv_descriptor_set_layout_destroy(device, set_layout);
2088 }
2089
2090 struct v3dv_pipeline_layout {
2091 struct vk_object_base base;
2092
2093 struct {
2094 struct v3dv_descriptor_set_layout *layout;
2095 uint32_t dynamic_offset_start;
2096 } set[MAX_SETS];
2097
2098 uint32_t num_sets;
2099
2100 /* Shader stages that are declared to use descriptors from this layout */
2101 uint32_t shader_stages;
2102
2103 uint32_t dynamic_offset_count;
2104 uint32_t push_constant_size;
2105
2106 /* Pipeline layouts can be destroyed after creating pipelines since
2107 * maintenance4.
2108 */
2109 uint32_t ref_cnt;
2110
2111 unsigned char sha1[20];
2112 };
2113
2114 void
2115 v3dv_pipeline_layout_destroy(struct v3dv_device *device,
2116 struct v3dv_pipeline_layout *layout,
2117 const VkAllocationCallbacks *alloc);
2118
2119 static inline void
v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout * layout)2120 v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout *layout)
2121 {
2122 assert(layout && layout->ref_cnt >= 1);
2123 p_atomic_inc(&layout->ref_cnt);
2124 }
2125
2126 static inline void
v3dv_pipeline_layout_unref(struct v3dv_device * device,struct v3dv_pipeline_layout * layout,const VkAllocationCallbacks * alloc)2127 v3dv_pipeline_layout_unref(struct v3dv_device *device,
2128 struct v3dv_pipeline_layout *layout,
2129 const VkAllocationCallbacks *alloc)
2130 {
2131 assert(layout && layout->ref_cnt >= 1);
2132 if (p_atomic_dec_zero(&layout->ref_cnt))
2133 v3dv_pipeline_layout_destroy(device, layout, alloc);
2134 }
2135
2136 /*
2137 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
2138 * it to be big enough to include the max value for all of them.
2139 *
2140 * FIXME: one alternative would be to allocate the map as big as you need for
2141 * each descriptor type. That would means more individual allocations.
2142 */
2143 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
2144 MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
2145 MAX_STORAGE_BUFFERS)
2146
2147
2148 struct v3dv_descriptor_map {
2149 /* FIXME: avoid fixed size array/justify the size */
2150 unsigned num_desc; /* Number of descriptors */
2151 int set[DESCRIPTOR_MAP_SIZE];
2152 int binding[DESCRIPTOR_MAP_SIZE];
2153 int array_index[DESCRIPTOR_MAP_SIZE];
2154 int array_size[DESCRIPTOR_MAP_SIZE];
2155 uint8_t plane[DESCRIPTOR_MAP_SIZE];
2156 bool used[DESCRIPTOR_MAP_SIZE];
2157
2158 /* NOTE: the following is only for sampler, but this is the easier place to
2159 * put it.
2160 */
2161 uint8_t return_size[DESCRIPTOR_MAP_SIZE];
2162 };
2163
2164 struct v3dv_sampler {
2165 struct vk_object_base base;
2166 struct vk_ycbcr_conversion *conversion;
2167
2168 bool compare_enable;
2169 bool unnormalized_coordinates;
2170
2171 /* Prepacked per plane SAMPLER_STATE, that is referenced as part of the tmu
2172 * configuration. If needed it will be copied to the descriptor info during
2173 * UpdateDescriptorSets
2174 */
2175 uint8_t plane_count;
2176 uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
2177 };
2178
2179 /* We keep two special values for the sampler idx that represents exactly when a
2180 * sampler is not needed/provided. The main use is that even if we don't have
2181 * sampler, we still need to do the output unpacking (through
2182 * nir_lower_tex). The easier way to do this is to add those special "no
2183 * sampler" in the sampler_map, and then use the proper unpacking for that
2184 * case.
2185 *
2186 * We have one when we want a 16bit output size, and other when we want a
2187 * 32bit output size. We use the info coming from the RelaxedPrecision
2188 * decoration to decide between one and the other.
2189 */
2190 #define V3DV_NO_SAMPLER_16BIT_IDX 0
2191 #define V3DV_NO_SAMPLER_32BIT_IDX 1
2192
2193 struct v3dv_descriptor_maps {
2194 struct v3dv_descriptor_map ubo_map;
2195 struct v3dv_descriptor_map ssbo_map;
2196 struct v3dv_descriptor_map sampler_map;
2197 struct v3dv_descriptor_map texture_map;
2198 };
2199
2200 /* The structure represents data shared between different objects, like the
2201 * pipeline and the pipeline cache, so we ref count it to know when it should
2202 * be freed.
2203 */
2204 struct v3dv_pipeline_shared_data {
2205 uint32_t ref_cnt;
2206
2207 unsigned char sha1_key[20];
2208
2209 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
2210 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
2211
2212 struct v3dv_bo *assembly_bo;
2213 };
2214
2215 struct v3dv_pipeline_executable_data {
2216 enum broadcom_shader_stage stage;
2217 char *nir_str;
2218 char *qpu_str;
2219 };
2220
2221 struct v3dv_pipeline {
2222 struct vk_object_base base;
2223
2224 struct v3dv_device *device;
2225
2226 VkShaderStageFlags active_stages;
2227 VkPipelineCreateFlagBits2KHR flags;
2228
2229 struct v3dv_render_pass *pass;
2230 struct v3dv_subpass *subpass;
2231
2232 struct v3dv_pipeline_stage *stages[BROADCOM_SHADER_STAGES];
2233
2234 /* For VK_KHR_dynamic_rendering */
2235 struct vk_render_pass_state rendering_info;
2236
2237 /* Flags for whether optional pipeline stages are present, for convenience */
2238 bool has_gs;
2239
2240 /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
2241 bool uses_buffer_device_address;
2242
2243 /* Spilling memory requirements */
2244 struct {
2245 struct v3dv_bo *bo;
2246 uint32_t size_per_thread;
2247 } spill;
2248
2249 struct vk_dynamic_graphics_state dynamic_graphics_state;
2250 struct v3dv_dynamic_state dynamic;
2251
2252 struct v3dv_pipeline_layout *layout;
2253
2254 enum v3dv_ez_state ez_state;
2255
2256 /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2257 * pipeline selects an incompatible depth test function.
2258 */
2259 bool incompatible_ez_test;
2260
2261 bool rasterization_enabled;
2262 bool msaa;
2263 bool sample_rate_shading;
2264 uint32_t sample_mask;
2265
2266 bool negative_one_to_one;
2267
2268 /* Indexed by vertex binding. */
2269 struct v3dv_pipeline_vertex_binding {
2270 uint32_t instance_divisor;
2271 } vb[MAX_VBS];
2272 uint32_t vb_count;
2273
2274 /* Note that a lot of info from VkVertexInputAttributeDescription is
2275 * already prepacked, so here we are only storing those that need recheck
2276 * later. The array must be indexed by driver location, since that is the
2277 * order in which we need to emit the attributes.
2278 */
2279 struct v3dv_pipeline_vertex_attrib {
2280 uint32_t binding;
2281 uint32_t offset;
2282 VkFormat vk_format;
2283 } va[MAX_VERTEX_ATTRIBS];
2284 uint32_t va_count;
2285
2286 enum mesa_prim topology;
2287
2288 bool line_smooth;
2289
2290 struct v3dv_pipeline_shared_data *shared_data;
2291
2292 /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2293 unsigned char sha1[20];
2294
2295 /* In general we can reuse v3dv_device->default_attribute_float, so note
2296 * that the following can be NULL. In 7.x this is not used, so it will be
2297 * always NULL.
2298 *
2299 * FIXME: the content of this BO will be small, so it could be improved to
2300 * be uploaded to a common BO. But as in most cases it will be NULL, it is
2301 * not a priority.
2302 */
2303 struct v3dv_bo *default_attribute_values;
2304
2305 struct vpm_config vpm_cfg;
2306 struct vpm_config vpm_cfg_bin;
2307
2308 /* If the pipeline should emit any of the stencil configuration packets */
2309 bool emit_stencil_cfg[2];
2310
2311 /* Blend state */
2312 struct {
2313 /* Per-RT bit mask with blend enables */
2314 uint8_t enables;
2315 /* Per-RT prepacked blend config packets */
2316 uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2317 /* Flag indicating whether the blend factors in use require
2318 * color constants.
2319 */
2320 bool needs_color_constants;
2321 /* Mask with enabled color channels for each RT (4 bits per RT) */
2322 uint32_t color_write_masks;
2323 } blend;
2324
2325 struct {
2326 void *mem_ctx;
2327 struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2328 } executables;
2329
2330 /* Packets prepacked during pipeline creation
2331 */
2332 uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2333 uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2334 uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2335 uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2336 MAX_VERTEX_ATTRIBS];
2337 uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2338 };
2339
2340 static inline bool
v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device * device)2341 v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device)
2342 {
2343 return device->devinfo.ver > 71 ||
2344 (device->devinfo.ver == 71 && device->devinfo.rev >= 5);
2345 }
2346
2347 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2348 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2349 {
2350 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2351 !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2352 return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2353 VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2354 }
2355
2356 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2357 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2358 struct v3dv_pipeline *pipeline)
2359 {
2360 if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2361 return &cmd_buffer->state.compute.descriptor_state;
2362 else
2363 return &cmd_buffer->state.gfx.descriptor_state;
2364 }
2365
2366 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo);
2367
2368 uint32_t v3dv_physical_device_vendor_id(const struct v3dv_physical_device *dev);
2369 uint32_t v3dv_physical_device_device_id(const struct v3dv_physical_device *dev);
2370
2371 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f,
2372 uint8_t plane);
2373 const struct v3dv_format *
2374 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2375 uint32_t bpp, VkFormat *out_vk_format);
2376 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2377 VkFormat vk_format,
2378 VkFormatFeatureFlags2 features);
2379
2380 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2381 struct v3dv_pipeline *pipeline,
2382 struct v3dv_shader_variant *variant);
2383
2384 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2385 struct v3dv_pipeline *pipeline,
2386 struct v3dv_shader_variant *variant,
2387 uint32_t **wg_count_offsets);
2388
2389 struct v3dv_shader_variant *
2390 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2391 struct v3dv_pipeline_cache *cache,
2392 struct v3d_key *key,
2393 size_t key_size,
2394 const VkAllocationCallbacks *pAllocator,
2395 VkResult *out_vk_result);
2396
2397 struct v3dv_shader_variant *
2398 v3dv_shader_variant_create(struct v3dv_device *device,
2399 enum broadcom_shader_stage stage,
2400 struct v3d_prog_data *prog_data,
2401 uint32_t prog_data_size,
2402 uint32_t assembly_offset,
2403 uint64_t *qpu_insts,
2404 uint32_t qpu_insts_size,
2405 VkResult *out_vk_result);
2406
2407 void
2408 v3dv_shader_variant_destroy(struct v3dv_device *device,
2409 struct v3dv_shader_variant *variant);
2410
2411 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2412 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2413 {
2414 assert(shared_data && shared_data->ref_cnt >= 1);
2415 p_atomic_inc(&shared_data->ref_cnt);
2416 }
2417
2418 void
2419 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2420 struct v3dv_pipeline_shared_data *shared_data);
2421
2422 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2423 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2424 struct v3dv_pipeline_shared_data *shared_data)
2425 {
2426 assert(shared_data && shared_data->ref_cnt >= 1);
2427 if (p_atomic_dec_zero(&shared_data->ref_cnt))
2428 v3dv_pipeline_shared_data_destroy(device, shared_data);
2429 }
2430
2431 struct v3dv_descriptor *
2432 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2433 struct v3dv_descriptor_map *map,
2434 struct v3dv_pipeline_layout *pipeline_layout,
2435 uint32_t index,
2436 uint32_t *dynamic_offset);
2437
2438 struct v3dv_cl_reloc
2439 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2440 struct v3dv_descriptor_state *descriptor_state,
2441 struct v3dv_descriptor_map *map,
2442 struct v3dv_pipeline_layout *pipeline_layout,
2443 uint32_t index,
2444 VkDescriptorType *out_type);
2445
2446 const struct v3dv_sampler *
2447 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2448 struct v3dv_descriptor_map *map,
2449 struct v3dv_pipeline_layout *pipeline_layout,
2450 uint32_t index);
2451
2452 struct v3dv_cl_reloc
2453 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2454 struct v3dv_descriptor_state *descriptor_state,
2455 struct v3dv_descriptor_map *map,
2456 struct v3dv_pipeline_layout *pipeline_layout,
2457 uint32_t index);
2458
2459 struct v3dv_cl_reloc
2460 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2461 struct v3dv_descriptor_state *descriptor_state,
2462 struct v3dv_descriptor_map *map,
2463 struct v3dv_pipeline_layout *pipeline_layout,
2464 uint32_t index);
2465
2466 struct v3dv_bo*
2467 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2468 struct v3dv_descriptor_map *map,
2469 struct v3dv_pipeline_layout *pipeline_layout,
2470 uint32_t index);
2471
2472 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2473 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2474 const struct v3dv_descriptor_set_binding_layout *binding)
2475 {
2476 assert(binding->immutable_samplers_offset);
2477 return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2478 }
2479
2480 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2481 struct v3dv_device *device,
2482 VkPipelineCacheCreateFlags,
2483 bool cache_enabled);
2484
2485 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2486
2487 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2488 struct v3dv_pipeline_cache *cache,
2489 nir_shader *nir,
2490 unsigned char sha1_key[20]);
2491
2492 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2493 struct v3dv_pipeline_cache *cache,
2494 const nir_shader_compiler_options *nir_options,
2495 unsigned char sha1_key[20]);
2496
2497 struct v3dv_pipeline_shared_data *
2498 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2499 unsigned char sha1_key[20],
2500 bool *cache_hit);
2501
2502 void
2503 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2504 struct v3dv_pipeline_cache *cache);
2505
2506 VkResult
2507 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
2508 nir_shader *nir,
2509 VkPipelineLayout pipeline_layout,
2510 VkPipeline *pipeline);
2511
2512 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
2513 VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2514
2515 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2516 VK_OBJECT_TYPE_COMMAND_BUFFER)
2517 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2518 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2519 VK_OBJECT_TYPE_INSTANCE)
2520 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2521 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2522 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2523
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2524 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2525 VK_OBJECT_TYPE_BUFFER)
2526 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2527 VK_OBJECT_TYPE_BUFFER_VIEW)
2528 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, vk.base, VkDeviceMemory,
2529 VK_OBJECT_TYPE_DEVICE_MEMORY)
2530 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2531 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2532 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2533 VK_OBJECT_TYPE_DESCRIPTOR_SET)
2534 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2535 VkDescriptorSetLayout,
2536 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2537 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2538 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2539 VK_OBJECT_TYPE_FRAMEBUFFER)
2540 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2541 VK_OBJECT_TYPE_IMAGE)
2542 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2543 VK_OBJECT_TYPE_IMAGE_VIEW)
2544 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2545 VK_OBJECT_TYPE_PIPELINE)
2546 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2547 VK_OBJECT_TYPE_PIPELINE_CACHE)
2548 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2549 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2550 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2551 VK_OBJECT_TYPE_QUERY_POOL)
2552 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2553 VK_OBJECT_TYPE_RENDER_PASS)
2554 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2555 VK_OBJECT_TYPE_SAMPLER)
2556
2557 /* Flags OOM conditions in command buffer state.
2558 *
2559 * Note: notice that no-op jobs don't have a command buffer reference.
2560 */
2561 static inline void
2562 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2563 {
2564 if (cmd_buffer) {
2565 cmd_buffer->state.oom = true;
2566 } else {
2567 assert(job);
2568 if (job->cmd_buffer)
2569 job->cmd_buffer->state.oom = true;
2570 }
2571 }
2572
2573 #define v3dv_return_if_oom(_cmd_buffer, _job) do { \
2574 const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
2575 if (__cmd_buffer && __cmd_buffer->state.oom) \
2576 return; \
2577 const struct v3dv_job *__job = _job; \
2578 if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
2579 return; \
2580 } while(0) \
2581
2582 static inline uint32_t
u64_hash(const void * key)2583 u64_hash(const void *key)
2584 {
2585 return _mesa_hash_data(key, sizeof(uint64_t));
2586 }
2587
2588 static inline bool
u64_compare(const void * key1,const void * key2)2589 u64_compare(const void *key1, const void *key2)
2590 {
2591 return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2592 }
2593
2594 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2595 * define v3dX for each version supported, because when we compile code that
2596 * is not version-specific, all version-specific macros need to be already
2597 * defined.
2598 */
2599 #ifdef v3dX
2600 # include "v3dvx_private.h"
2601 #else
2602 # define v3dX(x) v3d42_##x
2603 # include "v3dvx_private.h"
2604 # undef v3dX
2605
2606 # define v3dX(x) v3d71_##x
2607 # include "v3dvx_private.h"
2608 # undef v3dX
2609 #endif
2610
2611 VkResult
2612 v3dv_update_image_layout(struct v3dv_device *device,
2613 struct v3dv_image *image,
2614 uint64_t modifier,
2615 bool disjoint,
2616 const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info);
2617
2618 float
2619 v3dv_get_aa_line_width(struct v3dv_pipeline *pipeline,
2620 struct v3dv_cmd_buffer *buffer);
2621
2622
2623 void
2624 v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn,
2625 struct v3dv_pipeline *pipeline,
2626 enum v3dv_ez_state *ez_state,
2627 bool *incompatible_ez_test);
2628
2629 uint32_t v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim);
2630
2631 #endif /* V3DV_PRIVATE_H */
2632