1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38
39 #include "vk_descriptor_update_template.h"
40 #include "vk_device.h"
41 #include "vk_device_memory.h"
42 #include "vk_format.h"
43 #include "vk_instance.h"
44 #include "vk_image.h"
45 #include "vk_log.h"
46 #include "vk_physical_device.h"
47 #include "vk_shader_module.h"
48 #include "vk_sync.h"
49 #include "vk_sync_timeline.h"
50 #include "vk_util.h"
51 #include "vk_ycbcr_conversion.h"
52
53 #include "vk_command_buffer.h"
54 #include "vk_command_pool.h"
55 #include "vk_queue.h"
56 #include "vk_pipeline.h"
57
58 #include <xf86drm.h>
59
60 #ifdef HAVE_VALGRIND
61 #include <valgrind.h>
62 #include <memcheck.h>
63 #define VG(x) x
64 #else
65 #define VG(x) ((void)0)
66 #endif
67
68 #include "util/detect_os.h"
69
70 #if DETECT_OS_ANDROID
71 #include <vndk/hardware_buffer.h>
72 #include "util/u_gralloc/u_gralloc.h"
73 #endif
74
75 #include "v3dv_limits.h"
76
77 #include "common/v3d_device_info.h"
78 #include "common/v3d_limits.h"
79 #include "common/v3d_tiling.h"
80 #include "common/v3d_util.h"
81
82 #include "compiler/shader_enums.h"
83 #include "compiler/spirv/nir_spirv.h"
84
85 #include "compiler/v3d_compiler.h"
86
87 #include "vk_debug_report.h"
88 #include "util/set.h"
89 #include "util/hash_table.h"
90 #include "util/sparse_array.h"
91 #include "util/xmlconfig.h"
92 #include "util/u_atomic.h"
93
94 #include "v3dv_entrypoints.h"
95 #include "v3dv_bo.h"
96
97 #include "drm-uapi/v3d_drm.h"
98
99 #include "vk_alloc.h"
100 #include "simulator/v3d_simulator.h"
101
102 #include "v3dv_cl.h"
103
104 #include "wsi_common.h"
105
106 /* A non-fatal assert. Useful for debugging. */
107 #ifdef DEBUG
108 #define v3dv_assert(x) ({ \
109 if (unlikely(!(x))) \
110 fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
111 })
112 #else
113 #define v3dv_assert(x)
114 #endif
115
116 #define perf_debug(...) do { \
117 if (V3D_DBG(PERF)) \
118 fprintf(stderr, __VA_ARGS__); \
119 } while (0)
120
121 struct v3dv_instance;
122
123 #ifdef USE_V3D_SIMULATOR
124 #define using_v3d_simulator true
125 #else
126 #define using_v3d_simulator false
127 #endif
128
129 struct v3d_simulator_file;
130
131 /* Minimum required by the Vulkan 1.1 spec */
132 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
133
134 /* Maximum performance counters number */
135 #define V3D_MAX_PERFCNT 93
136
137 struct v3dv_physical_device {
138 struct vk_physical_device vk;
139
140 char *name;
141 int32_t render_fd;
142 int32_t display_fd;
143
144 /* We need these because it is not clear how to detect
145 * valid devids in a portable way
146 */
147 bool has_primary;
148 bool has_render;
149
150 dev_t primary_devid;
151 dev_t render_devid;
152
153 #if using_v3d_simulator
154 uint32_t device_id;
155 #endif
156
157 uint8_t driver_build_sha1[20];
158 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
159 uint8_t device_uuid[VK_UUID_SIZE];
160 uint8_t driver_uuid[VK_UUID_SIZE];
161
162 struct vk_sync_type drm_syncobj_type;
163 struct vk_sync_timeline_type sync_timeline_type;
164 const struct vk_sync_type *sync_types[3];
165
166 struct disk_cache *disk_cache;
167
168 mtx_t mutex;
169
170 struct wsi_device wsi_device;
171
172 VkPhysicalDeviceMemoryProperties memory;
173
174 struct v3d_device_info devinfo;
175
176 struct v3d_simulator_file *sim_file;
177
178 const struct v3d_compiler *compiler;
179 uint32_t next_program_id;
180
181 alignas(8) uint64_t heap_used;
182
183 /* This array holds all our 'struct v3dv_bo' allocations. We use this
184 * so we can add a refcount to our BOs and check if a particular BO
185 * was already allocated in this device using its GEM handle. This is
186 * necessary to properly manage BO imports, because the kernel doesn't
187 * refcount the underlying BO memory.
188 *
189 * Specifically, when self-importing (i.e. importing a BO into the same
190 * device that created it), the kernel will give us the same BO handle
191 * for both BOs and we must only free it once when both references are
192 * freed. Otherwise, if we are not self-importing, we get two different BO
193 * handles, and we want to free each one individually.
194 *
195 * The BOs in this map all have a refcnt with the reference counter and
196 * only self-imported BOs will ever have a refcnt > 1.
197 */
198 struct util_sparse_array bo_map;
199
200 struct {
201 bool merge_jobs;
202 } options;
203
204 struct {
205 bool cpu_queue;
206 bool multisync;
207 bool perfmon;
208 } caps;
209 };
210
211 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)212 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
213 {
214 return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
215 }
216
217 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
218 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
219 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
220 uint32_t index);
221
222 void v3dv_meta_clear_init(struct v3dv_device *device);
223 void v3dv_meta_clear_finish(struct v3dv_device *device);
224
225 void v3dv_meta_blit_init(struct v3dv_device *device);
226 void v3dv_meta_blit_finish(struct v3dv_device *device);
227
228 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
229 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
230
231 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
232 uint8_t plane,
233 uint8_t miplevel,
234 const VkOffset3D *offset,
235 const VkExtent3D *extent,
236 VkFormat *compat_format);
237
238 struct v3dv_instance {
239 struct vk_instance vk;
240
241 bool pipeline_cache_enabled;
242 bool default_pipeline_cache_enabled;
243 };
244
245 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
246 * tfu), we still need a syncobj to track the last overall job submitted
247 * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
248 * start expecting multisync to be present and drop the legacy implementation
249 * together with this V3DV_QUEUE_ANY tracker.
250 */
251 enum v3dv_queue_type {
252 V3DV_QUEUE_CL = 0,
253 V3DV_QUEUE_CSD,
254 V3DV_QUEUE_TFU,
255 V3DV_QUEUE_CPU,
256 V3DV_QUEUE_ANY,
257 V3DV_QUEUE_COUNT,
258 };
259
260 /* For each GPU queue, we use a syncobj to track the last job submitted. We
261 * set the flag `first` to determine when we are starting a new cmd buffer
262 * batch and therefore a job submitted to a given queue will be the first in a
263 * cmd buf batch.
264 */
265 struct v3dv_last_job_sync {
266 /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
267 *
268 * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
269 */
270 bool first[V3DV_QUEUE_COUNT];
271 /* Array of syncobj to track the last job submitted to a GPU queue.
272 *
273 * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
274 * queue, but without multisync we only track the last job submitted to any
275 * queue in V3DV_QUEUE_ANY.
276 */
277 uint32_t syncs[V3DV_QUEUE_COUNT];
278 };
279
280 struct v3dv_queue {
281 struct vk_queue vk;
282
283 struct v3dv_device *device;
284
285 struct v3dv_last_job_sync last_job_syncs;
286
287 struct v3dv_job *noop_job;
288
289 /* The last active perfmon ID to prevent mixing of counter results when a
290 * job is submitted with a different perfmon id.
291 */
292 uint32_t last_perfmon_id;
293 };
294
295 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
296 struct vk_queue_submit *submit);
297
298 #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
299 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
300 sizeof(VkComponentMapping))
301
302 struct v3dv_meta_color_clear_pipeline {
303 VkPipeline pipeline;
304 VkRenderPass pass;
305 bool cached;
306 uint64_t key;
307 };
308
309 struct v3dv_meta_depth_clear_pipeline {
310 VkPipeline pipeline;
311 uint64_t key;
312 };
313
314 struct v3dv_meta_blit_pipeline {
315 VkPipeline pipeline;
316 VkRenderPass pass;
317 VkRenderPass pass_no_load;
318 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
319 };
320
321 struct v3dv_meta_texel_buffer_copy_pipeline {
322 VkPipeline pipeline;
323 VkRenderPass pass;
324 VkRenderPass pass_no_load;
325 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
326 };
327
328 struct v3dv_pipeline_key {
329 uint8_t topology;
330 uint8_t logicop_func;
331 bool msaa;
332 bool sample_alpha_to_coverage;
333 bool sample_alpha_to_one;
334 uint8_t cbufs;
335 struct {
336 enum pipe_format format;
337 uint8_t swizzle[4];
338 } color_fmt[V3D_MAX_DRAW_BUFFERS];
339 uint8_t f32_color_rb;
340 uint32_t va_swap_rb_mask;
341 bool has_multiview;
342 };
343
344 struct v3dv_pipeline_cache_stats {
345 uint32_t miss;
346 uint32_t hit;
347 uint32_t count;
348 uint32_t on_disk_hit;
349 };
350
351 /* Equivalent to gl_shader_stage, but including the coordinate shaders
352 *
353 * FIXME: perhaps move to common
354 */
355 enum broadcom_shader_stage {
356 BROADCOM_SHADER_VERTEX,
357 BROADCOM_SHADER_VERTEX_BIN,
358 BROADCOM_SHADER_GEOMETRY,
359 BROADCOM_SHADER_GEOMETRY_BIN,
360 BROADCOM_SHADER_FRAGMENT,
361 BROADCOM_SHADER_COMPUTE,
362 };
363
364 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
365
366 /* Assumes that coordinate shaders will be custom-handled by the caller */
367 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)368 gl_shader_stage_to_broadcom(gl_shader_stage stage)
369 {
370 switch (stage) {
371 case MESA_SHADER_VERTEX:
372 return BROADCOM_SHADER_VERTEX;
373 case MESA_SHADER_GEOMETRY:
374 return BROADCOM_SHADER_GEOMETRY;
375 case MESA_SHADER_FRAGMENT:
376 return BROADCOM_SHADER_FRAGMENT;
377 case MESA_SHADER_COMPUTE:
378 return BROADCOM_SHADER_COMPUTE;
379 default:
380 unreachable("Unknown gl shader stage");
381 }
382 }
383
384 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)385 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
386 {
387 switch (stage) {
388 case BROADCOM_SHADER_VERTEX:
389 case BROADCOM_SHADER_VERTEX_BIN:
390 return MESA_SHADER_VERTEX;
391 case BROADCOM_SHADER_GEOMETRY:
392 case BROADCOM_SHADER_GEOMETRY_BIN:
393 return MESA_SHADER_GEOMETRY;
394 case BROADCOM_SHADER_FRAGMENT:
395 return MESA_SHADER_FRAGMENT;
396 case BROADCOM_SHADER_COMPUTE:
397 return MESA_SHADER_COMPUTE;
398 default:
399 unreachable("Unknown broadcom shader stage");
400 }
401 }
402
403 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)404 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
405 {
406 switch (stage) {
407 case BROADCOM_SHADER_VERTEX_BIN:
408 case BROADCOM_SHADER_GEOMETRY_BIN:
409 return true;
410 default:
411 return false;
412 }
413 }
414
415 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)416 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
417 {
418 switch (stage) {
419 case BROADCOM_SHADER_VERTEX:
420 case BROADCOM_SHADER_GEOMETRY:
421 return true;
422 default:
423 return false;
424 }
425 }
426
427 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)428 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
429 {
430 switch (stage) {
431 case BROADCOM_SHADER_VERTEX:
432 return BROADCOM_SHADER_VERTEX_BIN;
433 case BROADCOM_SHADER_GEOMETRY:
434 return BROADCOM_SHADER_GEOMETRY_BIN;
435 default:
436 unreachable("Invalid shader stage");
437 }
438 }
439
440 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)441 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
442 {
443 switch(stage) {
444 case BROADCOM_SHADER_VERTEX_BIN:
445 return "MESA_SHADER_VERTEX_BIN";
446 case BROADCOM_SHADER_GEOMETRY_BIN:
447 return "MESA_SHADER_GEOMETRY_BIN";
448 default:
449 return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
450 }
451 }
452
453 struct v3dv_pipeline_cache {
454 struct vk_object_base base;
455
456 struct v3dv_device *device;
457 mtx_t mutex;
458
459 struct hash_table *nir_cache;
460 struct v3dv_pipeline_cache_stats nir_stats;
461
462 struct hash_table *cache;
463 struct v3dv_pipeline_cache_stats stats;
464
465 /* For VK_EXT_pipeline_creation_cache_control. */
466 bool externally_synchronized;
467 };
468
469 struct v3dv_device {
470 struct vk_device vk;
471
472 struct v3dv_instance *instance;
473 struct v3dv_physical_device *pdevice;
474
475 struct v3d_device_info devinfo;
476 struct v3dv_queue queue;
477
478 /* Guards query->maybe_available and value for timestamps */
479 mtx_t query_mutex;
480
481 /* Signaled whenever a query is ended */
482 cnd_t query_ended;
483
484 /* Resources used for meta operations */
485 struct {
486 mtx_t mtx;
487 struct {
488 VkPipelineLayout p_layout;
489 struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
490 } color_clear;
491 struct {
492 VkPipelineLayout p_layout;
493 struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
494 } depth_clear;
495 struct {
496 VkDescriptorSetLayout ds_layout;
497 VkPipelineLayout p_layout;
498 struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
499 } blit;
500 struct {
501 VkDescriptorSetLayout ds_layout;
502 VkPipelineLayout p_layout;
503 struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
504 } texel_buffer_copy;
505 } meta;
506
507 struct v3dv_bo_cache {
508 /** List of struct v3d_bo freed, by age. */
509 struct list_head time_list;
510 /** List of struct v3d_bo freed, per size, by age. */
511 struct list_head *size_list;
512 uint32_t size_list_size;
513
514 mtx_t lock;
515
516 uint32_t cache_size;
517 uint32_t cache_count;
518 uint32_t max_cache_size;
519 } bo_cache;
520
521 uint32_t bo_size;
522 uint32_t bo_count;
523
524 /* Event handling resources.
525 *
526 * Our implementation of events uses a BO to store event state (signaled vs
527 * reset) and dispatches compute shaders to handle GPU event functions
528 * (signal, reset, wait). This struct holds all the resources required
529 * by the implementation.
530 */
531 struct {
532 mtx_t lock;
533
534 /* BO for the event states: signaled (1) or reset (0) */
535 struct v3dv_bo *bo;
536
537 /* We pre-allocate all the events we can fit for the size of the BO we
538 * create to track their states, where each event has an index which is
539 * basically the offset of its state in that BO. We keep a free list with
540 * the pre-allocated events that are available.
541 */
542 uint32_t event_count;
543 struct v3dv_event *events;
544 struct list_head free_list;
545
546 /* Vulkan resources to access the event BO from shaders. We have a
547 * pipeline that sets the state of an event and another that waits on
548 * a single event. Both pipelines require access to the event state BO,
549 * for which we need to allocate a single descripot set.
550 */
551 VkBuffer buffer;
552 VkDeviceMemory mem;
553 VkDescriptorSetLayout descriptor_set_layout;
554 VkPipelineLayout pipeline_layout;
555 VkDescriptorPool descriptor_pool;
556 VkDescriptorSet descriptor_set;
557 VkPipeline set_event_pipeline;
558 VkPipeline wait_event_pipeline;
559 } events;
560
561 /* Query handling resources.
562 *
563 * Our implementation of occlusion queries uses a BO per pool to keep track
564 * of the per-query availability state and dispatches compute shaders to
565 * handle GPU query functions that read and write that state. This struct
566 * holds Vulkan resources that can be shared across all query pools to
567 * implement this. This framework may be extended in the future to handle
568 * more query types.
569 */
570 struct {
571 VkDescriptorSetLayout buf_descriptor_set_layout;
572
573 /* Set query availability */
574 VkPipelineLayout avail_pipeline_layout;
575 VkPipeline avail_pipeline;
576
577 /* Reset query availability and clear occlusion counters */
578 VkPipelineLayout reset_occlusion_pipeline_layout;
579 VkPipeline reset_occlusion_pipeline;
580
581 /* Copy query results */
582 VkPipelineLayout copy_pipeline_layout;
583 VkPipeline copy_pipeline[8];
584 } queries;
585
586 struct v3dv_pipeline_cache default_pipeline_cache;
587
588 /* GL_SHADER_STATE_RECORD needs to specify default attribute values. The
589 * following covers the most common case, that is all attributes format
590 * being float being float, allowing us to reuse the same BO for all
591 * pipelines matching this requirement. Pipelines that need integer
592 * attributes will create their own BO.
593 *
594 * Note that since v71 the default attribute values are not needed, so this
595 * can be NULL.
596 */
597 struct v3dv_bo *default_attribute_float;
598
599 void *device_address_mem_ctx;
600 struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
601
602 #if DETECT_OS_ANDROID
603 struct u_gralloc *gralloc;
604 #endif
605 };
606
607 struct v3dv_device_memory {
608 struct vk_device_memory vk;
609
610 struct v3dv_bo *bo;
611 const VkMemoryType *type;
612 bool is_for_wsi;
613 bool is_for_device_address;
614 };
615
616 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
617 #define TEXTURE_DATA_FORMAT_NO 255
618
619 #define V3DV_MAX_PLANE_COUNT 3
620 struct v3dv_format_plane {
621 /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
622 uint8_t rt_type;
623
624 /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
625 uint8_t tex_type;
626
627 /* Swizzle to apply to the RGBA shader output for storing to the tile
628 * buffer, to the RGBA tile buffer to produce shader input (for
629 * blending), and for turning the rgba8888 texture sampler return
630 * value into shader rgba values.
631 */
632 uint8_t swizzle[4];
633
634 /* Whether the return value is 16F/I/UI or 32F/I/UI. */
635 uint8_t return_size;
636 };
637
638 struct v3dv_format {
639 /* Non 0 plane count implies supported */
640 uint8_t plane_count;
641
642 struct v3dv_format_plane planes[V3DV_MAX_PLANE_COUNT];
643
644 /* If the format supports (linear) filtering when texturing. */
645 bool supports_filtering;
646 };
647
648 /* Note that although VkImageAspectFlags would allow to combine more than one
649 * PLANE bit, for all the use cases we implement that use VkImageAspectFlags,
650 * only one plane is allowed, like for example vkCmdCopyImage:
651 *
652 * "If srcImage has a VkFormat with two planes then for each element of
653 * pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_PLANE_0_BIT
654 * or VK_IMAGE_ASPECT_PLANE_1_BIT"
655 *
656 */
v3dv_plane_from_aspect(VkImageAspectFlags aspect)657 static uint8_t v3dv_plane_from_aspect(VkImageAspectFlags aspect)
658 {
659 switch (aspect) {
660 case VK_IMAGE_ASPECT_COLOR_BIT:
661 case VK_IMAGE_ASPECT_DEPTH_BIT:
662 case VK_IMAGE_ASPECT_STENCIL_BIT:
663 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
664 case VK_IMAGE_ASPECT_PLANE_0_BIT:
665 case VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT:
666 return 0;
667 case VK_IMAGE_ASPECT_PLANE_1_BIT:
668 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
669 return 1;
670 case VK_IMAGE_ASPECT_PLANE_2_BIT:
671 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
672 return 2;
673 default:
674 unreachable("invalid image aspect");
675 }
676 }
677
678 struct v3d_resource_slice {
679 uint32_t offset;
680 uint32_t stride;
681 uint32_t padded_height;
682 uint32_t width;
683 uint32_t height;
684 /* Size of a single pane of the slice. For 3D textures, there will be
685 * a number of panes equal to the minified, power-of-two-aligned
686 * depth.
687 */
688 uint32_t size;
689 uint8_t ub_pad;
690 enum v3d_tiling_mode tiling;
691 uint32_t padded_height_of_output_image_in_uif_blocks;
692 };
693
694 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
695 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
696
697 struct v3dv_image {
698 struct vk_image vk;
699
700 const struct v3dv_format *format;
701 bool tiled;
702
703 uint8_t plane_count;
704
705 /* If 0, this is a multi-plane image with use disjoint memory, where each
706 * plane binds a different device memory. Otherwise, all the planes share
707 * the same device memory and this stores the total size of the image in
708 * bytes.
709 */
710 uint32_t non_disjoint_size;
711
712 struct {
713 uint32_t cpp;
714
715 struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
716 /* Total size of the plane in bytes. */
717 uint64_t size;
718 uint32_t cube_map_stride;
719
720 /* If not using disjoint memory, mem and mem_offset is the same for all
721 * planes, in which case mem_offset is the offset of plane 0.
722 */
723 struct v3dv_device_memory *mem;
724 VkDeviceSize mem_offset;
725 uint32_t alignment;
726
727 /* Pre-subsampled per plane width and height
728 */
729 uint32_t width;
730 uint32_t height;
731
732 /* Even if we can get it from the parent image format, we keep the
733 * format here for convenience
734 */
735 VkFormat vk_format;
736 } planes[V3DV_MAX_PLANE_COUNT];
737
738 /* Used only when sampling a linear texture (which V3D doesn't support).
739 * This holds a tiled copy of the image we can use for that purpose.
740 */
741 struct v3dv_image *shadow;
742
743 #if DETECT_OS_ANDROID
744 /* Image is backed by VK_ANDROID_native_buffer, */
745 bool is_native_buffer_memory;
746 /* Image is backed by VK_ANDROID_external_memory_android_hardware_buffer */
747 bool is_ahb;
748 VkImageDrmFormatModifierExplicitCreateInfoEXT *android_explicit_layout;
749 VkSubresourceLayout *android_plane_layouts;
750 #endif
751 };
752
753 VkResult
754 v3dv_image_init(struct v3dv_device *device,
755 const VkImageCreateInfo *pCreateInfo,
756 const VkAllocationCallbacks *pAllocator,
757 struct v3dv_image *image);
758
759 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
760
761 static uint32_t
v3dv_image_aspect_to_plane(const struct v3dv_image * image,VkImageAspectFlagBits aspect)762 v3dv_image_aspect_to_plane(const struct v3dv_image *image,
763 VkImageAspectFlagBits aspect)
764 {
765 assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects));
766
767 /* Because we always put image and view planes in aspect-bit-order, the
768 * plane index is the number of bits in the image aspect before aspect.
769 */
770 return util_bitcount(image->vk.aspects & (aspect - 1));
771 }
772
773 /* Pre-generating packets needs to consider changes in packet sizes across hw
774 * versions. Keep things simple and allocate enough space for any supported
775 * version. We ensure the size is large enough through static asserts.
776 */
777 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
778 #define V3DV_SAMPLER_STATE_LENGTH 24
779 #define V3DV_BLEND_CFG_LENGTH 5
780 #define V3DV_CFG_BITS_LENGTH 4
781 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
782 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
783 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
784 #define V3DV_STENCIL_CFG_LENGTH 6
785
786 struct v3dv_image_view {
787 struct vk_image_view vk;
788
789 const struct v3dv_format *format;
790
791 uint8_t view_swizzle[4];
792
793 uint8_t plane_count;
794 struct {
795 uint8_t image_plane;
796
797 bool swap_rb;
798 bool channel_reverse;
799 uint32_t internal_bpp;
800 uint32_t internal_type;
801 uint32_t offset;
802
803 /* Precomputed swizzle (composed from the view swizzle and the format
804 * swizzle).
805 *
806 * This could be also included on the descriptor bo, but the shader state
807 * packet doesn't need it on a bo, so we can just avoid a memory copy
808 */
809 uint8_t swizzle[4];
810
811 /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
812 * during UpdateDescriptorSets.
813 *
814 * Empirical tests show that cube arrays need a different shader state
815 * depending on whether they are used with a sampler or not, so for these
816 * we generate two states and select the one to use based on the descriptor
817 * type.
818 */
819 uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
820 } planes[V3DV_MAX_PLANE_COUNT];
821
822 /* Used only when sampling a linear texture (which V3D doesn't support).
823 * This would represent a view over the tiled shadow image.
824 */
825 struct v3dv_image_view *shadow;
826 };
827
828 VkResult v3dv_create_image_view(struct v3dv_device *device,
829 const VkImageViewCreateInfo *pCreateInfo,
830 VkImageView *pView);
831
832 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer,
833 uint8_t plane);
834
835 struct v3dv_buffer {
836 struct vk_object_base base;
837
838 VkDeviceSize size;
839 VkBufferUsageFlags usage;
840 uint32_t alignment;
841
842 struct v3dv_device_memory *mem;
843 VkDeviceSize mem_offset;
844 };
845
846 void
847 v3dv_buffer_init(struct v3dv_device *device,
848 const VkBufferCreateInfo *pCreateInfo,
849 struct v3dv_buffer *buffer,
850 uint32_t alignment);
851
852 void
853 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info);
854
855 struct v3dv_buffer_view {
856 struct vk_object_base base;
857
858 struct v3dv_buffer *buffer;
859
860 VkFormat vk_format;
861 const struct v3dv_format *format;
862 uint32_t internal_bpp;
863 uint32_t internal_type;
864
865 uint32_t offset;
866 uint32_t size;
867 uint32_t num_elements;
868
869 /* Prepacked TEXTURE_SHADER_STATE. */
870 uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
871 };
872
873 struct v3dv_subpass_attachment {
874 uint32_t attachment;
875 VkImageLayout layout;
876 };
877
878 struct v3dv_subpass {
879 uint32_t input_count;
880 struct v3dv_subpass_attachment *input_attachments;
881
882 uint32_t color_count;
883 struct v3dv_subpass_attachment *color_attachments;
884 struct v3dv_subpass_attachment *resolve_attachments;
885
886 struct v3dv_subpass_attachment ds_attachment;
887 struct v3dv_subpass_attachment ds_resolve_attachment;
888 bool resolve_depth, resolve_stencil;
889
890 /* If we need to emit the clear of the depth/stencil attachment using a
891 * a draw call instead of using the TLB (GFXH-1461).
892 */
893 bool do_depth_clear_with_draw;
894 bool do_stencil_clear_with_draw;
895
896 /* Multiview */
897 uint32_t view_mask;
898 };
899
900 struct v3dv_render_pass_attachment {
901 VkAttachmentDescription2 desc;
902
903 uint32_t first_subpass;
904 uint32_t last_subpass;
905
906 /* When multiview is enabled, we no longer care about when a particular
907 * attachment is first or last used in a render pass, since not all views
908 * in the attachment will meet that criteria. Instead, we need to track
909 * each individual view (layer) in each attachment and emit our stores,
910 * loads and clears accordingly.
911 */
912 struct {
913 uint32_t first_subpass;
914 uint32_t last_subpass;
915 } views[MAX_MULTIVIEW_VIEW_COUNT];
916
917 /* If this is a multisampled attachment that is going to be resolved,
918 * whether we may be able to use the TLB hardware resolve based on the
919 * attachment format.
920 */
921 bool try_tlb_resolve;
922 };
923
924 struct v3dv_render_pass {
925 struct vk_object_base base;
926
927 bool multiview_enabled;
928
929 uint32_t attachment_count;
930 struct v3dv_render_pass_attachment *attachments;
931
932 uint32_t subpass_count;
933 struct v3dv_subpass *subpasses;
934
935 struct v3dv_subpass_attachment *subpass_attachments;
936 };
937
938 struct v3dv_framebuffer {
939 struct vk_object_base base;
940
941 uint32_t width;
942 uint32_t height;
943 uint32_t layers;
944
945 /* Typically, edge tiles in the framebuffer have padding depending on the
946 * underlying tiling layout. One consequence of this is that when the
947 * framebuffer dimensions are not aligned to tile boundaries, tile stores
948 * would still write full tiles on the edges and write to the padded area.
949 * If the framebuffer is aliasing a smaller region of a larger image, then
950 * we need to be careful with this though, as we won't have padding on the
951 * edge tiles (which typically means that we need to load the tile buffer
952 * before we store).
953 */
954 bool has_edge_padding;
955
956 uint32_t attachment_count;
957 uint32_t color_attachment_count;
958
959 /* Notice that elements in 'attachments' will be NULL if the framebuffer
960 * was created imageless. The driver is expected to access attachment info
961 * from the command buffer state instead.
962 */
963 struct v3dv_image_view *attachments[0];
964 };
965
966 struct v3dv_frame_tiling {
967 uint32_t width;
968 uint32_t height;
969 uint32_t layers;
970 uint32_t render_target_count;
971 uint32_t internal_bpp;
972 uint32_t total_color_bpp;
973 bool msaa;
974 bool double_buffer;
975 uint32_t tile_width;
976 uint32_t tile_height;
977 uint32_t draw_tiles_x;
978 uint32_t draw_tiles_y;
979 uint32_t supertile_width;
980 uint32_t supertile_height;
981 uint32_t frame_width_in_supertiles;
982 uint32_t frame_height_in_supertiles;
983 };
984
985 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
986 const VkRect2D *area,
987 struct v3dv_framebuffer *fb,
988 struct v3dv_render_pass *pass,
989 uint32_t subpass_idx);
990
991 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
992 * This happens when we render at least 2 tiles, because in this mode each
993 * tile uses a different half of the tile buffer memory so we can have 2 tiles
994 * in flight (one being stored to memory and the next being rendered). In this
995 * scenario, if we emit a single initial tile clear we would only clear the
996 * first half of the tile buffer.
997 */
998 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)999 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
1000 {
1001 return tiling->double_buffer &&
1002 (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
1003 tiling->layers > 1);
1004 }
1005
1006 enum v3dv_cmd_buffer_status {
1007 V3DV_CMD_BUFFER_STATUS_NEW = 0,
1008 V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
1009 V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
1010 V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
1011 };
1012
1013 union v3dv_clear_value {
1014 uint32_t color[4];
1015 struct {
1016 float z;
1017 uint8_t s;
1018 };
1019 };
1020
1021 struct v3dv_cmd_buffer_attachment_state {
1022 /* The original clear value as provided by the Vulkan API */
1023 VkClearValue vk_clear_value;
1024
1025 /* The hardware clear value */
1026 union v3dv_clear_value clear_value;
1027
1028 /* The underlying image view (from the framebuffer or, if imageless
1029 * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
1030 */
1031 struct v3dv_image_view *image_view;
1032
1033 /* If this is a multisampled attachment with a resolve operation. */
1034 bool has_resolve;
1035
1036 /* If this is a multisampled attachment with a resolve operation,
1037 * whether we can use the TLB for the resolve.
1038 */
1039 bool use_tlb_resolve;
1040 };
1041
1042 struct v3dv_viewport_state {
1043 uint32_t count;
1044 VkViewport viewports[MAX_VIEWPORTS];
1045 float translate[MAX_VIEWPORTS][3];
1046 float scale[MAX_VIEWPORTS][3];
1047 };
1048
1049 struct v3dv_scissor_state {
1050 uint32_t count;
1051 VkRect2D scissors[MAX_SCISSORS];
1052 };
1053
1054 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
1055 * defined as dynamic
1056 */
1057 enum v3dv_dynamic_state_bits {
1058 V3DV_DYNAMIC_VIEWPORT = 1 << 0,
1059 V3DV_DYNAMIC_SCISSOR = 1 << 1,
1060 V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
1061 V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
1062 V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
1063 V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
1064 V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
1065 V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
1066 V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
1067 V3DV_DYNAMIC_DEPTH_BOUNDS = 1 << 9,
1068 V3DV_DYNAMIC_ALL = (1 << 10) - 1,
1069 };
1070
1071 /* Flags for dirty pipeline state.
1072 */
1073 enum v3dv_cmd_dirty_bits {
1074 V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
1075 V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
1076 V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
1077 V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
1078 V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
1079 V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
1080 V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,
1081 V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,
1082 V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,
1083 V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,
1084 V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,
1085 V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,
1086 V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO = 1 << 12,
1087 V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 13,
1088 V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 14,
1089 V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 15,
1090 V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 16,
1091 V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 17,
1092 V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 18,
1093 V3DV_CMD_DIRTY_DEPTH_BOUNDS = 1 << 19,
1094 V3DV_CMD_DIRTY_DRAW_ID = 1 << 20,
1095 };
1096
1097 struct v3dv_dynamic_state {
1098 /**
1099 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
1100 * Defines the set of saved dynamic state.
1101 */
1102 uint32_t mask;
1103
1104 struct v3dv_viewport_state viewport;
1105
1106 struct v3dv_scissor_state scissor;
1107
1108 struct {
1109 uint32_t front;
1110 uint32_t back;
1111 } stencil_compare_mask;
1112
1113 struct {
1114 uint32_t front;
1115 uint32_t back;
1116 } stencil_write_mask;
1117
1118 struct {
1119 uint32_t front;
1120 uint32_t back;
1121 } stencil_reference;
1122
1123 float blend_constants[4];
1124
1125 struct {
1126 float constant_factor;
1127 float depth_bias_clamp;
1128 float slope_factor;
1129 } depth_bias;
1130
1131 struct {
1132 float min;
1133 float max;
1134 } depth_bounds;
1135
1136 float line_width;
1137
1138 uint32_t color_write_enable;
1139 };
1140
1141 void v3dv_viewport_compute_xform(const VkViewport *viewport,
1142 float scale[3],
1143 float translate[3]);
1144
1145 enum v3dv_ez_state {
1146 V3D_EZ_UNDECIDED = 0,
1147 V3D_EZ_GT_GE,
1148 V3D_EZ_LT_LE,
1149 V3D_EZ_DISABLED,
1150 };
1151
1152 enum v3dv_job_type {
1153 V3DV_JOB_TYPE_GPU_CL = 0,
1154 V3DV_JOB_TYPE_GPU_CL_SECONDARY,
1155 V3DV_JOB_TYPE_GPU_TFU,
1156 V3DV_JOB_TYPE_GPU_CSD,
1157 V3DV_JOB_TYPE_CPU_RESET_QUERIES,
1158 V3DV_JOB_TYPE_CPU_END_QUERY,
1159 V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
1160 V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
1161 V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
1162 };
1163
1164 struct v3dv_reset_query_cpu_job_info {
1165 struct v3dv_query_pool *pool;
1166 uint32_t first;
1167 uint32_t count;
1168 };
1169
1170 struct v3dv_end_query_info {
1171 struct v3dv_query_pool *pool;
1172 uint32_t query;
1173
1174 /* This is one unless multiview is used */
1175 uint32_t count;
1176 };
1177
1178 struct v3dv_copy_query_results_cpu_job_info {
1179 struct v3dv_query_pool *pool;
1180 uint32_t first;
1181 uint32_t count;
1182 struct v3dv_buffer *dst;
1183 uint32_t offset;
1184 uint32_t stride;
1185 VkQueryResultFlags flags;
1186 };
1187
1188 struct v3dv_submit_sync_info {
1189 /* List of syncs to wait before running a job */
1190 uint32_t wait_count;
1191 struct vk_sync_wait *waits;
1192
1193 /* List of syncs to signal when all jobs complete */
1194 uint32_t signal_count;
1195 struct vk_sync_signal *signals;
1196 };
1197
1198 struct v3dv_csd_indirect_cpu_job_info {
1199 struct v3dv_buffer *buffer;
1200 uint32_t offset;
1201 struct v3dv_job *csd_job;
1202 uint32_t wg_size;
1203 uint32_t *wg_uniform_offsets[3];
1204 bool needs_wg_uniform_rewrite;
1205 };
1206
1207 struct v3dv_timestamp_query_cpu_job_info {
1208 struct v3dv_query_pool *pool;
1209 uint32_t query;
1210
1211 /* This is one unless multiview is used */
1212 uint32_t count;
1213 };
1214
1215 /* Number of perfmons required to handle all supported performance counters */
1216 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \
1217 DRM_V3D_MAX_PERF_COUNTERS)
1218
1219 struct v3dv_perf_query {
1220 uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1221
1222 /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1223 * performance data.
1224 */
1225 struct vk_sync *last_job_sync;
1226 };
1227
1228 struct v3dv_job {
1229 struct list_head list_link;
1230
1231 /* We only create job clones when executing secondary command buffers into
1232 * primaries. These clones don't make deep copies of the original object
1233 * so we want to flag them to avoid freeing resources they don't own.
1234 */
1235 bool is_clone;
1236
1237 /* If the job executes on the transfer stage of the pipeline */
1238 bool is_transfer;
1239
1240 /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1241 * dereference memory in any buffer that has been flagged with
1242 * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. These buffers may not
1243 * be bound via descriptor sets, so we need to make sure that a job that
1244 * uses this functionality includes all these buffers in its kernel
1245 * submission.
1246 */
1247 bool uses_buffer_device_address;
1248
1249 /* True if we have not identified anything that would be incompatible
1250 * with double-buffer (like MSAA) or that would make double-buffer mode
1251 * not efficient (like tile loads or not having any stores).
1252 */
1253 bool can_use_double_buffer;
1254
1255 /* This structure keeps track of various scores to inform a heuristic
1256 * for double-buffer mode.
1257 */
1258 struct {
1259 /* Cost of geometry shading */
1260 uint32_t geom;
1261 /* Cost of shader rendering */
1262 uint32_t render;
1263 } double_buffer_score;
1264
1265 /* We only need to allocate tile state for all layers if the binner
1266 * writes primitives to layers other than the first. This can only be
1267 * done using layered rendering (writing gl_Layer from a geometry shader),
1268 * so for other cases of multilayered framebuffers (typically with
1269 * meta copy/clear operations) that won't use layered rendering, we only
1270 * need one layer worth of of tile state for the binner.
1271 */
1272 bool allocate_tile_state_for_all_layers;
1273
1274 /* A pointer to the location of the TILE_BINNING_MODE_CFG packet so we can
1275 * rewrite it to enable double-buffer mode by the time we have enough info
1276 * about the job to make that decision.
1277 */
1278 struct v3dv_cl_out *bcl_tile_binning_mode_ptr;
1279
1280 enum v3dv_job_type type;
1281
1282 struct v3dv_device *device;
1283
1284 struct v3dv_cmd_buffer *cmd_buffer;
1285
1286 struct v3dv_cl bcl;
1287 struct v3dv_cl rcl;
1288 struct v3dv_cl indirect;
1289
1290 /* Set of all BOs referenced by the job. This will be used for making
1291 * the list of BOs that the kernel will need to have paged in to
1292 * execute our job.
1293 */
1294 struct set *bos;
1295 uint32_t bo_count;
1296 uint64_t bo_handle_mask;
1297
1298 struct v3dv_bo *tile_alloc;
1299 struct v3dv_bo *tile_state;
1300
1301 bool tmu_dirty_rcl;
1302
1303 uint32_t first_subpass;
1304
1305 /* When the current subpass is split into multiple jobs, this flag is set
1306 * to true for any jobs after the first in the same subpass.
1307 */
1308 bool is_subpass_continue;
1309
1310 /* If this job is the last job emitted for a subpass. */
1311 bool is_subpass_finish;
1312
1313 struct v3dv_frame_tiling frame_tiling;
1314
1315 enum v3dv_ez_state ez_state;
1316 enum v3dv_ez_state first_ez_state;
1317
1318 /* If we have already decided if we need to disable Early Z/S completely
1319 * for this job.
1320 */
1321 bool decided_global_ez_enable;
1322
1323 /* If the job emitted any draw calls with Early Z/S enabled */
1324 bool has_ez_draws;
1325
1326 /* If this job has been configured to use early Z/S clear */
1327 bool early_zs_clear;
1328
1329 /* Number of draw calls recorded into the job */
1330 uint32_t draw_count;
1331
1332 /* A flag indicating whether we want to flush every draw separately. This
1333 * can be used for debugging, or for cases where special circumstances
1334 * require this behavior.
1335 */
1336 bool always_flush;
1337
1338 /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1339 * can use this to select the hw queues where we need to serialize the job.
1340 */
1341 uint8_t serialize;
1342
1343 /* If this is a CL job, whether we should sync before binning */
1344 bool needs_bcl_sync;
1345
1346 /* Job specs for CPU jobs */
1347 union {
1348 struct v3dv_reset_query_cpu_job_info query_reset;
1349 struct v3dv_end_query_info query_end;
1350 struct v3dv_copy_query_results_cpu_job_info query_copy_results;
1351 struct v3dv_csd_indirect_cpu_job_info csd_indirect;
1352 struct v3dv_timestamp_query_cpu_job_info query_timestamp;
1353 } cpu;
1354
1355 /* Job specs for TFU jobs */
1356 struct drm_v3d_submit_tfu tfu;
1357
1358 /* Job specs for CSD jobs */
1359 struct {
1360 struct v3dv_bo *shared_memory;
1361 uint32_t wg_count[3];
1362 uint32_t wg_base[3];
1363 struct drm_v3d_submit_csd submit;
1364 } csd;
1365
1366 /* Perfmons with last job sync for CSD and CL jobs */
1367 struct v3dv_perf_query *perf;
1368 };
1369
1370 void v3dv_job_init(struct v3dv_job *job,
1371 enum v3dv_job_type type,
1372 struct v3dv_device *device,
1373 struct v3dv_cmd_buffer *cmd_buffer,
1374 int32_t subpass_idx);
1375 void v3dv_job_destroy(struct v3dv_job *job);
1376
1377 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1378 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1379
1380 void v3dv_job_start_frame(struct v3dv_job *job,
1381 uint32_t width,
1382 uint32_t height,
1383 uint32_t layers,
1384 bool allocate_tile_state_for_all_layers,
1385 bool allocate_tile_state_now,
1386 uint32_t render_target_count,
1387 uint8_t max_internal_bpp,
1388 uint8_t total_color_bpp,
1389 bool msaa);
1390
1391 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1392
1393 struct v3dv_job *
1394 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1395 struct v3dv_cmd_buffer *cmd_buffer);
1396
1397 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1398 enum v3dv_job_type type,
1399 struct v3dv_cmd_buffer *cmd_buffer,
1400 uint32_t subpass_idx);
1401
1402 void
1403 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1404 uint32_t slot_size,
1405 uint32_t used_count,
1406 uint32_t *alloc_count,
1407 void **ptr);
1408
1409 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1410 bool indexed, bool indirect,
1411 uint32_t vertex_count);
1412
1413 bool v3dv_job_allocate_tile_state(struct v3dv_job *job);
1414
1415 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1416 * cmd_buffer specific header?
1417 */
1418 struct v3dv_draw_info {
1419 uint32_t vertex_count;
1420 uint32_t instance_count;
1421 uint32_t first_vertex;
1422 uint32_t first_instance;
1423 };
1424
1425 struct v3dv_vertex_binding {
1426 struct v3dv_buffer *buffer;
1427 VkDeviceSize offset;
1428 };
1429
1430 struct v3dv_descriptor_state {
1431 struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1432 uint32_t valid;
1433 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1434 };
1435
1436 struct v3dv_cmd_pipeline_state {
1437 struct v3dv_pipeline *pipeline;
1438
1439 struct v3dv_descriptor_state descriptor_state;
1440 };
1441
1442 enum {
1443 V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1444 V3DV_BARRIER_COMPUTE_BIT = (1 << 1),
1445 V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1446 V3DV_BARRIER_CPU_BIT = (1 << 3),
1447 };
1448 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1449 V3DV_BARRIER_TRANSFER_BIT | \
1450 V3DV_BARRIER_COMPUTE_BIT | \
1451 V3DV_BARRIER_CPU_BIT);
1452
1453 struct v3dv_barrier_state {
1454 /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1455 uint8_t dst_mask;
1456
1457 /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1458 * indicating the sources of the dependency.
1459 */
1460 uint8_t src_mask_graphics;
1461 uint8_t src_mask_transfer;
1462 uint8_t src_mask_compute;
1463
1464 /* For graphics barriers, access masks involved. Used to decide if we need
1465 * to execute a binning or render barrier.
1466 */
1467 VkAccessFlags2 bcl_buffer_access;
1468 VkAccessFlags2 bcl_image_access;
1469 };
1470
1471 struct v3dv_cmd_buffer_state {
1472 struct v3dv_render_pass *pass;
1473 struct v3dv_framebuffer *framebuffer;
1474 VkRect2D render_area;
1475
1476 /* Current job being recorded */
1477 struct v3dv_job *job;
1478
1479 uint32_t subpass_idx;
1480
1481 struct v3dv_cmd_pipeline_state gfx;
1482 struct v3dv_cmd_pipeline_state compute;
1483
1484 struct v3dv_dynamic_state dynamic;
1485
1486 uint32_t dirty;
1487 VkShaderStageFlagBits dirty_descriptor_stages;
1488 VkShaderStageFlagBits dirty_push_constants_stages;
1489
1490 /* Current clip window. We use this to check whether we have an active
1491 * scissor, since in that case we can't use TLB clears and need to fallback
1492 * to drawing rects.
1493 */
1494 VkRect2D clip_window;
1495
1496 /* Whether our render area is aligned to tile boundaries. If this is false
1497 * then we have tiles that are only partially covered by the render area,
1498 * and therefore, we need to be careful with our loads and stores so we don't
1499 * modify pixels for the tile area that is not covered by the render area.
1500 * This means, for example, that we can't use the TLB to clear, since that
1501 * always clears full tiles.
1502 */
1503 bool tile_aligned_render_area;
1504
1505 /* FIXME: we have just one client-side BO for the push constants,
1506 * independently of the stageFlags in vkCmdPushConstants, and the
1507 * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1508 * tuning in the future if it makes sense.
1509 */
1510 uint32_t push_constants_size;
1511 uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1512
1513 uint32_t attachment_alloc_count;
1514 struct v3dv_cmd_buffer_attachment_state *attachments;
1515
1516 struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1517
1518 struct {
1519 VkBuffer buffer;
1520 VkDeviceSize offset;
1521 uint8_t index_size;
1522 } index_buffer;
1523
1524 /* Current uniforms */
1525 struct {
1526 struct v3dv_cl_reloc vs_bin;
1527 struct v3dv_cl_reloc vs;
1528 struct v3dv_cl_reloc gs_bin;
1529 struct v3dv_cl_reloc gs;
1530 struct v3dv_cl_reloc fs;
1531 } uniforms;
1532
1533 /* Current view index for multiview rendering */
1534 uint32_t view_index;
1535
1536 /* Current draw ID for multidraw */
1537 uint32_t draw_id;
1538
1539 /* Used to flag OOM conditions during command buffer recording */
1540 bool oom;
1541
1542 /* If we are currently recording job(s) for a transfer operation */
1543 bool is_transfer;
1544
1545 /* Barrier state tracking */
1546 struct v3dv_barrier_state barrier;
1547
1548 /* Secondary command buffer state */
1549 struct {
1550 bool occlusion_query_enable;
1551 } inheritance;
1552
1553 /* Command buffer state saved during a meta operation */
1554 struct {
1555 uint32_t subpass_idx;
1556 VkRenderPass pass;
1557 VkFramebuffer framebuffer;
1558
1559 uint32_t attachment_alloc_count;
1560 uint32_t attachment_count;
1561 struct v3dv_cmd_buffer_attachment_state *attachments;
1562
1563 bool tile_aligned_render_area;
1564 VkRect2D render_area;
1565
1566 struct v3dv_dynamic_state dynamic;
1567
1568 struct v3dv_cmd_pipeline_state gfx;
1569 bool has_descriptor_state;
1570
1571 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1572 uint32_t push_constants_size;
1573 } meta;
1574
1575 /* Command buffer state for queries */
1576 struct {
1577 /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1578 * a render pass. We queue these here and then schedule the corresponding
1579 * CPU jobs for them at the time we finish the GPU job in which they have
1580 * been recorded.
1581 */
1582 struct {
1583 uint32_t used_count;
1584 uint32_t alloc_count;
1585 struct v3dv_end_query_info *states;
1586 } end;
1587
1588 struct {
1589 /* This BO is not NULL if we have an active occlusion query, that is,
1590 * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1591 */
1592 struct v3dv_bo *bo;
1593 uint32_t offset;
1594 /* When the driver emits draw calls to implement other operations in
1595 * the middle of a render pass (such as an attachment clear), we need
1596 * to pause occlusion query recording and resume it later so that
1597 * these draw calls don't register in occlussion counters. We use
1598 * this to store the BO reference in which we should resume occlusion
1599 * query counters after the driver is done emitting its draw calls.
1600 */
1601 struct v3dv_bo *paused_bo;
1602
1603 /* This pointer is not NULL if we have an active performance query */
1604 struct v3dv_perf_query *perf;
1605 } active_query;
1606 } query;
1607 };
1608
1609 void
1610 v3dv_cmd_buffer_state_get_viewport_z_xform(struct v3dv_cmd_buffer_state *state,
1611 uint32_t vp_idx,
1612 float *translate_z, float *scale_z);
1613
1614 /* The following struct represents the info from a descriptor that we store on
1615 * the host memory. They are mostly links to other existing vulkan objects,
1616 * like the image_view in order to access to swizzle info, or the buffer used
1617 * for a UBO/SSBO, for example.
1618 *
1619 * FIXME: revisit if makes sense to just move everything that would be needed
1620 * from a descriptor to the bo.
1621 */
1622 struct v3dv_descriptor {
1623 VkDescriptorType type;
1624
1625 union {
1626 struct {
1627 struct v3dv_image_view *image_view;
1628 struct v3dv_sampler *sampler;
1629 };
1630
1631 struct {
1632 struct v3dv_buffer *buffer;
1633 size_t offset;
1634 size_t range;
1635 };
1636
1637 struct v3dv_buffer_view *buffer_view;
1638 };
1639 };
1640
1641 struct v3dv_query {
1642 /* Used by queries where we implement result copying in the CPU so we can
1643 * tell if the relevant jobs have been submitted for execution. Currently
1644 * these are all but occlusion queries.
1645 */
1646 bool maybe_available;
1647
1648 union {
1649 /* Used by occlusion queries */
1650 struct {
1651 /* Offset of this query in the occlusion query counter BO */
1652 uint32_t offset;
1653 } occlusion;
1654
1655 /* Used by timestamp queries */
1656 struct {
1657 /* Offset of this query in the timestamp BO for its value */
1658 uint32_t offset;
1659
1660 /* Syncobj to signal timestamp query availability */
1661 struct vk_sync *sync;
1662 } timestamp;
1663
1664 /* Used by performance queries */
1665 struct v3dv_perf_query perf;
1666 };
1667 };
1668
1669 struct v3dv_query_pool {
1670 struct vk_object_base base;
1671
1672 /* Per-pool Vulkan resources required to implement GPU-side query
1673 * functions (only occlusion queries for now).
1674 */
1675 struct {
1676 /* Buffer to access the BO with the occlusion query results and
1677 * availability info.
1678 */
1679 VkBuffer buf;
1680 VkDeviceMemory mem;
1681
1682 /* Descriptor set for accessing the buffer from a pipeline. */
1683 VkDescriptorPool descriptor_pool;
1684 VkDescriptorSet descriptor_set;
1685 } meta;
1686
1687 /* Only used with occlusion queries */
1688 struct {
1689 /* BO with the occlusion counters and query availability */
1690 struct v3dv_bo *bo;
1691 /* Offset of the availability info in the BO */
1692 uint32_t avail_offset;
1693 } occlusion;
1694
1695 /* Only used with timestamp queries */
1696 struct {
1697 /* BO with the query timestamp values */
1698 struct v3dv_bo *bo;
1699 } timestamp;
1700
1701 /* Only used with performance queries */
1702 struct {
1703 uint32_t ncounters;
1704 uint8_t counters[V3D_MAX_PERFCNT];
1705
1706 /* V3D has a limit on the number of counters we can track in a
1707 * single performance monitor, so if too many counters are requested
1708 * we need to create multiple monitors to record all of them. This
1709 * field represents the number of monitors required for the number
1710 * of counters requested.
1711 */
1712 uint8_t nperfmons;
1713 } perfmon;
1714
1715 VkQueryType query_type;
1716 uint32_t query_count;
1717 struct v3dv_query *queries;
1718 };
1719
1720 VkResult
1721 v3dv_query_allocate_resources(struct v3dv_device *decice);
1722
1723 void
1724 v3dv_query_free_resources(struct v3dv_device *decice);
1725
1726 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1727 struct v3dv_query_pool *pool,
1728 uint32_t first,
1729 uint32_t count,
1730 void *data,
1731 VkDeviceSize stride,
1732 VkQueryResultFlags flags);
1733
1734 void v3dv_reset_query_pool_cpu(struct v3dv_device *device,
1735 struct v3dv_query_pool *query_pool,
1736 uint32_t first,
1737 uint32_t last);
1738
1739 void v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
1740 struct v3dv_query_pool *pool,
1741 uint32_t query, uint32_t count,
1742 uint8_t availability);
1743
1744 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1745 uint64_t pobj,
1746 VkAllocationCallbacks *alloc);
1747 struct v3dv_cmd_buffer_private_obj {
1748 struct list_head list_link;
1749 uint64_t obj;
1750 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1751 };
1752
1753 extern const struct vk_command_buffer_ops v3dv_cmd_buffer_ops;
1754
1755 struct v3dv_cmd_buffer {
1756 struct vk_command_buffer vk;
1757
1758 struct v3dv_device *device;
1759
1760 VkCommandBufferUsageFlags usage_flags;
1761
1762 enum v3dv_cmd_buffer_status status;
1763
1764 struct v3dv_cmd_buffer_state state;
1765
1766 /* Buffer where we upload push constant data to resolve indirect indexing */
1767 struct v3dv_cl_reloc push_constants_resource;
1768
1769 /* Collection of Vulkan objects created internally by the driver (typically
1770 * during recording of meta operations) that are part of the command buffer
1771 * and should be destroyed with it.
1772 */
1773 struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1774
1775 /* Per-command buffer resources for meta operations. */
1776 struct {
1777 struct {
1778 /* The current descriptor pool for blit sources */
1779 VkDescriptorPool dspool;
1780 } blit;
1781 struct {
1782 /* The current descriptor pool for texel buffer copy sources */
1783 VkDescriptorPool dspool;
1784 } texel_buffer_copy;
1785 struct {
1786 /* The current descriptor pool for the copy query results output buffer */
1787 VkDescriptorPool dspool;
1788 } query;
1789 } meta;
1790
1791 /* List of jobs in the command buffer. For primary command buffers it
1792 * represents the jobs we want to submit to the GPU. For secondary command
1793 * buffers it represents jobs that will be merged into a primary command
1794 * buffer via vkCmdExecuteCommands.
1795 */
1796 struct list_head jobs;
1797 };
1798
1799 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1800 int32_t subpass_idx,
1801 enum v3dv_job_type type);
1802 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1803
1804 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1805 uint32_t subpass_idx);
1806 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1807 uint32_t subpass_idx);
1808
1809 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1810
1811 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1812 bool push_descriptor_state);
1813 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1814 bool needs_subpass_resume);
1815
1816 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1817 struct v3dv_query_pool *pool,
1818 uint32_t query,
1819 VkQueryControlFlags flags);
1820
1821 void v3dv_cmd_buffer_pause_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1822 void v3dv_cmd_buffer_resume_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1823
1824 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1825 struct v3dv_query_pool *pool,
1826 uint32_t query);
1827
1828 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1829 struct v3dv_query_pool *pool,
1830 uint32_t first,
1831 uint32_t count,
1832 struct v3dv_buffer *dst,
1833 uint32_t offset,
1834 uint32_t stride,
1835 VkQueryResultFlags flags);
1836
1837 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1838 struct drm_v3d_submit_tfu *tfu);
1839
1840 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
1841 struct v3dv_csd_indirect_cpu_job_info *info,
1842 const uint32_t *wg_counts);
1843
1844 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1845 uint64_t obj,
1846 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1847
1848 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1849 struct v3dv_barrier_state *src);
1850
1851 void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
1852 struct v3dv_job *job);
1853
1854 bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state,
1855 VkImageAspectFlags aspect,
1856 uint32_t first_subpass_idx,
1857 VkAttachmentLoadOp load_op,
1858 uint32_t last_subpass_idx,
1859 VkAttachmentStoreOp store_op);
1860
1861 bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state,
1862 VkImageAspectFlags aspect,
1863 uint32_t last_subpass_idx,
1864 VkAttachmentStoreOp store_op);
1865
1866 void v3dv_cmd_buffer_emit_pipeline_barrier(struct v3dv_cmd_buffer *cmd_buffer,
1867 const VkDependencyInfo *info);
1868
1869 bool v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1870 struct v3dv_image *dst,
1871 struct v3dv_image *src,
1872 const VkImageCopy2 *region);
1873
1874 struct v3dv_event {
1875 struct vk_object_base base;
1876
1877 /* Link in the device list of pre-allocated free events */
1878 struct list_head link;
1879
1880 /* Each event gets a different index, which we use to compute the offset
1881 * in the BO we use to track their state (signaled vs reset).
1882 */
1883 uint32_t index;
1884 };
1885
1886 VkResult
1887 v3dv_event_allocate_resources(struct v3dv_device *device);
1888
1889 void
1890 v3dv_event_free_resources(struct v3dv_device *device);
1891
1892 struct v3dv_shader_variant {
1893 enum broadcom_shader_stage stage;
1894
1895 union {
1896 struct v3d_prog_data *base;
1897 struct v3d_vs_prog_data *vs;
1898 struct v3d_gs_prog_data *gs;
1899 struct v3d_fs_prog_data *fs;
1900 struct v3d_compute_prog_data *cs;
1901 } prog_data;
1902
1903 /* We explicitly save the prog_data_size as it would make easier to
1904 * serialize
1905 */
1906 uint32_t prog_data_size;
1907
1908 /* The assembly for this variant will be uploaded to a BO shared with all
1909 * other shader stages in that pipeline. This is the offset in that BO.
1910 */
1911 uint32_t assembly_offset;
1912
1913 /* Note: don't assume qpu_insts to be always NULL or not-NULL. In general
1914 * we will try to free it as soon as we upload it to the shared bo while we
1915 * compile the different stages. But we can decide to keep it around based
1916 * on some pipeline creation flags, like
1917 * VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT.
1918 */
1919 uint64_t *qpu_insts;
1920 uint32_t qpu_insts_size;
1921 };
1922
1923 /*
1924 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1925 * other methods doesn't have so many parameters.
1926 *
1927 * FIXME: for the case of the coordinate shader and the vertex shader, module,
1928 * entrypoint, spec_info and nir are the same. There are also info only
1929 * relevant to some stages. But seemed too much a hassle to create a new
1930 * struct only to handle that. Revisit if such kind of info starts to grow.
1931 */
1932 struct v3dv_pipeline_stage {
1933 struct v3dv_pipeline *pipeline;
1934
1935 enum broadcom_shader_stage stage;
1936
1937 const struct vk_shader_module *module;
1938 const char *entrypoint;
1939 const VkSpecializationInfo *spec_info;
1940
1941 nir_shader *nir;
1942
1943 /* The following is the combined hash of module+entrypoint+spec_info+nir */
1944 unsigned char shader_sha1[20];
1945
1946 /** A name for this program, so you can track it in shader-db output. */
1947 uint32_t program_id;
1948
1949 VkPipelineCreationFeedback feedback;
1950
1951 struct vk_pipeline_robustness_state robustness;
1952 };
1953
1954 /* We are using the descriptor pool entry for two things:
1955 * * Track the allocated sets, so we can properly free it if needed
1956 * * Track the suballocated pool bo regions, so if some descriptor set is
1957 * freed, the gap could be reallocated later.
1958 *
1959 * Those only make sense if the pool was not created with the flag
1960 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1961 */
1962 struct v3dv_descriptor_pool_entry
1963 {
1964 struct v3dv_descriptor_set *set;
1965 /* Offset and size of the subregion allocated for this entry from the
1966 * pool->bo
1967 */
1968 uint32_t offset;
1969 uint32_t size;
1970 };
1971
1972 struct v3dv_descriptor_pool {
1973 struct vk_object_base base;
1974
1975 /* A list with all descriptor sets allocated from the pool. */
1976 struct list_head set_list;
1977
1978 /* If this descriptor pool has been allocated for the driver for internal
1979 * use, typically to implement meta operations.
1980 */
1981 bool is_driver_internal;
1982
1983 struct v3dv_bo *bo;
1984 /* Current offset at the descriptor bo. 0 means that we didn't use it for
1985 * any descriptor. If the descriptor bo is NULL, current offset is
1986 * meaningless
1987 */
1988 uint32_t current_offset;
1989
1990 /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1991 * descriptor sets are handled as a whole as pool memory and handled by the
1992 * following pointers. If set, they are not used, and individually
1993 * descriptor sets are allocated/freed.
1994 */
1995 uint8_t *host_memory_base;
1996 uint8_t *host_memory_ptr;
1997 uint8_t *host_memory_end;
1998
1999 uint32_t entry_count;
2000 uint32_t max_entry_count;
2001 struct v3dv_descriptor_pool_entry entries[0];
2002 };
2003
2004 struct v3dv_descriptor_set {
2005 struct vk_object_base base;
2006
2007 /* List link into the list of all sets allocated from the pool */
2008 struct list_head pool_link;
2009
2010 struct v3dv_descriptor_pool *pool;
2011
2012 struct v3dv_descriptor_set_layout *layout;
2013
2014 /* Offset relative to the descriptor pool bo for this set */
2015 uint32_t base_offset;
2016
2017 /* The descriptors below can be indexed (set/binding) using the set_layout
2018 */
2019 struct v3dv_descriptor descriptors[0];
2020 };
2021
2022 struct v3dv_descriptor_set_binding_layout {
2023 VkDescriptorType type;
2024
2025 /* Number of array elements in this binding */
2026 uint32_t array_size;
2027
2028 /* Index into the flattened descriptor set */
2029 uint32_t descriptor_index;
2030
2031 uint32_t dynamic_offset_count;
2032 uint32_t dynamic_offset_index;
2033
2034 /* Offset into the descriptor set where this descriptor lives (final offset
2035 * on the descriptor bo need to take into account set->base_offset)
2036 */
2037 uint32_t descriptor_offset;
2038
2039 /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
2040 * if there are no immutable samplers.
2041 */
2042 uint32_t immutable_samplers_offset;
2043
2044 /* Descriptors for multiplanar combined image samplers are larger.
2045 * For mutable descriptors, this is always 1.
2046 */
2047 uint8_t plane_stride;
2048 };
2049
2050 struct v3dv_descriptor_set_layout {
2051 struct vk_object_base base;
2052
2053 VkDescriptorSetLayoutCreateFlags flags;
2054
2055 /* Number of bindings in this descriptor set */
2056 uint32_t binding_count;
2057
2058 /* Total bo size needed for this descriptor set
2059 */
2060 uint32_t bo_size;
2061
2062 /* Shader stages affected by this descriptor set */
2063 uint16_t shader_stages;
2064
2065 /* Number of descriptors in this descriptor set */
2066 uint32_t descriptor_count;
2067
2068 /* Number of dynamic offsets used by this descriptor set */
2069 uint16_t dynamic_offset_count;
2070
2071 /* Descriptor set layouts can be destroyed even if they are still being
2072 * used.
2073 */
2074 uint32_t ref_cnt;
2075
2076 /* Bindings in this descriptor set */
2077 struct v3dv_descriptor_set_binding_layout binding[0];
2078 };
2079
2080 void
2081 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
2082 struct v3dv_descriptor_set_layout *set_layout);
2083
2084 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)2085 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
2086 {
2087 assert(set_layout && set_layout->ref_cnt >= 1);
2088 p_atomic_inc(&set_layout->ref_cnt);
2089 }
2090
2091 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)2092 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
2093 struct v3dv_descriptor_set_layout *set_layout)
2094 {
2095 assert(set_layout && set_layout->ref_cnt >= 1);
2096 if (p_atomic_dec_zero(&set_layout->ref_cnt))
2097 v3dv_descriptor_set_layout_destroy(device, set_layout);
2098 }
2099
2100 struct v3dv_pipeline_layout {
2101 struct vk_object_base base;
2102
2103 struct {
2104 struct v3dv_descriptor_set_layout *layout;
2105 uint32_t dynamic_offset_start;
2106 } set[MAX_SETS];
2107
2108 uint32_t num_sets;
2109
2110 /* Shader stages that are declared to use descriptors from this layout */
2111 uint32_t shader_stages;
2112
2113 uint32_t dynamic_offset_count;
2114 uint32_t push_constant_size;
2115
2116 /* Pipeline layouts can be destroyed after creating pipelines since
2117 * maintenance4.
2118 */
2119 uint32_t ref_cnt;
2120
2121 unsigned char sha1[20];
2122 };
2123
2124 void
2125 v3dv_pipeline_layout_destroy(struct v3dv_device *device,
2126 struct v3dv_pipeline_layout *layout,
2127 const VkAllocationCallbacks *alloc);
2128
2129 static inline void
v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout * layout)2130 v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout *layout)
2131 {
2132 assert(layout && layout->ref_cnt >= 1);
2133 p_atomic_inc(&layout->ref_cnt);
2134 }
2135
2136 static inline void
v3dv_pipeline_layout_unref(struct v3dv_device * device,struct v3dv_pipeline_layout * layout,const VkAllocationCallbacks * alloc)2137 v3dv_pipeline_layout_unref(struct v3dv_device *device,
2138 struct v3dv_pipeline_layout *layout,
2139 const VkAllocationCallbacks *alloc)
2140 {
2141 assert(layout && layout->ref_cnt >= 1);
2142 if (p_atomic_dec_zero(&layout->ref_cnt))
2143 v3dv_pipeline_layout_destroy(device, layout, alloc);
2144 }
2145
2146 /*
2147 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
2148 * it to be big enough to include the max value for all of them.
2149 *
2150 * FIXME: one alternative would be to allocate the map as big as you need for
2151 * each descriptor type. That would means more individual allocations.
2152 */
2153 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
2154 MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
2155 MAX_STORAGE_BUFFERS)
2156
2157
2158 struct v3dv_descriptor_map {
2159 /* FIXME: avoid fixed size array/justify the size */
2160 unsigned num_desc; /* Number of descriptors */
2161 int set[DESCRIPTOR_MAP_SIZE];
2162 int binding[DESCRIPTOR_MAP_SIZE];
2163 int array_index[DESCRIPTOR_MAP_SIZE];
2164 int array_size[DESCRIPTOR_MAP_SIZE];
2165 uint8_t plane[DESCRIPTOR_MAP_SIZE];
2166 bool used[DESCRIPTOR_MAP_SIZE];
2167
2168 /* NOTE: the following is only for sampler, but this is the easier place to
2169 * put it.
2170 */
2171 uint8_t return_size[DESCRIPTOR_MAP_SIZE];
2172 };
2173
2174 struct v3dv_sampler {
2175 struct vk_object_base base;
2176 struct vk_ycbcr_conversion *conversion;
2177
2178 bool compare_enable;
2179 bool unnormalized_coordinates;
2180
2181 /* Prepacked per plane SAMPLER_STATE, that is referenced as part of the tmu
2182 * configuration. If needed it will be copied to the descriptor info during
2183 * UpdateDescriptorSets
2184 */
2185 uint8_t plane_count;
2186 uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
2187 };
2188
2189 /* We keep two special values for the sampler idx that represents exactly when a
2190 * sampler is not needed/provided. The main use is that even if we don't have
2191 * sampler, we still need to do the output unpacking (through
2192 * nir_lower_tex). The easier way to do this is to add those special "no
2193 * sampler" in the sampler_map, and then use the proper unpacking for that
2194 * case.
2195 *
2196 * We have one when we want a 16bit output size, and other when we want a
2197 * 32bit output size. We use the info coming from the RelaxedPrecision
2198 * decoration to decide between one and the other.
2199 */
2200 #define V3DV_NO_SAMPLER_16BIT_IDX 0
2201 #define V3DV_NO_SAMPLER_32BIT_IDX 1
2202
2203 struct v3dv_descriptor_maps {
2204 struct v3dv_descriptor_map ubo_map;
2205 struct v3dv_descriptor_map ssbo_map;
2206 struct v3dv_descriptor_map sampler_map;
2207 struct v3dv_descriptor_map texture_map;
2208 };
2209
2210 /* The structure represents data shared between different objects, like the
2211 * pipeline and the pipeline cache, so we ref count it to know when it should
2212 * be freed.
2213 */
2214 struct v3dv_pipeline_shared_data {
2215 uint32_t ref_cnt;
2216
2217 unsigned char sha1_key[20];
2218
2219 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
2220 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
2221
2222 struct v3dv_bo *assembly_bo;
2223 };
2224
2225 struct v3dv_pipeline_executable_data {
2226 enum broadcom_shader_stage stage;
2227 char *nir_str;
2228 char *qpu_str;
2229 };
2230
2231 struct v3dv_pipeline {
2232 struct vk_object_base base;
2233
2234 struct v3dv_device *device;
2235
2236 VkShaderStageFlags active_stages;
2237 VkPipelineCreateFlags flags;
2238
2239 struct v3dv_render_pass *pass;
2240 struct v3dv_subpass *subpass;
2241
2242 struct v3dv_pipeline_stage *stages[BROADCOM_SHADER_STAGES];
2243
2244 /* Flags for whether optional pipeline stages are present, for convenience */
2245 bool has_gs;
2246
2247 /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
2248 bool uses_buffer_device_address;
2249
2250 /* Spilling memory requirements */
2251 struct {
2252 struct v3dv_bo *bo;
2253 uint32_t size_per_thread;
2254 } spill;
2255
2256 struct v3dv_dynamic_state dynamic_state;
2257
2258 struct v3dv_pipeline_layout *layout;
2259
2260 /* Whether this pipeline enables depth writes */
2261 bool z_updates_enable;
2262
2263 enum v3dv_ez_state ez_state;
2264
2265 /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2266 * pipeline selects an incompatible depth test function.
2267 */
2268 bool incompatible_ez_test;
2269
2270 bool msaa;
2271 bool sample_rate_shading;
2272 uint32_t sample_mask;
2273
2274 bool primitive_restart;
2275 bool negative_one_to_one;
2276
2277 /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
2278 * array with such binding
2279 */
2280 struct v3dv_pipeline_vertex_binding {
2281 uint32_t stride;
2282 uint32_t instance_divisor;
2283 } vb[MAX_VBS];
2284 uint32_t vb_count;
2285
2286 /* Note that a lot of info from VkVertexInputAttributeDescription is
2287 * already prepacked, so here we are only storing those that need recheck
2288 * later. The array must be indexed by driver location, since that is the
2289 * order in which we need to emit the attributes.
2290 */
2291 struct v3dv_pipeline_vertex_attrib {
2292 uint32_t binding;
2293 uint32_t offset;
2294 VkFormat vk_format;
2295 } va[MAX_VERTEX_ATTRIBS];
2296 uint32_t va_count;
2297
2298 enum mesa_prim topology;
2299
2300 struct v3dv_pipeline_shared_data *shared_data;
2301
2302 /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2303 unsigned char sha1[20];
2304
2305 /* In general we can reuse v3dv_device->default_attribute_float, so note
2306 * that the following can be NULL. In 7.x this is not used, so it will be
2307 * always NULL.
2308 *
2309 * FIXME: the content of this BO will be small, so it could be improved to
2310 * be uploaded to a common BO. But as in most cases it will be NULL, it is
2311 * not a priority.
2312 */
2313 struct v3dv_bo *default_attribute_values;
2314
2315 struct vpm_config vpm_cfg;
2316 struct vpm_config vpm_cfg_bin;
2317
2318 /* If the pipeline should emit any of the stencil configuration packets */
2319 bool emit_stencil_cfg[2];
2320
2321 /* Blend state */
2322 struct {
2323 /* Per-RT bit mask with blend enables */
2324 uint8_t enables;
2325 /* Per-RT prepacked blend config packets */
2326 uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2327 /* Flag indicating whether the blend factors in use require
2328 * color constants.
2329 */
2330 bool needs_color_constants;
2331 /* Mask with enabled color channels for each RT (4 bits per RT) */
2332 uint32_t color_write_masks;
2333 } blend;
2334
2335 /* Depth bias */
2336 struct {
2337 bool enabled;
2338 bool is_z16;
2339 } depth_bias;
2340
2341 /* Depth bounds */
2342 bool depth_bounds_test_enabled;
2343
2344 struct {
2345 void *mem_ctx;
2346 struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2347 } executables;
2348
2349 /* Packets prepacked during pipeline creation
2350 */
2351 uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2352 uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2353 uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2354 uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2355 MAX_VERTEX_ATTRIBS];
2356 uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2357 };
2358
2359 static inline bool
v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device * device)2360 v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device)
2361 {
2362 return device->devinfo.ver > 71 ||
2363 (device->devinfo.ver == 71 && device->devinfo.rev >= 5);
2364 }
2365
2366 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2367 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2368 {
2369 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2370 !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2371 return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2372 VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2373 }
2374
2375 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2376 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2377 struct v3dv_pipeline *pipeline)
2378 {
2379 if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2380 return &cmd_buffer->state.compute.descriptor_state;
2381 else
2382 return &cmd_buffer->state.gfx.descriptor_state;
2383 }
2384
2385 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
2386
2387 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
2388 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
2389
2390 #define v3dv_debug_ignored_stype(sType) \
2391 mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
2392
2393 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f,
2394 uint8_t plane);
2395 const struct v3dv_format *
2396 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2397 uint32_t bpp, VkFormat *out_vk_format);
2398 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2399 VkFormat vk_format,
2400 VkFormatFeatureFlags2 features);
2401
2402 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2403 struct v3dv_pipeline *pipeline,
2404 struct v3dv_shader_variant *variant);
2405
2406 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2407 struct v3dv_pipeline *pipeline,
2408 struct v3dv_shader_variant *variant,
2409 uint32_t **wg_count_offsets);
2410
2411 struct v3dv_shader_variant *
2412 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2413 struct v3dv_pipeline_cache *cache,
2414 struct v3d_key *key,
2415 size_t key_size,
2416 const VkAllocationCallbacks *pAllocator,
2417 VkResult *out_vk_result);
2418
2419 struct v3dv_shader_variant *
2420 v3dv_shader_variant_create(struct v3dv_device *device,
2421 enum broadcom_shader_stage stage,
2422 struct v3d_prog_data *prog_data,
2423 uint32_t prog_data_size,
2424 uint32_t assembly_offset,
2425 uint64_t *qpu_insts,
2426 uint32_t qpu_insts_size,
2427 VkResult *out_vk_result);
2428
2429 void
2430 v3dv_shader_variant_destroy(struct v3dv_device *device,
2431 struct v3dv_shader_variant *variant);
2432
2433 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2434 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2435 {
2436 assert(shared_data && shared_data->ref_cnt >= 1);
2437 p_atomic_inc(&shared_data->ref_cnt);
2438 }
2439
2440 void
2441 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2442 struct v3dv_pipeline_shared_data *shared_data);
2443
2444 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2445 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2446 struct v3dv_pipeline_shared_data *shared_data)
2447 {
2448 assert(shared_data && shared_data->ref_cnt >= 1);
2449 if (p_atomic_dec_zero(&shared_data->ref_cnt))
2450 v3dv_pipeline_shared_data_destroy(device, shared_data);
2451 }
2452
2453 struct v3dv_descriptor *
2454 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2455 struct v3dv_descriptor_map *map,
2456 struct v3dv_pipeline_layout *pipeline_layout,
2457 uint32_t index,
2458 uint32_t *dynamic_offset);
2459
2460 struct v3dv_cl_reloc
2461 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2462 struct v3dv_descriptor_state *descriptor_state,
2463 struct v3dv_descriptor_map *map,
2464 struct v3dv_pipeline_layout *pipeline_layout,
2465 uint32_t index,
2466 VkDescriptorType *out_type);
2467
2468 const struct v3dv_sampler *
2469 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2470 struct v3dv_descriptor_map *map,
2471 struct v3dv_pipeline_layout *pipeline_layout,
2472 uint32_t index);
2473
2474 struct v3dv_cl_reloc
2475 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2476 struct v3dv_descriptor_state *descriptor_state,
2477 struct v3dv_descriptor_map *map,
2478 struct v3dv_pipeline_layout *pipeline_layout,
2479 uint32_t index);
2480
2481 struct v3dv_cl_reloc
2482 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2483 struct v3dv_descriptor_state *descriptor_state,
2484 struct v3dv_descriptor_map *map,
2485 struct v3dv_pipeline_layout *pipeline_layout,
2486 uint32_t index);
2487
2488 struct v3dv_bo*
2489 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2490 struct v3dv_descriptor_map *map,
2491 struct v3dv_pipeline_layout *pipeline_layout,
2492 uint32_t index);
2493
2494 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2495 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2496 const struct v3dv_descriptor_set_binding_layout *binding)
2497 {
2498 assert(binding->immutable_samplers_offset);
2499 return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2500 }
2501
2502 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2503 struct v3dv_device *device,
2504 VkPipelineCacheCreateFlags,
2505 bool cache_enabled);
2506
2507 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2508
2509 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2510 struct v3dv_pipeline_cache *cache,
2511 nir_shader *nir,
2512 unsigned char sha1_key[20]);
2513
2514 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2515 struct v3dv_pipeline_cache *cache,
2516 const nir_shader_compiler_options *nir_options,
2517 unsigned char sha1_key[20]);
2518
2519 struct v3dv_pipeline_shared_data *
2520 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2521 unsigned char sha1_key[20],
2522 bool *cache_hit);
2523
2524 void
2525 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2526 struct v3dv_pipeline_cache *cache);
2527
2528 VkResult
2529 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
2530 nir_shader *nir,
2531 VkPipelineLayout pipeline_layout,
2532 VkPipeline *pipeline);
2533
2534 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
2535 VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2536
2537 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2538 VK_OBJECT_TYPE_COMMAND_BUFFER)
2539 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2540 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2541 VK_OBJECT_TYPE_INSTANCE)
2542 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2543 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2544 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2545
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2546 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2547 VK_OBJECT_TYPE_BUFFER)
2548 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2549 VK_OBJECT_TYPE_BUFFER_VIEW)
2550 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, vk.base, VkDeviceMemory,
2551 VK_OBJECT_TYPE_DEVICE_MEMORY)
2552 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2553 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2554 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2555 VK_OBJECT_TYPE_DESCRIPTOR_SET)
2556 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2557 VkDescriptorSetLayout,
2558 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2559 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2560 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2561 VK_OBJECT_TYPE_FRAMEBUFFER)
2562 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2563 VK_OBJECT_TYPE_IMAGE)
2564 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2565 VK_OBJECT_TYPE_IMAGE_VIEW)
2566 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2567 VK_OBJECT_TYPE_PIPELINE)
2568 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2569 VK_OBJECT_TYPE_PIPELINE_CACHE)
2570 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2571 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2572 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2573 VK_OBJECT_TYPE_QUERY_POOL)
2574 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2575 VK_OBJECT_TYPE_RENDER_PASS)
2576 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2577 VK_OBJECT_TYPE_SAMPLER)
2578
2579 static inline int
2580 v3dv_ioctl(int fd, unsigned long request, void *arg)
2581 {
2582 if (using_v3d_simulator)
2583 return v3d_simulator_ioctl(fd, request, arg);
2584 else
2585 return drmIoctl(fd, request, arg);
2586 }
2587
2588 /* Flags OOM conditions in command buffer state.
2589 *
2590 * Note: notice that no-op jobs don't have a command buffer reference.
2591 */
2592 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2593 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2594 {
2595 if (cmd_buffer) {
2596 cmd_buffer->state.oom = true;
2597 } else {
2598 assert(job);
2599 if (job->cmd_buffer)
2600 job->cmd_buffer->state.oom = true;
2601 }
2602 }
2603
2604 #define v3dv_return_if_oom(_cmd_buffer, _job) do { \
2605 const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
2606 if (__cmd_buffer && __cmd_buffer->state.oom) \
2607 return; \
2608 const struct v3dv_job *__job = _job; \
2609 if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
2610 return; \
2611 } while(0) \
2612
2613 static inline uint32_t
u64_hash(const void * key)2614 u64_hash(const void *key)
2615 {
2616 return _mesa_hash_data(key, sizeof(uint64_t));
2617 }
2618
2619 static inline bool
u64_compare(const void * key1,const void * key2)2620 u64_compare(const void *key1, const void *key2)
2621 {
2622 return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2623 }
2624
2625 /* Helper to call hw ver specific functions */
2626 #define v3dv_X(device, thing) ({ \
2627 __typeof(&v3d42_##thing) v3d_X_thing; \
2628 switch (device->devinfo.ver) { \
2629 case 42: \
2630 v3d_X_thing = &v3d42_##thing; \
2631 break; \
2632 case 71: \
2633 v3d_X_thing = &v3d71_##thing; \
2634 break; \
2635 default: \
2636 unreachable("Unsupported hardware generation"); \
2637 } \
2638 v3d_X_thing; \
2639 })
2640
2641 /* Helper to get hw-specific macro values */
2642 #define V3DV_X(device, thing) ({ \
2643 __typeof(V3D42_##thing) V3D_X_THING; \
2644 switch (device->devinfo.ver) { \
2645 case 42: \
2646 V3D_X_THING = V3D42_##thing; \
2647 break; \
2648 case 71: \
2649 V3D_X_THING = V3D71_##thing; \
2650 break; \
2651 default: \
2652 unreachable("Unsupported hardware generation"); \
2653 } \
2654 V3D_X_THING; \
2655 })
2656
2657
2658
2659 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2660 * define v3dX for each version supported, because when we compile code that
2661 * is not version-specific, all version-specific macros need to be already
2662 * defined.
2663 */
2664 #ifdef v3dX
2665 # include "v3dvx_private.h"
2666 #else
2667 # define v3dX(x) v3d42_##x
2668 # include "v3dvx_private.h"
2669 # undef v3dX
2670
2671 # define v3dX(x) v3d71_##x
2672 # include "v3dvx_private.h"
2673 # undef v3dX
2674 #endif
2675
2676 VkResult
2677 v3dv_update_image_layout(struct v3dv_device *device,
2678 struct v3dv_image *image,
2679 uint64_t modifier,
2680 bool disjoint,
2681 const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info);
2682
2683 #if DETECT_OS_ANDROID
2684 VkResult
2685 v3dv_gralloc_to_drm_explicit_layout(struct u_gralloc *gralloc,
2686 struct u_gralloc_buffer_handle *in_hnd,
2687 VkImageDrmFormatModifierExplicitCreateInfoEXT *out,
2688 VkSubresourceLayout *out_layouts,
2689 int max_planes);
2690
2691 VkResult
2692 v3dv_import_native_buffer_fd(VkDevice device_h,
2693 int dma_buf,
2694 const VkAllocationCallbacks *alloc,
2695 VkImage image_h);
2696 #endif /* DETECT_OS_ANDROID */
2697
2698 #endif /* V3DV_PRIVATE_H */
2699