1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38
39 #include "vk_device.h"
40 #include "vk_format.h"
41 #include "vk_instance.h"
42 #include "vk_image.h"
43 #include "vk_log.h"
44 #include "vk_physical_device.h"
45 #include "vk_shader_module.h"
46 #include "vk_sync.h"
47 #include "vk_sync_timeline.h"
48 #include "vk_util.h"
49
50 #include "vk_command_buffer.h"
51 #include "vk_command_pool.h"
52 #include "vk_queue.h"
53
54 #include <xf86drm.h>
55
56 #ifdef HAVE_VALGRIND
57 #include <valgrind.h>
58 #include <memcheck.h>
59 #define VG(x) x
60 #else
61 #define VG(x) ((void)0)
62 #endif
63
64 #include "v3dv_limits.h"
65
66 #include "common/v3d_device_info.h"
67 #include "common/v3d_limits.h"
68 #include "common/v3d_tiling.h"
69 #include "common/v3d_util.h"
70
71 #include "compiler/shader_enums.h"
72 #include "compiler/spirv/nir_spirv.h"
73
74 #include "compiler/v3d_compiler.h"
75
76 #include "vk_debug_report.h"
77 #include "util/set.h"
78 #include "util/hash_table.h"
79 #include "util/sparse_array.h"
80 #include "util/xmlconfig.h"
81 #include "u_atomic.h"
82
83 #include "v3dv_entrypoints.h"
84 #include "v3dv_bo.h"
85
86 #include "drm-uapi/v3d_drm.h"
87
88 #include "vk_alloc.h"
89 #include "simulator/v3d_simulator.h"
90
91 #include "v3dv_cl.h"
92
93 #include "wsi_common.h"
94
95 /* A non-fatal assert. Useful for debugging. */
96 #ifdef DEBUG
97 #define v3dv_assert(x) ({ \
98 if (unlikely(!(x))) \
99 fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
100 })
101 #else
102 #define v3dv_assert(x)
103 #endif
104
105 #define perf_debug(...) do { \
106 if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
107 fprintf(stderr, __VA_ARGS__); \
108 } while (0)
109
110 struct v3dv_instance;
111
112 #ifdef USE_V3D_SIMULATOR
113 #define using_v3d_simulator true
114 #else
115 #define using_v3d_simulator false
116 #endif
117
118 struct v3d_simulator_file;
119
120 /* Minimum required by the Vulkan 1.1 spec */
121 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
122
123 struct v3dv_physical_device {
124 struct vk_physical_device vk;
125
126 char *name;
127 int32_t render_fd;
128 int32_t display_fd;
129 int32_t master_fd;
130
131 /* We need these because it is not clear how to detect
132 * valid devids in a portable way
133 */
134 bool has_primary;
135 bool has_render;
136
137 dev_t primary_devid;
138 dev_t render_devid;
139
140 #if using_v3d_simulator
141 uint32_t device_id;
142 #endif
143
144 uint8_t driver_build_sha1[20];
145 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
146 uint8_t device_uuid[VK_UUID_SIZE];
147 uint8_t driver_uuid[VK_UUID_SIZE];
148
149 struct vk_sync_type drm_syncobj_type;
150 struct vk_sync_timeline_type sync_timeline_type;
151 const struct vk_sync_type *sync_types[3];
152
153 struct disk_cache *disk_cache;
154
155 mtx_t mutex;
156
157 struct wsi_device wsi_device;
158
159 VkPhysicalDeviceMemoryProperties memory;
160
161 struct v3d_device_info devinfo;
162
163 struct v3d_simulator_file *sim_file;
164
165 const struct v3d_compiler *compiler;
166 uint32_t next_program_id;
167
168 /* This array holds all our 'struct v3dv_bo' allocations. We use this
169 * so we can add a refcount to our BOs and check if a particular BO
170 * was already allocated in this device using its GEM handle. This is
171 * necessary to properly manage BO imports, because the kernel doesn't
172 * refcount the underlying BO memory.
173 *
174 * Specifically, when self-importing (i.e. importing a BO into the same
175 * device that created it), the kernel will give us the same BO handle
176 * for both BOs and we must only free it once when both references are
177 * freed. Otherwise, if we are not self-importing, we get two differnt BO
178 * handles, and we want to free each one individually.
179 *
180 * The BOs in this map all have a refcnt with the referece counter and
181 * only self-imported BOs will ever have a refcnt > 1.
182 */
183 struct util_sparse_array bo_map;
184
185 struct {
186 bool merge_jobs;
187 } options;
188
189 struct {
190 bool multisync;
191 bool perfmon;
192 } caps;
193 };
194
195 VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
196 struct v3dv_physical_device *pdevice,
197 VkIcdSurfaceBase *surface);
198
199 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)200 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
201 {
202 return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
203 }
204
205 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
206 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
207 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
208 uint32_t index);
209
210 void v3dv_meta_clear_init(struct v3dv_device *device);
211 void v3dv_meta_clear_finish(struct v3dv_device *device);
212
213 void v3dv_meta_blit_init(struct v3dv_device *device);
214 void v3dv_meta_blit_finish(struct v3dv_device *device);
215
216 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
217 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
218
219 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
220 const VkOffset3D *offset,
221 VkFormat *compat_format);
222
223 struct v3dv_instance {
224 struct vk_instance vk;
225
226 int physicalDeviceCount;
227 struct v3dv_physical_device physicalDevice;
228
229 bool pipeline_cache_enabled;
230 bool default_pipeline_cache_enabled;
231 };
232
233 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
234 * tfu), we still need a syncobj to track the last overall job submitted
235 * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
236 * start expecting multisync to be present and drop the legacy implementation
237 * together with this V3DV_QUEUE_ANY tracker.
238 */
239 enum v3dv_queue_type {
240 V3DV_QUEUE_CL = 0,
241 V3DV_QUEUE_CSD,
242 V3DV_QUEUE_TFU,
243 V3DV_QUEUE_ANY,
244 V3DV_QUEUE_COUNT,
245 };
246
247 /* For each GPU queue, we use a syncobj to track the last job submitted. We
248 * set the flag `first` to determine when we are starting a new cmd buffer
249 * batch and therefore a job submitted to a given queue will be the first in a
250 * cmd buf batch.
251 */
252 struct v3dv_last_job_sync {
253 /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
254 *
255 * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
256 */
257 bool first[V3DV_QUEUE_COUNT];
258 /* Array of syncobj to track the last job submitted to a GPU queue.
259 *
260 * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
261 * queue, but without multisync we only track the last job submitted to any
262 * queue in V3DV_QUEUE_ANY.
263 */
264 uint32_t syncs[V3DV_QUEUE_COUNT];
265 };
266
267 struct v3dv_queue {
268 struct vk_queue vk;
269
270 struct v3dv_device *device;
271
272 struct v3dv_last_job_sync last_job_syncs;
273
274 struct v3dv_job *noop_job;
275
276 /* The last active perfmon ID to prevent mixing of counter results when a
277 * job is submitted with a different perfmon id.
278 */
279 uint32_t last_perfmon_id;
280 };
281
282 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
283 struct vk_queue_submit *submit);
284
285 #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
286 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
287 sizeof(VkComponentMapping))
288
289 struct v3dv_meta_color_clear_pipeline {
290 VkPipeline pipeline;
291 VkRenderPass pass;
292 bool cached;
293 uint64_t key;
294 };
295
296 struct v3dv_meta_depth_clear_pipeline {
297 VkPipeline pipeline;
298 uint64_t key;
299 };
300
301 struct v3dv_meta_blit_pipeline {
302 VkPipeline pipeline;
303 VkRenderPass pass;
304 VkRenderPass pass_no_load;
305 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
306 };
307
308 struct v3dv_meta_texel_buffer_copy_pipeline {
309 VkPipeline pipeline;
310 VkRenderPass pass;
311 VkRenderPass pass_no_load;
312 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
313 };
314
315 struct v3dv_pipeline_key {
316 bool robust_buffer_access;
317 uint8_t topology;
318 uint8_t logicop_func;
319 bool msaa;
320 bool sample_coverage;
321 bool sample_alpha_to_coverage;
322 bool sample_alpha_to_one;
323 uint8_t cbufs;
324 struct {
325 enum pipe_format format;
326 uint8_t swizzle[4];
327 } color_fmt[V3D_MAX_DRAW_BUFFERS];
328 uint8_t f32_color_rb;
329 uint32_t va_swap_rb_mask;
330 bool has_multiview;
331 };
332
333 struct v3dv_pipeline_cache_stats {
334 uint32_t miss;
335 uint32_t hit;
336 uint32_t count;
337 uint32_t on_disk_hit;
338 };
339
340 /* Equivalent to gl_shader_stage, but including the coordinate shaders
341 *
342 * FIXME: perhaps move to common
343 */
344 enum broadcom_shader_stage {
345 BROADCOM_SHADER_VERTEX,
346 BROADCOM_SHADER_VERTEX_BIN,
347 BROADCOM_SHADER_GEOMETRY,
348 BROADCOM_SHADER_GEOMETRY_BIN,
349 BROADCOM_SHADER_FRAGMENT,
350 BROADCOM_SHADER_COMPUTE,
351 };
352
353 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
354
355 /* Assumes that coordinate shaders will be custom-handled by the caller */
356 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)357 gl_shader_stage_to_broadcom(gl_shader_stage stage)
358 {
359 switch (stage) {
360 case MESA_SHADER_VERTEX:
361 return BROADCOM_SHADER_VERTEX;
362 case MESA_SHADER_GEOMETRY:
363 return BROADCOM_SHADER_GEOMETRY;
364 case MESA_SHADER_FRAGMENT:
365 return BROADCOM_SHADER_FRAGMENT;
366 case MESA_SHADER_COMPUTE:
367 return BROADCOM_SHADER_COMPUTE;
368 default:
369 unreachable("Unknown gl shader stage");
370 }
371 }
372
373 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)374 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
375 {
376 switch (stage) {
377 case BROADCOM_SHADER_VERTEX:
378 case BROADCOM_SHADER_VERTEX_BIN:
379 return MESA_SHADER_VERTEX;
380 case BROADCOM_SHADER_GEOMETRY:
381 case BROADCOM_SHADER_GEOMETRY_BIN:
382 return MESA_SHADER_GEOMETRY;
383 case BROADCOM_SHADER_FRAGMENT:
384 return MESA_SHADER_FRAGMENT;
385 case BROADCOM_SHADER_COMPUTE:
386 return MESA_SHADER_COMPUTE;
387 default:
388 unreachable("Unknown broadcom shader stage");
389 }
390 }
391
392 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)393 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
394 {
395 switch (stage) {
396 case BROADCOM_SHADER_VERTEX_BIN:
397 case BROADCOM_SHADER_GEOMETRY_BIN:
398 return true;
399 default:
400 return false;
401 }
402 }
403
404 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)405 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
406 {
407 switch (stage) {
408 case BROADCOM_SHADER_VERTEX:
409 case BROADCOM_SHADER_GEOMETRY:
410 return true;
411 default:
412 return false;
413 }
414 }
415
416 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)417 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
418 {
419 switch (stage) {
420 case BROADCOM_SHADER_VERTEX:
421 return BROADCOM_SHADER_VERTEX_BIN;
422 case BROADCOM_SHADER_GEOMETRY:
423 return BROADCOM_SHADER_GEOMETRY_BIN;
424 default:
425 unreachable("Invalid shader stage");
426 }
427 }
428
429 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)430 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
431 {
432 switch(stage) {
433 case BROADCOM_SHADER_VERTEX_BIN:
434 return "MESA_SHADER_VERTEX_BIN";
435 case BROADCOM_SHADER_GEOMETRY_BIN:
436 return "MESA_SHADER_GEOMETRY_BIN";
437 default:
438 return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
439 }
440 }
441
442 struct v3dv_pipeline_cache {
443 struct vk_object_base base;
444
445 struct v3dv_device *device;
446 mtx_t mutex;
447
448 struct hash_table *nir_cache;
449 struct v3dv_pipeline_cache_stats nir_stats;
450
451 struct hash_table *cache;
452 struct v3dv_pipeline_cache_stats stats;
453
454 /* For VK_EXT_pipeline_creation_cache_control. */
455 bool externally_synchronized;
456 };
457
458 struct v3dv_device {
459 struct vk_device vk;
460
461 struct v3dv_instance *instance;
462 struct v3dv_physical_device *pdevice;
463
464 struct v3d_device_info devinfo;
465 struct v3dv_queue queue;
466
467 /* Guards query->maybe_available and value for timestamps */
468 mtx_t query_mutex;
469
470 /* Signaled whenever a query is ended */
471 cnd_t query_ended;
472
473 /* Resources used for meta operations */
474 struct {
475 mtx_t mtx;
476 struct {
477 VkPipelineLayout p_layout;
478 struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
479 } color_clear;
480 struct {
481 VkPipelineLayout p_layout;
482 struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
483 } depth_clear;
484 struct {
485 VkDescriptorSetLayout ds_layout;
486 VkPipelineLayout p_layout;
487 struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
488 } blit;
489 struct {
490 VkDescriptorSetLayout ds_layout;
491 VkPipelineLayout p_layout;
492 struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
493 } texel_buffer_copy;
494 } meta;
495
496 struct v3dv_bo_cache {
497 /** List of struct v3d_bo freed, by age. */
498 struct list_head time_list;
499 /** List of struct v3d_bo freed, per size, by age. */
500 struct list_head *size_list;
501 uint32_t size_list_size;
502
503 mtx_t lock;
504
505 uint32_t cache_size;
506 uint32_t cache_count;
507 uint32_t max_cache_size;
508 } bo_cache;
509
510 uint32_t bo_size;
511 uint32_t bo_count;
512
513 struct v3dv_pipeline_cache default_pipeline_cache;
514
515 /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
516 * following covers the most common case, that is all attributes format
517 * being float being float, allowing us to reuse the same BO for all
518 * pipelines matching this requirement. Pipelines that need integer
519 * attributes will create their own BO.
520 */
521 struct v3dv_bo *default_attribute_float;
522 VkPhysicalDeviceFeatures features;
523
524 void *device_address_mem_ctx;
525 struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
526
527 #ifdef ANDROID
528 const void *gralloc;
529 enum {
530 V3DV_GRALLOC_UNKNOWN,
531 V3DV_GRALLOC_CROS,
532 V3DV_GRALLOC_OTHER,
533 } gralloc_type;
534 #endif
535 };
536
537 struct v3dv_device_memory {
538 struct vk_object_base base;
539
540 struct v3dv_bo *bo;
541 const VkMemoryType *type;
542 bool is_for_wsi;
543 bool is_for_device_address;
544 };
545
546 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
547 #define TEXTURE_DATA_FORMAT_NO 255
548
549 struct v3dv_format {
550 bool supported;
551
552 /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
553 uint8_t rt_type;
554
555 /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
556 uint8_t tex_type;
557
558 /* Swizzle to apply to the RGBA shader output for storing to the tile
559 * buffer, to the RGBA tile buffer to produce shader input (for
560 * blending), and for turning the rgba8888 texture sampler return
561 * value into shader rgba values.
562 */
563 uint8_t swizzle[4];
564
565 /* Whether the return value is 16F/I/UI or 32F/I/UI. */
566 uint8_t return_size;
567
568 /* If the format supports (linear) filtering when texturing. */
569 bool supports_filtering;
570 };
571
572 struct v3d_resource_slice {
573 uint32_t offset;
574 uint32_t stride;
575 uint32_t padded_height;
576 /* Size of a single pane of the slice. For 3D textures, there will be
577 * a number of panes equal to the minified, power-of-two-aligned
578 * depth.
579 */
580 uint32_t size;
581 uint8_t ub_pad;
582 enum v3d_tiling_mode tiling;
583 uint32_t padded_height_of_output_image_in_uif_blocks;
584 };
585
586 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
587 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
588
589 struct v3dv_image {
590 struct vk_image vk;
591
592 const struct v3dv_format *format;
593 uint32_t cpp;
594 bool tiled;
595
596 struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
597 uint64_t size; /* Total size in bytes */
598 uint32_t cube_map_stride;
599
600 struct v3dv_device_memory *mem;
601 VkDeviceSize mem_offset;
602 uint32_t alignment;
603
604 #ifdef ANDROID
605 /* Image is backed by VK_ANDROID_native_buffer, */
606 bool is_native_buffer_memory;
607 #endif
608 };
609
610 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
611
612 /* Pre-generating packets needs to consider changes in packet sizes across hw
613 * versions. Keep things simple and allocate enough space for any supported
614 * version. We ensure the size is large enough through static asserts.
615 */
616 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
617 #define V3DV_SAMPLER_STATE_LENGTH 24
618 #define V3DV_BLEND_CFG_LENGTH 5
619 #define V3DV_CFG_BITS_LENGTH 4
620 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
621 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
622 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
623 #define V3DV_STENCIL_CFG_LENGTH 6
624
625 struct v3dv_image_view {
626 struct vk_image_view vk;
627
628 const struct v3dv_format *format;
629 bool swap_rb;
630 bool channel_reverse;
631 uint32_t internal_bpp;
632 uint32_t internal_type;
633 uint32_t offset;
634
635 /* Precomputed (composed from createinfo->components and formar swizzle)
636 * swizzles to pass in to the shader key.
637 *
638 * This could be also included on the descriptor bo, but the shader state
639 * packet doesn't need it on a bo, so we can just avoid a memory copy
640 */
641 uint8_t swizzle[4];
642
643 /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
644 * during UpdateDescriptorSets.
645 *
646 * Empirical tests show that cube arrays need a different shader state
647 * depending on whether they are used with a sampler or not, so for these
648 * we generate two states and select the one to use based on the descriptor
649 * type.
650 */
651 uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
652 };
653
654 VkResult v3dv_create_image_view(struct v3dv_device *device,
655 const VkImageViewCreateInfo *pCreateInfo,
656 VkImageView *pView);
657
658 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
659
660 struct v3dv_buffer {
661 struct vk_object_base base;
662
663 VkDeviceSize size;
664 VkBufferUsageFlags usage;
665 uint32_t alignment;
666
667 struct v3dv_device_memory *mem;
668 VkDeviceSize mem_offset;
669 };
670
671 struct v3dv_buffer_view {
672 struct vk_object_base base;
673
674 struct v3dv_buffer *buffer;
675
676 VkFormat vk_format;
677 const struct v3dv_format *format;
678 uint32_t internal_bpp;
679 uint32_t internal_type;
680
681 uint32_t offset;
682 uint32_t size;
683 uint32_t num_elements;
684
685 /* Prepacked TEXTURE_SHADER_STATE. */
686 uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
687 };
688
689 struct v3dv_subpass_attachment {
690 uint32_t attachment;
691 VkImageLayout layout;
692 };
693
694 struct v3dv_subpass {
695 uint32_t input_count;
696 struct v3dv_subpass_attachment *input_attachments;
697
698 uint32_t color_count;
699 struct v3dv_subpass_attachment *color_attachments;
700 struct v3dv_subpass_attachment *resolve_attachments;
701
702 struct v3dv_subpass_attachment ds_attachment;
703 struct v3dv_subpass_attachment ds_resolve_attachment;
704 bool resolve_depth, resolve_stencil;
705
706 /* If we need to emit the clear of the depth/stencil attachment using a
707 * a draw call instead of using the TLB (GFXH-1461).
708 */
709 bool do_depth_clear_with_draw;
710 bool do_stencil_clear_with_draw;
711
712 /* Multiview */
713 uint32_t view_mask;
714 };
715
716 struct v3dv_render_pass_attachment {
717 VkAttachmentDescription2 desc;
718
719 uint32_t first_subpass;
720 uint32_t last_subpass;
721
722 /* When multiview is enabled, we no longer care about when a particular
723 * attachment is first or last used in a render pass, since not all views
724 * in the attachment will meet that criteria. Instead, we need to track
725 * each individual view (layer) in each attachment and emit our stores,
726 * loads and clears accordingly.
727 */
728 struct {
729 uint32_t first_subpass;
730 uint32_t last_subpass;
731 } views[MAX_MULTIVIEW_VIEW_COUNT];
732
733 /* If this is a multisampled attachment that is going to be resolved,
734 * whether we may be able to use the TLB hardware resolve based on the
735 * attachment format.
736 */
737 bool try_tlb_resolve;
738 };
739
740 struct v3dv_render_pass {
741 struct vk_object_base base;
742
743 bool multiview_enabled;
744
745 uint32_t attachment_count;
746 struct v3dv_render_pass_attachment *attachments;
747
748 uint32_t subpass_count;
749 struct v3dv_subpass *subpasses;
750
751 struct v3dv_subpass_attachment *subpass_attachments;
752 };
753
754 struct v3dv_framebuffer {
755 struct vk_object_base base;
756
757 uint32_t width;
758 uint32_t height;
759 uint32_t layers;
760
761 /* Typically, edge tiles in the framebuffer have padding depending on the
762 * underlying tiling layout. One consequnce of this is that when the
763 * framebuffer dimensions are not aligned to tile boundaries, tile stores
764 * would still write full tiles on the edges and write to the padded area.
765 * If the framebuffer is aliasing a smaller region of a larger image, then
766 * we need to be careful with this though, as we won't have padding on the
767 * edge tiles (which typically means that we need to load the tile buffer
768 * before we store).
769 */
770 bool has_edge_padding;
771
772 uint32_t attachment_count;
773 uint32_t color_attachment_count;
774
775 /* Notice that elements in 'attachments' will be NULL if the framebuffer
776 * was created imageless. The driver is expected to access attachment info
777 * from the command buffer state instead.
778 */
779 struct v3dv_image_view *attachments[0];
780 };
781
782 struct v3dv_frame_tiling {
783 uint32_t width;
784 uint32_t height;
785 uint32_t layers;
786 uint32_t render_target_count;
787 uint32_t internal_bpp;
788 bool msaa;
789 bool double_buffer;
790 uint32_t tile_width;
791 uint32_t tile_height;
792 uint32_t draw_tiles_x;
793 uint32_t draw_tiles_y;
794 uint32_t supertile_width;
795 uint32_t supertile_height;
796 uint32_t frame_width_in_supertiles;
797 uint32_t frame_height_in_supertiles;
798 };
799
800 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
801 const VkRect2D *area,
802 struct v3dv_framebuffer *fb,
803 struct v3dv_render_pass *pass,
804 uint32_t subpass_idx);
805
806 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
807 * This happens when we render at least 2 tiles, because in this mode each
808 * tile uses a different half of the tile buffer memory so we can have 2 tiles
809 * in flight (one being stored to memory and the next being rendered). In this
810 * scenario, if we emit a single initial tile clear we would only clear the
811 * first half of the tile buffer.
812 */
813 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)814 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
815 {
816 return tiling->double_buffer &&
817 (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
818 tiling->layers > 1);
819 }
820
821 enum v3dv_cmd_buffer_status {
822 V3DV_CMD_BUFFER_STATUS_NEW = 0,
823 V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
824 V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
825 V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
826 };
827
828 union v3dv_clear_value {
829 uint32_t color[4];
830 struct {
831 float z;
832 uint8_t s;
833 };
834 };
835
836 struct v3dv_cmd_buffer_attachment_state {
837 /* The original clear value as provided by the Vulkan API */
838 VkClearValue vk_clear_value;
839
840 /* The hardware clear value */
841 union v3dv_clear_value clear_value;
842
843 /* The underlying image view (from the framebuffer or, if imageless
844 * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
845 */
846 struct v3dv_image_view *image_view;
847
848 /* If this is a multisampled attachment with a resolve operation. */
849 bool has_resolve;
850
851 /* If this is a multisampled attachment with a resolve operation,
852 * whether we can use the TLB for the resolve.
853 */
854 bool use_tlb_resolve;
855 };
856
857 struct v3dv_viewport_state {
858 uint32_t count;
859 VkViewport viewports[MAX_VIEWPORTS];
860 float translate[MAX_VIEWPORTS][3];
861 float scale[MAX_VIEWPORTS][3];
862 };
863
864 struct v3dv_scissor_state {
865 uint32_t count;
866 VkRect2D scissors[MAX_SCISSORS];
867 };
868
869 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
870 * defined as dynamic
871 */
872 enum v3dv_dynamic_state_bits {
873 V3DV_DYNAMIC_VIEWPORT = 1 << 0,
874 V3DV_DYNAMIC_SCISSOR = 1 << 1,
875 V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
876 V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
877 V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
878 V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
879 V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
880 V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
881 V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
882 V3DV_DYNAMIC_ALL = (1 << 9) - 1,
883 };
884
885 /* Flags for dirty pipeline state.
886 */
887 enum v3dv_cmd_dirty_bits {
888 V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
889 V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
890 V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
891 V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
892 V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
893 V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
894 V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,
895 V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,
896 V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,
897 V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,
898 V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,
899 V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,
900 V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO = 1 << 12,
901 V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 13,
902 V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 14,
903 V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 15,
904 V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 16,
905 V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 17,
906 V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 18,
907 };
908
909 struct v3dv_dynamic_state {
910 /**
911 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
912 * Defines the set of saved dynamic state.
913 */
914 uint32_t mask;
915
916 struct v3dv_viewport_state viewport;
917
918 struct v3dv_scissor_state scissor;
919
920 struct {
921 uint32_t front;
922 uint32_t back;
923 } stencil_compare_mask;
924
925 struct {
926 uint32_t front;
927 uint32_t back;
928 } stencil_write_mask;
929
930 struct {
931 uint32_t front;
932 uint32_t back;
933 } stencil_reference;
934
935 float blend_constants[4];
936
937 struct {
938 float constant_factor;
939 float depth_bias_clamp;
940 float slope_factor;
941 } depth_bias;
942
943 float line_width;
944
945 uint32_t color_write_enable;
946 };
947
948 void v3dv_viewport_compute_xform(const VkViewport *viewport,
949 float scale[3],
950 float translate[3]);
951
952 enum v3dv_ez_state {
953 V3D_EZ_UNDECIDED = 0,
954 V3D_EZ_GT_GE,
955 V3D_EZ_LT_LE,
956 V3D_EZ_DISABLED,
957 };
958
959 enum v3dv_job_type {
960 V3DV_JOB_TYPE_GPU_CL = 0,
961 V3DV_JOB_TYPE_GPU_CL_SECONDARY,
962 V3DV_JOB_TYPE_GPU_TFU,
963 V3DV_JOB_TYPE_GPU_CSD,
964 V3DV_JOB_TYPE_CPU_RESET_QUERIES,
965 V3DV_JOB_TYPE_CPU_END_QUERY,
966 V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
967 V3DV_JOB_TYPE_CPU_SET_EVENT,
968 V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
969 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
970 V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
971 V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
972 };
973
974 struct v3dv_reset_query_cpu_job_info {
975 struct v3dv_query_pool *pool;
976 uint32_t first;
977 uint32_t count;
978 };
979
980 struct v3dv_end_query_cpu_job_info {
981 struct v3dv_query_pool *pool;
982 uint32_t query;
983
984 /* This is one unless multiview is used */
985 uint32_t count;
986 };
987
988 struct v3dv_copy_query_results_cpu_job_info {
989 struct v3dv_query_pool *pool;
990 uint32_t first;
991 uint32_t count;
992 struct v3dv_buffer *dst;
993 uint32_t offset;
994 uint32_t stride;
995 VkQueryResultFlags flags;
996 };
997
998 struct v3dv_submit_sync_info {
999 /* List of syncs to wait before running a job */
1000 uint32_t wait_count;
1001 struct vk_sync_wait *waits;
1002
1003 /* List of syncs to signal when all jobs complete */
1004 uint32_t signal_count;
1005 struct vk_sync_signal *signals;
1006 };
1007
1008 struct v3dv_event_set_cpu_job_info {
1009 struct v3dv_event *event;
1010 int state;
1011 };
1012
1013 struct v3dv_event_wait_cpu_job_info {
1014 /* List of events to wait on */
1015 uint32_t event_count;
1016 struct v3dv_event **events;
1017 };
1018
1019 struct v3dv_copy_buffer_to_image_cpu_job_info {
1020 struct v3dv_image *image;
1021 struct v3dv_buffer *buffer;
1022 uint32_t buffer_offset;
1023 uint32_t buffer_stride;
1024 uint32_t buffer_layer_stride;
1025 VkOffset3D image_offset;
1026 VkExtent3D image_extent;
1027 uint32_t mip_level;
1028 uint32_t base_layer;
1029 uint32_t layer_count;
1030 };
1031
1032 struct v3dv_csd_indirect_cpu_job_info {
1033 struct v3dv_buffer *buffer;
1034 uint32_t offset;
1035 struct v3dv_job *csd_job;
1036 uint32_t wg_size;
1037 uint32_t *wg_uniform_offsets[3];
1038 bool needs_wg_uniform_rewrite;
1039 };
1040
1041 struct v3dv_timestamp_query_cpu_job_info {
1042 struct v3dv_query_pool *pool;
1043 uint32_t query;
1044
1045 /* This is one unless multiview is used */
1046 uint32_t count;
1047 };
1048
1049 /* Number of perfmons required to handle all supported performance counters */
1050 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \
1051 DRM_V3D_MAX_PERF_COUNTERS)
1052
1053 struct v3dv_perf_query {
1054 uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1055
1056 /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1057 * performance data.
1058 */
1059 struct vk_sync *last_job_sync;
1060 };
1061
1062 struct v3dv_job {
1063 struct list_head list_link;
1064
1065 /* We only create job clones when executing secondary command buffers into
1066 * primaries. These clones don't make deep copies of the original object
1067 * so we want to flag them to avoid freeing resources they don't own.
1068 */
1069 bool is_clone;
1070
1071 /* If the job executes on the transfer stage of the pipeline */
1072 bool is_transfer;
1073
1074 /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1075 * dereference memory in any buffer that has been flagged with
1076 * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR. These buffers may not
1077 * be bound via descriptor sets, so we need to make sure that a job that
1078 * uses this functionality includes all these buffers in its kernel
1079 * submission.
1080 */
1081 bool uses_buffer_device_address;
1082
1083 enum v3dv_job_type type;
1084
1085 struct v3dv_device *device;
1086
1087 struct v3dv_cmd_buffer *cmd_buffer;
1088
1089 struct v3dv_cl bcl;
1090 struct v3dv_cl rcl;
1091 struct v3dv_cl indirect;
1092
1093 /* Set of all BOs referenced by the job. This will be used for making
1094 * the list of BOs that the kernel will need to have paged in to
1095 * execute our job.
1096 */
1097 struct set *bos;
1098 uint32_t bo_count;
1099 uint64_t bo_handle_mask;
1100
1101 struct v3dv_bo *tile_alloc;
1102 struct v3dv_bo *tile_state;
1103
1104 bool tmu_dirty_rcl;
1105
1106 uint32_t first_subpass;
1107
1108 /* When the current subpass is split into multiple jobs, this flag is set
1109 * to true for any jobs after the first in the same subpass.
1110 */
1111 bool is_subpass_continue;
1112
1113 /* If this job is the last job emitted for a subpass. */
1114 bool is_subpass_finish;
1115
1116 struct v3dv_frame_tiling frame_tiling;
1117
1118 enum v3dv_ez_state ez_state;
1119 enum v3dv_ez_state first_ez_state;
1120
1121 /* If we have already decided if we need to disable Early Z/S completely
1122 * for this job.
1123 */
1124 bool decided_global_ez_enable;
1125
1126 /* If the job emitted any draw calls with Early Z/S enabled */
1127 bool has_ez_draws;
1128
1129 /* If this job has been configured to use early Z/S clear */
1130 bool early_zs_clear;
1131
1132 /* Number of draw calls recorded into the job */
1133 uint32_t draw_count;
1134
1135 /* A flag indicating whether we want to flush every draw separately. This
1136 * can be used for debugging, or for cases where special circumstances
1137 * require this behavior.
1138 */
1139 bool always_flush;
1140
1141 /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1142 * can use this to select the hw queues where we need to serialize the job.
1143 */
1144 uint8_t serialize;
1145
1146 /* If this is a CL job, whether we should sync before binning */
1147 bool needs_bcl_sync;
1148
1149 /* Job specs for CPU jobs */
1150 union {
1151 struct v3dv_reset_query_cpu_job_info query_reset;
1152 struct v3dv_end_query_cpu_job_info query_end;
1153 struct v3dv_copy_query_results_cpu_job_info query_copy_results;
1154 struct v3dv_event_set_cpu_job_info event_set;
1155 struct v3dv_event_wait_cpu_job_info event_wait;
1156 struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1157 struct v3dv_csd_indirect_cpu_job_info csd_indirect;
1158 struct v3dv_timestamp_query_cpu_job_info query_timestamp;
1159 } cpu;
1160
1161 /* Job specs for TFU jobs */
1162 struct drm_v3d_submit_tfu tfu;
1163
1164 /* Job specs for CSD jobs */
1165 struct {
1166 struct v3dv_bo *shared_memory;
1167 uint32_t wg_count[3];
1168 uint32_t wg_base[3];
1169 struct drm_v3d_submit_csd submit;
1170 } csd;
1171
1172 /* Perfmons with last job sync for CSD and CL jobs */
1173 struct v3dv_perf_query *perf;
1174 };
1175
1176 void v3dv_job_init(struct v3dv_job *job,
1177 enum v3dv_job_type type,
1178 struct v3dv_device *device,
1179 struct v3dv_cmd_buffer *cmd_buffer,
1180 int32_t subpass_idx);
1181 void v3dv_job_destroy(struct v3dv_job *job);
1182
1183 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1184 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1185
1186 void v3dv_job_start_frame(struct v3dv_job *job,
1187 uint32_t width,
1188 uint32_t height,
1189 uint32_t layers,
1190 bool allocate_tile_state_for_all_layers,
1191 uint32_t render_target_count,
1192 uint8_t max_internal_bpp,
1193 bool msaa);
1194
1195 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1196
1197 struct v3dv_job *
1198 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1199 struct v3dv_cmd_buffer *cmd_buffer);
1200
1201 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1202 enum v3dv_job_type type,
1203 struct v3dv_cmd_buffer *cmd_buffer,
1204 uint32_t subpass_idx);
1205
1206 void
1207 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1208 uint32_t slot_size,
1209 uint32_t used_count,
1210 uint32_t *alloc_count,
1211 void **ptr);
1212
1213 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1214 bool indexed, bool indirect);
1215
1216 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1217 * cmd_buffer specific header?
1218 */
1219 struct v3dv_draw_info {
1220 uint32_t vertex_count;
1221 uint32_t instance_count;
1222 uint32_t first_vertex;
1223 uint32_t first_instance;
1224 };
1225
1226 struct v3dv_vertex_binding {
1227 struct v3dv_buffer *buffer;
1228 VkDeviceSize offset;
1229 };
1230
1231 struct v3dv_descriptor_state {
1232 struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1233 uint32_t valid;
1234 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1235 };
1236
1237 struct v3dv_cmd_pipeline_state {
1238 struct v3dv_pipeline *pipeline;
1239
1240 struct v3dv_descriptor_state descriptor_state;
1241 };
1242
1243 enum {
1244 V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1245 V3DV_BARRIER_COMPUTE_BIT = (1 << 1),
1246 V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1247 };
1248 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1249 V3DV_BARRIER_TRANSFER_BIT | \
1250 V3DV_BARRIER_COMPUTE_BIT);
1251
1252 struct v3dv_barrier_state {
1253 /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1254 uint8_t dst_mask;
1255
1256 /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1257 * indicating the sources of the dependency.
1258 */
1259 uint8_t src_mask_graphics;
1260 uint8_t src_mask_transfer;
1261 uint8_t src_mask_compute;
1262
1263 /* For graphics barriers, access masks involved. Used to decide if we need
1264 * to execute a binning or render barrier.
1265 */
1266 VkAccessFlags bcl_buffer_access;
1267 VkAccessFlags bcl_image_access;
1268 };
1269
1270 struct v3dv_cmd_buffer_state {
1271 struct v3dv_render_pass *pass;
1272 struct v3dv_framebuffer *framebuffer;
1273 VkRect2D render_area;
1274
1275 /* Current job being recorded */
1276 struct v3dv_job *job;
1277
1278 uint32_t subpass_idx;
1279
1280 struct v3dv_cmd_pipeline_state gfx;
1281 struct v3dv_cmd_pipeline_state compute;
1282
1283 struct v3dv_dynamic_state dynamic;
1284
1285 uint32_t dirty;
1286 VkShaderStageFlagBits dirty_descriptor_stages;
1287 VkShaderStageFlagBits dirty_push_constants_stages;
1288
1289 /* Current clip window. We use this to check whether we have an active
1290 * scissor, since in that case we can't use TLB clears and need to fallback
1291 * to drawing rects.
1292 */
1293 VkRect2D clip_window;
1294
1295 /* Whether our render area is aligned to tile boundaries. If this is false
1296 * then we have tiles that are only partially covered by the render area,
1297 * and therefore, we need to be careful with our loads and stores so we don't
1298 * modify pixels for the tile area that is not covered by the render area.
1299 * This means, for example, that we can't use the TLB to clear, since that
1300 * always clears full tiles.
1301 */
1302 bool tile_aligned_render_area;
1303
1304 /* FIXME: we have just one client-side BO for the push constants,
1305 * independently of the stageFlags in vkCmdPushConstants, and the
1306 * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1307 * tunning in the future if it makes sense.
1308 */
1309 uint32_t push_constants_size;
1310 uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1311
1312 uint32_t attachment_alloc_count;
1313 struct v3dv_cmd_buffer_attachment_state *attachments;
1314
1315 struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1316
1317 struct {
1318 VkBuffer buffer;
1319 VkDeviceSize offset;
1320 uint8_t index_size;
1321 } index_buffer;
1322
1323 /* Current uniforms */
1324 struct {
1325 struct v3dv_cl_reloc vs_bin;
1326 struct v3dv_cl_reloc vs;
1327 struct v3dv_cl_reloc gs_bin;
1328 struct v3dv_cl_reloc gs;
1329 struct v3dv_cl_reloc fs;
1330 } uniforms;
1331
1332 /* Current view index for multiview rendering */
1333 uint32_t view_index;
1334
1335 /* Used to flag OOM conditions during command buffer recording */
1336 bool oom;
1337
1338 /* If we are currently recording job(s) for a transfer operation */
1339 bool is_transfer;
1340
1341 /* Barrier state tracking */
1342 struct v3dv_barrier_state barrier;
1343
1344 /* Secondary command buffer state */
1345 struct {
1346 bool occlusion_query_enable;
1347 } inheritance;
1348
1349 /* Command buffer state saved during a meta operation */
1350 struct {
1351 uint32_t subpass_idx;
1352 VkRenderPass pass;
1353 VkFramebuffer framebuffer;
1354
1355 uint32_t attachment_alloc_count;
1356 uint32_t attachment_count;
1357 struct v3dv_cmd_buffer_attachment_state *attachments;
1358
1359 bool tile_aligned_render_area;
1360 VkRect2D render_area;
1361
1362 struct v3dv_dynamic_state dynamic;
1363
1364 struct v3dv_cmd_pipeline_state gfx;
1365 bool has_descriptor_state;
1366
1367 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1368 uint32_t push_constants_size;
1369 } meta;
1370
1371 /* Command buffer state for queries */
1372 struct {
1373 /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1374 * a render pass. We queue these here and then schedule the corresponding
1375 * CPU jobs for them at the time we finish the GPU job in which they have
1376 * been recorded.
1377 */
1378 struct {
1379 uint32_t used_count;
1380 uint32_t alloc_count;
1381 struct v3dv_end_query_cpu_job_info *states;
1382 } end;
1383
1384 struct {
1385 /* This BO is not NULL if we have an active occlusion query, that is,
1386 * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1387 */
1388 struct v3dv_bo *bo;
1389 uint32_t offset;
1390
1391 /* This pointer is not NULL if we have an active performance query */
1392 struct v3dv_perf_query *perf;
1393 } active_query;
1394 } query;
1395 };
1396
1397 /* The following struct represents the info from a descriptor that we store on
1398 * the host memory. They are mostly links to other existing vulkan objects,
1399 * like the image_view in order to access to swizzle info, or the buffer used
1400 * for a UBO/SSBO, for example.
1401 *
1402 * FIXME: revisit if makes sense to just move everything that would be needed
1403 * from a descriptor to the bo.
1404 */
1405 struct v3dv_descriptor {
1406 VkDescriptorType type;
1407
1408 union {
1409 struct {
1410 struct v3dv_image_view *image_view;
1411 struct v3dv_sampler *sampler;
1412 };
1413
1414 struct {
1415 struct v3dv_buffer *buffer;
1416 size_t offset;
1417 size_t range;
1418 };
1419
1420 struct v3dv_buffer_view *buffer_view;
1421 };
1422 };
1423
1424 struct v3dv_query {
1425 bool maybe_available;
1426 union {
1427 /* Used by GPU queries (occlusion) */
1428 struct {
1429 struct v3dv_bo *bo;
1430 uint32_t offset;
1431 };
1432 /* Used by CPU queries (timestamp) */
1433 uint64_t value;
1434
1435 /* Used by performance queries */
1436 struct v3dv_perf_query perf;
1437 };
1438 };
1439
1440 struct v3dv_query_pool {
1441 struct vk_object_base base;
1442
1443 struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1444
1445 /* Only used with performance queries */
1446 struct {
1447 uint32_t ncounters;
1448 uint8_t counters[V3D_PERFCNT_NUM];
1449
1450 /* V3D has a limit on the number of counters we can track in a
1451 * single performance monitor, so if too many counters are requested
1452 * we need to create multiple monitors to record all of them. This
1453 * field represents the number of monitors required for the number
1454 * of counters requested.
1455 */
1456 uint8_t nperfmons;
1457 } perfmon;
1458
1459 VkQueryType query_type;
1460 uint32_t query_count;
1461 struct v3dv_query *queries;
1462 };
1463
1464 VkResult v3dv_get_query_pool_results(struct v3dv_device *device,
1465 struct v3dv_query_pool *pool,
1466 uint32_t first,
1467 uint32_t count,
1468 void *data,
1469 VkDeviceSize stride,
1470 VkQueryResultFlags flags);
1471
1472 void v3dv_reset_query_pools(struct v3dv_device *device,
1473 struct v3dv_query_pool *query_pool,
1474 uint32_t first,
1475 uint32_t last);
1476
1477 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1478 uint64_t pobj,
1479 VkAllocationCallbacks *alloc);
1480 struct v3dv_cmd_buffer_private_obj {
1481 struct list_head list_link;
1482 uint64_t obj;
1483 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1484 };
1485
1486 struct v3dv_cmd_buffer {
1487 struct vk_command_buffer vk;
1488
1489 struct v3dv_device *device;
1490
1491 /* Used at submit time to link command buffers in the submission that have
1492 * spawned wait threads, so we can then wait on all of them to complete
1493 * before we process any signal sempahores or fences.
1494 */
1495 struct list_head list_link;
1496
1497 VkCommandBufferUsageFlags usage_flags;
1498
1499 enum v3dv_cmd_buffer_status status;
1500
1501 struct v3dv_cmd_buffer_state state;
1502
1503 /* Buffer where we upload push constant data to resolve indirect indexing */
1504 struct v3dv_cl_reloc push_constants_resource;
1505
1506 /* Collection of Vulkan objects created internally by the driver (typically
1507 * during recording of meta operations) that are part of the command buffer
1508 * and should be destroyed with it.
1509 */
1510 struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1511
1512 /* Per-command buffer resources for meta operations. */
1513 struct {
1514 struct {
1515 /* The current descriptor pool for blit sources */
1516 VkDescriptorPool dspool;
1517 } blit;
1518 struct {
1519 /* The current descriptor pool for texel buffer copy sources */
1520 VkDescriptorPool dspool;
1521 } texel_buffer_copy;
1522 } meta;
1523
1524 /* List of jobs in the command buffer. For primary command buffers it
1525 * represents the jobs we want to submit to the GPU. For secondary command
1526 * buffers it represents jobs that will be merged into a primary command
1527 * buffer via vkCmdExecuteCommands.
1528 */
1529 struct list_head jobs;
1530 };
1531
1532 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1533 int32_t subpass_idx,
1534 enum v3dv_job_type type);
1535 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1536
1537 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1538 uint32_t subpass_idx);
1539 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1540 uint32_t subpass_idx);
1541
1542 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1543
1544 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1545 bool push_descriptor_state);
1546 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1547 uint32_t dirty_dynamic_state,
1548 bool needs_subpass_resume);
1549
1550 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1551 struct v3dv_query_pool *pool,
1552 uint32_t first,
1553 uint32_t count);
1554
1555 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1556 struct v3dv_query_pool *pool,
1557 uint32_t query,
1558 VkQueryControlFlags flags);
1559
1560 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1561 struct v3dv_query_pool *pool,
1562 uint32_t query);
1563
1564 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1565 struct v3dv_query_pool *pool,
1566 uint32_t first,
1567 uint32_t count,
1568 struct v3dv_buffer *dst,
1569 uint32_t offset,
1570 uint32_t stride,
1571 VkQueryResultFlags flags);
1572
1573 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1574 struct drm_v3d_submit_tfu *tfu);
1575
1576 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1577 const uint32_t *wg_counts);
1578
1579 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1580 uint64_t obj,
1581 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1582
1583 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1584 struct v3dv_barrier_state *src);
1585
1586 struct v3dv_event {
1587 struct vk_object_base base;
1588 int state;
1589 };
1590
1591 struct v3dv_shader_variant {
1592 enum broadcom_shader_stage stage;
1593
1594 union {
1595 struct v3d_prog_data *base;
1596 struct v3d_vs_prog_data *vs;
1597 struct v3d_gs_prog_data *gs;
1598 struct v3d_fs_prog_data *fs;
1599 struct v3d_compute_prog_data *cs;
1600 } prog_data;
1601
1602 /* We explicitly save the prog_data_size as it would make easier to
1603 * serialize
1604 */
1605 uint32_t prog_data_size;
1606
1607 /* The assembly for this variant will be uploaded to a BO shared with all
1608 * other shader stages in that pipeline. This is the offset in that BO.
1609 */
1610 uint32_t assembly_offset;
1611
1612 /* Note: it is really likely that qpu_insts would be NULL, as it will be
1613 * used only temporarily, to upload it to the shared bo, as we compile the
1614 * different stages individually.
1615 */
1616 uint64_t *qpu_insts;
1617 uint32_t qpu_insts_size;
1618 };
1619
1620 /*
1621 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1622 * other methods doesn't have so many parameters.
1623 *
1624 * FIXME: for the case of the coordinate shader and the vertex shader, module,
1625 * entrypoint, spec_info and nir are the same. There are also info only
1626 * relevant to some stages. But seemed too much a hassle to create a new
1627 * struct only to handle that. Revisit if such kind of info starts to grow.
1628 */
1629 struct v3dv_pipeline_stage {
1630 struct v3dv_pipeline *pipeline;
1631
1632 enum broadcom_shader_stage stage;
1633
1634 const struct vk_shader_module *module;
1635 const char *entrypoint;
1636 const VkSpecializationInfo *spec_info;
1637
1638 nir_shader *nir;
1639
1640 /* The following is the combined hash of module+entrypoint+spec_info+nir */
1641 unsigned char shader_sha1[20];
1642
1643 /** A name for this program, so you can track it in shader-db output. */
1644 uint32_t program_id;
1645
1646 VkPipelineCreationFeedback feedback;
1647 };
1648
1649 /* We are using the descriptor pool entry for two things:
1650 * * Track the allocated sets, so we can properly free it if needed
1651 * * Track the suballocated pool bo regions, so if some descriptor set is
1652 * freed, the gap could be reallocated later.
1653 *
1654 * Those only make sense if the pool was not created with the flag
1655 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1656 */
1657 struct v3dv_descriptor_pool_entry
1658 {
1659 struct v3dv_descriptor_set *set;
1660 /* Offset and size of the subregion allocated for this entry from the
1661 * pool->bo
1662 */
1663 uint32_t offset;
1664 uint32_t size;
1665 };
1666
1667 struct v3dv_descriptor_pool {
1668 struct vk_object_base base;
1669
1670 /* A list with all descriptor sets allocated from the pool. */
1671 struct list_head set_list;
1672
1673 /* If this descriptor pool has been allocated for the driver for internal
1674 * use, typically to implement meta operations.
1675 */
1676 bool is_driver_internal;
1677
1678 struct v3dv_bo *bo;
1679 /* Current offset at the descriptor bo. 0 means that we didn't use it for
1680 * any descriptor. If the descriptor bo is NULL, current offset is
1681 * meaningless
1682 */
1683 uint32_t current_offset;
1684
1685 /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1686 * descriptor sets are handled as a whole as pool memory and handled by the
1687 * following pointers. If set, they are not used, and individually
1688 * descriptor sets are allocated/freed.
1689 */
1690 uint8_t *host_memory_base;
1691 uint8_t *host_memory_ptr;
1692 uint8_t *host_memory_end;
1693
1694 uint32_t entry_count;
1695 uint32_t max_entry_count;
1696 struct v3dv_descriptor_pool_entry entries[0];
1697 };
1698
1699 struct v3dv_descriptor_set {
1700 struct vk_object_base base;
1701
1702 /* List link into the list of all sets allocated from the pool */
1703 struct list_head pool_link;
1704
1705 struct v3dv_descriptor_pool *pool;
1706
1707 struct v3dv_descriptor_set_layout *layout;
1708
1709 /* Offset relative to the descriptor pool bo for this set */
1710 uint32_t base_offset;
1711
1712 /* The descriptors below can be indexed (set/binding) using the set_layout
1713 */
1714 struct v3dv_descriptor descriptors[0];
1715 };
1716
1717 struct v3dv_descriptor_set_binding_layout {
1718 VkDescriptorType type;
1719
1720 /* Number of array elements in this binding */
1721 uint32_t array_size;
1722
1723 /* Index into the flattend descriptor set */
1724 uint32_t descriptor_index;
1725
1726 uint32_t dynamic_offset_count;
1727 uint32_t dynamic_offset_index;
1728
1729 /* Offset into the descriptor set where this descriptor lives (final offset
1730 * on the descriptor bo need to take into account set->base_offset)
1731 */
1732 uint32_t descriptor_offset;
1733
1734 /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1735 * if there are no immutable samplers.
1736 */
1737 uint32_t immutable_samplers_offset;
1738 };
1739
1740 struct v3dv_descriptor_set_layout {
1741 struct vk_object_base base;
1742
1743 VkDescriptorSetLayoutCreateFlags flags;
1744
1745 /* Number of bindings in this descriptor set */
1746 uint32_t binding_count;
1747
1748 /* Total bo size needed for this descriptor set
1749 */
1750 uint32_t bo_size;
1751
1752 /* Shader stages affected by this descriptor set */
1753 uint16_t shader_stages;
1754
1755 /* Number of descriptors in this descriptor set */
1756 uint32_t descriptor_count;
1757
1758 /* Number of dynamic offsets used by this descriptor set */
1759 uint16_t dynamic_offset_count;
1760
1761 /* Descriptor set layouts can be destroyed even if they are still being
1762 * used.
1763 */
1764 uint32_t ref_cnt;
1765
1766 /* Bindings in this descriptor set */
1767 struct v3dv_descriptor_set_binding_layout binding[0];
1768 };
1769
1770 void
1771 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
1772 struct v3dv_descriptor_set_layout *set_layout);
1773
1774 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)1775 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
1776 {
1777 assert(set_layout && set_layout->ref_cnt >= 1);
1778 p_atomic_inc(&set_layout->ref_cnt);
1779 }
1780
1781 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)1782 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
1783 struct v3dv_descriptor_set_layout *set_layout)
1784 {
1785 assert(set_layout && set_layout->ref_cnt >= 1);
1786 if (p_atomic_dec_zero(&set_layout->ref_cnt))
1787 v3dv_descriptor_set_layout_destroy(device, set_layout);
1788 }
1789
1790 struct v3dv_pipeline_layout {
1791 struct vk_object_base base;
1792
1793 struct {
1794 struct v3dv_descriptor_set_layout *layout;
1795 uint32_t dynamic_offset_start;
1796 } set[MAX_SETS];
1797
1798 uint32_t num_sets;
1799
1800 /* Shader stages that are declared to use descriptors from this layout */
1801 uint32_t shader_stages;
1802
1803 uint32_t dynamic_offset_count;
1804 uint32_t push_constant_size;
1805
1806 unsigned char sha1[20];
1807 };
1808
1809 /*
1810 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1811 * it to be big enough to include the max value for all of them.
1812 *
1813 * FIXME: one alternative would be to allocate the map as big as you need for
1814 * each descriptor type. That would means more individual allocations.
1815 */
1816 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
1817 MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
1818 MAX_STORAGE_BUFFERS)
1819
1820
1821 struct v3dv_descriptor_map {
1822 /* TODO: avoid fixed size array/justify the size */
1823 unsigned num_desc; /* Number of descriptors */
1824 int set[DESCRIPTOR_MAP_SIZE];
1825 int binding[DESCRIPTOR_MAP_SIZE];
1826 int array_index[DESCRIPTOR_MAP_SIZE];
1827 int array_size[DESCRIPTOR_MAP_SIZE];
1828 bool used[DESCRIPTOR_MAP_SIZE];
1829
1830 /* NOTE: the following is only for sampler, but this is the easier place to
1831 * put it.
1832 */
1833 uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1834 };
1835
1836 struct v3dv_sampler {
1837 struct vk_object_base base;
1838
1839 bool compare_enable;
1840 bool unnormalized_coordinates;
1841 bool clamp_to_transparent_black_border;
1842
1843 /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1844 * configuration. If needed it will be copied to the descriptor info during
1845 * UpdateDescriptorSets
1846 */
1847 uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1848 };
1849
1850 struct v3dv_descriptor_template_entry {
1851 /* The type of descriptor in this entry */
1852 VkDescriptorType type;
1853
1854 /* Binding in the descriptor set */
1855 uint32_t binding;
1856
1857 /* Offset at which to write into the descriptor set binding */
1858 uint32_t array_element;
1859
1860 /* Number of elements to write into the descriptor set binding */
1861 uint32_t array_count;
1862
1863 /* Offset into the user provided data */
1864 size_t offset;
1865
1866 /* Stride between elements into the user provided data */
1867 size_t stride;
1868 };
1869
1870 struct v3dv_descriptor_update_template {
1871 struct vk_object_base base;
1872
1873 VkPipelineBindPoint bind_point;
1874
1875 /* The descriptor set this template corresponds to. This value is only
1876 * valid if the template was created with the templateType
1877 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1878 */
1879 uint8_t set;
1880
1881 /* Number of entries in this template */
1882 uint32_t entry_count;
1883
1884 /* Entries of the template */
1885 struct v3dv_descriptor_template_entry entries[0];
1886 };
1887
1888
1889 /* We keep two special values for the sampler idx that represents exactly when a
1890 * sampler is not needed/provided. The main use is that even if we don't have
1891 * sampler, we still need to do the output unpacking (through
1892 * nir_lower_tex). The easier way to do this is to add those special "no
1893 * sampler" in the sampler_map, and then use the proper unpacking for that
1894 * case.
1895 *
1896 * We have one when we want a 16bit output size, and other when we want a
1897 * 32bit output size. We use the info coming from the RelaxedPrecision
1898 * decoration to decide between one and the other.
1899 */
1900 #define V3DV_NO_SAMPLER_16BIT_IDX 0
1901 #define V3DV_NO_SAMPLER_32BIT_IDX 1
1902
1903 /*
1904 * Following two methods are using on the combined to/from texture/sampler
1905 * indices maps at v3dv_pipeline.
1906 */
1907 static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,uint32_t sampler_index)1908 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1909 uint32_t sampler_index)
1910 {
1911 return texture_index << 24 | sampler_index;
1912 }
1913
1914 static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,uint32_t * texture_index,uint32_t * sampler_index)1915 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1916 uint32_t *texture_index,
1917 uint32_t *sampler_index)
1918 {
1919 uint32_t texture = combined_index_key >> 24;
1920 uint32_t sampler = combined_index_key & 0xffffff;
1921
1922 if (texture_index)
1923 *texture_index = texture;
1924
1925 if (sampler_index)
1926 *sampler_index = sampler;
1927 }
1928
1929 struct v3dv_descriptor_maps {
1930 struct v3dv_descriptor_map ubo_map;
1931 struct v3dv_descriptor_map ssbo_map;
1932 struct v3dv_descriptor_map sampler_map;
1933 struct v3dv_descriptor_map texture_map;
1934 };
1935
1936 /* The structure represents data shared between different objects, like the
1937 * pipeline and the pipeline cache, so we ref count it to know when it should
1938 * be freed.
1939 */
1940 struct v3dv_pipeline_shared_data {
1941 uint32_t ref_cnt;
1942
1943 unsigned char sha1_key[20];
1944
1945 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1946 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1947
1948 struct v3dv_bo *assembly_bo;
1949 };
1950
1951 struct v3dv_pipeline_executable_data {
1952 enum broadcom_shader_stage stage;
1953 char *nir_str;
1954 char *qpu_str;
1955 };
1956
1957 struct v3dv_pipeline {
1958 struct vk_object_base base;
1959
1960 struct v3dv_device *device;
1961
1962 VkShaderStageFlags active_stages;
1963
1964 struct v3dv_render_pass *pass;
1965 struct v3dv_subpass *subpass;
1966
1967 /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1968 * to track binning shaders. Note these will be freed once the pipeline
1969 * has been compiled.
1970 */
1971 struct v3dv_pipeline_stage *vs;
1972 struct v3dv_pipeline_stage *vs_bin;
1973 struct v3dv_pipeline_stage *gs;
1974 struct v3dv_pipeline_stage *gs_bin;
1975 struct v3dv_pipeline_stage *fs;
1976 struct v3dv_pipeline_stage *cs;
1977
1978 /* Flags for whether optional pipeline stages are present, for convenience */
1979 bool has_gs;
1980
1981 /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
1982 bool uses_buffer_device_address;
1983
1984 /* Spilling memory requirements */
1985 struct {
1986 struct v3dv_bo *bo;
1987 uint32_t size_per_thread;
1988 } spill;
1989
1990 struct v3dv_dynamic_state dynamic_state;
1991
1992 struct v3dv_pipeline_layout *layout;
1993
1994 /* Whether this pipeline enables depth writes */
1995 bool z_updates_enable;
1996
1997 enum v3dv_ez_state ez_state;
1998
1999 /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2000 * pipeline selects an incompatible depth test function.
2001 */
2002 bool incompatible_ez_test;
2003
2004 bool msaa;
2005 bool sample_rate_shading;
2006 uint32_t sample_mask;
2007
2008 bool primitive_restart;
2009
2010 /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
2011 * array with such binding
2012 */
2013 struct v3dv_pipeline_vertex_binding {
2014 uint32_t stride;
2015 uint32_t instance_divisor;
2016 } vb[MAX_VBS];
2017 uint32_t vb_count;
2018
2019 /* Note that a lot of info from VkVertexInputAttributeDescription is
2020 * already prepacked, so here we are only storing those that need recheck
2021 * later. The array must be indexed by driver location, since that is the
2022 * order in which we need to emit the attributes.
2023 */
2024 struct v3dv_pipeline_vertex_attrib {
2025 uint32_t binding;
2026 uint32_t offset;
2027 VkFormat vk_format;
2028 } va[MAX_VERTEX_ATTRIBS];
2029 uint32_t va_count;
2030
2031 enum pipe_prim_type topology;
2032
2033 struct v3dv_pipeline_shared_data *shared_data;
2034
2035 /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2036 unsigned char sha1[20];
2037
2038 /* In general we can reuse v3dv_device->default_attribute_float, so note
2039 * that the following can be NULL.
2040 *
2041 * FIXME: the content of this BO will be small, so it could be improved to
2042 * be uploaded to a common BO. But as in most cases it will be NULL, it is
2043 * not a priority.
2044 */
2045 struct v3dv_bo *default_attribute_values;
2046
2047 struct vpm_config vpm_cfg;
2048 struct vpm_config vpm_cfg_bin;
2049
2050 /* If the pipeline should emit any of the stencil configuration packets */
2051 bool emit_stencil_cfg[2];
2052
2053 /* Blend state */
2054 struct {
2055 /* Per-RT bit mask with blend enables */
2056 uint8_t enables;
2057 /* Per-RT prepacked blend config packets */
2058 uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2059 /* Flag indicating whether the blend factors in use require
2060 * color constants.
2061 */
2062 bool needs_color_constants;
2063 /* Mask with enabled color channels for each RT (4 bits per RT) */
2064 uint32_t color_write_masks;
2065 } blend;
2066
2067 /* Depth bias */
2068 struct {
2069 bool enabled;
2070 bool is_z16;
2071 } depth_bias;
2072
2073 struct {
2074 void *mem_ctx;
2075 bool has_data;
2076 struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2077 } executables;
2078
2079 /* Packets prepacked during pipeline creation
2080 */
2081 uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2082 uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2083 uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2084 uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2085 MAX_VERTEX_ATTRIBS];
2086 uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2087 };
2088
2089 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2090 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2091 {
2092 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2093 !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2094 return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2095 VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2096 }
2097
2098 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2099 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2100 struct v3dv_pipeline *pipeline)
2101 {
2102 if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2103 return &cmd_buffer->state.compute.descriptor_state;
2104 else
2105 return &cmd_buffer->state.gfx.descriptor_state;
2106 }
2107
2108 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
2109
2110 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
2111 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
2112
2113 #define v3dv_debug_ignored_stype(sType) \
2114 mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
2115
2116 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
2117 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
2118 const struct v3dv_format *
2119 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2120 uint32_t bpp, VkFormat *out_vk_format);
2121 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2122 VkFormat vk_format,
2123 VkFormatFeatureFlags2 features);
2124
2125 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2126 struct v3dv_pipeline *pipeline,
2127 struct v3dv_shader_variant *variant);
2128
2129 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2130 struct v3dv_pipeline *pipeline,
2131 struct v3dv_shader_variant *variant,
2132 uint32_t **wg_count_offsets);
2133
2134 struct v3dv_shader_variant *
2135 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2136 struct v3dv_pipeline_cache *cache,
2137 struct v3d_key *key,
2138 size_t key_size,
2139 const VkAllocationCallbacks *pAllocator,
2140 VkResult *out_vk_result);
2141
2142 struct v3dv_shader_variant *
2143 v3dv_shader_variant_create(struct v3dv_device *device,
2144 enum broadcom_shader_stage stage,
2145 struct v3d_prog_data *prog_data,
2146 uint32_t prog_data_size,
2147 uint32_t assembly_offset,
2148 uint64_t *qpu_insts,
2149 uint32_t qpu_insts_size,
2150 VkResult *out_vk_result);
2151
2152 void
2153 v3dv_shader_variant_destroy(struct v3dv_device *device,
2154 struct v3dv_shader_variant *variant);
2155
2156 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2157 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2158 {
2159 assert(shared_data && shared_data->ref_cnt >= 1);
2160 p_atomic_inc(&shared_data->ref_cnt);
2161 }
2162
2163 void
2164 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2165 struct v3dv_pipeline_shared_data *shared_data);
2166
2167 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2168 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2169 struct v3dv_pipeline_shared_data *shared_data)
2170 {
2171 assert(shared_data && shared_data->ref_cnt >= 1);
2172 if (p_atomic_dec_zero(&shared_data->ref_cnt))
2173 v3dv_pipeline_shared_data_destroy(device, shared_data);
2174 }
2175
2176 struct v3dv_descriptor *
2177 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2178 struct v3dv_descriptor_map *map,
2179 struct v3dv_pipeline_layout *pipeline_layout,
2180 uint32_t index,
2181 uint32_t *dynamic_offset);
2182
2183 struct v3dv_cl_reloc
2184 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2185 struct v3dv_descriptor_state *descriptor_state,
2186 struct v3dv_descriptor_map *map,
2187 struct v3dv_pipeline_layout *pipeline_layout,
2188 uint32_t index,
2189 VkDescriptorType *out_type);
2190
2191 const struct v3dv_sampler *
2192 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2193 struct v3dv_descriptor_map *map,
2194 struct v3dv_pipeline_layout *pipeline_layout,
2195 uint32_t index);
2196
2197 struct v3dv_cl_reloc
2198 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2199 struct v3dv_descriptor_state *descriptor_state,
2200 struct v3dv_descriptor_map *map,
2201 struct v3dv_pipeline_layout *pipeline_layout,
2202 uint32_t index);
2203
2204 struct v3dv_cl_reloc
2205 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2206 struct v3dv_descriptor_state *descriptor_state,
2207 struct v3dv_descriptor_map *map,
2208 struct v3dv_pipeline_layout *pipeline_layout,
2209 uint32_t index);
2210
2211 struct v3dv_bo*
2212 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2213 struct v3dv_descriptor_map *map,
2214 struct v3dv_pipeline_layout *pipeline_layout,
2215 uint32_t index);
2216
2217 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2218 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2219 const struct v3dv_descriptor_set_binding_layout *binding)
2220 {
2221 assert(binding->immutable_samplers_offset);
2222 return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2223 }
2224
2225 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2226 struct v3dv_device *device,
2227 VkPipelineCacheCreateFlags,
2228 bool cache_enabled);
2229
2230 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2231
2232 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2233 struct v3dv_pipeline_cache *cache,
2234 nir_shader *nir,
2235 unsigned char sha1_key[20]);
2236
2237 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2238 struct v3dv_pipeline_cache *cache,
2239 const nir_shader_compiler_options *nir_options,
2240 unsigned char sha1_key[20]);
2241
2242 struct v3dv_pipeline_shared_data *
2243 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2244 unsigned char sha1_key[20],
2245 bool *cache_hit);
2246
2247 void
2248 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2249 struct v3dv_pipeline_cache *cache);
2250
2251 struct v3dv_bo *
2252 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2253 struct v3dv_pipeline *pipeline);
2254
2255 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
2256 VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2257
2258 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2259 VK_OBJECT_TYPE_COMMAND_BUFFER)
2260 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2261 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2262 VK_OBJECT_TYPE_INSTANCE)
2263 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2264 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2265 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2266
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2267 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2268 VK_OBJECT_TYPE_BUFFER)
2269 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2270 VK_OBJECT_TYPE_BUFFER_VIEW)
2271 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2272 VK_OBJECT_TYPE_DEVICE_MEMORY)
2273 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2274 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2275 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2276 VK_OBJECT_TYPE_DESCRIPTOR_SET)
2277 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2278 VkDescriptorSetLayout,
2279 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2280 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2281 VkDescriptorUpdateTemplate,
2282 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2283 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2284 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2285 VK_OBJECT_TYPE_FRAMEBUFFER)
2286 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2287 VK_OBJECT_TYPE_IMAGE)
2288 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2289 VK_OBJECT_TYPE_IMAGE_VIEW)
2290 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2291 VK_OBJECT_TYPE_PIPELINE)
2292 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2293 VK_OBJECT_TYPE_PIPELINE_CACHE)
2294 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2295 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2296 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2297 VK_OBJECT_TYPE_QUERY_POOL)
2298 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2299 VK_OBJECT_TYPE_RENDER_PASS)
2300 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2301 VK_OBJECT_TYPE_SAMPLER)
2302
2303 static inline int
2304 v3dv_ioctl(int fd, unsigned long request, void *arg)
2305 {
2306 if (using_v3d_simulator)
2307 return v3d_simulator_ioctl(fd, request, arg);
2308 else
2309 return drmIoctl(fd, request, arg);
2310 }
2311
2312 /* Flags OOM conditions in command buffer state.
2313 *
2314 * Note: notice that no-op jobs don't have a command buffer reference.
2315 */
2316 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2317 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2318 {
2319 if (cmd_buffer) {
2320 cmd_buffer->state.oom = true;
2321 } else {
2322 assert(job);
2323 if (job->cmd_buffer)
2324 job->cmd_buffer->state.oom = true;
2325 }
2326 }
2327
2328 #define v3dv_return_if_oom(_cmd_buffer, _job) do { \
2329 const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
2330 if (__cmd_buffer && __cmd_buffer->state.oom) \
2331 return; \
2332 const struct v3dv_job *__job = _job; \
2333 if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
2334 return; \
2335 } while(0) \
2336
2337 static inline uint32_t
u64_hash(const void * key)2338 u64_hash(const void *key)
2339 {
2340 return _mesa_hash_data(key, sizeof(uint64_t));
2341 }
2342
2343 static inline bool
u64_compare(const void * key1,const void * key2)2344 u64_compare(const void *key1, const void *key2)
2345 {
2346 return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2347 }
2348
2349 /* Helper to call hw ver speficic functions */
2350 #define v3dv_X(device, thing) ({ \
2351 __typeof(&v3d42_##thing) v3d_X_thing; \
2352 switch (device->devinfo.ver) { \
2353 case 42: \
2354 v3d_X_thing = &v3d42_##thing; \
2355 break; \
2356 default: \
2357 unreachable("Unsupported hardware generation"); \
2358 } \
2359 v3d_X_thing; \
2360 })
2361
2362
2363 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2364 * define v3dX for each version supported, because when we compile code that
2365 * is not version-specific, all version-specific macros need to be already
2366 * defined.
2367 */
2368 #ifdef v3dX
2369 # include "v3dvx_private.h"
2370 #else
2371 # define v3dX(x) v3d42_##x
2372 # include "v3dvx_private.h"
2373 # undef v3dX
2374 #endif
2375
2376 #ifdef ANDROID
2377 VkResult
2378 v3dv_gralloc_info(struct v3dv_device *device,
2379 const VkNativeBufferANDROID *gralloc_info,
2380 int *out_dmabuf,
2381 int *out_stride,
2382 int *out_size,
2383 uint64_t *out_modifier);
2384
2385 VkResult
2386 v3dv_import_native_buffer_fd(VkDevice device_h,
2387 int dma_buf,
2388 const VkAllocationCallbacks *alloc,
2389 VkImage image_h);
2390 #endif /* ANDROID */
2391
2392 #endif /* V3DV_PRIVATE_H */
2393