1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38
39 #include "vk_device.h"
40 #include "vk_instance.h"
41 #include "vk_image.h"
42 #include "vk_log.h"
43 #include "vk_physical_device.h"
44 #include "vk_shader_module.h"
45 #include "vk_util.h"
46
47 #include "vk_command_buffer.h"
48 #include "vk_queue.h"
49
50 #include <xf86drm.h>
51
52 #ifdef HAVE_VALGRIND
53 #include <valgrind.h>
54 #include <memcheck.h>
55 #define VG(x) x
56 #else
57 #define VG(x) ((void)0)
58 #endif
59
60 #include "v3dv_limits.h"
61
62 #include "common/v3d_device_info.h"
63 #include "common/v3d_limits.h"
64 #include "common/v3d_tiling.h"
65 #include "common/v3d_util.h"
66
67 #include "compiler/shader_enums.h"
68 #include "compiler/spirv/nir_spirv.h"
69
70 #include "compiler/v3d_compiler.h"
71
72 #include "vk_debug_report.h"
73 #include "util/set.h"
74 #include "util/hash_table.h"
75 #include "util/xmlconfig.h"
76 #include "u_atomic.h"
77
78 #include "v3dv_entrypoints.h"
79 #include "v3dv_bo.h"
80
81 #include "drm-uapi/v3d_drm.h"
82
83 #include "vk_alloc.h"
84 #include "simulator/v3d_simulator.h"
85
86 #include "v3dv_cl.h"
87
88 #include "wsi_common.h"
89
90 /* A non-fatal assert. Useful for debugging. */
91 #ifdef DEBUG
92 #define v3dv_assert(x) ({ \
93 if (unlikely(!(x))) \
94 fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
95 })
96 #else
97 #define v3dv_assert(x)
98 #endif
99
100 #define perf_debug(...) do { \
101 if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
102 fprintf(stderr, __VA_ARGS__); \
103 } while (0)
104
105 struct v3dv_instance;
106
107 #ifdef USE_V3D_SIMULATOR
108 #define using_v3d_simulator true
109 #else
110 #define using_v3d_simulator false
111 #endif
112
113 struct v3d_simulator_file;
114
115 /* Minimum required by the Vulkan 1.1 spec */
116 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
117
118 struct v3dv_physical_device {
119 struct vk_physical_device vk;
120
121 char *name;
122 int32_t render_fd;
123 int32_t display_fd;
124 int32_t master_fd;
125
126 /* We need these because it is not clear how to detect
127 * valid devids in a portable way
128 */
129 bool has_primary;
130 bool has_render;
131
132 dev_t primary_devid;
133 dev_t render_devid;
134
135 uint8_t driver_build_sha1[20];
136 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
137 uint8_t device_uuid[VK_UUID_SIZE];
138 uint8_t driver_uuid[VK_UUID_SIZE];
139
140 struct disk_cache *disk_cache;
141
142 mtx_t mutex;
143
144 struct wsi_device wsi_device;
145
146 VkPhysicalDeviceMemoryProperties memory;
147
148 struct v3d_device_info devinfo;
149
150 struct v3d_simulator_file *sim_file;
151
152 const struct v3d_compiler *compiler;
153 uint32_t next_program_id;
154
155 struct {
156 bool merge_jobs;
157 } options;
158 };
159
160 VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
161 struct v3dv_physical_device *pdevice,
162 VkIcdSurfaceBase *surface);
163
164 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
165 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
166 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
167 uint32_t index);
168
169 void v3dv_meta_clear_init(struct v3dv_device *device);
170 void v3dv_meta_clear_finish(struct v3dv_device *device);
171
172 void v3dv_meta_blit_init(struct v3dv_device *device);
173 void v3dv_meta_blit_finish(struct v3dv_device *device);
174
175 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
176 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
177
178 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
179 const VkOffset3D *offset,
180 VkFormat *compat_format);
181
182 struct v3dv_instance {
183 struct vk_instance vk;
184
185 int physicalDeviceCount;
186 struct v3dv_physical_device physicalDevice;
187
188 bool pipeline_cache_enabled;
189 bool default_pipeline_cache_enabled;
190 };
191
192 /* Tracks wait threads spawned from a single vkQueueSubmit call */
193 struct v3dv_queue_submit_wait_info {
194 /* struct vk_object_base base; ?*/
195 struct list_head list_link;
196
197 struct v3dv_device *device;
198
199 /* List of wait threads spawned for any command buffers in a particular
200 * call to vkQueueSubmit.
201 */
202 uint32_t wait_thread_count;
203 struct {
204 pthread_t thread;
205 bool finished;
206 } wait_threads[16];
207
208 /* The master wait thread for the entire submit. This will wait for all
209 * other threads in this submit to complete before processing signal
210 * semaphores and fences.
211 */
212 pthread_t master_wait_thread;
213
214 /* List of semaphores (and fence) to signal after all wait threads completed
215 * and all command buffer jobs in the submission have been sent to the GPU.
216 */
217 uint32_t signal_semaphore_count;
218 VkSemaphore *signal_semaphores;
219 VkFence fence;
220 };
221
222 struct v3dv_queue {
223 struct vk_queue vk;
224
225 struct v3dv_device *device;
226
227 /* A list of active v3dv_queue_submit_wait_info */
228 struct list_head submit_wait_list;
229
230 /* A mutex to prevent concurrent access to the list of wait threads */
231 mtx_t mutex;
232
233 struct v3dv_job *noop_job;
234 };
235
236 #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
237 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
238 sizeof(VkComponentMapping))
239
240 struct v3dv_meta_color_clear_pipeline {
241 VkPipeline pipeline;
242 VkRenderPass pass;
243 bool cached;
244 uint64_t key;
245 };
246
247 struct v3dv_meta_depth_clear_pipeline {
248 VkPipeline pipeline;
249 uint64_t key;
250 };
251
252 struct v3dv_meta_blit_pipeline {
253 VkPipeline pipeline;
254 VkRenderPass pass;
255 VkRenderPass pass_no_load;
256 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
257 };
258
259 struct v3dv_meta_texel_buffer_copy_pipeline {
260 VkPipeline pipeline;
261 VkRenderPass pass;
262 VkRenderPass pass_no_load;
263 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
264 };
265
266 struct v3dv_pipeline_key {
267 bool robust_buffer_access;
268 uint8_t topology;
269 uint8_t logicop_func;
270 bool msaa;
271 bool sample_coverage;
272 bool sample_alpha_to_coverage;
273 bool sample_alpha_to_one;
274 uint8_t cbufs;
275 struct {
276 enum pipe_format format;
277 const uint8_t *swizzle;
278 } color_fmt[V3D_MAX_DRAW_BUFFERS];
279 uint8_t f32_color_rb;
280 uint32_t va_swap_rb_mask;
281 bool has_multiview;
282 };
283
284 struct v3dv_pipeline_cache_stats {
285 uint32_t miss;
286 uint32_t hit;
287 uint32_t count;
288 };
289
290 /* Equivalent to gl_shader_stage, but including the coordinate shaders
291 *
292 * FIXME: perhaps move to common
293 */
294 enum broadcom_shader_stage {
295 BROADCOM_SHADER_VERTEX,
296 BROADCOM_SHADER_VERTEX_BIN,
297 BROADCOM_SHADER_GEOMETRY,
298 BROADCOM_SHADER_GEOMETRY_BIN,
299 BROADCOM_SHADER_FRAGMENT,
300 BROADCOM_SHADER_COMPUTE,
301 };
302
303 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
304
305 /* Assumes that coordinate shaders will be custom-handled by the caller */
306 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)307 gl_shader_stage_to_broadcom(gl_shader_stage stage)
308 {
309 switch (stage) {
310 case MESA_SHADER_VERTEX:
311 return BROADCOM_SHADER_VERTEX;
312 case MESA_SHADER_GEOMETRY:
313 return BROADCOM_SHADER_GEOMETRY;
314 case MESA_SHADER_FRAGMENT:
315 return BROADCOM_SHADER_FRAGMENT;
316 case MESA_SHADER_COMPUTE:
317 return BROADCOM_SHADER_COMPUTE;
318 default:
319 unreachable("Unknown gl shader stage");
320 }
321 }
322
323 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)324 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
325 {
326 switch (stage) {
327 case BROADCOM_SHADER_VERTEX:
328 case BROADCOM_SHADER_VERTEX_BIN:
329 return MESA_SHADER_VERTEX;
330 case BROADCOM_SHADER_GEOMETRY:
331 case BROADCOM_SHADER_GEOMETRY_BIN:
332 return MESA_SHADER_GEOMETRY;
333 case BROADCOM_SHADER_FRAGMENT:
334 return MESA_SHADER_FRAGMENT;
335 case BROADCOM_SHADER_COMPUTE:
336 return MESA_SHADER_COMPUTE;
337 default:
338 unreachable("Unknown broadcom shader stage");
339 }
340 }
341
342 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)343 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
344 {
345 switch (stage) {
346 case BROADCOM_SHADER_VERTEX_BIN:
347 case BROADCOM_SHADER_GEOMETRY_BIN:
348 return true;
349 default:
350 return false;
351 }
352 }
353
354 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)355 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
356 {
357 switch (stage) {
358 case BROADCOM_SHADER_VERTEX:
359 case BROADCOM_SHADER_GEOMETRY:
360 return true;
361 default:
362 return false;
363 }
364 }
365
366 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)367 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
368 {
369 switch (stage) {
370 case BROADCOM_SHADER_VERTEX:
371 return BROADCOM_SHADER_VERTEX_BIN;
372 case BROADCOM_SHADER_GEOMETRY:
373 return BROADCOM_SHADER_GEOMETRY_BIN;
374 default:
375 unreachable("Invalid shader stage");
376 }
377 }
378
379 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)380 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
381 {
382 switch(stage) {
383 case BROADCOM_SHADER_VERTEX_BIN:
384 return "MESA_SHADER_VERTEX_BIN";
385 case BROADCOM_SHADER_GEOMETRY_BIN:
386 return "MESA_SHADER_GEOMETRY_BIN";
387 default:
388 return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
389 }
390 }
391
392 struct v3dv_pipeline_cache {
393 struct vk_object_base base;
394
395 struct v3dv_device *device;
396 mtx_t mutex;
397
398 struct hash_table *nir_cache;
399 struct v3dv_pipeline_cache_stats nir_stats;
400
401 struct hash_table *cache;
402 struct v3dv_pipeline_cache_stats stats;
403
404 /* For VK_EXT_pipeline_creation_cache_control. */
405 bool externally_synchronized;
406 };
407
408 struct v3dv_device {
409 struct vk_device vk;
410
411 struct v3dv_instance *instance;
412 struct v3dv_physical_device *pdevice;
413
414 struct v3d_device_info devinfo;
415 struct v3dv_queue queue;
416
417 /* A sync object to track the last job submitted to the GPU. */
418 uint32_t last_job_sync;
419
420 /* A mutex to prevent concurrent access to last_job_sync from the queue */
421 mtx_t mutex;
422
423 /* Resources used for meta operations */
424 struct {
425 mtx_t mtx;
426 struct {
427 VkPipelineLayout p_layout;
428 struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
429 } color_clear;
430 struct {
431 VkPipelineLayout p_layout;
432 struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
433 } depth_clear;
434 struct {
435 VkDescriptorSetLayout ds_layout;
436 VkPipelineLayout p_layout;
437 struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
438 } blit;
439 struct {
440 VkDescriptorSetLayout ds_layout;
441 VkPipelineLayout p_layout;
442 struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
443 } texel_buffer_copy;
444 } meta;
445
446 struct v3dv_bo_cache {
447 /** List of struct v3d_bo freed, by age. */
448 struct list_head time_list;
449 /** List of struct v3d_bo freed, per size, by age. */
450 struct list_head *size_list;
451 uint32_t size_list_size;
452
453 mtx_t lock;
454
455 uint32_t cache_size;
456 uint32_t cache_count;
457 uint32_t max_cache_size;
458 } bo_cache;
459
460 uint32_t bo_size;
461 uint32_t bo_count;
462
463 struct v3dv_pipeline_cache default_pipeline_cache;
464
465 /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
466 * following covers the most common case, that is all attributes format
467 * being float being float, allowing us to reuse the same BO for all
468 * pipelines matching this requirement. Pipelines that need integer
469 * attributes will create their own BO.
470 */
471 struct v3dv_bo *default_attribute_float;
472 VkPhysicalDeviceFeatures features;
473 };
474
475 struct v3dv_device_memory {
476 struct vk_object_base base;
477
478 struct v3dv_bo *bo;
479 const VkMemoryType *type;
480 bool has_bo_ownership;
481 bool is_for_wsi;
482 };
483
484 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
485 #define TEXTURE_DATA_FORMAT_NO 255
486
487 struct v3dv_format {
488 bool supported;
489
490 /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
491 uint8_t rt_type;
492
493 /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
494 uint8_t tex_type;
495
496 /* Swizzle to apply to the RGBA shader output for storing to the tile
497 * buffer, to the RGBA tile buffer to produce shader input (for
498 * blending), and for turning the rgba8888 texture sampler return
499 * value into shader rgba values.
500 */
501 uint8_t swizzle[4];
502
503 /* Whether the return value is 16F/I/UI or 32F/I/UI. */
504 uint8_t return_size;
505
506 /* If the format supports (linear) filtering when texturing. */
507 bool supports_filtering;
508 };
509
510 struct v3d_resource_slice {
511 uint32_t offset;
512 uint32_t stride;
513 uint32_t padded_height;
514 /* Size of a single pane of the slice. For 3D textures, there will be
515 * a number of panes equal to the minified, power-of-two-aligned
516 * depth.
517 */
518 uint32_t size;
519 uint8_t ub_pad;
520 enum v3d_tiling_mode tiling;
521 uint32_t padded_height_of_output_image_in_uif_blocks;
522 };
523
524 struct v3dv_image {
525 struct vk_image vk;
526
527 const struct v3dv_format *format;
528 uint32_t cpp;
529 bool tiled;
530
531 struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
532 uint64_t size; /* Total size in bytes */
533 uint32_t cube_map_stride;
534
535 struct v3dv_device_memory *mem;
536 VkDeviceSize mem_offset;
537 uint32_t alignment;
538 };
539
540 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
541
542 /* Pre-generating packets needs to consider changes in packet sizes across hw
543 * versions. Keep things simple and allocate enough space for any supported
544 * version. We ensure the size is large enough through static asserts.
545 */
546 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
547 #define V3DV_SAMPLER_STATE_LENGTH 24
548 #define V3DV_BLEND_CFG_LENGTH 5
549 #define V3DV_CFG_BITS_LENGTH 4
550 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
551 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
552 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
553 #define V3DV_STENCIL_CFG_LENGTH 6
554
555 struct v3dv_image_view {
556 struct vk_image_view vk;
557
558 const struct v3dv_format *format;
559 bool swap_rb;
560 uint32_t internal_bpp;
561 uint32_t internal_type;
562 uint32_t offset;
563
564 /* Precomputed (composed from createinfo->components and formar swizzle)
565 * swizzles to pass in to the shader key.
566 *
567 * This could be also included on the descriptor bo, but the shader state
568 * packet doesn't need it on a bo, so we can just avoid a memory copy
569 */
570 uint8_t swizzle[4];
571
572 /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
573 * during UpdateDescriptorSets.
574 *
575 * Empirical tests show that cube arrays need a different shader state
576 * depending on whether they are used with a sampler or not, so for these
577 * we generate two states and select the one to use based on the descriptor
578 * type.
579 */
580 uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
581 };
582
583 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
584
585 struct v3dv_buffer {
586 struct vk_object_base base;
587
588 VkDeviceSize size;
589 VkBufferUsageFlags usage;
590 uint32_t alignment;
591
592 struct v3dv_device_memory *mem;
593 VkDeviceSize mem_offset;
594 };
595
596 struct v3dv_buffer_view {
597 struct vk_object_base base;
598
599 struct v3dv_buffer *buffer;
600
601 VkFormat vk_format;
602 const struct v3dv_format *format;
603 uint32_t internal_bpp;
604 uint32_t internal_type;
605
606 uint32_t offset;
607 uint32_t size;
608 uint32_t num_elements;
609
610 /* Prepacked TEXTURE_SHADER_STATE. */
611 uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
612 };
613
614 struct v3dv_subpass_attachment {
615 uint32_t attachment;
616 VkImageLayout layout;
617 };
618
619 struct v3dv_subpass {
620 uint32_t input_count;
621 struct v3dv_subpass_attachment *input_attachments;
622
623 uint32_t color_count;
624 struct v3dv_subpass_attachment *color_attachments;
625 struct v3dv_subpass_attachment *resolve_attachments;
626
627 struct v3dv_subpass_attachment ds_attachment;
628
629 /* If we need to emit the clear of the depth/stencil attachment using a
630 * a draw call instead of using the TLB (GFXH-1461).
631 */
632 bool do_depth_clear_with_draw;
633 bool do_stencil_clear_with_draw;
634
635 /* Multiview */
636 uint32_t view_mask;
637 };
638
639 struct v3dv_render_pass_attachment {
640 VkAttachmentDescription desc;
641
642 uint32_t first_subpass;
643 uint32_t last_subpass;
644
645 /* When multiview is enabled, we no longer care about when a particular
646 * attachment is first or last used in a render pass, since not all views
647 * in the attachment will meet that criteria. Instead, we need to track
648 * each individual view (layer) in each attachment and emit our stores,
649 * loads and clears accordingly.
650 */
651 struct {
652 uint32_t first_subpass;
653 uint32_t last_subpass;
654 } views[MAX_MULTIVIEW_VIEW_COUNT];
655
656 /* If this is a multismapled attachment that is going to be resolved,
657 * whether we can use the TLB resolve on store.
658 */
659 bool use_tlb_resolve;
660 };
661
662 struct v3dv_render_pass {
663 struct vk_object_base base;
664
665 bool multiview_enabled;
666
667 uint32_t attachment_count;
668 struct v3dv_render_pass_attachment *attachments;
669
670 uint32_t subpass_count;
671 struct v3dv_subpass *subpasses;
672
673 struct v3dv_subpass_attachment *subpass_attachments;
674 };
675
676 struct v3dv_framebuffer {
677 struct vk_object_base base;
678
679 uint32_t width;
680 uint32_t height;
681 uint32_t layers;
682
683 /* Typically, edge tiles in the framebuffer have padding depending on the
684 * underlying tiling layout. One consequnce of this is that when the
685 * framebuffer dimensions are not aligned to tile boundaries, tile stores
686 * would still write full tiles on the edges and write to the padded area.
687 * If the framebuffer is aliasing a smaller region of a larger image, then
688 * we need to be careful with this though, as we won't have padding on the
689 * edge tiles (which typically means that we need to load the tile buffer
690 * before we store).
691 */
692 bool has_edge_padding;
693
694 uint32_t attachment_count;
695 uint32_t color_attachment_count;
696 struct v3dv_image_view *attachments[0];
697 };
698
699 struct v3dv_frame_tiling {
700 uint32_t width;
701 uint32_t height;
702 uint32_t layers;
703 uint32_t render_target_count;
704 uint32_t internal_bpp;
705 bool msaa;
706 uint32_t tile_width;
707 uint32_t tile_height;
708 uint32_t draw_tiles_x;
709 uint32_t draw_tiles_y;
710 uint32_t supertile_width;
711 uint32_t supertile_height;
712 uint32_t frame_width_in_supertiles;
713 uint32_t frame_height_in_supertiles;
714 };
715
716 void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *framebuffer,
717 const struct v3dv_subpass *subpass,
718 uint8_t *max_bpp, bool *msaa);
719
720 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
721 const VkRect2D *area,
722 struct v3dv_framebuffer *fb,
723 struct v3dv_render_pass *pass,
724 uint32_t subpass_idx);
725
726 struct v3dv_cmd_pool {
727 struct vk_object_base base;
728
729 VkAllocationCallbacks alloc;
730 struct list_head cmd_buffers;
731 };
732
733 enum v3dv_cmd_buffer_status {
734 V3DV_CMD_BUFFER_STATUS_NEW = 0,
735 V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
736 V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
737 V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
738 };
739
740 union v3dv_clear_value {
741 uint32_t color[4];
742 struct {
743 float z;
744 uint8_t s;
745 };
746 };
747
748 struct v3dv_cmd_buffer_attachment_state {
749 /* The original clear value as provided by the Vulkan API */
750 VkClearValue vk_clear_value;
751
752 /* The hardware clear value */
753 union v3dv_clear_value clear_value;
754 };
755
756 struct v3dv_viewport_state {
757 uint32_t count;
758 VkViewport viewports[MAX_VIEWPORTS];
759 float translate[MAX_VIEWPORTS][3];
760 float scale[MAX_VIEWPORTS][3];
761 };
762
763 struct v3dv_scissor_state {
764 uint32_t count;
765 VkRect2D scissors[MAX_SCISSORS];
766 };
767
768 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
769 * defined as dynamic
770 */
771 enum v3dv_dynamic_state_bits {
772 V3DV_DYNAMIC_VIEWPORT = 1 << 0,
773 V3DV_DYNAMIC_SCISSOR = 1 << 1,
774 V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
775 V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
776 V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
777 V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
778 V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
779 V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
780 V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
781 V3DV_DYNAMIC_ALL = (1 << 9) - 1,
782 };
783
784 /* Flags for dirty pipeline state.
785 */
786 enum v3dv_cmd_dirty_bits {
787 V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
788 V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
789 V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
790 V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
791 V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
792 V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
793 V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,
794 V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,
795 V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,
796 V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,
797 V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,
798 V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,
799 V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 12,
800 V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13,
801 V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14,
802 V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15,
803 V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 16,
804 V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 17,
805 };
806
807 struct v3dv_dynamic_state {
808 /**
809 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
810 * Defines the set of saved dynamic state.
811 */
812 uint32_t mask;
813
814 struct v3dv_viewport_state viewport;
815
816 struct v3dv_scissor_state scissor;
817
818 struct {
819 uint32_t front;
820 uint32_t back;
821 } stencil_compare_mask;
822
823 struct {
824 uint32_t front;
825 uint32_t back;
826 } stencil_write_mask;
827
828 struct {
829 uint32_t front;
830 uint32_t back;
831 } stencil_reference;
832
833 float blend_constants[4];
834
835 struct {
836 float constant_factor;
837 float depth_bias_clamp;
838 float slope_factor;
839 } depth_bias;
840
841 float line_width;
842
843 uint32_t color_write_enable;
844 };
845
846 extern const struct v3dv_dynamic_state default_dynamic_state;
847
848 void v3dv_viewport_compute_xform(const VkViewport *viewport,
849 float scale[3],
850 float translate[3]);
851
852 enum v3dv_ez_state {
853 V3D_EZ_UNDECIDED = 0,
854 V3D_EZ_GT_GE,
855 V3D_EZ_LT_LE,
856 V3D_EZ_DISABLED,
857 };
858
859 enum v3dv_job_type {
860 V3DV_JOB_TYPE_GPU_CL = 0,
861 V3DV_JOB_TYPE_GPU_CL_SECONDARY,
862 V3DV_JOB_TYPE_GPU_TFU,
863 V3DV_JOB_TYPE_GPU_CSD,
864 V3DV_JOB_TYPE_CPU_RESET_QUERIES,
865 V3DV_JOB_TYPE_CPU_END_QUERY,
866 V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
867 V3DV_JOB_TYPE_CPU_SET_EVENT,
868 V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
869 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
870 V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
871 V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
872 };
873
874 struct v3dv_reset_query_cpu_job_info {
875 struct v3dv_query_pool *pool;
876 uint32_t first;
877 uint32_t count;
878 };
879
880 struct v3dv_end_query_cpu_job_info {
881 struct v3dv_query_pool *pool;
882 uint32_t query;
883
884 /* This is one unless multiview is used */
885 uint32_t count;
886 };
887
888 struct v3dv_copy_query_results_cpu_job_info {
889 struct v3dv_query_pool *pool;
890 uint32_t first;
891 uint32_t count;
892 struct v3dv_buffer *dst;
893 uint32_t offset;
894 uint32_t stride;
895 VkQueryResultFlags flags;
896 };
897
898 struct v3dv_event_set_cpu_job_info {
899 struct v3dv_event *event;
900 int state;
901 };
902
903 struct v3dv_event_wait_cpu_job_info {
904 /* List of events to wait on */
905 uint32_t event_count;
906 struct v3dv_event **events;
907
908 /* Whether any postponed jobs after the wait should wait on semaphores */
909 bool sem_wait;
910 };
911
912 struct v3dv_copy_buffer_to_image_cpu_job_info {
913 struct v3dv_image *image;
914 struct v3dv_buffer *buffer;
915 uint32_t buffer_offset;
916 uint32_t buffer_stride;
917 uint32_t buffer_layer_stride;
918 VkOffset3D image_offset;
919 VkExtent3D image_extent;
920 uint32_t mip_level;
921 uint32_t base_layer;
922 uint32_t layer_count;
923 };
924
925 struct v3dv_csd_indirect_cpu_job_info {
926 struct v3dv_buffer *buffer;
927 uint32_t offset;
928 struct v3dv_job *csd_job;
929 uint32_t wg_size;
930 uint32_t *wg_uniform_offsets[3];
931 bool needs_wg_uniform_rewrite;
932 };
933
934 struct v3dv_timestamp_query_cpu_job_info {
935 struct v3dv_query_pool *pool;
936 uint32_t query;
937
938 /* This is one unless multiview is used */
939 uint32_t count;
940 };
941
942 struct v3dv_job {
943 struct list_head list_link;
944
945 /* We only create job clones when executing secondary command buffers into
946 * primaries. These clones don't make deep copies of the original object
947 * so we want to flag them to avoid freeing resources they don't own.
948 */
949 bool is_clone;
950
951 enum v3dv_job_type type;
952
953 struct v3dv_device *device;
954
955 struct v3dv_cmd_buffer *cmd_buffer;
956
957 struct v3dv_cl bcl;
958 struct v3dv_cl rcl;
959 struct v3dv_cl indirect;
960
961 /* Set of all BOs referenced by the job. This will be used for making
962 * the list of BOs that the kernel will need to have paged in to
963 * execute our job.
964 */
965 struct set *bos;
966 uint32_t bo_count;
967 uint64_t bo_handle_mask;
968
969 struct v3dv_bo *tile_alloc;
970 struct v3dv_bo *tile_state;
971
972 bool tmu_dirty_rcl;
973
974 uint32_t first_subpass;
975
976 /* When the current subpass is split into multiple jobs, this flag is set
977 * to true for any jobs after the first in the same subpass.
978 */
979 bool is_subpass_continue;
980
981 /* If this job is the last job emitted for a subpass. */
982 bool is_subpass_finish;
983
984 struct v3dv_frame_tiling frame_tiling;
985
986 enum v3dv_ez_state ez_state;
987 enum v3dv_ez_state first_ez_state;
988
989 /* If we have already decided if we need to disable Early Z/S completely
990 * for this job.
991 */
992 bool decided_global_ez_enable;
993
994 /* If this job has been configured to use early Z/S clear */
995 bool early_zs_clear;
996
997 /* Number of draw calls recorded into the job */
998 uint32_t draw_count;
999
1000 /* A flag indicating whether we want to flush every draw separately. This
1001 * can be used for debugging, or for cases where special circumstances
1002 * require this behavior.
1003 */
1004 bool always_flush;
1005
1006 /* Whether we need to serialize this job in our command stream */
1007 bool serialize;
1008
1009 /* If this is a CL job, whether we should sync before binning */
1010 bool needs_bcl_sync;
1011
1012 /* Job specs for CPU jobs */
1013 union {
1014 struct v3dv_reset_query_cpu_job_info query_reset;
1015 struct v3dv_end_query_cpu_job_info query_end;
1016 struct v3dv_copy_query_results_cpu_job_info query_copy_results;
1017 struct v3dv_event_set_cpu_job_info event_set;
1018 struct v3dv_event_wait_cpu_job_info event_wait;
1019 struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1020 struct v3dv_csd_indirect_cpu_job_info csd_indirect;
1021 struct v3dv_timestamp_query_cpu_job_info query_timestamp;
1022 } cpu;
1023
1024 /* Job specs for TFU jobs */
1025 struct drm_v3d_submit_tfu tfu;
1026
1027 /* Job specs for CSD jobs */
1028 struct {
1029 struct v3dv_bo *shared_memory;
1030 uint32_t wg_count[3];
1031 uint32_t wg_base[3];
1032 struct drm_v3d_submit_csd submit;
1033 } csd;
1034 };
1035
1036 void v3dv_job_init(struct v3dv_job *job,
1037 enum v3dv_job_type type,
1038 struct v3dv_device *device,
1039 struct v3dv_cmd_buffer *cmd_buffer,
1040 int32_t subpass_idx);
1041 void v3dv_job_destroy(struct v3dv_job *job);
1042
1043 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1044 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1045
1046 void v3dv_job_start_frame(struct v3dv_job *job,
1047 uint32_t width,
1048 uint32_t height,
1049 uint32_t layers,
1050 bool allocate_tile_state_for_all_layers,
1051 uint32_t render_target_count,
1052 uint8_t max_internal_bpp,
1053 bool msaa);
1054
1055 struct v3dv_job *
1056 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1057 struct v3dv_cmd_buffer *cmd_buffer);
1058
1059 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1060 enum v3dv_job_type type,
1061 struct v3dv_cmd_buffer *cmd_buffer,
1062 uint32_t subpass_idx);
1063
1064 void
1065 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1066 uint32_t slot_size,
1067 uint32_t used_count,
1068 uint32_t *alloc_count,
1069 void **ptr);
1070
1071 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
1072
1073 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1074 * cmd_buffer specific header?
1075 */
1076 struct v3dv_draw_info {
1077 uint32_t vertex_count;
1078 uint32_t instance_count;
1079 uint32_t first_vertex;
1080 uint32_t first_instance;
1081 };
1082
1083 struct v3dv_vertex_binding {
1084 struct v3dv_buffer *buffer;
1085 VkDeviceSize offset;
1086 };
1087
1088 struct v3dv_descriptor_state {
1089 struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1090 uint32_t valid;
1091 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1092 };
1093
1094 struct v3dv_cmd_pipeline_state {
1095 struct v3dv_pipeline *pipeline;
1096
1097 struct v3dv_descriptor_state descriptor_state;
1098 };
1099
1100 struct v3dv_cmd_buffer_state {
1101 struct v3dv_render_pass *pass;
1102 struct v3dv_framebuffer *framebuffer;
1103 VkRect2D render_area;
1104
1105 /* Current job being recorded */
1106 struct v3dv_job *job;
1107
1108 uint32_t subpass_idx;
1109
1110 struct v3dv_cmd_pipeline_state gfx;
1111 struct v3dv_cmd_pipeline_state compute;
1112
1113 struct v3dv_dynamic_state dynamic;
1114
1115 uint32_t dirty;
1116 VkShaderStageFlagBits dirty_descriptor_stages;
1117 VkShaderStageFlagBits dirty_push_constants_stages;
1118
1119 /* Current clip window. We use this to check whether we have an active
1120 * scissor, since in that case we can't use TLB clears and need to fallback
1121 * to drawing rects.
1122 */
1123 VkRect2D clip_window;
1124
1125 /* Whether our render area is aligned to tile boundaries. If this is false
1126 * then we have tiles that are only partially covered by the render area,
1127 * and therefore, we need to be careful with our loads and stores so we don't
1128 * modify pixels for the tile area that is not covered by the render area.
1129 * This means, for example, that we can't use the TLB to clear, since that
1130 * always clears full tiles.
1131 */
1132 bool tile_aligned_render_area;
1133
1134 uint32_t attachment_alloc_count;
1135 struct v3dv_cmd_buffer_attachment_state *attachments;
1136
1137 struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1138
1139 struct {
1140 VkBuffer buffer;
1141 VkDeviceSize offset;
1142 uint8_t index_size;
1143 } index_buffer;
1144
1145 /* Current uniforms */
1146 struct {
1147 struct v3dv_cl_reloc vs_bin;
1148 struct v3dv_cl_reloc vs;
1149 struct v3dv_cl_reloc gs_bin;
1150 struct v3dv_cl_reloc gs;
1151 struct v3dv_cl_reloc fs;
1152 } uniforms;
1153
1154 /* Current view index for multiview rendering */
1155 uint32_t view_index;
1156
1157 /* Used to flag OOM conditions during command buffer recording */
1158 bool oom;
1159
1160 /* Whether we have recorded a pipeline barrier that we still need to
1161 * process.
1162 */
1163 bool has_barrier;
1164 bool has_bcl_barrier;
1165
1166 /* Secondary command buffer state */
1167 struct {
1168 bool occlusion_query_enable;
1169 } inheritance;
1170
1171 /* Command buffer state saved during a meta operation */
1172 struct {
1173 uint32_t subpass_idx;
1174 VkRenderPass pass;
1175 VkFramebuffer framebuffer;
1176
1177 uint32_t attachment_alloc_count;
1178 uint32_t attachment_count;
1179 struct v3dv_cmd_buffer_attachment_state *attachments;
1180
1181 bool tile_aligned_render_area;
1182 VkRect2D render_area;
1183
1184 struct v3dv_dynamic_state dynamic;
1185
1186 struct v3dv_cmd_pipeline_state gfx;
1187 bool has_descriptor_state;
1188
1189 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1190 } meta;
1191
1192 /* Command buffer state for queries */
1193 struct {
1194 /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1195 * a render pass. We queue these here and then schedule the corresponding
1196 * CPU jobs for them at the time we finish the GPU job in which they have
1197 * been recorded.
1198 */
1199 struct {
1200 uint32_t used_count;
1201 uint32_t alloc_count;
1202 struct v3dv_end_query_cpu_job_info *states;
1203 } end;
1204
1205 /* This BO is not NULL if we have an active query, that is, we have
1206 * called vkCmdBeginQuery but not vkCmdEndQuery.
1207 */
1208 struct {
1209 struct v3dv_bo *bo;
1210 uint32_t offset;
1211 } active_query;
1212 } query;
1213 };
1214
1215 /* The following struct represents the info from a descriptor that we store on
1216 * the host memory. They are mostly links to other existing vulkan objects,
1217 * like the image_view in order to access to swizzle info, or the buffer used
1218 * for a UBO/SSBO, for example.
1219 *
1220 * FIXME: revisit if makes sense to just move everything that would be needed
1221 * from a descriptor to the bo.
1222 */
1223 struct v3dv_descriptor {
1224 VkDescriptorType type;
1225
1226 union {
1227 struct {
1228 struct v3dv_image_view *image_view;
1229 struct v3dv_sampler *sampler;
1230 };
1231
1232 struct {
1233 struct v3dv_buffer *buffer;
1234 uint32_t offset;
1235 uint32_t range;
1236 };
1237
1238 struct v3dv_buffer_view *buffer_view;
1239 };
1240 };
1241
1242 struct v3dv_query {
1243 bool maybe_available;
1244 union {
1245 /* Used by GPU queries (occlusion) */
1246 struct {
1247 struct v3dv_bo *bo;
1248 uint32_t offset;
1249 };
1250 /* Used by CPU queries (timestamp) */
1251 uint64_t value;
1252 };
1253 };
1254
1255 struct v3dv_query_pool {
1256 struct vk_object_base base;
1257
1258 struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1259
1260 VkQueryType query_type;
1261 uint32_t query_count;
1262 struct v3dv_query *queries;
1263 };
1264
1265 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1266 struct v3dv_query_pool *pool,
1267 uint32_t first,
1268 uint32_t count,
1269 void *data,
1270 VkDeviceSize stride,
1271 VkQueryResultFlags flags);
1272
1273 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1274 uint64_t pobj,
1275 VkAllocationCallbacks *alloc);
1276 struct v3dv_cmd_buffer_private_obj {
1277 struct list_head list_link;
1278 uint64_t obj;
1279 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1280 };
1281
1282 struct v3dv_cmd_buffer {
1283 struct vk_command_buffer vk;
1284
1285 struct v3dv_device *device;
1286
1287 struct v3dv_cmd_pool *pool;
1288 struct list_head pool_link;
1289
1290 /* Used at submit time to link command buffers in the submission that have
1291 * spawned wait threads, so we can then wait on all of them to complete
1292 * before we process any signal sempahores or fences.
1293 */
1294 struct list_head list_link;
1295
1296 VkCommandBufferUsageFlags usage_flags;
1297 VkCommandBufferLevel level;
1298
1299 enum v3dv_cmd_buffer_status status;
1300
1301 struct v3dv_cmd_buffer_state state;
1302
1303 /* FIXME: we have just one client-side and bo for the push constants,
1304 * independently of the stageFlags in vkCmdPushConstants, and the
1305 * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1306 * tunning in the future if it makes sense.
1307 */
1308 uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1309 struct v3dv_cl_reloc push_constants_resource;
1310
1311 /* Collection of Vulkan objects created internally by the driver (typically
1312 * during recording of meta operations) that are part of the command buffer
1313 * and should be destroyed with it.
1314 */
1315 struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1316
1317 /* Per-command buffer resources for meta operations. */
1318 struct {
1319 struct {
1320 /* The current descriptor pool for blit sources */
1321 VkDescriptorPool dspool;
1322 } blit;
1323 struct {
1324 /* The current descriptor pool for texel buffer copy sources */
1325 VkDescriptorPool dspool;
1326 } texel_buffer_copy;
1327 } meta;
1328
1329 /* List of jobs in the command buffer. For primary command buffers it
1330 * represents the jobs we want to submit to the GPU. For secondary command
1331 * buffers it represents jobs that will be merged into a primary command
1332 * buffer via vkCmdExecuteCommands.
1333 */
1334 struct list_head jobs;
1335 };
1336
1337 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1338 int32_t subpass_idx,
1339 enum v3dv_job_type type);
1340 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1341
1342 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1343 uint32_t subpass_idx);
1344 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1345 uint32_t subpass_idx);
1346
1347 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1348
1349 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1350 bool push_descriptor_state);
1351 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1352 uint32_t dirty_dynamic_state,
1353 bool needs_subpass_resume);
1354
1355 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1356 struct v3dv_query_pool *pool,
1357 uint32_t first,
1358 uint32_t count);
1359
1360 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1361 struct v3dv_query_pool *pool,
1362 uint32_t query,
1363 VkQueryControlFlags flags);
1364
1365 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1366 struct v3dv_query_pool *pool,
1367 uint32_t query);
1368
1369 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1370 struct v3dv_query_pool *pool,
1371 uint32_t first,
1372 uint32_t count,
1373 struct v3dv_buffer *dst,
1374 uint32_t offset,
1375 uint32_t stride,
1376 VkQueryResultFlags flags);
1377
1378 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1379 struct drm_v3d_submit_tfu *tfu);
1380
1381 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1382 const uint32_t *wg_counts);
1383
1384 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1385 uint64_t obj,
1386 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1387
1388 struct v3dv_semaphore {
1389 struct vk_object_base base;
1390
1391 /* A syncobject handle associated with this semaphore */
1392 uint32_t sync;
1393
1394 /* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
1395 uint32_t temp_sync;
1396 };
1397
1398 struct v3dv_fence {
1399 struct vk_object_base base;
1400
1401 /* A syncobject handle associated with this fence */
1402 uint32_t sync;
1403
1404 /* A temporary syncobject handle produced from a vkImportFenceFd. */
1405 uint32_t temp_sync;
1406 };
1407
1408 struct v3dv_event {
1409 struct vk_object_base base;
1410 int state;
1411 };
1412
1413 struct v3dv_shader_variant {
1414 enum broadcom_shader_stage stage;
1415
1416 union {
1417 struct v3d_prog_data *base;
1418 struct v3d_vs_prog_data *vs;
1419 struct v3d_gs_prog_data *gs;
1420 struct v3d_fs_prog_data *fs;
1421 struct v3d_compute_prog_data *cs;
1422 } prog_data;
1423
1424 /* We explicitly save the prog_data_size as it would make easier to
1425 * serialize
1426 */
1427 uint32_t prog_data_size;
1428
1429 /* The assembly for this variant will be uploaded to a BO shared with all
1430 * other shader stages in that pipeline. This is the offset in that BO.
1431 */
1432 uint32_t assembly_offset;
1433
1434 /* Note: it is really likely that qpu_insts would be NULL, as it will be
1435 * used only temporarily, to upload it to the shared bo, as we compile the
1436 * different stages individually.
1437 */
1438 uint64_t *qpu_insts;
1439 uint32_t qpu_insts_size;
1440 };
1441
1442 /*
1443 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1444 * other methods doesn't have so many parameters.
1445 *
1446 * FIXME: for the case of the coordinate shader and the vertex shader, module,
1447 * entrypoint, spec_info and nir are the same. There are also info only
1448 * relevant to some stages. But seemed too much a hassle to create a new
1449 * struct only to handle that. Revisit if such kind of info starts to grow.
1450 */
1451 struct v3dv_pipeline_stage {
1452 struct v3dv_pipeline *pipeline;
1453
1454 enum broadcom_shader_stage stage;
1455
1456 const struct vk_shader_module *module;
1457 const char *entrypoint;
1458 const VkSpecializationInfo *spec_info;
1459
1460 nir_shader *nir;
1461
1462 /* The following is the combined hash of module+entrypoint+spec_info+nir */
1463 unsigned char shader_sha1[20];
1464
1465 /** A name for this program, so you can track it in shader-db output. */
1466 uint32_t program_id;
1467
1468 VkPipelineCreationFeedbackEXT feedback;
1469 };
1470
1471 /* We are using the descriptor pool entry for two things:
1472 * * Track the allocated sets, so we can properly free it if needed
1473 * * Track the suballocated pool bo regions, so if some descriptor set is
1474 * freed, the gap could be reallocated later.
1475 *
1476 * Those only make sense if the pool was not created with the flag
1477 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1478 */
1479 struct v3dv_descriptor_pool_entry
1480 {
1481 struct v3dv_descriptor_set *set;
1482 /* Offset and size of the subregion allocated for this entry from the
1483 * pool->bo
1484 */
1485 uint32_t offset;
1486 uint32_t size;
1487 };
1488
1489 struct v3dv_descriptor_pool {
1490 struct vk_object_base base;
1491
1492 /* If this descriptor pool has been allocated for the driver for internal
1493 * use, typically to implement meta operations.
1494 */
1495 bool is_driver_internal;
1496
1497 struct v3dv_bo *bo;
1498 /* Current offset at the descriptor bo. 0 means that we didn't use it for
1499 * any descriptor. If the descriptor bo is NULL, current offset is
1500 * meaningless
1501 */
1502 uint32_t current_offset;
1503
1504 /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1505 * descriptor sets are handled as a whole as pool memory and handled by the
1506 * following pointers. If set, they are not used, and individually
1507 * descriptor sets are allocated/freed.
1508 */
1509 uint8_t *host_memory_base;
1510 uint8_t *host_memory_ptr;
1511 uint8_t *host_memory_end;
1512
1513 uint32_t entry_count;
1514 uint32_t max_entry_count;
1515 struct v3dv_descriptor_pool_entry entries[0];
1516 };
1517
1518 struct v3dv_descriptor_set {
1519 struct vk_object_base base;
1520
1521 struct v3dv_descriptor_pool *pool;
1522
1523 const struct v3dv_descriptor_set_layout *layout;
1524
1525 /* Offset relative to the descriptor pool bo for this set */
1526 uint32_t base_offset;
1527
1528 /* The descriptors below can be indexed (set/binding) using the set_layout
1529 */
1530 struct v3dv_descriptor descriptors[0];
1531 };
1532
1533 struct v3dv_descriptor_set_binding_layout {
1534 VkDescriptorType type;
1535
1536 /* Number of array elements in this binding */
1537 uint32_t array_size;
1538
1539 /* Index into the flattend descriptor set */
1540 uint32_t descriptor_index;
1541
1542 uint32_t dynamic_offset_count;
1543 uint32_t dynamic_offset_index;
1544
1545 /* Offset into the descriptor set where this descriptor lives (final offset
1546 * on the descriptor bo need to take into account set->base_offset)
1547 */
1548 uint32_t descriptor_offset;
1549
1550 /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1551 * if there are no immutable samplers.
1552 */
1553 uint32_t immutable_samplers_offset;
1554 };
1555
1556 struct v3dv_descriptor_set_layout {
1557 struct vk_object_base base;
1558
1559 VkDescriptorSetLayoutCreateFlags flags;
1560
1561 /* Number of bindings in this descriptor set */
1562 uint32_t binding_count;
1563
1564 /* Total bo size needed for this descriptor set
1565 */
1566 uint32_t bo_size;
1567
1568 /* Shader stages affected by this descriptor set */
1569 uint16_t shader_stages;
1570
1571 /* Number of descriptors in this descriptor set */
1572 uint32_t descriptor_count;
1573
1574 /* Number of dynamic offsets used by this descriptor set */
1575 uint16_t dynamic_offset_count;
1576
1577 /* Bindings in this descriptor set */
1578 struct v3dv_descriptor_set_binding_layout binding[0];
1579 };
1580
1581 struct v3dv_pipeline_layout {
1582 struct vk_object_base base;
1583
1584 struct {
1585 struct v3dv_descriptor_set_layout *layout;
1586 uint32_t dynamic_offset_start;
1587 } set[MAX_SETS];
1588
1589 uint32_t num_sets;
1590
1591 /* Shader stages that are declared to use descriptors from this layout */
1592 uint32_t shader_stages;
1593
1594 uint32_t dynamic_offset_count;
1595 uint32_t push_constant_size;
1596 };
1597
1598 /*
1599 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1600 * it to be big enough to include the max value for all of them.
1601 *
1602 * FIXME: one alternative would be to allocate the map as big as you need for
1603 * each descriptor type. That would means more individual allocations.
1604 */
1605 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
1606 MAX_UNIFORM_BUFFERS, \
1607 MAX_STORAGE_BUFFERS)
1608
1609
1610 struct v3dv_descriptor_map {
1611 /* TODO: avoid fixed size array/justify the size */
1612 unsigned num_desc; /* Number of descriptors */
1613 int set[DESCRIPTOR_MAP_SIZE];
1614 int binding[DESCRIPTOR_MAP_SIZE];
1615 int array_index[DESCRIPTOR_MAP_SIZE];
1616 int array_size[DESCRIPTOR_MAP_SIZE];
1617
1618 /* NOTE: the following is only for sampler, but this is the easier place to
1619 * put it.
1620 */
1621 uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1622 };
1623
1624 struct v3dv_sampler {
1625 struct vk_object_base base;
1626
1627 bool compare_enable;
1628 bool unnormalized_coordinates;
1629 bool clamp_to_transparent_black_border;
1630
1631 /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1632 * configuration. If needed it will be copied to the descriptor info during
1633 * UpdateDescriptorSets
1634 */
1635 uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1636 };
1637
1638 struct v3dv_descriptor_template_entry {
1639 /* The type of descriptor in this entry */
1640 VkDescriptorType type;
1641
1642 /* Binding in the descriptor set */
1643 uint32_t binding;
1644
1645 /* Offset at which to write into the descriptor set binding */
1646 uint32_t array_element;
1647
1648 /* Number of elements to write into the descriptor set binding */
1649 uint32_t array_count;
1650
1651 /* Offset into the user provided data */
1652 size_t offset;
1653
1654 /* Stride between elements into the user provided data */
1655 size_t stride;
1656 };
1657
1658 struct v3dv_descriptor_update_template {
1659 struct vk_object_base base;
1660
1661 VkPipelineBindPoint bind_point;
1662
1663 /* The descriptor set this template corresponds to. This value is only
1664 * valid if the template was created with the templateType
1665 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1666 */
1667 uint8_t set;
1668
1669 /* Number of entries in this template */
1670 uint32_t entry_count;
1671
1672 /* Entries of the template */
1673 struct v3dv_descriptor_template_entry entries[0];
1674 };
1675
1676
1677 /* We keep two special values for the sampler idx that represents exactly when a
1678 * sampler is not needed/provided. The main use is that even if we don't have
1679 * sampler, we still need to do the output unpacking (through
1680 * nir_lower_tex). The easier way to do this is to add those special "no
1681 * sampler" in the sampler_map, and then use the proper unpacking for that
1682 * case.
1683 *
1684 * We have one when we want a 16bit output size, and other when we want a
1685 * 32bit output size. We use the info coming from the RelaxedPrecision
1686 * decoration to decide between one and the other.
1687 */
1688 #define V3DV_NO_SAMPLER_16BIT_IDX 0
1689 #define V3DV_NO_SAMPLER_32BIT_IDX 1
1690
1691 /*
1692 * Following two methods are using on the combined to/from texture/sampler
1693 * indices maps at v3dv_pipeline.
1694 */
1695 static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,uint32_t sampler_index)1696 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1697 uint32_t sampler_index)
1698 {
1699 return texture_index << 24 | sampler_index;
1700 }
1701
1702 static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,uint32_t * texture_index,uint32_t * sampler_index)1703 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1704 uint32_t *texture_index,
1705 uint32_t *sampler_index)
1706 {
1707 uint32_t texture = combined_index_key >> 24;
1708 uint32_t sampler = combined_index_key & 0xffffff;
1709
1710 if (texture_index)
1711 *texture_index = texture;
1712
1713 if (sampler_index)
1714 *sampler_index = sampler;
1715 }
1716
1717 struct v3dv_descriptor_maps {
1718 struct v3dv_descriptor_map ubo_map;
1719 struct v3dv_descriptor_map ssbo_map;
1720 struct v3dv_descriptor_map sampler_map;
1721 struct v3dv_descriptor_map texture_map;
1722 };
1723
1724 /* The structure represents data shared between different objects, like the
1725 * pipeline and the pipeline cache, so we ref count it to know when it should
1726 * be freed.
1727 */
1728 struct v3dv_pipeline_shared_data {
1729 uint32_t ref_cnt;
1730
1731 unsigned char sha1_key[20];
1732
1733 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1734 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1735
1736 struct v3dv_bo *assembly_bo;
1737 };
1738
1739 struct v3dv_pipeline {
1740 struct vk_object_base base;
1741
1742 struct v3dv_device *device;
1743
1744 VkShaderStageFlags active_stages;
1745
1746 struct v3dv_render_pass *pass;
1747 struct v3dv_subpass *subpass;
1748
1749 /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1750 * to track binning shaders. Note these will be freed once the pipeline
1751 * has been compiled.
1752 */
1753 struct v3dv_pipeline_stage *vs;
1754 struct v3dv_pipeline_stage *vs_bin;
1755 struct v3dv_pipeline_stage *gs;
1756 struct v3dv_pipeline_stage *gs_bin;
1757 struct v3dv_pipeline_stage *fs;
1758 struct v3dv_pipeline_stage *cs;
1759
1760 /* Flags for whether optional pipeline stages are present, for convenience */
1761 bool has_gs;
1762
1763 /* Spilling memory requirements */
1764 struct {
1765 struct v3dv_bo *bo;
1766 uint32_t size_per_thread;
1767 } spill;
1768
1769 struct v3dv_dynamic_state dynamic_state;
1770
1771 struct v3dv_pipeline_layout *layout;
1772
1773 /* Whether this pipeline enables depth writes */
1774 bool z_updates_enable;
1775
1776 enum v3dv_ez_state ez_state;
1777
1778 bool msaa;
1779 bool sample_rate_shading;
1780 uint32_t sample_mask;
1781
1782 bool primitive_restart;
1783
1784 /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
1785 * array with such binding
1786 */
1787 struct v3dv_pipeline_vertex_binding {
1788 uint32_t stride;
1789 uint32_t instance_divisor;
1790 } vb[MAX_VBS];
1791 uint32_t vb_count;
1792
1793 /* Note that a lot of info from VkVertexInputAttributeDescription is
1794 * already prepacked, so here we are only storing those that need recheck
1795 * later. The array must be indexed by driver location, since that is the
1796 * order in which we need to emit the attributes.
1797 */
1798 struct v3dv_pipeline_vertex_attrib {
1799 uint32_t binding;
1800 uint32_t offset;
1801 VkFormat vk_format;
1802 } va[MAX_VERTEX_ATTRIBS];
1803 uint32_t va_count;
1804
1805 enum pipe_prim_type topology;
1806
1807 struct v3dv_pipeline_shared_data *shared_data;
1808
1809 /* In general we can reuse v3dv_device->default_attribute_float, so note
1810 * that the following can be NULL.
1811 *
1812 * FIXME: the content of this BO will be small, so it could be improved to
1813 * be uploaded to a common BO. But as in most cases it will be NULL, it is
1814 * not a priority.
1815 */
1816 struct v3dv_bo *default_attribute_values;
1817
1818 struct vpm_config vpm_cfg;
1819 struct vpm_config vpm_cfg_bin;
1820
1821 /* If the pipeline should emit any of the stencil configuration packets */
1822 bool emit_stencil_cfg[2];
1823
1824 /* Blend state */
1825 struct {
1826 /* Per-RT bit mask with blend enables */
1827 uint8_t enables;
1828 /* Per-RT prepacked blend config packets */
1829 uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
1830 /* Flag indicating whether the blend factors in use require
1831 * color constants.
1832 */
1833 bool needs_color_constants;
1834 /* Mask with enabled color channels for each RT (4 bits per RT) */
1835 uint32_t color_write_masks;
1836 } blend;
1837
1838 /* Depth bias */
1839 struct {
1840 bool enabled;
1841 bool is_z16;
1842 } depth_bias;
1843
1844 /* Packets prepacked during pipeline creation
1845 */
1846 uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
1847 uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
1848 uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
1849 uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
1850 MAX_VERTEX_ATTRIBS];
1851 uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
1852 };
1853
1854 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)1855 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
1856 {
1857 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
1858 !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
1859 return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
1860 VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1861 }
1862
1863 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)1864 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
1865 struct v3dv_pipeline *pipeline)
1866 {
1867 if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
1868 return &cmd_buffer->state.compute.descriptor_state;
1869 else
1870 return &cmd_buffer->state.gfx.descriptor_state;
1871 }
1872
1873 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
1874
1875 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
1876 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
1877
1878 #ifdef DEBUG
1879 #define v3dv_debug_ignored_stype(sType) \
1880 fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
1881 #else
1882 #define v3dv_debug_ignored_stype(sType)
1883 #endif
1884
1885 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
1886 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
1887 const struct v3dv_format *
1888 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
1889 uint32_t bpp, VkFormat *out_vk_format);
1890 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
1891 VkFormat vk_format,
1892 VkFormatFeatureFlags features);
1893
1894 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
1895 struct v3dv_pipeline *pipeline,
1896 struct v3dv_shader_variant *variant);
1897
1898 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
1899 struct v3dv_pipeline *pipeline,
1900 struct v3dv_shader_variant *variant,
1901 uint32_t **wg_count_offsets);
1902
1903 struct v3dv_shader_variant *
1904 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1905 struct v3dv_pipeline_cache *cache,
1906 struct v3d_key *key,
1907 size_t key_size,
1908 const VkAllocationCallbacks *pAllocator,
1909 VkResult *out_vk_result);
1910
1911 struct v3dv_shader_variant *
1912 v3dv_shader_variant_create(struct v3dv_device *device,
1913 enum broadcom_shader_stage stage,
1914 struct v3d_prog_data *prog_data,
1915 uint32_t prog_data_size,
1916 uint32_t assembly_offset,
1917 uint64_t *qpu_insts,
1918 uint32_t qpu_insts_size,
1919 VkResult *out_vk_result);
1920
1921 void
1922 v3dv_shader_variant_destroy(struct v3dv_device *device,
1923 struct v3dv_shader_variant *variant);
1924
1925 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)1926 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
1927 {
1928 assert(shared_data && shared_data->ref_cnt >= 1);
1929 p_atomic_inc(&shared_data->ref_cnt);
1930 }
1931
1932 void
1933 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
1934 struct v3dv_pipeline_shared_data *shared_data);
1935
1936 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)1937 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
1938 struct v3dv_pipeline_shared_data *shared_data)
1939 {
1940 assert(shared_data && shared_data->ref_cnt >= 1);
1941 if (p_atomic_dec_zero(&shared_data->ref_cnt))
1942 v3dv_pipeline_shared_data_destroy(device, shared_data);
1943 }
1944
1945 struct v3dv_descriptor *
1946 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
1947 struct v3dv_descriptor_map *map,
1948 struct v3dv_pipeline_layout *pipeline_layout,
1949 uint32_t index,
1950 uint32_t *dynamic_offset);
1951
1952 const struct v3dv_sampler *
1953 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
1954 struct v3dv_descriptor_map *map,
1955 struct v3dv_pipeline_layout *pipeline_layout,
1956 uint32_t index);
1957
1958 struct v3dv_cl_reloc
1959 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
1960 struct v3dv_descriptor_state *descriptor_state,
1961 struct v3dv_descriptor_map *map,
1962 struct v3dv_pipeline_layout *pipeline_layout,
1963 uint32_t index);
1964
1965 struct v3dv_cl_reloc
1966 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
1967 struct v3dv_descriptor_state *descriptor_state,
1968 struct v3dv_descriptor_map *map,
1969 struct v3dv_pipeline_layout *pipeline_layout,
1970 uint32_t index);
1971
1972 const struct v3dv_format*
1973 v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_state,
1974 struct v3dv_descriptor_map *map,
1975 struct v3dv_pipeline_layout *pipeline_layout,
1976 uint32_t index,
1977 VkFormat *out_vk_format);
1978
1979 struct v3dv_bo*
1980 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
1981 struct v3dv_descriptor_map *map,
1982 struct v3dv_pipeline_layout *pipeline_layout,
1983 uint32_t index);
1984
1985 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)1986 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
1987 const struct v3dv_descriptor_set_binding_layout *binding)
1988 {
1989 assert(binding->immutable_samplers_offset);
1990 return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
1991 }
1992
1993 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
1994 struct v3dv_device *device,
1995 VkPipelineCacheCreateFlags,
1996 bool cache_enabled);
1997
1998 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
1999
2000 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2001 struct v3dv_pipeline_cache *cache,
2002 nir_shader *nir,
2003 unsigned char sha1_key[20]);
2004
2005 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2006 struct v3dv_pipeline_cache *cache,
2007 const nir_shader_compiler_options *nir_options,
2008 unsigned char sha1_key[20]);
2009
2010 struct v3dv_pipeline_shared_data *
2011 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2012 unsigned char sha1_key[20],
2013 bool *cache_hit);
2014
2015 void
2016 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2017 struct v3dv_pipeline_cache *cache);
2018
2019 struct v3dv_bo *
2020 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2021 struct v3dv_pipeline *pipeline);
2022
2023 void v3dv_shader_module_internal_init(struct v3dv_device *device,
2024 struct vk_shader_module *module,
2025 nir_shader *nir);
2026
2027 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
2028 VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2029
2030 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2031 VK_OBJECT_TYPE_COMMAND_BUFFER)
2032 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2033 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2034 VK_OBJECT_TYPE_INSTANCE)
2035 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2036 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2037 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2038
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool,base,VkCommandPool,VK_OBJECT_TYPE_COMMAND_POOL)2039 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, base, VkCommandPool,
2040 VK_OBJECT_TYPE_COMMAND_POOL)
2041 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2042 VK_OBJECT_TYPE_BUFFER)
2043 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2044 VK_OBJECT_TYPE_BUFFER_VIEW)
2045 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2046 VK_OBJECT_TYPE_DEVICE_MEMORY)
2047 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2048 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2049 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2050 VK_OBJECT_TYPE_DESCRIPTOR_SET)
2051 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2052 VkDescriptorSetLayout,
2053 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2054 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2055 VkDescriptorUpdateTemplate,
2056 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2057 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2058 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
2059 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2060 VK_OBJECT_TYPE_FRAMEBUFFER)
2061 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2062 VK_OBJECT_TYPE_IMAGE)
2063 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2064 VK_OBJECT_TYPE_IMAGE_VIEW)
2065 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2066 VK_OBJECT_TYPE_PIPELINE)
2067 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2068 VK_OBJECT_TYPE_PIPELINE_CACHE)
2069 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2070 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2071 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2072 VK_OBJECT_TYPE_QUERY_POOL)
2073 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2074 VK_OBJECT_TYPE_RENDER_PASS)
2075 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2076 VK_OBJECT_TYPE_SAMPLER)
2077 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore,
2078 VK_OBJECT_TYPE_SEMAPHORE)
2079
2080 static inline int
2081 v3dv_ioctl(int fd, unsigned long request, void *arg)
2082 {
2083 if (using_v3d_simulator)
2084 return v3d_simulator_ioctl(fd, request, arg);
2085 else
2086 return drmIoctl(fd, request, arg);
2087 }
2088
2089 /* Flags OOM conditions in command buffer state.
2090 *
2091 * Note: notice that no-op jobs don't have a command buffer reference.
2092 */
2093 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2094 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2095 {
2096 if (cmd_buffer) {
2097 cmd_buffer->state.oom = true;
2098 } else {
2099 assert(job);
2100 if (job->cmd_buffer)
2101 job->cmd_buffer->state.oom = true;
2102 }
2103 }
2104
2105 #define v3dv_return_if_oom(_cmd_buffer, _job) do { \
2106 const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
2107 if (__cmd_buffer && __cmd_buffer->state.oom) \
2108 return; \
2109 const struct v3dv_job *__job = _job; \
2110 if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
2111 return; \
2112 } while(0) \
2113
2114 static inline uint32_t
u64_hash(const void * key)2115 u64_hash(const void *key)
2116 {
2117 return _mesa_hash_data(key, sizeof(uint64_t));
2118 }
2119
2120 static inline bool
u64_compare(const void * key1,const void * key2)2121 u64_compare(const void *key1, const void *key2)
2122 {
2123 return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2124 }
2125
2126 /* Helper to call hw ver speficic functions */
2127 #define v3dv_X(device, thing) ({ \
2128 __typeof(&v3d42_##thing) v3d_X_thing; \
2129 switch (device->devinfo.ver) { \
2130 case 42: \
2131 v3d_X_thing = &v3d42_##thing; \
2132 break; \
2133 default: \
2134 unreachable("Unsupported hardware generation"); \
2135 } \
2136 v3d_X_thing; \
2137 })
2138
2139
2140 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2141 * define v3dX for each version supported, because when we compile code that
2142 * is not version-specific, all version-specific macros need to be already
2143 * defined.
2144 */
2145 #ifdef v3dX
2146 # include "v3dvx_private.h"
2147 #else
2148 # define v3dX(x) v3d42_##x
2149 # include "v3dvx_private.h"
2150 # undef v3dX
2151 #endif
2152
2153 #endif /* V3DV_PRIVATE_H */
2154