1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef TU_PRIVATE_H
29 #define TU_PRIVATE_H
30
31 #include <assert.h>
32 #include <pthread.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #ifdef HAVE_VALGRIND
39 #include <memcheck.h>
40 #include <valgrind.h>
41 #define VG(x) x
42 #else
43 #define VG(x) ((void)0)
44 #endif
45
46 #define MESA_LOG_TAG "TU"
47
48 #include "c11/threads.h"
49 #include "main/macros.h"
50 #include "util/list.h"
51 #include "util/log.h"
52 #include "util/macros.h"
53 #include "util/u_atomic.h"
54 #include "vk_alloc.h"
55 #include "vk_object.h"
56 #include "vk_debug_report.h"
57 #include "wsi_common.h"
58
59 #include "ir3/ir3_compiler.h"
60 #include "ir3/ir3_shader.h"
61
62 #include "adreno_common.xml.h"
63 #include "adreno_pm4.xml.h"
64 #include "a6xx.xml.h"
65 #include "fdl/freedreno_layout.h"
66 #include "common/freedreno_dev_info.h"
67
68 #include "tu_descriptor_set.h"
69 #include "tu_extensions.h"
70 #include "tu_util.h"
71
72 /* Pre-declarations needed for WSI entrypoints */
73 struct wl_surface;
74 struct wl_display;
75 typedef struct xcb_connection_t xcb_connection_t;
76 typedef uint32_t xcb_visualid_t;
77 typedef uint32_t xcb_window_t;
78
79 #include <vulkan/vk_android_native_buffer.h>
80 #include <vulkan/vk_icd.h>
81 #include <vulkan/vulkan.h>
82 #include <vulkan/vulkan_intel.h>
83
84 #include "tu_entrypoints.h"
85
86 #include "vk_format.h"
87
88 #define MAX_VBS 32
89 #define MAX_VERTEX_ATTRIBS 32
90 #define MAX_RTS 8
91 #define MAX_VSC_PIPES 32
92 #define MAX_VIEWPORTS 16
93 #define MAX_SCISSORS 16
94 #define MAX_DISCARD_RECTANGLES 4
95 #define MAX_PUSH_CONSTANTS_SIZE 128
96 #define MAX_PUSH_DESCRIPTORS 32
97 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16
98 #define MAX_DYNAMIC_STORAGE_BUFFERS 8
99 #define MAX_DYNAMIC_BUFFERS \
100 (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
101 #define TU_MAX_DRM_DEVICES 8
102 #define MAX_VIEWS 16
103 #define MAX_BIND_POINTS 2 /* compute + graphics */
104 /* The Qualcomm driver exposes 0x20000058 */
105 #define MAX_STORAGE_BUFFER_RANGE 0x20000000
106 /* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
107 * expose the same maximum range.
108 * TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual
109 * range might be higher.
110 */
111 #define MAX_UNIFORM_BUFFER_RANGE 0x10000
112
113 #define A6XX_TEX_CONST_DWORDS 16
114 #define A6XX_TEX_SAMP_DWORDS 4
115
116 #define for_each_bit(b, dword) \
117 for (uint32_t __dword = (dword); \
118 (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b)))
119
120 #define COND(bool, val) ((bool) ? (val) : 0)
121 #define BIT(bit) (1u << (bit))
122
123 /* Whenever we generate an error, pass it through this function. Useful for
124 * debugging, where we can break on it. Only call at error site, not when
125 * propagating errors. Might be useful to plug in a stack trace here.
126 */
127
128 struct tu_instance;
129
130 VkResult
131 __vk_errorf(struct tu_instance *instance,
132 VkResult error,
133 bool force_print,
134 const char *file,
135 int line,
136 const char *format,
137 ...) PRINTFLIKE(6, 7);
138
139 #define vk_error(instance, error) \
140 __vk_errorf(instance, error, false, __FILE__, __LINE__, NULL);
141 #define vk_errorf(instance, error, format, ...) \
142 __vk_errorf(instance, error, false, __FILE__, __LINE__, format, ##__VA_ARGS__);
143
144 /* Prints startup errors if TU_DEBUG=startup is set or on a debug driver
145 * build.
146 */
147 #define vk_startup_errorf(instance, error, format, ...) \
148 __vk_errorf(instance, error, instance->debug_flags & TU_DEBUG_STARTUP, \
149 __FILE__, __LINE__, format, ##__VA_ARGS__)
150
151 void
152 __tu_finishme(const char *file, int line, const char *format, ...)
153 PRINTFLIKE(3, 4);
154
155 /**
156 * Print a FINISHME message, including its source location.
157 */
158 #define tu_finishme(format, ...) \
159 do { \
160 static bool reported = false; \
161 if (!reported) { \
162 __tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
163 reported = true; \
164 } \
165 } while (0)
166
167 #define tu_stub() \
168 do { \
169 tu_finishme("stub %s", __func__); \
170 } while (0)
171
172 void *
173 tu_lookup_entrypoint_unchecked(const char *name);
174 void *
175 tu_lookup_entrypoint_checked(
176 const char *name,
177 uint32_t core_version,
178 const struct tu_instance_extension_table *instance,
179 const struct tu_device_extension_table *device);
180
181 struct tu_physical_device
182 {
183 struct vk_object_base base;
184
185 struct tu_instance *instance;
186
187 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
188 uint8_t driver_uuid[VK_UUID_SIZE];
189 uint8_t device_uuid[VK_UUID_SIZE];
190 uint8_t cache_uuid[VK_UUID_SIZE];
191
192 struct wsi_device wsi_device;
193
194 int local_fd;
195 int master_fd;
196
197 unsigned gpu_id;
198 uint32_t gmem_size;
199 uint64_t gmem_base;
200
201 struct freedreno_dev_info info;
202
203 int msm_major_version;
204 int msm_minor_version;
205
206 bool limited_z24s8;
207
208 /* This is the drivers on-disk cache used as a fallback as opposed to
209 * the pipeline cache defined by apps.
210 */
211 struct disk_cache *disk_cache;
212
213 struct tu_device_extension_table supported_extensions;
214 };
215
216 enum tu_debug_flags
217 {
218 TU_DEBUG_STARTUP = 1 << 0,
219 TU_DEBUG_NIR = 1 << 1,
220 TU_DEBUG_IR3 = 1 << 2,
221 TU_DEBUG_NOBIN = 1 << 3,
222 TU_DEBUG_SYSMEM = 1 << 4,
223 TU_DEBUG_FORCEBIN = 1 << 5,
224 TU_DEBUG_NOUBWC = 1 << 6,
225 TU_DEBUG_NOMULTIPOS = 1 << 7,
226 TU_DEBUG_NOLRZ = 1 << 8,
227 };
228
229 struct tu_instance
230 {
231 struct vk_object_base base;
232
233 VkAllocationCallbacks alloc;
234
235 uint32_t api_version;
236 int physical_device_count;
237 struct tu_physical_device physical_devices[TU_MAX_DRM_DEVICES];
238
239 enum tu_debug_flags debug_flags;
240
241 struct vk_debug_report_instance debug_report_callbacks;
242
243 struct tu_instance_extension_table enabled_extensions;
244 };
245
246 VkResult
247 tu_wsi_init(struct tu_physical_device *physical_device);
248 void
249 tu_wsi_finish(struct tu_physical_device *physical_device);
250
251 bool
252 tu_instance_extension_supported(const char *name);
253 uint32_t
254 tu_physical_device_api_version(struct tu_physical_device *dev);
255 bool
256 tu_physical_device_extension_supported(struct tu_physical_device *dev,
257 const char *name);
258
259 struct cache_entry;
260
261 struct tu_pipeline_cache
262 {
263 struct vk_object_base base;
264
265 struct tu_device *device;
266 pthread_mutex_t mutex;
267
268 uint32_t total_size;
269 uint32_t table_size;
270 uint32_t kernel_count;
271 struct cache_entry **hash_table;
272 bool modified;
273
274 VkAllocationCallbacks alloc;
275 };
276
277 struct tu_pipeline_key
278 {
279 };
280
281
282 /* queue types */
283 #define TU_QUEUE_GENERAL 0
284
285 #define TU_MAX_QUEUE_FAMILIES 1
286
287 struct tu_syncobj;
288
289 struct tu_queue
290 {
291 struct vk_object_base base;
292
293 struct tu_device *device;
294 uint32_t queue_family_index;
295 int queue_idx;
296 VkDeviceQueueCreateFlags flags;
297
298 uint32_t msm_queue_id;
299 int fence;
300 };
301
302 struct tu_bo
303 {
304 uint32_t gem_handle;
305 uint64_t size;
306 uint64_t iova;
307 void *map;
308 };
309
310 enum global_shader {
311 GLOBAL_SH_VS,
312 GLOBAL_SH_FS_BLIT,
313 GLOBAL_SH_FS_CLEAR0,
314 GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS,
315 GLOBAL_SH_COUNT,
316 };
317
318 #define TU_BORDER_COLOR_COUNT 4096
319 #define TU_BORDER_COLOR_BUILTIN 6
320
321 /* This struct defines the layout of the global_bo */
322 struct tu6_global
323 {
324 /* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */
325 instr_t shaders[GLOBAL_SH_COUNT][16];
326
327 uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
328 uint32_t _pad0;
329 volatile uint32_t vsc_draw_overflow;
330 uint32_t _pad1;
331 volatile uint32_t vsc_prim_overflow;
332 uint32_t _pad2;
333 uint64_t predicate;
334
335 /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
336 struct {
337 uint32_t offset;
338 uint32_t pad[7];
339 } flush_base[4];
340
341 /* note: larger global bo will be used for customBorderColors */
342 struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
343 };
344 #define gb_offset(member) offsetof(struct tu6_global, member)
345 #define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
346
347 void tu_init_clear_blit_shaders(struct tu6_global *global);
348
349 /* extra space in vsc draw/prim streams */
350 #define VSC_PAD 0x40
351
352 struct tu_device
353 {
354 struct vk_device vk;
355 struct tu_instance *instance;
356
357 struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES];
358 int queue_count[TU_MAX_QUEUE_FAMILIES];
359
360 struct tu_physical_device *physical_device;
361 int fd;
362 int _lost;
363
364 struct ir3_compiler *compiler;
365
366 /* Backup in-memory cache to be used if the app doesn't provide one */
367 struct tu_pipeline_cache *mem_cache;
368
369 #define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
370
371 /* Currently the kernel driver uses a 32-bit GPU address space, but it
372 * should be impossible to go beyond 48 bits.
373 */
374 struct {
375 struct tu_bo bo;
376 mtx_t construct_mtx;
377 bool initialized;
378 } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
379
380 struct tu_bo global_bo;
381
382 struct tu_device_extension_table enabled_extensions;
383
384 uint32_t vsc_draw_strm_pitch;
385 uint32_t vsc_prim_strm_pitch;
386 BITSET_DECLARE(custom_border_color, TU_BORDER_COLOR_COUNT);
387 mtx_t mutex;
388
389 /* bo list for submits: */
390 struct drm_msm_gem_submit_bo *bo_list;
391 /* map bo handles to bo list index: */
392 uint32_t *bo_idx;
393 uint32_t bo_count, bo_list_size, bo_idx_size;
394 mtx_t bo_mutex;
395 };
396
397 VkResult _tu_device_set_lost(struct tu_device *device,
398 const char *msg, ...) PRINTFLIKE(2, 3);
399 #define tu_device_set_lost(dev, ...) \
400 _tu_device_set_lost(dev, __VA_ARGS__)
401
402 static inline bool
tu_device_is_lost(struct tu_device * device)403 tu_device_is_lost(struct tu_device *device)
404 {
405 return unlikely(p_atomic_read(&device->_lost));
406 }
407
408 VkResult
409 tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size, bool dump);
410 VkResult
411 tu_bo_init_dmabuf(struct tu_device *dev,
412 struct tu_bo *bo,
413 uint64_t size,
414 int fd);
415 int
416 tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo);
417 void
418 tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
419 VkResult
420 tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
421
422 /* Get a scratch bo for use inside a command buffer. This will always return
423 * the same bo given the same size or similar sizes, so only one scratch bo
424 * can be used at the same time. It's meant for short-lived things where we
425 * need to write to some piece of memory, read from it, and then immediately
426 * discard it.
427 */
428 VkResult
429 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);
430
431 struct tu_cs_entry
432 {
433 /* No ownership */
434 const struct tu_bo *bo;
435
436 uint32_t size;
437 uint32_t offset;
438 };
439
440 struct tu_cs_memory {
441 uint32_t *map;
442 uint64_t iova;
443 };
444
445 struct tu_draw_state {
446 uint64_t iova : 48;
447 uint32_t size : 16;
448 };
449
450 enum tu_dynamic_state
451 {
452 /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
453 TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
454 TU_DYNAMIC_STATE_RB_DEPTH_CNTL,
455 TU_DYNAMIC_STATE_RB_STENCIL_CNTL,
456 TU_DYNAMIC_STATE_VB_STRIDE,
457 TU_DYNAMIC_STATE_COUNT,
458 /* no associated draw state: */
459 TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,
460 /* re-use the line width enum as it uses GRAS_SU_CNTL: */
461 TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH,
462 };
463
464 enum tu_draw_state_group_id
465 {
466 TU_DRAW_STATE_PROGRAM,
467 TU_DRAW_STATE_PROGRAM_BINNING,
468 TU_DRAW_STATE_TESS,
469 TU_DRAW_STATE_VB,
470 TU_DRAW_STATE_VI,
471 TU_DRAW_STATE_VI_BINNING,
472 TU_DRAW_STATE_RAST,
473 TU_DRAW_STATE_BLEND,
474 TU_DRAW_STATE_VS_CONST,
475 TU_DRAW_STATE_HS_CONST,
476 TU_DRAW_STATE_DS_CONST,
477 TU_DRAW_STATE_GS_CONST,
478 TU_DRAW_STATE_FS_CONST,
479 TU_DRAW_STATE_DESC_SETS,
480 TU_DRAW_STATE_DESC_SETS_LOAD,
481 TU_DRAW_STATE_VS_PARAMS,
482 TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,
483 TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
484 TU_DRAW_STATE_LRZ,
485
486 /* dynamic state related draw states */
487 TU_DRAW_STATE_DYNAMIC,
488 TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
489 };
490
491 enum tu_cs_mode
492 {
493
494 /*
495 * A command stream in TU_CS_MODE_GROW mode grows automatically whenever it
496 * is full. tu_cs_begin must be called before command packet emission and
497 * tu_cs_end must be called after.
498 *
499 * This mode may create multiple entries internally. The entries must be
500 * submitted together.
501 */
502 TU_CS_MODE_GROW,
503
504 /*
505 * A command stream in TU_CS_MODE_EXTERNAL mode wraps an external,
506 * fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no
507 * effect on it.
508 *
509 * This mode does not create any entry or any BO.
510 */
511 TU_CS_MODE_EXTERNAL,
512
513 /*
514 * A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct
515 * command packet emission. tu_cs_begin_sub_stream must be called to get a
516 * sub-stream to emit comamnd packets to. When done with the sub-stream,
517 * tu_cs_end_sub_stream must be called.
518 *
519 * This mode does not create any entry internally.
520 */
521 TU_CS_MODE_SUB_STREAM,
522 };
523
524 struct tu_cs
525 {
526 uint32_t *start;
527 uint32_t *cur;
528 uint32_t *reserved_end;
529 uint32_t *end;
530
531 struct tu_device *device;
532 enum tu_cs_mode mode;
533 uint32_t next_bo_size;
534
535 struct tu_cs_entry *entries;
536 uint32_t entry_count;
537 uint32_t entry_capacity;
538
539 struct tu_bo **bos;
540 uint32_t bo_count;
541 uint32_t bo_capacity;
542
543 /* state for cond_exec_start/cond_exec_end */
544 uint32_t cond_flags;
545 uint32_t *cond_dwords;
546 };
547
548 struct tu_device_memory
549 {
550 struct vk_object_base base;
551
552 struct tu_bo bo;
553 };
554
555 struct tu_descriptor_range
556 {
557 uint64_t va;
558 uint32_t size;
559 };
560
561 struct tu_descriptor_set
562 {
563 struct vk_object_base base;
564
565 const struct tu_descriptor_set_layout *layout;
566 struct tu_descriptor_pool *pool;
567 uint32_t size;
568
569 uint64_t va;
570 uint32_t *mapped_ptr;
571
572 uint32_t *dynamic_descriptors;
573 };
574
575 struct tu_descriptor_pool_entry
576 {
577 uint32_t offset;
578 uint32_t size;
579 struct tu_descriptor_set *set;
580 };
581
582 struct tu_descriptor_pool
583 {
584 struct vk_object_base base;
585
586 struct tu_bo bo;
587 uint64_t current_offset;
588 uint64_t size;
589
590 uint8_t *host_memory_base;
591 uint8_t *host_memory_ptr;
592 uint8_t *host_memory_end;
593
594 uint32_t entry_count;
595 uint32_t max_entry_count;
596 struct tu_descriptor_pool_entry entries[0];
597 };
598
599 struct tu_descriptor_update_template_entry
600 {
601 VkDescriptorType descriptor_type;
602
603 /* The number of descriptors to update */
604 uint32_t descriptor_count;
605
606 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array
607 */
608 uint32_t dst_offset;
609
610 /* In dwords. Not valid/used for dynamic descriptors */
611 uint32_t dst_stride;
612
613 uint32_t buffer_offset;
614
615 /* Only valid for combined image samplers and samplers */
616 uint16_t has_sampler;
617
618 /* In bytes */
619 size_t src_offset;
620 size_t src_stride;
621
622 /* For push descriptors */
623 const struct tu_sampler *immutable_samplers;
624 };
625
626 struct tu_descriptor_update_template
627 {
628 struct vk_object_base base;
629
630 uint32_t entry_count;
631 VkPipelineBindPoint bind_point;
632 struct tu_descriptor_update_template_entry entry[0];
633 };
634
635 struct tu_buffer
636 {
637 struct vk_object_base base;
638
639 VkDeviceSize size;
640
641 VkBufferUsageFlags usage;
642 VkBufferCreateFlags flags;
643
644 struct tu_bo *bo;
645 VkDeviceSize bo_offset;
646 };
647
648 static inline uint64_t
tu_buffer_iova(struct tu_buffer * buffer)649 tu_buffer_iova(struct tu_buffer *buffer)
650 {
651 return buffer->bo->iova + buffer->bo_offset;
652 }
653
654 const char *
655 tu_get_debug_option_name(int id);
656
657 const char *
658 tu_get_perftest_option_name(int id);
659
660 struct tu_descriptor_state
661 {
662 struct tu_descriptor_set *sets[MAX_SETS];
663 struct tu_descriptor_set push_set;
664 uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
665 };
666
667 enum tu_cmd_dirty_bits
668 {
669 TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
670 TU_CMD_DIRTY_VB_STRIDE = BIT(1),
671 TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),
672 TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
673 TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
674 TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
675 TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
676 TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
677 TU_CMD_DIRTY_LRZ = BIT(8),
678 /* all draw states were disabled and need to be re-enabled: */
679 TU_CMD_DIRTY_DRAW_STATE = BIT(9)
680 };
681
682 /* There are only three cache domains we have to care about: the CCU, or
683 * color cache unit, which is used for color and depth/stencil attachments
684 * and copy/blit destinations, and is split conceptually into color and depth,
685 * and the universal cache or UCHE which is used for pretty much everything
686 * else, except for the CP (uncached) and host. We need to flush whenever data
687 * crosses these boundaries.
688 */
689
690 enum tu_cmd_access_mask {
691 TU_ACCESS_UCHE_READ = 1 << 0,
692 TU_ACCESS_UCHE_WRITE = 1 << 1,
693 TU_ACCESS_CCU_COLOR_READ = 1 << 2,
694 TU_ACCESS_CCU_COLOR_WRITE = 1 << 3,
695 TU_ACCESS_CCU_DEPTH_READ = 1 << 4,
696 TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5,
697
698 /* Experiments have shown that while it's safe to avoid flushing the CCU
699 * after each blit/renderpass, it's not safe to assume that subsequent
700 * lookups with a different attachment state will hit unflushed cache
701 * entries. That is, the CCU needs to be flushed and possibly invalidated
702 * when accessing memory with a different attachment state. Writing to an
703 * attachment under the following conditions after clearing using the
704 * normal 2d engine path is known to have issues:
705 *
706 * - It isn't the 0'th layer.
707 * - There are more than one attachment, and this isn't the 0'th attachment
708 * (this seems to also depend on the cpp of the attachments).
709 *
710 * Our best guess is that the layer/MRT state is used when computing
711 * the location of a cache entry in CCU, to avoid conflicts. We assume that
712 * any access in a renderpass after or before an access by a transfer needs
713 * a flush/invalidate, and use the _INCOHERENT variants to represent access
714 * by a transfer.
715 */
716 TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6,
717 TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7,
718 TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8,
719 TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9,
720
721 /* Accesses by the host */
722 TU_ACCESS_HOST_READ = 1 << 10,
723 TU_ACCESS_HOST_WRITE = 1 << 11,
724
725 /* Accesses by a GPU engine which bypasses any cache. e.g. writes via
726 * CP_EVENT_WRITE::BLIT and the CP are SYSMEM_WRITE.
727 */
728 TU_ACCESS_SYSMEM_READ = 1 << 12,
729 TU_ACCESS_SYSMEM_WRITE = 1 << 13,
730
731 /* Set if a WFI is required. This can be required for:
732 * - 2D engine which (on some models) doesn't wait for flushes to complete
733 * before starting
734 * - CP draw indirect opcodes, where we need to wait for any flushes to
735 * complete but the CP implicitly waits for WFI's to complete and
736 * therefore we only need a WFI after the flushes.
737 */
738 TU_ACCESS_WFI_READ = 1 << 14,
739
740 /* Set if a CP_WAIT_FOR_ME is required due to the data being read by the CP
741 * without it waiting for any WFI.
742 */
743 TU_ACCESS_WFM_READ = 1 << 15,
744
745 /* Memory writes from the CP start in-order with draws and event writes,
746 * but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read.
747 */
748 TU_ACCESS_CP_WRITE = 1 << 16,
749
750 TU_ACCESS_READ =
751 TU_ACCESS_UCHE_READ |
752 TU_ACCESS_CCU_COLOR_READ |
753 TU_ACCESS_CCU_DEPTH_READ |
754 TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
755 TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
756 TU_ACCESS_HOST_READ |
757 TU_ACCESS_SYSMEM_READ |
758 TU_ACCESS_WFI_READ |
759 TU_ACCESS_WFM_READ,
760
761 TU_ACCESS_WRITE =
762 TU_ACCESS_UCHE_WRITE |
763 TU_ACCESS_CCU_COLOR_WRITE |
764 TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE |
765 TU_ACCESS_CCU_DEPTH_WRITE |
766 TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
767 TU_ACCESS_HOST_WRITE |
768 TU_ACCESS_SYSMEM_WRITE |
769 TU_ACCESS_CP_WRITE,
770
771 TU_ACCESS_ALL =
772 TU_ACCESS_READ |
773 TU_ACCESS_WRITE,
774 };
775
776 enum tu_cmd_flush_bits {
777 TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
778 TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
779 TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
780 TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
781 TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
782 TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
783 TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
784 TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
785 TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
786
787 TU_CMD_FLAG_ALL_FLUSH =
788 TU_CMD_FLAG_CCU_FLUSH_DEPTH |
789 TU_CMD_FLAG_CCU_FLUSH_COLOR |
790 TU_CMD_FLAG_CACHE_FLUSH |
791 /* Treat the CP as a sort of "cache" which may need to be "flushed" via
792 * waiting for writes to land with WAIT_FOR_MEM_WRITES.
793 */
794 TU_CMD_FLAG_WAIT_MEM_WRITES,
795
796 TU_CMD_FLAG_GPU_INVALIDATE =
797 TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
798 TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
799 TU_CMD_FLAG_CACHE_INVALIDATE,
800
801 TU_CMD_FLAG_ALL_INVALIDATE =
802 TU_CMD_FLAG_GPU_INVALIDATE |
803 /* Treat the CP as a sort of "cache" which may need to be "invalidated"
804 * via waiting for UCHE/CCU flushes to land with WFI/WFM.
805 */
806 TU_CMD_FLAG_WAIT_FOR_IDLE |
807 TU_CMD_FLAG_WAIT_FOR_ME,
808 };
809
810 /* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty
811 * heavy, involving a CCU cache flush/invalidate and a WFI in order to change
812 * which part of the gmem is used by the CCU. Here we keep track of what the
813 * state of the CCU.
814 */
815 enum tu_cmd_ccu_state {
816 TU_CMD_CCU_SYSMEM,
817 TU_CMD_CCU_GMEM,
818 TU_CMD_CCU_UNKNOWN,
819 };
820
821 struct tu_cache_state {
822 /* Caches which must be made available (flushed) eventually if there are
823 * any users outside that cache domain, and caches which must be
824 * invalidated eventually if there are any reads.
825 */
826 enum tu_cmd_flush_bits pending_flush_bits;
827 /* Pending flushes */
828 enum tu_cmd_flush_bits flush_bits;
829 };
830
831 struct tu_lrz_pipeline
832 {
833 bool write : 1;
834 bool invalidate : 1;
835
836 bool enable : 1;
837 bool greater : 1;
838 bool z_test_enable : 1;
839 bool blend_disable_write : 1;
840 };
841
842 struct tu_lrz_state
843 {
844 /* Depth/Stencil image currently on use to do LRZ */
845 struct tu_image *image;
846 bool valid : 1;
847 struct tu_draw_state state;
848 };
849
850 struct tu_cmd_state
851 {
852 uint32_t dirty;
853
854 struct tu_pipeline *pipeline;
855 struct tu_pipeline *compute_pipeline;
856
857 /* Vertex buffers, viewports, and scissors
858 * the states for these can be updated partially, so we need to save these
859 * to be able to emit a complete draw state
860 */
861 struct {
862 uint64_t base;
863 uint32_t size;
864 uint32_t stride;
865 } vb[MAX_VBS];
866 VkViewport viewport[MAX_VIEWPORTS];
867 VkRect2D scissor[MAX_SCISSORS];
868 uint32_t max_viewport, max_scissor;
869
870 /* for dynamic states that can't be emitted directly */
871 uint32_t dynamic_stencil_mask;
872 uint32_t dynamic_stencil_wrmask;
873 uint32_t dynamic_stencil_ref;
874
875 uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
876 enum pc_di_primtype primtype;
877
878 /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
879 struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
880 struct tu_draw_state vertex_buffers;
881 struct tu_draw_state shader_const[MESA_SHADER_STAGES];
882 struct tu_draw_state desc_sets;
883
884 struct tu_draw_state vs_params;
885
886 /* Index buffer */
887 uint64_t index_va;
888 uint32_t max_index_count;
889 uint8_t index_size;
890
891 /* because streamout base has to be 32-byte aligned
892 * there is an extra offset to deal with when it is
893 * unaligned
894 */
895 uint8_t streamout_offset[IR3_MAX_SO_BUFFERS];
896
897 /* Renderpasses are tricky, because we may need to flush differently if
898 * using sysmem vs. gmem and therefore we have to delay any flushing that
899 * happens before a renderpass. So we have to have two copies of the flush
900 * state, one for intra-renderpass flushes (i.e. renderpass dependencies)
901 * and one for outside a renderpass.
902 */
903 struct tu_cache_state cache;
904 struct tu_cache_state renderpass_cache;
905
906 enum tu_cmd_ccu_state ccu_state;
907
908 const struct tu_render_pass *pass;
909 const struct tu_subpass *subpass;
910 const struct tu_framebuffer *framebuffer;
911 VkRect2D render_area;
912
913 struct tu_cs_entry tile_store_ib;
914
915 bool xfb_used;
916 bool has_tess;
917 bool has_subpass_predication;
918 bool predication_active;
919
920 struct tu_lrz_state lrz;
921 };
922
923 struct tu_cmd_pool
924 {
925 struct vk_object_base base;
926
927 VkAllocationCallbacks alloc;
928 struct list_head cmd_buffers;
929 struct list_head free_cmd_buffers;
930 uint32_t queue_family_index;
931 };
932
933 enum tu_cmd_buffer_status
934 {
935 TU_CMD_BUFFER_STATUS_INVALID,
936 TU_CMD_BUFFER_STATUS_INITIAL,
937 TU_CMD_BUFFER_STATUS_RECORDING,
938 TU_CMD_BUFFER_STATUS_EXECUTABLE,
939 TU_CMD_BUFFER_STATUS_PENDING,
940 };
941
942 struct tu_cmd_buffer
943 {
944 struct vk_object_base base;
945
946 struct tu_device *device;
947
948 struct tu_cmd_pool *pool;
949 struct list_head pool_link;
950
951 VkCommandBufferUsageFlags usage_flags;
952 VkCommandBufferLevel level;
953 enum tu_cmd_buffer_status status;
954
955 struct tu_cmd_state state;
956 uint32_t queue_family_index;
957
958 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
959 VkShaderStageFlags push_constant_stages;
960 struct tu_descriptor_set meta_push_descriptors;
961
962 struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
963
964 VkResult record_result;
965
966 struct tu_cs cs;
967 struct tu_cs draw_cs;
968 struct tu_cs draw_epilogue_cs;
969 struct tu_cs sub_cs;
970
971 uint32_t vsc_draw_strm_pitch;
972 uint32_t vsc_prim_strm_pitch;
973 };
974
975 /* Temporary struct for tracking a register state to be written, used by
976 * a6xx-pack.h and tu_cs_emit_regs()
977 */
978 struct tu_reg_value {
979 uint32_t reg;
980 uint64_t value;
981 bool is_address;
982 struct tu_bo *bo;
983 bool bo_write;
984 uint32_t bo_offset;
985 uint32_t bo_shift;
986 };
987
988
989 void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer,
990 struct tu_cs *cs);
991
992 void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
993 struct tu_cs *cs,
994 enum tu_cmd_ccu_state ccu_state);
995
996 void
997 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
998 struct tu_cs *cs,
999 enum vgt_event_type event);
1000
1001 static inline struct tu_descriptor_state *
tu_get_descriptors_state(struct tu_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1002 tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
1003 VkPipelineBindPoint bind_point)
1004 {
1005 return &cmd_buffer->descriptors[bind_point];
1006 }
1007
1008 struct tu_event
1009 {
1010 struct vk_object_base base;
1011 struct tu_bo bo;
1012 };
1013
1014 struct tu_shader_module
1015 {
1016 struct vk_object_base base;
1017
1018 uint32_t code_size;
1019 uint32_t code[];
1020 };
1021
1022 struct tu_push_constant_range
1023 {
1024 uint32_t lo;
1025 uint32_t count;
1026 };
1027
1028 struct tu_shader
1029 {
1030 struct ir3_shader *ir3_shader;
1031
1032 struct tu_push_constant_range push_consts;
1033 uint8_t active_desc_sets;
1034 bool multi_pos_output;
1035 };
1036
1037 bool
1038 tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
1039 struct tu_device *dev);
1040
1041 nir_shader *
1042 tu_spirv_to_nir(struct tu_device *dev,
1043 const VkPipelineShaderStageCreateInfo *stage_info,
1044 gl_shader_stage stage);
1045
1046 struct tu_shader *
1047 tu_shader_create(struct tu_device *dev,
1048 nir_shader *nir,
1049 unsigned multiview_mask,
1050 struct tu_pipeline_layout *layout,
1051 const VkAllocationCallbacks *alloc);
1052
1053 void
1054 tu_shader_destroy(struct tu_device *dev,
1055 struct tu_shader *shader,
1056 const VkAllocationCallbacks *alloc);
1057
1058 struct tu_program_descriptor_linkage
1059 {
1060 struct ir3_const_state const_state;
1061
1062 uint32_t constlen;
1063
1064 struct tu_push_constant_range push_consts;
1065 };
1066
1067 struct tu_pipeline
1068 {
1069 struct vk_object_base base;
1070
1071 struct tu_cs cs;
1072
1073 struct tu_pipeline_layout *layout;
1074
1075 bool need_indirect_descriptor_sets;
1076 VkShaderStageFlags active_stages;
1077 uint32_t active_desc_sets;
1078
1079 /* mask of enabled dynamic states
1080 * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
1081 */
1082 uint32_t dynamic_state_mask;
1083 struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
1084
1085 /* for dynamic states which use the same register: */
1086 uint32_t gras_su_cntl, gras_su_cntl_mask;
1087 uint32_t rb_depth_cntl, rb_depth_cntl_mask;
1088 uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;
1089
1090 bool rb_depth_cntl_disable;
1091
1092 /* draw states for the pipeline */
1093 struct tu_draw_state load_state, rast_state, blend_state;
1094
1095 /* for vertex buffers state */
1096 uint32_t num_vbs;
1097
1098 struct
1099 {
1100 struct tu_draw_state state;
1101 struct tu_draw_state binning_state;
1102
1103 struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
1104 } program;
1105
1106 struct
1107 {
1108 struct tu_draw_state state;
1109 struct tu_draw_state binning_state;
1110 } vi;
1111
1112 struct
1113 {
1114 enum pc_di_primtype primtype;
1115 bool primitive_restart;
1116 } ia;
1117
1118 struct
1119 {
1120 uint32_t patch_type;
1121 uint32_t param_stride;
1122 uint32_t hs_bo_regid;
1123 uint32_t ds_bo_regid;
1124 bool upper_left_domain_origin;
1125 } tess;
1126
1127 struct
1128 {
1129 uint32_t local_size[3];
1130 } compute;
1131
1132 struct tu_lrz_pipeline lrz;
1133 };
1134
1135 void
1136 tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport);
1137
1138 void
1139 tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count);
1140
1141 void
1142 tu6_clear_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_image* image, const VkClearValue *value);
1143
1144 void
1145 tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
1146
1147 void
1148 tu6_emit_depth_bias(struct tu_cs *cs,
1149 float constant_factor,
1150 float clamp,
1151 float slope_factor);
1152
1153 void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
1154
1155 void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
1156
1157 void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
1158
1159 void
1160 tu6_emit_xs_config(struct tu_cs *cs,
1161 gl_shader_stage stage,
1162 const struct ir3_shader_variant *xs,
1163 uint64_t binary_iova);
1164
1165 void
1166 tu6_emit_vpc(struct tu_cs *cs,
1167 const struct ir3_shader_variant *vs,
1168 const struct ir3_shader_variant *hs,
1169 const struct ir3_shader_variant *ds,
1170 const struct ir3_shader_variant *gs,
1171 const struct ir3_shader_variant *fs,
1172 uint32_t patch_control_points,
1173 bool vshs_workgroup);
1174
1175 void
1176 tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);
1177
1178 struct tu_image_view;
1179
1180 void
1181 tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
1182 struct tu_cs *cs,
1183 struct tu_image_view *src,
1184 struct tu_image_view *dst,
1185 uint32_t layer_mask,
1186 uint32_t layers,
1187 const VkRect2D *rect);
1188
1189 void
1190 tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
1191 struct tu_cs *cs,
1192 uint32_t a,
1193 const VkRenderPassBeginInfo *info);
1194
1195 void
1196 tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
1197 struct tu_cs *cs,
1198 uint32_t a,
1199 const VkRenderPassBeginInfo *info);
1200
1201 void
1202 tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
1203 struct tu_cs *cs,
1204 uint32_t a,
1205 bool force_load);
1206
1207 /* expose this function to be able to emit load without checking LOAD_OP */
1208 void
1209 tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
1210
1211 /* note: gmem store can also resolve */
1212 void
1213 tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
1214 struct tu_cs *cs,
1215 uint32_t a,
1216 uint32_t gmem_a);
1217
1218 enum tu_supported_formats {
1219 FMT_VERTEX = 1,
1220 FMT_TEXTURE = 2,
1221 FMT_COLOR = 4,
1222 };
1223
1224 struct tu_native_format
1225 {
1226 enum a6xx_format fmt : 8;
1227 enum a3xx_color_swap swap : 8;
1228 enum a6xx_tile_mode tile_mode : 8;
1229 enum tu_supported_formats supported : 8;
1230 };
1231
1232 struct tu_native_format tu6_format_vtx(VkFormat format);
1233 struct tu_native_format tu6_format_color(VkFormat format, enum a6xx_tile_mode tile_mode);
1234 struct tu_native_format tu6_format_texture(VkFormat format, enum a6xx_tile_mode tile_mode);
1235
1236 static inline enum a6xx_format
tu6_base_format(VkFormat format)1237 tu6_base_format(VkFormat format)
1238 {
1239 /* note: tu6_format_color doesn't care about tiling for .fmt field */
1240 return tu6_format_color(format, TILE6_LINEAR).fmt;
1241 }
1242
1243 struct tu_image
1244 {
1245 struct vk_object_base base;
1246
1247 /* The original VkFormat provided by the client. This may not match any
1248 * of the actual surface formats.
1249 */
1250 VkFormat vk_format;
1251 uint32_t level_count;
1252 uint32_t layer_count;
1253
1254 struct fdl_layout layout[3];
1255 uint32_t total_size;
1256
1257 #ifdef ANDROID
1258 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
1259 VkDeviceMemory owned_memory;
1260 #endif
1261
1262 /* Set when bound */
1263 struct tu_bo *bo;
1264 VkDeviceSize bo_offset;
1265
1266 uint32_t lrz_height;
1267 uint32_t lrz_pitch;
1268 uint32_t lrz_offset;
1269 };
1270
1271 static inline uint32_t
tu_get_layerCount(const struct tu_image * image,const VkImageSubresourceRange * range)1272 tu_get_layerCount(const struct tu_image *image,
1273 const VkImageSubresourceRange *range)
1274 {
1275 return range->layerCount == VK_REMAINING_ARRAY_LAYERS
1276 ? image->layer_count - range->baseArrayLayer
1277 : range->layerCount;
1278 }
1279
1280 static inline uint32_t
tu_get_levelCount(const struct tu_image * image,const VkImageSubresourceRange * range)1281 tu_get_levelCount(const struct tu_image *image,
1282 const VkImageSubresourceRange *range)
1283 {
1284 return range->levelCount == VK_REMAINING_MIP_LEVELS
1285 ? image->level_count - range->baseMipLevel
1286 : range->levelCount;
1287 }
1288
1289 struct tu_image_view
1290 {
1291 struct vk_object_base base;
1292
1293 struct tu_image *image; /**< VkImageViewCreateInfo::image */
1294
1295 uint64_t base_addr;
1296 uint64_t ubwc_addr;
1297 uint32_t layer_size;
1298 uint32_t ubwc_layer_size;
1299
1300 /* used to determine if fast gmem store path can be used */
1301 VkExtent2D extent;
1302 bool need_y2_align;
1303
1304 bool ubwc_enabled;
1305
1306 uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
1307
1308 /* Descriptor for use as a storage image as opposed to a sampled image.
1309 * This has a few differences for cube maps (e.g. type).
1310 */
1311 uint32_t storage_descriptor[A6XX_TEX_CONST_DWORDS];
1312
1313 /* pre-filled register values */
1314 uint32_t PITCH;
1315 uint32_t FLAG_BUFFER_PITCH;
1316
1317 uint32_t RB_MRT_BUF_INFO;
1318 uint32_t SP_FS_MRT_REG;
1319
1320 uint32_t SP_PS_2D_SRC_INFO;
1321 uint32_t SP_PS_2D_SRC_SIZE;
1322
1323 uint32_t RB_2D_DST_INFO;
1324
1325 uint32_t RB_BLIT_DST_INFO;
1326
1327 /* for d32s8 separate stencil */
1328 uint64_t stencil_base_addr;
1329 uint32_t stencil_layer_size;
1330 uint32_t stencil_PITCH;
1331 };
1332
1333 struct tu_sampler_ycbcr_conversion {
1334 struct vk_object_base base;
1335
1336 VkFormat format;
1337 VkSamplerYcbcrModelConversion ycbcr_model;
1338 VkSamplerYcbcrRange ycbcr_range;
1339 VkComponentMapping components;
1340 VkChromaLocation chroma_offsets[2];
1341 VkFilter chroma_filter;
1342 };
1343
1344 struct tu_sampler {
1345 struct vk_object_base base;
1346
1347 uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];
1348 struct tu_sampler_ycbcr_conversion *ycbcr_sampler;
1349 };
1350
1351 void
1352 tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1353
1354 void
1355 tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src);
1356
1357 void
1358 tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1359
1360 void
1361 tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1362
1363 #define tu_image_view_stencil(iview, x) \
1364 ((iview->x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT))
1365
1366 VkResult
1367 tu_gralloc_info(struct tu_device *device,
1368 const VkNativeBufferANDROID *gralloc_info,
1369 int *dma_buf,
1370 uint64_t *modifier);
1371
1372 VkResult
1373 tu_import_memory_from_gralloc_handle(VkDevice device_h,
1374 int dma_buf,
1375 const VkAllocationCallbacks *alloc,
1376 VkImage image_h);
1377
1378 void
1379 tu_image_view_init(struct tu_image_view *iview,
1380 const VkImageViewCreateInfo *pCreateInfo,
1381 bool limited_z24s8);
1382
1383 bool
1384 ubwc_possible(VkFormat format, VkImageType type, VkImageUsageFlags usage, bool limited_z24s8);
1385
1386 struct tu_buffer_view
1387 {
1388 struct vk_object_base base;
1389
1390 uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
1391
1392 struct tu_buffer *buffer;
1393 };
1394 void
1395 tu_buffer_view_init(struct tu_buffer_view *view,
1396 struct tu_device *device,
1397 const VkBufferViewCreateInfo *pCreateInfo);
1398
1399 struct tu_attachment_info
1400 {
1401 struct tu_image_view *attachment;
1402 };
1403
1404 struct tu_framebuffer
1405 {
1406 struct vk_object_base base;
1407
1408 uint32_t width;
1409 uint32_t height;
1410 uint32_t layers;
1411
1412 /* size of the first tile */
1413 VkExtent2D tile0;
1414 /* number of tiles */
1415 VkExtent2D tile_count;
1416
1417 /* size of the first VSC pipe */
1418 VkExtent2D pipe0;
1419 /* number of VSC pipes */
1420 VkExtent2D pipe_count;
1421
1422 /* pipe register values */
1423 uint32_t pipe_config[MAX_VSC_PIPES];
1424 uint32_t pipe_sizes[MAX_VSC_PIPES];
1425
1426 uint32_t attachment_count;
1427 struct tu_attachment_info attachments[0];
1428 };
1429
1430 void
1431 tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
1432 const struct tu_device *device,
1433 const struct tu_render_pass *pass);
1434
1435 struct tu_subpass_barrier {
1436 VkPipelineStageFlags src_stage_mask;
1437 VkAccessFlags src_access_mask;
1438 VkAccessFlags dst_access_mask;
1439 bool incoherent_ccu_color, incoherent_ccu_depth;
1440 };
1441
1442 struct tu_subpass_attachment
1443 {
1444 uint32_t attachment;
1445 };
1446
1447 struct tu_subpass
1448 {
1449 uint32_t input_count;
1450 uint32_t color_count;
1451 struct tu_subpass_attachment *input_attachments;
1452 struct tu_subpass_attachment *color_attachments;
1453 struct tu_subpass_attachment *resolve_attachments;
1454 struct tu_subpass_attachment depth_stencil_attachment;
1455
1456 VkSampleCountFlagBits samples;
1457
1458 uint32_t srgb_cntl;
1459 uint32_t multiview_mask;
1460
1461 struct tu_subpass_barrier start_barrier;
1462 };
1463
1464 struct tu_render_pass_attachment
1465 {
1466 VkFormat format;
1467 uint32_t samples;
1468 uint32_t cpp;
1469 VkImageAspectFlags clear_mask;
1470 uint32_t clear_views;
1471 bool load;
1472 bool store;
1473 int32_t gmem_offset;
1474 /* for D32S8 separate stencil: */
1475 bool load_stencil;
1476 bool store_stencil;
1477 int32_t gmem_offset_stencil;
1478 };
1479
1480 struct tu_render_pass
1481 {
1482 struct vk_object_base base;
1483
1484 uint32_t attachment_count;
1485 uint32_t subpass_count;
1486 uint32_t gmem_pixels;
1487 uint32_t tile_align_w;
1488 struct tu_subpass_attachment *subpass_attachments;
1489 struct tu_render_pass_attachment *attachments;
1490 struct tu_subpass_barrier end_barrier;
1491 struct tu_subpass subpasses[0];
1492 };
1493
1494 struct tu_query_pool
1495 {
1496 struct vk_object_base base;
1497
1498 VkQueryType type;
1499 uint32_t stride;
1500 uint64_t size;
1501 uint32_t pipeline_statistics;
1502 struct tu_bo bo;
1503 };
1504
1505 void
1506 tu_update_descriptor_sets(VkDescriptorSet overrideSet,
1507 uint32_t descriptorWriteCount,
1508 const VkWriteDescriptorSet *pDescriptorWrites,
1509 uint32_t descriptorCopyCount,
1510 const VkCopyDescriptorSet *pDescriptorCopies);
1511
1512 void
1513 tu_update_descriptor_set_with_template(
1514 struct tu_descriptor_set *set,
1515 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1516 const void *pData);
1517
1518 VkResult
1519 tu_physical_device_init(struct tu_physical_device *device,
1520 struct tu_instance *instance);
1521 VkResult
1522 tu_enumerate_devices(struct tu_instance *instance);
1523
1524 int
1525 tu_drm_submitqueue_new(const struct tu_device *dev,
1526 int priority,
1527 uint32_t *queue_id);
1528
1529 void
1530 tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id);
1531
1532 int
1533 tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2);
1534
1535 int
1536 tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync);
1537
1538 #define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
1539 \
1540 static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
1541 { \
1542 return (struct __tu_type *) _handle; \
1543 } \
1544 \
1545 static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
1546 { \
1547 return (__VkType) _obj; \
1548 }
1549
1550 #define TU_DEFINE_NONDISP_HANDLE_CASTS(__tu_type, __VkType) \
1551 \
1552 static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
1553 { \
1554 return (struct __tu_type *) (uintptr_t) _handle; \
1555 } \
1556 \
1557 static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
1558 { \
1559 return (__VkType)(uintptr_t) _obj; \
1560 }
1561
1562 #define TU_FROM_HANDLE(__tu_type, __name, __handle) \
1563 struct __tu_type *__name = __tu_type##_from_handle(__handle)
1564
1565 TU_DEFINE_HANDLE_CASTS(tu_cmd_buffer, VkCommandBuffer)
1566 TU_DEFINE_HANDLE_CASTS(tu_device, VkDevice)
1567 TU_DEFINE_HANDLE_CASTS(tu_instance, VkInstance)
1568 TU_DEFINE_HANDLE_CASTS(tu_physical_device, VkPhysicalDevice)
1569 TU_DEFINE_HANDLE_CASTS(tu_queue, VkQueue)
1570
1571 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, VkCommandPool)
1572 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, VkBuffer)
1573 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, VkBufferView)
1574 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, VkDescriptorPool)
1575 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, VkDescriptorSet)
1576 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout,
1577 VkDescriptorSetLayout)
1578 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template,
1579 VkDescriptorUpdateTemplate)
1580 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, VkDeviceMemory)
1581 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_event, VkEvent)
1582 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, VkFramebuffer)
1583 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image, VkImage)
1584 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, VkImageView);
1585 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, VkPipelineCache)
1586 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, VkPipeline)
1587 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout)
1588 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool)
1589 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass)
1590 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler)
1591 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)
1592 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_shader_module, VkShaderModule)
1593
1594 /* for TU_FROM_HANDLE with both VkFence and VkSemaphore: */
1595 #define tu_syncobj_from_handle(x) ((struct tu_syncobj*) (uintptr_t) (x))
1596
1597 #endif /* TU_PRIVATE_H */
1598