1 /*
2 * Copyright © 2024 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "panvk_cmd_buffer.h"
7 #include "panvk_entrypoints.h"
8 #include "panvk_event.h"
9
10 #include "util/bitscan.h"
11
12 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdResetEvent2)13 panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
14 VkPipelineStageFlags2 stageMask)
15 {
16 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
17 VK_FROM_HANDLE(panvk_event, event, _event);
18
19 /* Wrap stageMask with a VkDependencyInfo object so we can re-use
20 * get_cs_deps(). */
21 const VkMemoryBarrier2 barrier = {
22 .srcStageMask = stageMask,
23 };
24 const VkDependencyInfo info = {
25 .memoryBarrierCount = 1,
26 .pMemoryBarriers = &barrier,
27 };
28 struct panvk_cs_deps deps;
29
30 panvk_per_arch(get_cs_deps)(cmdbuf, &info, &deps);
31
32 for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
33 struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i);
34 uint32_t sb_mask = deps.src[i].wait_sb_mask;
35 struct cs_index sync_addr = cs_scratch_reg64(b, 0);
36 struct cs_index seqno = cs_scratch_reg32(b, 2);
37 struct cs_index cmp_scratch = cs_scratch_reg32(b, 3);
38
39 cs_move64_to(b, sync_addr,
40 panvk_priv_mem_dev_addr(event->syncobjs) +
41 (i * sizeof(struct panvk_cs_sync32)));
42 cs_load32_to(b, seqno, sync_addr,
43 offsetof(struct panvk_cs_sync32, seqno));
44 cs_wait_slot(b, SB_ID(LS), false);
45
46 cs_match(b, seqno, cmp_scratch) {
47 cs_case(b, 0) {
48 /* Nothing to do, we just need it defined for the default case. */
49 }
50
51 cs_default(b) {
52 cs_move32_to(b, seqno, 0);
53 cs_sync32_set(b, false, MALI_CS_SYNC_SCOPE_CSG, seqno, sync_addr,
54 cs_defer(sb_mask | SB_MASK(DEFERRED_FLUSH),
55 SB_ID(DEFERRED_SYNC)));
56 }
57 }
58 }
59 }
60
61 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdSetEvent2)62 panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
63 const VkDependencyInfo *pDependencyInfo)
64 {
65 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
66 VK_FROM_HANDLE(panvk_event, event, _event);
67 struct panvk_cs_deps deps;
68
69 panvk_per_arch(get_cs_deps)(cmdbuf, pDependencyInfo, &deps);
70
71 if (deps.needs_draw_flush)
72 panvk_per_arch(cmd_flush_draws)(cmdbuf);
73
74 for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
75 struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i);
76 uint16_t sb_mask = deps.src[i].wait_sb_mask;
77 struct cs_index sync_addr = cs_scratch_reg64(b, 0);
78 struct cs_index seqno = cs_scratch_reg32(b, 2);
79 struct cs_index cmp_scratch = cs_scratch_reg32(b, 3);
80
81 cs_move64_to(b, sync_addr,
82 panvk_priv_mem_dev_addr(event->syncobjs) +
83 (i * sizeof(struct panvk_cs_sync32)));
84 cs_load32_to(b, seqno, sync_addr,
85 offsetof(struct panvk_cs_sync32, seqno));
86 cs_wait_slot(b, SB_ID(LS), false);
87
88 cs_match(b, seqno, cmp_scratch) {
89 cs_case(b, 0) {
90 struct panvk_cache_flush_info cache_flush = deps.src[i].cache_flush;
91
92 if (cache_flush.l2 != MALI_CS_FLUSH_MODE_NONE ||
93 cache_flush.lsc != MALI_CS_FLUSH_MODE_NONE ||
94 cache_flush.others) {
95 /* We rely on r88 being zero since we're in the if (r88 == 0)
96 * branch. */
97 cs_flush_caches(b, cache_flush.l2, cache_flush.lsc,
98 cache_flush.others, seqno,
99 cs_defer(sb_mask, SB_ID(DEFERRED_FLUSH)));
100 }
101
102 cs_move32_to(b, seqno, 1);
103 cs_sync32_set(b, false, MALI_CS_SYNC_SCOPE_CSG, seqno, sync_addr,
104 cs_defer(sb_mask | SB_MASK(DEFERRED_FLUSH),
105 SB_ID(DEFERRED_SYNC)));
106 }
107 }
108 }
109 }
110
111 static void
cmd_wait_event(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event,const VkDependencyInfo * info)112 cmd_wait_event(struct panvk_cmd_buffer *cmdbuf, struct panvk_event *event,
113 const VkDependencyInfo *info)
114 {
115 struct panvk_cs_deps deps;
116
117 panvk_per_arch(get_cs_deps)(cmdbuf, info, &deps);
118
119 for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
120 struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i);
121
122 u_foreach_bit(j, deps.dst[i].wait_subqueue_mask) {
123 struct cs_index sync_addr = cs_scratch_reg64(b, 0);
124 struct cs_index seqno = cs_scratch_reg32(b, 2);
125
126 cs_move64_to(b, sync_addr,
127 panvk_priv_mem_dev_addr(event->syncobjs) +
128 (j * sizeof(struct panvk_cs_sync32)));
129
130 cs_move32_to(b, seqno, 0);
131 cs_sync32_wait(b, false, MALI_CS_CONDITION_GREATER, seqno, sync_addr);
132 }
133 }
134 }
135
136 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdWaitEvents2)137 panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer,
138 uint32_t eventCount, const VkEvent *pEvents,
139 const VkDependencyInfo *pDependencyInfos)
140 {
141 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
142
143 for (uint32_t i = 0; i < eventCount; i++) {
144 VK_FROM_HANDLE(panvk_event, event, pEvents[i]);
145
146 cmd_wait_event(cmdbuf, event, &pDependencyInfos[i]);
147 }
148 }
149