1 /*
2 * Copyright © 2022 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 /* When using dynamic rendering with the suspend/resume functionality, we
7 * sometimes need to merge together multiple suspended render passes
8 * dynamically at submit time. This involves combining all the saved-up IBs,
9 * emitting the rendering commands usually emitted by
10 * CmdEndRenderPass()/CmdEndRendering(), and inserting them in between the
11 * user command buffers. This gets tricky, because the same command buffer can
12 * be submitted multiple times, each time with a different other set of
13 * command buffers, and with VK_COMMAND_BUFFER_SIMULTANEOUS_USE_BIT, this can
14 * happen before the previous submission of the same command buffer has
15 * finished. At some point we have to free these commands and the BOs they are
16 * contained in, and we can't do that when resubmitting the last command
17 * buffer in the sequence because it may still be in use. This means we have
18 * to make the commands owned by the device and roll our own memory tracking.
19 */
20
21 #include "tu_dynamic_rendering.h"
22
23 #include "tu_cmd_buffer.h"
24 #include "tu_cs.h"
25
26 struct dynamic_rendering_entry {
27 struct tu_cmd_buffer *cmd_buffer;
28 uint32_t fence; /* The fence value when cmd_buffer becomes available */
29 };
30
31 static VkResult
get_cmd_buffer(struct tu_device * dev,struct tu_cmd_buffer ** cmd_buffer_out)32 get_cmd_buffer(struct tu_device *dev, struct tu_cmd_buffer **cmd_buffer_out)
33 {
34 struct tu6_global *global = dev->global_bo->map;
35
36 /* Note: because QueueSubmit is serialized, we don't need any locks here.
37 */
38 uint32_t fence = global->dynamic_rendering_fence;
39
40 /* Go through the entries and return the finished ones to the pool,
41 * shrinking the array of pending entries.
42 */
43 struct dynamic_rendering_entry *new_entry =
44 util_dynarray_begin(&dev->dynamic_rendering_pending);
45 uint32_t entries = 0;
46 util_dynarray_foreach(&dev->dynamic_rendering_pending,
47 struct dynamic_rendering_entry, entry) {
48 if (entry->fence <= fence) {
49 VkCommandBuffer vk_buf = tu_cmd_buffer_to_handle(entry->cmd_buffer);
50 tu_FreeCommandBuffers(tu_device_to_handle(dev),
51 dev->dynamic_rendering_pool, 1, &vk_buf);
52 } else {
53 *new_entry = *entry;
54 new_entry++;
55 entries++;
56 }
57 }
58 UNUSED void *dummy =
59 util_dynarray_resize(&dev->dynamic_rendering_pending,
60 struct dynamic_rendering_entry, entries);
61
62 VkCommandBuffer vk_buf;
63 const VkCommandBufferAllocateInfo info = {
64 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
65 .pNext = NULL,
66 .commandPool = dev->dynamic_rendering_pool,
67 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
68 .commandBufferCount = 1,
69 };
70 VkResult result =
71 tu_AllocateCommandBuffers(tu_device_to_handle(dev), &info, &vk_buf);
72 if (result != VK_SUCCESS)
73 return result;
74
75 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, vk_buf);
76
77 struct dynamic_rendering_entry entry = {
78 .cmd_buffer = cmd_buffer,
79 .fence = ++dev->dynamic_rendering_fence,
80 };
81
82 util_dynarray_append(&dev->dynamic_rendering_pending,
83 struct dynamic_rendering_entry, entry);
84 *cmd_buffer_out = cmd_buffer;
85
86 return VK_SUCCESS;
87 }
88
89 VkResult
tu_init_dynamic_rendering(struct tu_device * dev)90 tu_init_dynamic_rendering(struct tu_device *dev)
91 {
92 util_dynarray_init(&dev->dynamic_rendering_pending, NULL);
93 dev->dynamic_rendering_fence = 0;
94
95 return tu_CreateCommandPool(tu_device_to_handle(dev),
96 &(VkCommandPoolCreateInfo) {
97 .pNext = NULL,
98 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
99 .flags = 0,
100 .queueFamilyIndex = 0,
101 }, &dev->vk.alloc, &dev->dynamic_rendering_pool);
102 }
103
104 void
tu_destroy_dynamic_rendering(struct tu_device * dev)105 tu_destroy_dynamic_rendering(struct tu_device *dev)
106 {
107 tu_DestroyCommandPool(tu_device_to_handle(dev),
108 dev->dynamic_rendering_pool,
109 &dev->vk.alloc);
110 util_dynarray_fini(&dev->dynamic_rendering_pending);
111 }
112
113 VkResult
tu_insert_dynamic_cmdbufs(struct tu_device * dev,struct tu_cmd_buffer *** cmds_ptr,uint32_t * size)114 tu_insert_dynamic_cmdbufs(struct tu_device *dev,
115 struct tu_cmd_buffer ***cmds_ptr,
116 uint32_t *size)
117 {
118 struct tu_cmd_buffer **old_cmds = *cmds_ptr;
119
120 bool has_dynamic = false;
121 for (unsigned i = 0; i < *size; i++) {
122 if (old_cmds[i]->state.suspend_resume != SR_NONE) {
123 has_dynamic = true;
124 break;
125 }
126 }
127
128 if (!has_dynamic)
129 return VK_SUCCESS;
130
131 struct util_dynarray cmds = {0};
132 struct tu_cmd_buffer *cmd_buffer = NULL;
133
134 for (unsigned i = 0; i < *size; i++) {
135 switch (old_cmds[i]->state.suspend_resume) {
136 case SR_NONE:
137 case SR_IN_CHAIN:
138 case SR_IN_PRE_CHAIN:
139 break;
140
141 case SR_AFTER_PRE_CHAIN:
142 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
143 tu_append_pre_chain(cmd_buffer, old_cmds[i]);
144
145 if (!(old_cmds[i]->usage_flags &
146 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
147 u_trace_disable_event_range(old_cmds[i]->pre_chain.trace_renderpass_start,
148 old_cmds[i]->pre_chain.trace_renderpass_end);
149 }
150
151 tu_cmd_render(cmd_buffer);
152
153 tu_cs_emit_pkt7(&cmd_buffer->cs, CP_MEM_WRITE, 3);
154 tu_cs_emit_qw(&cmd_buffer->cs,
155 global_iova(cmd_buffer, dynamic_rendering_fence));
156 tu_cs_emit(&cmd_buffer->cs, dev->dynamic_rendering_fence);
157
158 tu_EndCommandBuffer(tu_cmd_buffer_to_handle(cmd_buffer));
159 util_dynarray_append(&cmds, struct tu_cmd_buffer *, cmd_buffer);
160 cmd_buffer = NULL;
161 break;
162 }
163
164 util_dynarray_append(&cmds, struct tu_cmd_buffer *, old_cmds[i]);
165
166 switch (old_cmds[i]->state.suspend_resume) {
167 case SR_NONE:
168 case SR_AFTER_PRE_CHAIN:
169 break;
170 case SR_IN_CHAIN:
171 case SR_IN_CHAIN_AFTER_PRE_CHAIN: {
172 assert(!cmd_buffer);
173 VkResult result = get_cmd_buffer(dev, &cmd_buffer);
174 if (result != VK_SUCCESS)
175 return result;
176
177 tu_cmd_buffer_begin(cmd_buffer,
178 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
179
180 /* Setup the render pass using the first command buffer involved in
181 * the chain, so that it will look like we're inside a render pass
182 * for tu_cmd_render().
183 */
184 tu_restore_suspended_pass(cmd_buffer, old_cmds[i]);
185 FALLTHROUGH;
186 }
187 case SR_IN_PRE_CHAIN:
188 assert(cmd_buffer);
189
190 tu_append_pre_post_chain(cmd_buffer, old_cmds[i]);
191
192 if (old_cmds[i]->usage_flags &
193 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
194 u_trace_disable_event_range(old_cmds[i]->trace_renderpass_start,
195 old_cmds[i]->trace_renderpass_end);
196 }
197
198 /* When the command buffer is finally recorded, we need its state
199 * to be the state of the command buffer before it. We need this
200 * because we skip tu6_emit_hw().
201 */
202 cmd_buffer->state.ccu_state = old_cmds[i]->state.ccu_state;
203 cmd_buffer->vsc_draw_strm_pitch = old_cmds[i]->vsc_draw_strm_pitch;
204 cmd_buffer->vsc_prim_strm_pitch = old_cmds[i]->vsc_prim_strm_pitch;
205 break;
206 }
207 }
208
209 struct tu_cmd_buffer **new_cmds =
210 vk_alloc(&dev->vk.alloc, cmds.size, alignof(struct tu_cmd_buffer *),
211 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
212 if (!new_cmds)
213 return VK_ERROR_OUT_OF_HOST_MEMORY;
214 memcpy(new_cmds, cmds.data, cmds.size);
215 *cmds_ptr = new_cmds;
216 *size = util_dynarray_num_elements(&cmds, struct tu_cmd_buffer *);
217 util_dynarray_fini(&cmds);
218
219 return VK_SUCCESS;
220 }
221
222