1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_queue.h"
11
12 #include "tu_cmd_buffer.h"
13 #include "tu_dynamic_rendering.h"
14 #include "tu_knl.h"
15 #include "tu_device.h"
16
17 #include "vk_util.h"
18
19 static int
tu_get_submitqueue_priority(const struct tu_physical_device * pdevice,VkQueueGlobalPriorityKHR global_priority,bool global_priority_query)20 tu_get_submitqueue_priority(const struct tu_physical_device *pdevice,
21 VkQueueGlobalPriorityKHR global_priority,
22 bool global_priority_query)
23 {
24 if (global_priority_query) {
25 VkQueueFamilyGlobalPriorityPropertiesKHR props;
26 tu_physical_device_get_global_priority_properties(pdevice, &props);
27
28 bool valid = false;
29 for (uint32_t i = 0; i < props.priorityCount; i++) {
30 if (props.priorities[i] == global_priority) {
31 valid = true;
32 break;
33 }
34 }
35
36 if (!valid)
37 return -1;
38 }
39
40 /* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
41 * with 0 being the highest priority. This matches what freedreno does.
42 */
43 int priority;
44 if (global_priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
45 priority = pdevice->submitqueue_priority_count / 2;
46 else if (global_priority < VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
47 priority = pdevice->submitqueue_priority_count - 1;
48 else
49 priority = 0;
50
51 return priority;
52 }
53
54 static void
submit_add_entries(struct tu_device * dev,void * submit,struct util_dynarray * dump_cmds,struct tu_cs_entry * entries,unsigned num_entries)55 submit_add_entries(struct tu_device *dev, void *submit,
56 struct util_dynarray *dump_cmds,
57 struct tu_cs_entry *entries, unsigned num_entries)
58 {
59 tu_submit_add_entries(dev, submit, entries, num_entries);
60 if (FD_RD_DUMP(ENABLE)) {
61 util_dynarray_append_array(dump_cmds, struct tu_cs_entry, entries,
62 num_entries);
63 }
64 }
65
66 static VkResult
queue_submit(struct vk_queue * _queue,struct vk_queue_submit * vk_submit)67 queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
68 {
69 struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
70 struct tu_device *device = queue->device;
71 bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
72 struct util_dynarray dump_cmds;
73
74 util_dynarray_init(&dump_cmds, NULL);
75
76 uint32_t perf_pass_index =
77 device->perfcntrs_pass_cs_entries ? vk_submit->perf_pass_index : ~0;
78
79 if (TU_DEBUG(LOG_SKIP_GMEM_OPS))
80 tu_dbg_log_gmem_load_store_skips(device);
81
82 pthread_mutex_lock(&device->submit_mutex);
83
84 struct tu_cmd_buffer **cmd_buffers =
85 (struct tu_cmd_buffer **) vk_submit->command_buffers;
86 uint32_t cmdbuf_count = vk_submit->command_buffer_count;
87
88 VkResult result =
89 tu_insert_dynamic_cmdbufs(device, &cmd_buffers, &cmdbuf_count);
90 if (result != VK_SUCCESS)
91 return result;
92
93 bool has_trace_points = false;
94 static_assert(offsetof(struct tu_cmd_buffer, vk) == 0,
95 "vk must be first member of tu_cmd_buffer");
96 for (unsigned i = 0; i < vk_submit->command_buffer_count; i++) {
97 if (u_trace_enabled && u_trace_has_points(&cmd_buffers[i]->trace))
98 has_trace_points = true;
99 }
100
101 struct tu_u_trace_submission_data *u_trace_submission_data = NULL;
102
103 void *submit = tu_submit_create(device);
104 if (!submit)
105 goto fail_create_submit;
106
107 if (has_trace_points) {
108 tu_u_trace_submission_data_create(
109 device, cmd_buffers, cmdbuf_count, &u_trace_submission_data);
110 }
111
112 for (uint32_t i = 0; i < cmdbuf_count; i++) {
113 struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
114 struct tu_cs *cs = &cmd_buffer->cs;
115
116 if (perf_pass_index != ~0) {
117 struct tu_cs_entry *perf_cs_entry =
118 &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index];
119
120 submit_add_entries(device, submit, &dump_cmds, perf_cs_entry, 1);
121 }
122
123 submit_add_entries(device, submit, &dump_cmds, cs->entries,
124 cs->entry_count);
125
126 if (u_trace_submission_data &&
127 u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
128 struct tu_cs_entry *trace_cs_entry =
129 &u_trace_submission_data->cmd_trace_data[i]
130 .timestamp_copy_cs->entries[0];
131 submit_add_entries(device, submit, &dump_cmds, trace_cs_entry, 1);
132 }
133 }
134
135 if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) {
136 struct tu_cs *autotune_cs = tu_autotune_on_submit(
137 device, &device->autotune, cmd_buffers, cmdbuf_count);
138 submit_add_entries(device, submit, &dump_cmds, autotune_cs->entries,
139 autotune_cs->entry_count);
140 }
141
142 if (cmdbuf_count && FD_RD_DUMP(ENABLE) &&
143 fd_rd_output_begin(&queue->device->rd_output,
144 queue->device->submit_count)) {
145 struct tu_device *device = queue->device;
146 struct fd_rd_output *rd_output = &device->rd_output;
147
148 if (FD_RD_DUMP(FULL)) {
149 VkResult result = tu_queue_wait_fence(queue, queue->fence, ~0);
150 if (result != VK_SUCCESS) {
151 mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u",
152 device->device_idx, queue->msm_queue_id, 0);
153 }
154 }
155
156 fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 8);
157 fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8);
158
159 mtx_lock(&device->bo_mutex);
160 util_dynarray_foreach (&device->dump_bo_list, struct tu_bo *, bo_ptr) {
161 struct tu_bo *bo = *bo_ptr;
162 uint64_t iova = bo->iova;
163
164 uint32_t buf[3] = { iova, bo->size, iova >> 32 };
165 fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12);
166 if (bo->dump || FD_RD_DUMP(FULL)) {
167 tu_bo_map(device, bo, NULL); /* note: this would need locking to be safe */
168 fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, bo->map, bo->size);
169 }
170 }
171 mtx_unlock(&device->bo_mutex);
172
173 util_dynarray_foreach (&dump_cmds, struct tu_cs_entry, cmd) {
174 uint64_t iova = cmd->bo->iova + cmd->offset;
175 uint32_t size = cmd->size >> 2;
176 uint32_t buf[3] = { iova, size, iova >> 32 };
177 fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12);
178 }
179
180 fd_rd_output_end(rd_output);
181 }
182
183 util_dynarray_fini(&dump_cmds);
184
185 result =
186 tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
187 vk_submit->signals, vk_submit->signal_count,
188 u_trace_submission_data);
189
190 if (result != VK_SUCCESS) {
191 pthread_mutex_unlock(&device->submit_mutex);
192 goto out;
193 }
194
195 tu_debug_bos_print_stats(device);
196
197 if (u_trace_submission_data) {
198 u_trace_submission_data->submission_id = device->submit_count;
199 u_trace_submission_data->queue = queue;
200 u_trace_submission_data->fence = queue->fence;
201
202 for (uint32_t i = 0; i < u_trace_submission_data->cmd_buffer_count; i++) {
203 bool free_data = i == u_trace_submission_data->last_buffer_with_tracepoints;
204 if (u_trace_submission_data->cmd_trace_data[i].trace)
205 u_trace_flush(u_trace_submission_data->cmd_trace_data[i].trace,
206 u_trace_submission_data, queue->device->vk.current_frame,
207 free_data);
208
209 if (!u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
210 /* u_trace is owned by cmd_buffer */
211 u_trace_submission_data->cmd_trace_data[i].trace = NULL;
212 }
213 }
214 }
215
216 device->submit_count++;
217
218 pthread_mutex_unlock(&device->submit_mutex);
219 pthread_cond_broadcast(&queue->device->timeline_cond);
220
221 u_trace_context_process(&device->trace_context, false);
222
223 out:
224 tu_submit_finish(device, submit);
225
226 fail_create_submit:
227 if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
228 vk_free(&queue->device->vk.alloc, cmd_buffers);
229
230 return result;
231 }
232
233 VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info)234 tu_queue_init(struct tu_device *device,
235 struct tu_queue *queue,
236 int idx,
237 const VkDeviceQueueCreateInfo *create_info)
238 {
239 const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
240 vk_find_struct_const(create_info->pNext,
241 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
242 const VkQueueGlobalPriorityKHR global_priority = priority_info ?
243 priority_info->globalPriority :
244 (TU_DEBUG(HIPRIO) ? VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR :
245 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR);
246
247 const int priority = tu_get_submitqueue_priority(
248 device->physical_device, global_priority,
249 device->vk.enabled_features.globalPriorityQuery);
250 if (priority < 0) {
251 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
252 "invalid global priority");
253 }
254
255 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
256 if (result != VK_SUCCESS)
257 return result;
258
259 queue->device = device;
260 queue->priority = priority;
261 queue->vk.driver_submit = queue_submit;
262
263 int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
264 if (ret)
265 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
266 "submitqueue create failed");
267
268 queue->fence = -1;
269
270 return VK_SUCCESS;
271 }
272
273 void
tu_queue_finish(struct tu_queue * queue)274 tu_queue_finish(struct tu_queue *queue)
275 {
276 vk_queue_finish(&queue->vk);
277 tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
278 }
279
280