• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_queue.h"
11 
12 #include "tu_cmd_buffer.h"
13 #include "tu_dynamic_rendering.h"
14 #include "tu_knl.h"
15 #include "tu_device.h"
16 
17 #include "vk_util.h"
18 
19 static int
tu_get_submitqueue_priority(const struct tu_physical_device * pdevice,VkQueueGlobalPriorityKHR global_priority,bool global_priority_query)20 tu_get_submitqueue_priority(const struct tu_physical_device *pdevice,
21                             VkQueueGlobalPriorityKHR global_priority,
22                             bool global_priority_query)
23 {
24    if (global_priority_query) {
25       VkQueueFamilyGlobalPriorityPropertiesKHR props;
26       tu_physical_device_get_global_priority_properties(pdevice, &props);
27 
28       bool valid = false;
29       for (uint32_t i = 0; i < props.priorityCount; i++) {
30          if (props.priorities[i] == global_priority) {
31             valid = true;
32             break;
33          }
34       }
35 
36       if (!valid)
37          return -1;
38    }
39 
40    /* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
41     * with 0 being the highest priority.  This matches what freedreno does.
42     */
43    int priority;
44    if (global_priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
45       priority = pdevice->submitqueue_priority_count / 2;
46    else if (global_priority < VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
47       priority = pdevice->submitqueue_priority_count - 1;
48    else
49       priority = 0;
50 
51    return priority;
52 }
53 
54 static void
submit_add_entries(struct tu_device * dev,void * submit,struct util_dynarray * dump_cmds,struct tu_cs_entry * entries,unsigned num_entries)55 submit_add_entries(struct tu_device *dev, void *submit,
56                    struct util_dynarray *dump_cmds,
57                    struct tu_cs_entry *entries, unsigned num_entries)
58 {
59    tu_submit_add_entries(dev, submit, entries, num_entries);
60    if (FD_RD_DUMP(ENABLE)) {
61       util_dynarray_append_array(dump_cmds, struct tu_cs_entry, entries,
62                                  num_entries);
63    }
64 }
65 
66 static VkResult
queue_submit(struct vk_queue * _queue,struct vk_queue_submit * vk_submit)67 queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
68 {
69    struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
70    struct tu_device *device = queue->device;
71    bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
72    struct util_dynarray dump_cmds;
73 
74    util_dynarray_init(&dump_cmds, NULL);
75 
76    uint32_t perf_pass_index =
77       device->perfcntrs_pass_cs_entries ? vk_submit->perf_pass_index : ~0;
78 
79    if (TU_DEBUG(LOG_SKIP_GMEM_OPS))
80       tu_dbg_log_gmem_load_store_skips(device);
81 
82    pthread_mutex_lock(&device->submit_mutex);
83 
84    struct tu_cmd_buffer **cmd_buffers =
85       (struct tu_cmd_buffer **) vk_submit->command_buffers;
86    uint32_t cmdbuf_count = vk_submit->command_buffer_count;
87 
88    VkResult result =
89       tu_insert_dynamic_cmdbufs(device, &cmd_buffers, &cmdbuf_count);
90    if (result != VK_SUCCESS)
91       return result;
92 
93    bool has_trace_points = false;
94    static_assert(offsetof(struct tu_cmd_buffer, vk) == 0,
95                  "vk must be first member of tu_cmd_buffer");
96    for (unsigned i = 0; i < vk_submit->command_buffer_count; i++) {
97       if (u_trace_enabled && u_trace_has_points(&cmd_buffers[i]->trace))
98          has_trace_points = true;
99    }
100 
101    struct tu_u_trace_submission_data *u_trace_submission_data = NULL;
102 
103    void *submit = tu_submit_create(device);
104    if (!submit)
105       goto fail_create_submit;
106 
107    if (has_trace_points) {
108       tu_u_trace_submission_data_create(
109          device, cmd_buffers, cmdbuf_count, &u_trace_submission_data);
110    }
111 
112    for (uint32_t i = 0; i < cmdbuf_count; i++) {
113       struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
114       struct tu_cs *cs = &cmd_buffer->cs;
115 
116       if (perf_pass_index != ~0) {
117          struct tu_cs_entry *perf_cs_entry =
118             &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index];
119 
120          submit_add_entries(device, submit, &dump_cmds, perf_cs_entry, 1);
121       }
122 
123       submit_add_entries(device, submit, &dump_cmds, cs->entries,
124                          cs->entry_count);
125 
126       if (u_trace_submission_data &&
127           u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
128          struct tu_cs_entry *trace_cs_entry =
129             &u_trace_submission_data->cmd_trace_data[i]
130                 .timestamp_copy_cs->entries[0];
131          submit_add_entries(device, submit, &dump_cmds, trace_cs_entry, 1);
132       }
133    }
134 
135    if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) {
136       struct tu_cs *autotune_cs = tu_autotune_on_submit(
137          device, &device->autotune, cmd_buffers, cmdbuf_count);
138       submit_add_entries(device, submit, &dump_cmds, autotune_cs->entries,
139                          autotune_cs->entry_count);
140    }
141 
142    if (cmdbuf_count && FD_RD_DUMP(ENABLE) &&
143        fd_rd_output_begin(&queue->device->rd_output,
144                           queue->device->submit_count)) {
145       struct tu_device *device = queue->device;
146       struct fd_rd_output *rd_output = &device->rd_output;
147 
148       if (FD_RD_DUMP(FULL)) {
149          VkResult result = tu_queue_wait_fence(queue, queue->fence, ~0);
150          if (result != VK_SUCCESS) {
151             mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u",
152                       device->device_idx, queue->msm_queue_id, 0);
153          }
154       }
155 
156       fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 8);
157       fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8);
158 
159       mtx_lock(&device->bo_mutex);
160       util_dynarray_foreach (&device->dump_bo_list, struct tu_bo *, bo_ptr) {
161          struct tu_bo *bo = *bo_ptr;
162          uint64_t iova = bo->iova;
163 
164          uint32_t buf[3] = { iova, bo->size, iova >> 32 };
165          fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12);
166          if (bo->dump || FD_RD_DUMP(FULL)) {
167             tu_bo_map(device, bo, NULL); /* note: this would need locking to be safe */
168             fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, bo->map, bo->size);
169          }
170       }
171       mtx_unlock(&device->bo_mutex);
172 
173       util_dynarray_foreach (&dump_cmds, struct tu_cs_entry, cmd) {
174          uint64_t iova = cmd->bo->iova + cmd->offset;
175          uint32_t size = cmd->size >> 2;
176          uint32_t buf[3] = { iova, size, iova >> 32 };
177          fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12);
178       }
179 
180       fd_rd_output_end(rd_output);
181    }
182 
183    util_dynarray_fini(&dump_cmds);
184 
185    result =
186       tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
187                       vk_submit->signals, vk_submit->signal_count,
188                       u_trace_submission_data);
189 
190    if (result != VK_SUCCESS) {
191       pthread_mutex_unlock(&device->submit_mutex);
192       goto out;
193    }
194 
195    tu_debug_bos_print_stats(device);
196 
197    if (u_trace_submission_data) {
198       u_trace_submission_data->submission_id = device->submit_count;
199       u_trace_submission_data->queue = queue;
200       u_trace_submission_data->fence = queue->fence;
201 
202       for (uint32_t i = 0; i < u_trace_submission_data->cmd_buffer_count; i++) {
203          bool free_data = i == u_trace_submission_data->last_buffer_with_tracepoints;
204          if (u_trace_submission_data->cmd_trace_data[i].trace)
205             u_trace_flush(u_trace_submission_data->cmd_trace_data[i].trace,
206                           u_trace_submission_data, queue->device->vk.current_frame,
207                           free_data);
208 
209          if (!u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
210             /* u_trace is owned by cmd_buffer */
211             u_trace_submission_data->cmd_trace_data[i].trace = NULL;
212          }
213       }
214    }
215 
216    device->submit_count++;
217 
218    pthread_mutex_unlock(&device->submit_mutex);
219    pthread_cond_broadcast(&queue->device->timeline_cond);
220 
221    u_trace_context_process(&device->trace_context, false);
222 
223 out:
224    tu_submit_finish(device, submit);
225 
226 fail_create_submit:
227    if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
228       vk_free(&queue->device->vk.alloc, cmd_buffers);
229 
230    return result;
231 }
232 
233 VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info)234 tu_queue_init(struct tu_device *device,
235               struct tu_queue *queue,
236               int idx,
237               const VkDeviceQueueCreateInfo *create_info)
238 {
239    const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
240       vk_find_struct_const(create_info->pNext,
241             DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
242    const VkQueueGlobalPriorityKHR global_priority = priority_info ?
243       priority_info->globalPriority :
244       (TU_DEBUG(HIPRIO) ? VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR :
245        VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR);
246 
247    const int priority = tu_get_submitqueue_priority(
248          device->physical_device, global_priority,
249          device->vk.enabled_features.globalPriorityQuery);
250    if (priority < 0) {
251       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
252                                "invalid global priority");
253    }
254 
255    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
256    if (result != VK_SUCCESS)
257       return result;
258 
259    queue->device = device;
260    queue->priority = priority;
261    queue->vk.driver_submit = queue_submit;
262 
263    int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
264    if (ret)
265       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
266                                "submitqueue create failed");
267 
268    queue->fence = -1;
269 
270    return VK_SUCCESS;
271 }
272 
273 void
tu_queue_finish(struct tu_queue * queue)274 tu_queue_finish(struct tu_queue *queue)
275 {
276    vk_queue_finish(&queue->vk);
277    tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
278 }
279 
280