• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "xe/anv_batch_chain.h"
25 
26 #include "anv_private.h"
27 #include "anv_measure.h"
28 #include "common/intel_bind_timeline.h"
29 
30 #include "drm-uapi/xe_drm.h"
31 
32 VkResult
xe_execute_simple_batch(struct anv_queue * queue,struct anv_bo * batch_bo,uint32_t batch_bo_size,bool is_companion_rcs_batch)33 xe_execute_simple_batch(struct anv_queue *queue,
34                         struct anv_bo *batch_bo,
35                         uint32_t batch_bo_size,
36                         bool is_companion_rcs_batch)
37 {
38    struct anv_device *device = queue->device;
39    uint32_t exec_queue_id = is_companion_rcs_batch ?
40                             queue->companion_rcs_id :
41                             queue->exec_queue_id;
42    struct drm_syncobj_create syncobj_create = {};
43    struct drm_syncobj_destroy syncobj_destroy = {};
44    struct drm_xe_sync syncs[2] = {};
45    VkResult result = VK_SUCCESS;
46 
47    if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create))
48       return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create sync obj");
49 
50    syncs[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
51    syncs[0].flags = DRM_XE_SYNC_FLAG_SIGNAL;
52    syncs[0].handle = syncobj_create.handle;
53 
54    /* vm bind sync */
55    syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
56    syncs[1].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
57    syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
58 
59    struct drm_xe_exec exec = {
60       .exec_queue_id = exec_queue_id,
61       .num_batch_buffer = 1,
62       .address = batch_bo->offset,
63       .num_syncs = ARRAY_SIZE(syncs),
64       .syncs = (uintptr_t)syncs,
65    };
66 
67    if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) {
68       result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m");
69       goto exec_error;
70    }
71 
72    struct drm_syncobj_wait wait = {
73       .handles = (uintptr_t)&syncobj_create.handle,
74       .timeout_nsec = INT64_MAX,
75       .count_handles = 1,
76    };
77    if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait))
78       result = vk_device_set_lost(&device->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
79 
80 exec_error:
81    syncobj_destroy.handle = syncobj_create.handle;
82    intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
83 
84    return result;
85 }
86 
87 #define TYPE_SIGNAL true
88 #define TYPE_WAIT false
89 
90 static void
xe_exec_fill_sync(struct drm_xe_sync * xe_sync,struct vk_sync * vk_sync,uint64_t value,bool signal)91 xe_exec_fill_sync(struct drm_xe_sync *xe_sync, struct vk_sync *vk_sync,
92                   uint64_t value, bool signal)
93 {
94    if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
95       unreachable("Unsupported sync type");
96       return;
97    }
98 
99    const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
100    xe_sync->handle = syncobj->syncobj;
101 
102    if (value) {
103       xe_sync->type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
104       xe_sync->timeline_value = value;
105    } else {
106       xe_sync->type = DRM_XE_SYNC_TYPE_SYNCOBJ;
107    }
108 
109    if (signal)
110       xe_sync->flags = DRM_XE_SYNC_FLAG_SIGNAL;
111 }
112 
113 static VkResult
xe_exec_process_syncs(struct anv_queue * queue,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t signal_count,const struct vk_sync_signal * signals,uint32_t extra_sync_count,const struct drm_xe_sync * extra_syncs,struct anv_utrace_submit * utrace_submit,bool is_companion_rcs_queue,struct drm_xe_sync ** ret,uint32_t * ret_count)114 xe_exec_process_syncs(struct anv_queue *queue,
115                       uint32_t wait_count, const struct vk_sync_wait *waits,
116                       uint32_t signal_count, const struct vk_sync_signal *signals,
117                       uint32_t extra_sync_count, const struct drm_xe_sync *extra_syncs,
118                       struct anv_utrace_submit *utrace_submit,
119                       bool is_companion_rcs_queue,
120                       struct drm_xe_sync **ret, uint32_t *ret_count)
121 {
122    struct anv_device *device = queue->device;
123    /* Signal the utrace sync only if it doesn't have a batch. Otherwise the
124     * it's the utrace batch that should signal its own sync.
125     */
126    const bool has_utrace_sync = utrace_submit &&
127                                 util_dynarray_num_elements(&utrace_submit->batch_bos, struct anv_bo *) == 0;
128    const uint32_t num_syncs = wait_count + signal_count + extra_sync_count +
129                               (has_utrace_sync ? 1 : 0) +
130                               ((queue->sync && !is_companion_rcs_queue) ? 1 : 0) +
131                               1 /* vm bind sync */;
132    struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc,
133                                             sizeof(*xe_syncs) * num_syncs, 8,
134                                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
135    struct drm_xe_sync *xe_sync;
136 
137    if (!xe_syncs)
138       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
139 
140    uint32_t count = 0;
141 
142    if (has_utrace_sync) {
143       xe_sync = &xe_syncs[count++];
144       xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
145    }
146 
147    for (uint32_t i = 0; i < wait_count; i++) {
148       const struct vk_sync_wait *vk_wait = &waits[i];
149 
150       xe_sync = &xe_syncs[count++];
151       xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value,
152                         TYPE_WAIT);
153    }
154 
155    for (uint32_t i = 0; i < signal_count; i++) {
156       const struct vk_sync_signal *vk_signal = &signals[i];
157 
158       xe_sync = &xe_syncs[count++];
159       xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value,
160                         TYPE_SIGNAL);
161    }
162 
163    for (uint32_t i = 0; i < extra_sync_count; i++)
164       xe_syncs[count++] = extra_syncs[i];
165 
166    if (queue->sync && !is_companion_rcs_queue) {
167       xe_sync = &xe_syncs[count++];
168       xe_exec_fill_sync(xe_sync, queue->sync, 0,
169                         TYPE_SIGNAL);
170    }
171 
172    /* vm bind sync */
173    xe_sync = &xe_syncs[count++];
174    xe_sync->handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
175    xe_sync->type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
176    xe_sync->timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
177 
178    assert(count == num_syncs);
179    *ret = xe_syncs;
180    *ret_count = num_syncs;
181    return VK_SUCCESS;
182 }
183 
184 static void
xe_exec_print_debug(struct anv_queue * queue,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,struct anv_query_pool * perf_query_pool,uint32_t perf_query_pass,struct drm_xe_exec * exec)185 xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
186                     struct anv_cmd_buffer **cmd_buffers, struct anv_query_pool *perf_query_pool,
187                     uint32_t perf_query_pass, struct drm_xe_exec *exec)
188 {
189    if (INTEL_DEBUG(DEBUG_SUBMIT))
190       fprintf(stderr, "Batch offset=0x%016"PRIx64" on queue %u\n",
191               (uint64_t)exec->address, queue->vk.index_in_family);
192 
193    anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
194                                    perf_query_pool, perf_query_pass);
195 }
196 
197 VkResult
xe_execute_trtt_batch(struct anv_sparse_submission * submit,struct anv_trtt_batch_bo * trtt_bbo)198 xe_execute_trtt_batch(struct anv_sparse_submission *submit,
199                       struct anv_trtt_batch_bo *trtt_bbo)
200 {
201    struct anv_queue *queue = submit->queue;
202    struct anv_device *device = queue->device;
203    struct anv_trtt *trtt = &device->trtt;
204    VkResult result;
205 
206    struct drm_xe_sync extra_sync = {
207       .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
208       .flags = DRM_XE_SYNC_FLAG_SIGNAL,
209       .handle = trtt->timeline_handle,
210       .timeline_value = trtt_bbo->timeline_val,
211    };
212 
213    struct drm_xe_sync *xe_syncs = NULL;
214    uint32_t xe_syncs_count = 0;
215    result = xe_exec_process_syncs(queue, submit->wait_count, submit->waits,
216                                   submit->signal_count, submit->signals,
217                                   1, &extra_sync,
218                                   NULL, /* utrace_submit */
219                                   false, /* is_companion_rcs_queue */
220                                   &xe_syncs, &xe_syncs_count);
221    if (result != VK_SUCCESS)
222       return result;
223 
224    struct drm_xe_exec exec = {
225       .exec_queue_id = queue->exec_queue_id,
226       .num_syncs = xe_syncs_count,
227       .syncs = (uintptr_t)xe_syncs,
228       .address = trtt_bbo->bo->offset,
229       .num_batch_buffer = 1,
230    };
231 
232    if (!device->info->no_hw) {
233       if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
234          return vk_device_set_lost(&device->vk, "XE_EXEC failed: %m");
235    }
236 
237    if (queue->sync) {
238       result = vk_sync_wait(&device->vk, queue->sync, 0,
239                             VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
240       if (result != VK_SUCCESS)
241          return vk_queue_set_lost(&queue->vk, "trtt sync wait failed");
242    }
243 
244    return VK_SUCCESS;
245 }
246 
247 VkResult
xe_queue_exec_utrace_locked(struct anv_queue * queue,struct anv_utrace_submit * utrace_submit)248 xe_queue_exec_utrace_locked(struct anv_queue *queue,
249                             struct anv_utrace_submit *utrace_submit)
250 {
251    struct anv_device *device = queue->device;
252    struct drm_xe_sync xe_syncs[2] = {};
253 
254    xe_exec_fill_sync(&xe_syncs[0], utrace_submit->sync, 0, TYPE_SIGNAL);
255 
256    xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
257    xe_syncs[1].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
258    xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
259 
260 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
261    if (device->physical->memory.need_flush &&
262        anv_bo_needs_host_cache_flush(device->utrace_bo_pool.bo_alloc_flags)) {
263       util_dynarray_foreach(&utrace_submit->batch_bos, struct anv_bo *, bo)
264          intel_flush_range((*bo)->map, (*bo)->size);
265    }
266 #endif
267 
268    struct anv_bo *batch_bo =
269       *util_dynarray_element(&utrace_submit->batch_bos, struct anv_bo *, 0);
270    struct drm_xe_exec exec = {
271       .exec_queue_id = queue->exec_queue_id,
272       .num_batch_buffer = 1,
273       .syncs = (uintptr_t)xe_syncs,
274       .num_syncs = ARRAY_SIZE(xe_syncs),
275       .address = batch_bo->offset,
276    };
277    if (likely(!device->info->no_hw)) {
278       if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
279          return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
280    }
281 
282    return VK_SUCCESS;
283 }
284 
285 static VkResult
xe_companion_rcs_queue_exec_locked(struct anv_queue * queue,struct anv_cmd_buffer * companion_rcs_cmd_buffer,uint32_t wait_count,const struct vk_sync_wait * waits)286 xe_companion_rcs_queue_exec_locked(struct anv_queue *queue,
287                                    struct anv_cmd_buffer *companion_rcs_cmd_buffer,
288                                    uint32_t wait_count,
289                                    const struct vk_sync_wait *waits)
290 {
291    struct anv_device *device = queue->device;
292    VkResult result;
293 
294    struct vk_sync_signal companion_sync = {
295       .sync = queue->companion_sync,
296    };
297    struct drm_xe_sync *xe_syncs = NULL;
298    uint32_t xe_syncs_count = 0;
299    result = xe_exec_process_syncs(queue,
300                                   wait_count, waits,
301                                   1, &companion_sync,
302                                   0, NULL, /* extra_syncs */
303                                   NULL /* utrace_submit */,
304                                   true /* is_companion_rcs_queue */,
305                                   &xe_syncs,
306                                   &xe_syncs_count);
307    if (result != VK_SUCCESS)
308       return result;
309 
310    struct drm_xe_exec exec = {
311       .exec_queue_id = queue->companion_rcs_id,
312       .num_batch_buffer = 1,
313       .syncs = (uintptr_t)xe_syncs,
314       .num_syncs = xe_syncs_count,
315    };
316 
317    struct anv_batch_bo *batch_bo =
318       list_first_entry(&companion_rcs_cmd_buffer->batch_bos,
319                        struct anv_batch_bo, link);
320    exec.address = batch_bo->bo->offset;
321 
322    anv_measure_submit(companion_rcs_cmd_buffer);
323    xe_exec_print_debug(queue, 1, &companion_rcs_cmd_buffer, NULL, 0, &exec);
324 
325    if (!device->info->no_hw) {
326       if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
327          result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
328    }
329    vk_free(&device->vk.alloc, xe_syncs);
330 
331    return result;
332 }
333 
334 VkResult
xe_queue_exec_locked(struct anv_queue * queue,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t signal_count,const struct vk_sync_signal * signals,struct anv_query_pool * perf_query_pool,uint32_t perf_query_pass,struct anv_utrace_submit * utrace_submit)335 xe_queue_exec_locked(struct anv_queue *queue,
336                      uint32_t wait_count,
337                      const struct vk_sync_wait *waits,
338                      uint32_t cmd_buffer_count,
339                      struct anv_cmd_buffer **cmd_buffers,
340                      uint32_t signal_count,
341                      const struct vk_sync_signal *signals,
342                      struct anv_query_pool *perf_query_pool,
343                      uint32_t perf_query_pass,
344                      struct anv_utrace_submit *utrace_submit)
345 {
346    struct anv_device *device = queue->device;
347    VkResult result;
348 
349    struct drm_xe_sync *xe_syncs = NULL;
350    uint32_t xe_syncs_count = 0;
351    result = xe_exec_process_syncs(queue, wait_count, waits,
352                                   signal_count, signals,
353                                   0, NULL, /* extra_syncs */
354                                   utrace_submit,
355                                   false, /* is_companion_rcs_queue */
356                                   &xe_syncs, &xe_syncs_count);
357    if (result != VK_SUCCESS)
358       return result;
359 
360    /* If we have no batch for utrace, just forget about it now. */
361    if (utrace_submit &&
362        util_dynarray_num_elements(&utrace_submit->batch_bos,
363                                   struct anv_bo *) == 0)
364       utrace_submit = NULL;
365 
366    struct drm_xe_exec exec = {
367       .exec_queue_id = queue->exec_queue_id,
368       .num_batch_buffer = 1,
369       .syncs = (uintptr_t)xe_syncs,
370       .num_syncs = xe_syncs_count,
371    };
372 
373    if (cmd_buffer_count) {
374       if (unlikely(device->physical->measure_device.config)) {
375          for (uint32_t i = 0; i < cmd_buffer_count; i++)
376             anv_measure_submit(cmd_buffers[i]);
377       }
378 
379       anv_cmd_buffer_chain_command_buffers(cmd_buffers, cmd_buffer_count);
380 
381 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
382       if (device->physical->memory.need_flush &&
383           anv_bo_needs_host_cache_flush(device->batch_bo_pool.bo_alloc_flags))
384          anv_cmd_buffer_clflush(cmd_buffers, cmd_buffer_count);
385 #endif
386 
387       struct anv_cmd_buffer *first_cmd_buffer = cmd_buffers[0];
388       struct anv_batch_bo *first_batch_bo = list_first_entry(&first_cmd_buffer->batch_bos,
389                                                              struct anv_batch_bo, link);
390       exec.address = first_batch_bo->bo->offset;
391    } else {
392       exec.address = device->trivial_batch_bo->offset;
393    }
394 
395    xe_exec_print_debug(queue, cmd_buffer_count, cmd_buffers, perf_query_pool,
396                        perf_query_pass, &exec);
397 
398    /* TODO: add perfetto stuff when Xe supports it */
399 
400    if (!device->info->no_hw) {
401       if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
402          result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
403    }
404    vk_free(&device->vk.alloc, xe_syncs);
405 
406    if (cmd_buffer_count != 0 && cmd_buffers[0]->companion_rcs_cmd_buffer) {
407       /* not allowed to chain cmd_buffers with companion_rcs_cmd_buffer  */
408       assert(cmd_buffer_count == 1);
409       result = xe_companion_rcs_queue_exec_locked(queue,
410                                                   cmd_buffers[0]->companion_rcs_cmd_buffer,
411                                                   wait_count, waits);
412    }
413 
414    result = anv_queue_post_submit(queue, result);
415 
416    if (result == VK_SUCCESS && utrace_submit)
417       result = xe_queue_exec_utrace_locked(queue, utrace_submit);
418 
419    return result;
420 }
421