1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "i915/anv_batch_chain.h"
25 #include "anv_private.h"
26 #include "anv_measure.h"
27
28 #include "perf/intel_perf.h"
29 #include "util/u_debug.h"
30
31 #include "drm-uapi/i915_drm.h"
32
33 struct anv_execbuf {
34 struct drm_i915_gem_execbuffer2 execbuf;
35
36 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
37
38 struct drm_i915_gem_exec_object2 * objects;
39 uint32_t bo_count;
40 uint32_t bo_array_length;
41 struct anv_bo ** bos;
42
43 uint32_t syncobj_count;
44 uint32_t syncobj_array_length;
45 struct drm_i915_gem_exec_fence * syncobjs;
46 uint64_t * syncobj_values;
47
48 uint32_t cmd_buffer_count;
49 struct anv_query_pool *perf_query_pool;
50
51 const VkAllocationCallbacks * alloc;
52 VkSystemAllocationScope alloc_scope;
53
54 int perf_query_pass;
55 };
56
57 static void
anv_execbuf_finish(struct anv_execbuf * exec)58 anv_execbuf_finish(struct anv_execbuf *exec)
59 {
60 vk_free(exec->alloc, exec->syncobjs);
61 vk_free(exec->alloc, exec->syncobj_values);
62 vk_free(exec->alloc, exec->objects);
63 vk_free(exec->alloc, exec->bos);
64 }
65
66 static void
anv_execbuf_add_ext(struct anv_execbuf * exec,uint32_t ext_name,struct i915_user_extension * ext)67 anv_execbuf_add_ext(struct anv_execbuf *exec,
68 uint32_t ext_name,
69 struct i915_user_extension *ext)
70 {
71 __u64 *iter = &exec->execbuf.cliprects_ptr;
72
73 exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
74
75 while (*iter != 0) {
76 iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
77 }
78
79 ext->name = ext_name;
80
81 *iter = (uintptr_t) ext;
82 }
83
84 static VkResult
85 anv_execbuf_add_bo_bitset(struct anv_device *device,
86 struct anv_execbuf *exec,
87 uint32_t dep_words,
88 BITSET_WORD *deps,
89 uint32_t extra_flags);
90
91 static VkResult
anv_execbuf_add_bo(struct anv_device * device,struct anv_execbuf * exec,struct anv_bo * bo,struct anv_reloc_list * relocs,uint32_t extra_flags)92 anv_execbuf_add_bo(struct anv_device *device,
93 struct anv_execbuf *exec,
94 struct anv_bo *bo,
95 struct anv_reloc_list *relocs,
96 uint32_t extra_flags)
97 {
98 struct drm_i915_gem_exec_object2 *obj = NULL;
99
100 if (bo->exec_obj_index < exec->bo_count &&
101 exec->bos[bo->exec_obj_index] == bo)
102 obj = &exec->objects[bo->exec_obj_index];
103
104 if (obj == NULL) {
105 /* We've never seen this one before. Add it to the list and assign
106 * an id that we can use later.
107 */
108 if (exec->bo_count >= exec->bo_array_length) {
109 uint32_t new_len = exec->objects ? exec->bo_array_length * 2 : 64;
110
111 struct drm_i915_gem_exec_object2 *new_objects =
112 vk_realloc(exec->alloc, exec->objects,
113 new_len * sizeof(*new_objects), 8, exec->alloc_scope);
114 if (new_objects == NULL)
115 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
116
117 exec->objects = new_objects;
118
119 struct anv_bo **new_bos =
120 vk_realloc(exec->alloc, exec->bos, new_len * sizeof(*new_bos), 8,
121 exec->alloc_scope);
122 if (new_bos == NULL)
123 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
124
125 exec->bos = new_bos;
126 exec->bo_array_length = new_len;
127 }
128
129 assert(exec->bo_count < exec->bo_array_length);
130
131 bo->exec_obj_index = exec->bo_count++;
132 obj = &exec->objects[bo->exec_obj_index];
133 exec->bos[bo->exec_obj_index] = bo;
134
135 obj->handle = bo->gem_handle;
136 obj->relocation_count = 0;
137 obj->relocs_ptr = 0;
138 obj->alignment = 0;
139 obj->offset = bo->offset;
140 obj->flags = bo->flags | extra_flags;
141 obj->rsvd1 = 0;
142 obj->rsvd2 = 0;
143 }
144
145 if (extra_flags & EXEC_OBJECT_WRITE) {
146 obj->flags |= EXEC_OBJECT_WRITE;
147 obj->flags &= ~EXEC_OBJECT_ASYNC;
148 }
149
150 if (relocs != NULL) {
151 return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
152 relocs->deps, extra_flags);
153 }
154
155 return VK_SUCCESS;
156 }
157
158 /* Add BO dependencies to execbuf */
159 static VkResult
anv_execbuf_add_bo_bitset(struct anv_device * device,struct anv_execbuf * exec,uint32_t dep_words,BITSET_WORD * deps,uint32_t extra_flags)160 anv_execbuf_add_bo_bitset(struct anv_device *device,
161 struct anv_execbuf *exec,
162 uint32_t dep_words,
163 BITSET_WORD *deps,
164 uint32_t extra_flags)
165 {
166 for (uint32_t w = 0; w < dep_words; w++) {
167 BITSET_WORD mask = deps[w];
168 while (mask) {
169 int i = u_bit_scan(&mask);
170 uint32_t gem_handle = w * BITSET_WORDBITS + i;
171 struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
172 assert(bo->refcount > 0);
173 VkResult result =
174 anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
175 if (result != VK_SUCCESS)
176 return result;
177 }
178 }
179
180 return VK_SUCCESS;
181 }
182
183 static VkResult
anv_execbuf_add_syncobj(struct anv_device * device,struct anv_execbuf * exec,uint32_t syncobj,uint32_t flags,uint64_t timeline_value)184 anv_execbuf_add_syncobj(struct anv_device *device,
185 struct anv_execbuf *exec,
186 uint32_t syncobj,
187 uint32_t flags,
188 uint64_t timeline_value)
189 {
190 if (exec->syncobj_count >= exec->syncobj_array_length) {
191 uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
192
193 struct drm_i915_gem_exec_fence *new_syncobjs =
194 vk_realloc(exec->alloc, exec->syncobjs,
195 new_len * sizeof(*new_syncobjs), 8, exec->alloc_scope);
196 if (new_syncobjs == NULL)
197 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
198
199 exec->syncobjs = new_syncobjs;
200
201 if (exec->syncobj_values) {
202 uint64_t *new_syncobj_values =
203 vk_realloc(exec->alloc, exec->syncobj_values,
204 new_len * sizeof(*new_syncobj_values), 8,
205 exec->alloc_scope);
206 if (new_syncobj_values == NULL)
207 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
208
209 exec->syncobj_values = new_syncobj_values;
210 }
211
212 exec->syncobj_array_length = new_len;
213 }
214
215 if (timeline_value && !exec->syncobj_values) {
216 exec->syncobj_values =
217 vk_zalloc(exec->alloc, exec->syncobj_array_length *
218 sizeof(*exec->syncobj_values),
219 8, exec->alloc_scope);
220 if (!exec->syncobj_values)
221 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
222 }
223
224 exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
225 .handle = syncobj,
226 .flags = flags,
227 };
228 if (exec->syncobj_values)
229 exec->syncobj_values[exec->syncobj_count] = timeline_value;
230
231 exec->syncobj_count++;
232
233 return VK_SUCCESS;
234 }
235
236 static VkResult
anv_execbuf_add_sync(struct anv_device * device,struct anv_execbuf * execbuf,struct vk_sync * sync,bool is_signal,uint64_t value)237 anv_execbuf_add_sync(struct anv_device *device,
238 struct anv_execbuf *execbuf,
239 struct vk_sync *sync,
240 bool is_signal,
241 uint64_t value)
242 {
243 /* It's illegal to signal a timeline with value 0 because that's never
244 * higher than the current value. A timeline wait on value 0 is always
245 * trivial because 0 <= uint64_t always.
246 */
247 if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
248 return VK_SUCCESS;
249
250 if (vk_sync_is_anv_bo_sync(sync)) {
251 struct anv_bo_sync *bo_sync =
252 container_of(sync, struct anv_bo_sync, sync);
253
254 assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
255
256 return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
257 is_signal ? EXEC_OBJECT_WRITE : 0);
258 } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
259 struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
260
261 if (!(sync->flags & VK_SYNC_IS_TIMELINE))
262 value = 0;
263
264 return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
265 is_signal ? I915_EXEC_FENCE_SIGNAL :
266 I915_EXEC_FENCE_WAIT,
267 value);
268 }
269
270 unreachable("Invalid sync type");
271 }
272
273 static VkResult
setup_execbuf_for_cmd_buffer(struct anv_execbuf * execbuf,struct anv_cmd_buffer * cmd_buffer)274 setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
275 struct anv_cmd_buffer *cmd_buffer)
276 {
277 VkResult result;
278 /* Add surface dependencies (BOs) to the execbuf */
279 result = anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
280 cmd_buffer->surface_relocs.dep_words,
281 cmd_buffer->surface_relocs.deps, 0);
282 if (result != VK_SUCCESS)
283 return result;
284
285 /* First, we walk over all of the bos we've seen and add them and their
286 * relocations to the validate list.
287 */
288 struct anv_batch_bo **bbo;
289 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
290 result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
291 (*bbo)->bo, &(*bbo)->relocs, 0);
292 if (result != VK_SUCCESS)
293 return result;
294 }
295
296 struct anv_bo **bo_entry;
297 u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
298 result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
299 *bo_entry, NULL, 0);
300 if (result != VK_SUCCESS)
301 return result;
302 }
303
304 return VK_SUCCESS;
305 }
306
307 static VkResult
pin_state_pool(struct anv_device * device,struct anv_execbuf * execbuf,struct anv_state_pool * pool)308 pin_state_pool(struct anv_device *device,
309 struct anv_execbuf *execbuf,
310 struct anv_state_pool *pool)
311 {
312 anv_block_pool_foreach_bo(bo, &pool->block_pool) {
313 VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
314 if (result != VK_SUCCESS)
315 return result;
316 }
317
318 return VK_SUCCESS;
319 }
320
321 static void
get_context_and_exec_flags(struct anv_queue * queue,bool is_companion_rcs_batch,uint64_t * exec_flags,uint32_t * context_id)322 get_context_and_exec_flags(struct anv_queue *queue,
323 bool is_companion_rcs_batch,
324 uint64_t *exec_flags,
325 uint32_t *context_id)
326 {
327 assert(queue != NULL);
328
329 struct anv_device *device = queue->device;
330
331 /** Submit batch to index 0 which is the main virtual engine */
332 *exec_flags = device->physical->has_vm_control ? 0 : queue->exec_flags;
333
334 *context_id = device->physical->has_vm_control ?
335 is_companion_rcs_batch ?
336 queue->companion_rcs_id :
337 queue->context_id :
338 device->context_id;
339 }
340
341 static VkResult
anv_execbuf_add_trtt_bos(struct anv_device * device,struct anv_execbuf * execbuf)342 anv_execbuf_add_trtt_bos(struct anv_device *device,
343 struct anv_execbuf *execbuf)
344 {
345 struct anv_trtt *trtt = &device->trtt;
346 VkResult result = VK_SUCCESS;
347
348 /* If l3_addr is zero we're not using TR-TT, there's no bo to add. */
349 if (!trtt->l3_addr)
350 return VK_SUCCESS;
351
352 pthread_mutex_lock(&trtt->mutex);
353
354 for (int i = 0; i < trtt->num_page_table_bos; i++) {
355 result = anv_execbuf_add_bo(device, execbuf, trtt->page_table_bos[i],
356 NULL, 0);
357 if (result != VK_SUCCESS)
358 goto out;
359 }
360
361 out:
362 pthread_mutex_unlock(&trtt->mutex);
363 return result;
364 }
365
366 static VkResult
setup_execbuf_for_cmd_buffers(struct anv_execbuf * execbuf,struct anv_queue * queue,struct anv_cmd_buffer ** cmd_buffers,uint32_t num_cmd_buffers)367 setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
368 struct anv_queue *queue,
369 struct anv_cmd_buffer **cmd_buffers,
370 uint32_t num_cmd_buffers)
371 {
372 struct anv_device *device = queue->device;
373 VkResult result;
374
375 if (unlikely(device->physical->measure_device.config)) {
376 for (uint32_t i = 0; i < num_cmd_buffers; i++)
377 anv_measure_submit(cmd_buffers[i]);
378 }
379
380 /* Edit the tail of the command buffers to chain them all together if they
381 * can be.
382 */
383 anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
384
385 for (uint32_t i = 0; i < num_cmd_buffers; i++) {
386 result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
387 if (result != VK_SUCCESS)
388 return result;
389 }
390
391 /* Add all the global BOs to the object list for softpin case. */
392 result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
393 if (result != VK_SUCCESS)
394 return result;
395
396 if (device->physical->va.bindless_surface_state_pool.size > 0) {
397 result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
398 if (result != VK_SUCCESS)
399 return result;
400 }
401
402 if (device->physical->va.indirect_push_descriptor_pool.size > 0) {
403 result = pin_state_pool(device, execbuf, &device->indirect_push_descriptor_pool);
404 if (result != VK_SUCCESS)
405 return result;
406 }
407
408 result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
409 if (result != VK_SUCCESS)
410 return result;
411
412 result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
413 if (result != VK_SUCCESS)
414 return result;
415
416 result = pin_state_pool(device, execbuf, &device->general_state_pool);
417 if (result != VK_SUCCESS)
418 return result;
419
420 result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
421 if (result != VK_SUCCESS)
422 return result;
423
424 result = pin_state_pool(device, execbuf, &device->binding_table_pool);
425 if (result != VK_SUCCESS)
426 return result;
427
428 /* Add the BOs for all user allocated memory objects because we can't
429 * track after binding updates of VK_EXT_descriptor_indexing and due to how
430 * sparse resources work.
431 */
432 list_for_each_entry(struct anv_device_memory, mem,
433 &device->memory_objects, link) {
434 result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
435 if (result != VK_SUCCESS)
436 return result;
437 }
438
439 result = anv_execbuf_add_trtt_bos(device, execbuf);
440 if (result != VK_SUCCESS)
441 return result;
442
443 /* Add all the private BOs from images because we can't track after binding
444 * updates of VK_EXT_descriptor_indexing.
445 */
446 list_for_each_entry(struct anv_image, image,
447 &device->image_private_objects, link) {
448 struct anv_bo *private_bo =
449 image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE].address.bo;
450 result = anv_execbuf_add_bo(device, execbuf, private_bo, NULL, 0);
451 if (result != VK_SUCCESS)
452 return result;
453 }
454
455 struct list_head *batch_bo = &cmd_buffers[0]->batch_bos;
456 struct anv_batch_bo *first_batch_bo =
457 list_first_entry(batch_bo, struct anv_batch_bo, link);
458
459 /* The kernel requires that the last entry in the validation list be the
460 * batch buffer to execute. We can simply swap the element
461 * corresponding to the first batch_bo in the chain with the last
462 * element in the list.
463 */
464 if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
465 uint32_t idx = first_batch_bo->bo->exec_obj_index;
466 uint32_t last_idx = execbuf->bo_count - 1;
467
468 struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
469 assert(execbuf->bos[idx] == first_batch_bo->bo);
470
471 execbuf->objects[idx] = execbuf->objects[last_idx];
472 execbuf->bos[idx] = execbuf->bos[last_idx];
473 execbuf->bos[idx]->exec_obj_index = idx;
474
475 execbuf->objects[last_idx] = tmp_obj;
476 execbuf->bos[last_idx] = first_batch_bo->bo;
477 first_batch_bo->bo->exec_obj_index = last_idx;
478 }
479
480 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
481 if (device->physical->memory.need_flush &&
482 anv_bo_needs_host_cache_flush(device->batch_bo_pool.bo_alloc_flags))
483 anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers);
484 #endif
485
486 assert(!cmd_buffers[0]->is_companion_rcs_cmd_buffer || device->physical->has_vm_control);
487 uint64_t exec_flags = 0;
488 uint32_t context_id;
489 get_context_and_exec_flags(queue, cmd_buffers[0]->is_companion_rcs_cmd_buffer,
490 &exec_flags, &context_id);
491
492 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
493 .buffers_ptr = (uintptr_t) execbuf->objects,
494 .buffer_count = execbuf->bo_count,
495 .batch_start_offset = 0,
496 .batch_len = 0,
497 .cliprects_ptr = 0,
498 .num_cliprects = 0,
499 .DR1 = 0,
500 .DR4 = 0,
501 .flags = I915_EXEC_NO_RELOC |
502 I915_EXEC_HANDLE_LUT |
503 exec_flags,
504 .rsvd1 = context_id,
505 .rsvd2 = 0,
506 };
507
508 return VK_SUCCESS;
509 }
510
511 static VkResult
setup_empty_execbuf(struct anv_execbuf * execbuf,struct anv_queue * queue)512 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
513 {
514 struct anv_device *device = queue->device;
515 VkResult result = anv_execbuf_add_bo(device, execbuf,
516 device->trivial_batch_bo,
517 NULL, 0);
518 if (result != VK_SUCCESS)
519 return result;
520
521 uint64_t exec_flags = 0;
522 uint32_t context_id;
523 get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
524
525 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
526 .buffers_ptr = (uintptr_t) execbuf->objects,
527 .buffer_count = execbuf->bo_count,
528 .batch_start_offset = 0,
529 .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
530 .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
531 .rsvd1 = context_id,
532 .rsvd2 = 0,
533 };
534
535 return VK_SUCCESS;
536 }
537
538 static VkResult
setup_utrace_execbuf(struct anv_execbuf * execbuf,struct anv_queue * queue,struct anv_utrace_submit * submit)539 setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
540 struct anv_utrace_submit *submit)
541 {
542 struct anv_device *device = queue->device;
543
544 /* Always add the workaround BO as it includes a driver identifier for the
545 * error_state.
546 */
547 VkResult result = anv_execbuf_add_bo(device, execbuf,
548 device->workaround_bo,
549 NULL, 0);
550 if (result != VK_SUCCESS)
551 return result;
552
553 util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, _bo) {
554 struct anv_bo *bo = *_bo;
555
556 result = anv_execbuf_add_bo(device, execbuf, bo,
557 &submit->relocs, 0);
558 if (result != VK_SUCCESS)
559 return result;
560
561 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
562 if (device->physical->memory.need_flush &&
563 anv_bo_needs_host_cache_flush(bo->alloc_flags))
564 intel_flush_range(bo->map, bo->size);
565 #endif
566 }
567
568 result = anv_execbuf_add_sync(device, execbuf, submit->sync,
569 true /* is_signal */, 0 /* value */);
570 if (result != VK_SUCCESS)
571 return result;
572
573 struct anv_bo *batch_bo =
574 *util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
575 if (batch_bo->exec_obj_index != execbuf->bo_count - 1) {
576 uint32_t idx = batch_bo->exec_obj_index;
577 uint32_t last_idx = execbuf->bo_count - 1;
578
579 struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
580 assert(execbuf->bos[idx] == batch_bo);
581
582 execbuf->objects[idx] = execbuf->objects[last_idx];
583 execbuf->bos[idx] = execbuf->bos[last_idx];
584 execbuf->bos[idx]->exec_obj_index = idx;
585
586 execbuf->objects[last_idx] = tmp_obj;
587 execbuf->bos[last_idx] = batch_bo;
588 batch_bo->exec_obj_index = last_idx;
589 }
590
591 uint64_t exec_flags = 0;
592 uint32_t context_id;
593 get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
594
595 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
596 .buffers_ptr = (uintptr_t) execbuf->objects,
597 .buffer_count = execbuf->bo_count,
598 .batch_start_offset = 0,
599 .batch_len = submit->batch.next - submit->batch.start,
600 .flags = I915_EXEC_NO_RELOC |
601 I915_EXEC_HANDLE_LUT |
602 I915_EXEC_FENCE_ARRAY |
603 exec_flags,
604 .rsvd1 = context_id,
605 .rsvd2 = 0,
606 .num_cliprects = execbuf->syncobj_count,
607 .cliprects_ptr = (uintptr_t)execbuf->syncobjs,
608 };
609
610 return VK_SUCCESS;
611 }
612
613 static int
anv_gem_execbuffer(struct anv_device * device,struct drm_i915_gem_execbuffer2 * execbuf)614 anv_gem_execbuffer(struct anv_device *device,
615 struct drm_i915_gem_execbuffer2 *execbuf)
616 {
617 int ret;
618 const unsigned long request = (execbuf->flags & I915_EXEC_FENCE_OUT) ?
619 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR :
620 DRM_IOCTL_I915_GEM_EXECBUFFER2;
621
622 do {
623 ret = intel_ioctl(device->fd, request, execbuf);
624 } while (ret && errno == ENOMEM);
625
626 return ret;
627 }
628
629 static VkResult
anv_queue_exec_utrace_locked(struct anv_queue * queue,struct anv_utrace_submit * submit)630 anv_queue_exec_utrace_locked(struct anv_queue *queue,
631 struct anv_utrace_submit *submit)
632 {
633 assert(util_dynarray_num_elements(&submit->batch_bos,
634 struct anv_bo *) > 0);
635
636 struct anv_device *device = queue->device;
637 struct anv_execbuf execbuf = {
638 .alloc = &device->vk.alloc,
639 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
640 };
641
642 VkResult result = setup_utrace_execbuf(&execbuf, queue, submit);
643 if (result != VK_SUCCESS)
644 goto error;
645
646 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
647
648 int ret = queue->device->info->no_hw ? 0 :
649 anv_gem_execbuffer(queue->device, &execbuf.execbuf);
650 if (ret)
651 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
652
653 error:
654 anv_execbuf_finish(&execbuf);
655
656 return result;
657 }
658
659 static void
anv_i915_debug_submit(const struct anv_execbuf * execbuf)660 anv_i915_debug_submit(const struct anv_execbuf *execbuf)
661 {
662 uint32_t total_size_kb = 0, total_vram_only_size_kb = 0;
663 for (uint32_t i = 0; i < execbuf->bo_count; i++) {
664 const struct anv_bo *bo = execbuf->bos[i];
665 total_size_kb += bo->size / 1024;
666 if (anv_bo_is_vram_only(bo))
667 total_vram_only_size_kb += bo->size / 1024;
668 }
669
670 fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (aperture: %.1fMb, %.1fMb VRAM only)\n",
671 execbuf->execbuf.batch_start_offset, execbuf->execbuf.batch_len,
672 (float)total_size_kb / 1024.0f,
673 (float)total_vram_only_size_kb / 1024.0f);
674 for (uint32_t i = 0; i < execbuf->bo_count; i++) {
675 const struct anv_bo *bo = execbuf->bos[i];
676
677 fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
678 "KB handle=%05u capture=%u vram_only=%u name=%s\n",
679 bo->offset, bo->offset + bo->size - 1, bo->size / 1024,
680 bo->gem_handle, (bo->flags & EXEC_OBJECT_CAPTURE) != 0,
681 anv_bo_is_vram_only(bo), bo->name);
682 }
683 }
684
685 static void
setup_execbuf_fence_params(struct anv_execbuf * execbuf)686 setup_execbuf_fence_params(struct anv_execbuf *execbuf)
687 {
688 if (execbuf->syncobj_values) {
689 execbuf->timeline_fences.fence_count = execbuf->syncobj_count;
690 execbuf->timeline_fences.handles_ptr = (uintptr_t)execbuf->syncobjs;
691 execbuf->timeline_fences.values_ptr = (uintptr_t)execbuf->syncobj_values;
692 anv_execbuf_add_ext(execbuf,
693 DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
694 &execbuf->timeline_fences.base);
695 } else if (execbuf->syncobjs) {
696 execbuf->execbuf.flags |= I915_EXEC_FENCE_ARRAY;
697 execbuf->execbuf.num_cliprects = execbuf->syncobj_count;
698 execbuf->execbuf.cliprects_ptr = (uintptr_t)execbuf->syncobjs;
699 }
700 }
701
702 static VkResult
i915_companion_rcs_queue_exec_locked(struct anv_queue * queue,struct anv_cmd_buffer * companion_rcs_cmd_buffer,uint32_t wait_count,const struct vk_sync_wait * waits)703 i915_companion_rcs_queue_exec_locked(struct anv_queue *queue,
704 struct anv_cmd_buffer *companion_rcs_cmd_buffer,
705 uint32_t wait_count,
706 const struct vk_sync_wait *waits)
707 {
708 struct anv_device *device = queue->device;
709 struct anv_execbuf execbuf = {
710 .alloc = &queue->device->vk.alloc,
711 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
712 };
713
714 /* Always add the workaround BO as it includes a driver identifier for the
715 * error_state.
716 */
717 VkResult result =
718 anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
719 if (result != VK_SUCCESS)
720 goto error;
721
722 for (uint32_t i = 0; i < wait_count; i++) {
723 result = anv_execbuf_add_sync(device, &execbuf,
724 waits[i].sync,
725 false /* is_signal */,
726 waits[i].wait_value);
727 if (result != VK_SUCCESS)
728 goto error;
729 }
730
731 if (queue->companion_sync) {
732 result = anv_execbuf_add_sync(device, &execbuf,
733 queue->companion_sync,
734 true /* is_signal */, 0);
735 if (result != VK_SUCCESS)
736 goto error;
737 }
738
739 result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
740 &companion_rcs_cmd_buffer, 1);
741 if (result != VK_SUCCESS)
742 goto error;
743
744 if (INTEL_DEBUG(DEBUG_SUBMIT))
745 anv_i915_debug_submit(&execbuf);
746
747 anv_cmd_buffer_exec_batch_debug(queue, 1, &companion_rcs_cmd_buffer, NULL, 0);
748
749 setup_execbuf_fence_params(&execbuf);
750
751 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
752
753 int ret = queue->device->info->no_hw ? 0 :
754 anv_gem_execbuffer(queue->device, &execbuf.execbuf);
755 if (ret) {
756 anv_i915_debug_submit(&execbuf);
757 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
758 }
759
760 error:
761 anv_execbuf_finish(&execbuf);
762 return result;
763 }
764
765 VkResult
i915_queue_exec_locked(struct anv_queue * queue,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t signal_count,const struct vk_sync_signal * signals,struct anv_query_pool * perf_query_pool,uint32_t perf_query_pass,struct anv_utrace_submit * utrace_submit)766 i915_queue_exec_locked(struct anv_queue *queue,
767 uint32_t wait_count,
768 const struct vk_sync_wait *waits,
769 uint32_t cmd_buffer_count,
770 struct anv_cmd_buffer **cmd_buffers,
771 uint32_t signal_count,
772 const struct vk_sync_signal *signals,
773 struct anv_query_pool *perf_query_pool,
774 uint32_t perf_query_pass,
775 struct anv_utrace_submit *utrace_submit)
776 {
777 struct anv_device *device = queue->device;
778 struct anv_execbuf execbuf = {
779 .alloc = &queue->device->vk.alloc,
780 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
781 .perf_query_pass = perf_query_pass,
782 };
783 VkResult result;
784
785 if (utrace_submit &&
786 util_dynarray_num_elements(&utrace_submit->batch_bos,
787 struct anv_bo *) == 0) {
788 result = anv_execbuf_add_sync(device, &execbuf,
789 utrace_submit->sync,
790 true /* is_signal */,
791 0);
792 if (result != VK_SUCCESS)
793 goto error;
794
795 /* When The utrace submission doesn't have its own batch buffer*/
796 utrace_submit = NULL;
797 }
798
799 /* Always add the workaround BO as it includes a driver identifier for the
800 * error_state.
801 */
802 result =
803 anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
804 if (result != VK_SUCCESS)
805 goto error;
806
807 for (uint32_t i = 0; i < wait_count; i++) {
808 result = anv_execbuf_add_sync(device, &execbuf,
809 waits[i].sync,
810 false /* is_signal */,
811 waits[i].wait_value);
812 if (result != VK_SUCCESS)
813 goto error;
814 }
815
816 for (uint32_t i = 0; i < signal_count; i++) {
817 result = anv_execbuf_add_sync(device, &execbuf,
818 signals[i].sync,
819 true /* is_signal */,
820 signals[i].signal_value);
821 if (result != VK_SUCCESS)
822 goto error;
823 }
824
825 if (queue->sync) {
826 result = anv_execbuf_add_sync(device, &execbuf,
827 queue->sync,
828 true /* is_signal */,
829 0 /* signal_value */);
830 if (result != VK_SUCCESS)
831 goto error;
832 }
833
834 if (cmd_buffer_count) {
835 result = setup_execbuf_for_cmd_buffers(&execbuf, queue, cmd_buffers,
836 cmd_buffer_count);
837 } else {
838 result = setup_empty_execbuf(&execbuf, queue);
839 }
840
841 if (result != VK_SUCCESS)
842 goto error;
843
844 const bool has_perf_query =
845 perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
846
847 if (INTEL_DEBUG(DEBUG_SUBMIT))
848 anv_i915_debug_submit(&execbuf);
849
850 anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
851 perf_query_pool, perf_query_pass);
852
853 setup_execbuf_fence_params(&execbuf);
854
855 if (has_perf_query) {
856 assert(perf_query_pass < perf_query_pool->n_passes);
857 struct intel_perf_query_info *query_info =
858 perf_query_pool->pass_query[perf_query_pass];
859
860 /* Some performance queries just the pipeline statistic HW, no need for
861 * OA in that case, so no need to reconfigure.
862 */
863 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
864 (query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
865 query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
866 int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
867 (void *)(uintptr_t) query_info->oa_metrics_set_id);
868 if (ret < 0) {
869 result = vk_device_set_lost(&device->vk,
870 "i915-perf config failed: %s",
871 strerror(errno));
872 }
873 }
874
875 struct anv_bo *pass_batch_bo = perf_query_pool->bo;
876
877 struct drm_i915_gem_exec_object2 query_pass_object = {
878 .handle = pass_batch_bo->gem_handle,
879 .offset = pass_batch_bo->offset,
880 .flags = pass_batch_bo->flags,
881 };
882
883 uint64_t exec_flags = 0;
884 uint32_t context_id;
885 get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
886
887 struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
888 .buffers_ptr = (uintptr_t) &query_pass_object,
889 .buffer_count = 1,
890 .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
891 perf_query_pass),
892 .flags = I915_EXEC_HANDLE_LUT | exec_flags,
893 .rsvd1 = context_id,
894 };
895
896 int ret = queue->device->info->no_hw ? 0 :
897 anv_gem_execbuffer(queue->device, &query_pass_execbuf);
898 if (ret)
899 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
900 }
901
902 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
903
904 int ret = queue->device->info->no_hw ? 0 :
905 anv_gem_execbuffer(queue->device, &execbuf.execbuf);
906 if (ret) {
907 anv_i915_debug_submit(&execbuf);
908 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
909 }
910
911 if (cmd_buffer_count != 0 && cmd_buffers[0]->companion_rcs_cmd_buffer) {
912 struct anv_cmd_buffer *companion_rcs_cmd_buffer =
913 cmd_buffers[0]->companion_rcs_cmd_buffer;
914 assert(companion_rcs_cmd_buffer->is_companion_rcs_cmd_buffer);
915 assert(cmd_buffer_count == 1);
916 result = i915_companion_rcs_queue_exec_locked(queue,
917 cmd_buffers[0]->companion_rcs_cmd_buffer, wait_count,
918 waits);
919 }
920
921 result = anv_queue_post_submit(queue, result);
922
923 error:
924 anv_execbuf_finish(&execbuf);
925
926 if (result == VK_SUCCESS && utrace_submit)
927 result = anv_queue_exec_utrace_locked(queue, utrace_submit);
928
929 return result;
930 }
931
932 VkResult
i915_execute_simple_batch(struct anv_queue * queue,struct anv_bo * batch_bo,uint32_t batch_bo_size,bool is_companion_rcs_batch)933 i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
934 uint32_t batch_bo_size, bool is_companion_rcs_batch)
935 {
936 struct anv_device *device = queue->device;
937 struct anv_execbuf execbuf = {
938 .alloc = &queue->device->vk.alloc,
939 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
940 };
941
942 VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
943 if (result != VK_SUCCESS)
944 goto fail;
945
946 assert(!is_companion_rcs_batch || device->physical->has_vm_control);
947 uint64_t exec_flags = 0;
948 uint32_t context_id;
949 get_context_and_exec_flags(queue, is_companion_rcs_batch, &exec_flags,
950 &context_id);
951
952 execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
953 .buffers_ptr = (uintptr_t) execbuf.objects,
954 .buffer_count = execbuf.bo_count,
955 .batch_start_offset = 0,
956 .batch_len = batch_bo_size,
957 .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
958 .rsvd1 = context_id,
959 .rsvd2 = 0,
960 };
961
962 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
963
964 if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
965 result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
966 goto fail;
967 }
968
969 result = anv_device_wait(device, batch_bo, INT64_MAX);
970 if (result != VK_SUCCESS)
971 result = vk_device_set_lost(&device->vk,
972 "anv_device_wait failed: %m");
973
974 fail:
975 anv_execbuf_finish(&execbuf);
976 return result;
977 }
978
979 VkResult
i915_execute_trtt_batch(struct anv_sparse_submission * submit,struct anv_trtt_batch_bo * trtt_bbo)980 i915_execute_trtt_batch(struct anv_sparse_submission *submit,
981 struct anv_trtt_batch_bo *trtt_bbo)
982 {
983 struct anv_queue *queue = submit->queue;
984 struct anv_device *device = queue->device;
985 struct anv_trtt *trtt = &device->trtt;
986 struct anv_execbuf execbuf = {
987 .alloc = &device->vk.alloc,
988 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
989 };
990 VkResult result;
991
992 for (uint32_t i = 0; i < submit->wait_count; i++) {
993 result = anv_execbuf_add_sync(device, &execbuf, submit->waits[i].sync,
994 false /* is_signal */,
995 submit->waits[i].wait_value);
996 if (result != VK_SUCCESS)
997 goto out;
998 }
999
1000 for (uint32_t i = 0; i < submit->signal_count; i++) {
1001 result = anv_execbuf_add_sync(device, &execbuf, submit->signals[i].sync,
1002 true /* is_signal */,
1003 submit->signals[i].signal_value);
1004 if (result != VK_SUCCESS)
1005 goto out;
1006 }
1007
1008 result = anv_execbuf_add_syncobj(device, &execbuf, trtt->timeline_handle,
1009 I915_EXEC_FENCE_SIGNAL,
1010 trtt_bbo->timeline_val);
1011 if (result != VK_SUCCESS)
1012 goto out;
1013
1014
1015 result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL,
1016 0);
1017 if (result != VK_SUCCESS)
1018 goto out;
1019
1020 for (int i = 0; i < trtt->num_page_table_bos; i++) {
1021 result = anv_execbuf_add_bo(device, &execbuf, trtt->page_table_bos[i],
1022 NULL, EXEC_OBJECT_WRITE);
1023 if (result != VK_SUCCESS)
1024 goto out;
1025 }
1026
1027 if (queue->sync) {
1028 result = anv_execbuf_add_sync(device, &execbuf, queue->sync,
1029 true /* is_signal */,
1030 0 /* signal_value */);
1031 if (result != VK_SUCCESS)
1032 goto out;
1033 }
1034
1035 result = anv_execbuf_add_bo(device, &execbuf, trtt_bbo->bo, NULL, 0);
1036 if (result != VK_SUCCESS)
1037 goto out;
1038
1039 if (INTEL_DEBUG(DEBUG_SUBMIT))
1040 anv_i915_debug_submit(&execbuf);
1041
1042 uint64_t exec_flags = 0;
1043 uint32_t context_id;
1044 get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
1045
1046 execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
1047 .buffers_ptr = (uintptr_t) execbuf.objects,
1048 .buffer_count = execbuf.bo_count,
1049 .batch_start_offset = 0,
1050 .batch_len = trtt_bbo->size,
1051 .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | exec_flags,
1052 .rsvd1 = context_id,
1053 .rsvd2 = 0,
1054 };
1055 setup_execbuf_fence_params(&execbuf);
1056
1057 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
1058
1059 int ret = queue->device->info->no_hw ? 0 :
1060 anv_gem_execbuffer(device, &execbuf.execbuf);
1061 if (ret) {
1062 result = vk_device_set_lost(&device->vk,
1063 "trtt anv_gem_execbuffer failed: %m");
1064 goto out;
1065 }
1066
1067 if (queue->sync) {
1068 result = vk_sync_wait(&device->vk, queue->sync, 0,
1069 VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
1070 if (result != VK_SUCCESS) {
1071 result = vk_queue_set_lost(&queue->vk, "trtt sync wait failed");
1072 goto out;
1073 }
1074 }
1075
1076 out:
1077 anv_execbuf_finish(&execbuf);
1078 return result;
1079 }
1080
1081 VkResult
i915_queue_exec_trace(struct anv_queue * queue,struct anv_utrace_submit * submit)1082 i915_queue_exec_trace(struct anv_queue *queue,
1083 struct anv_utrace_submit *submit)
1084 {
1085 assert(util_dynarray_num_elements(&submit->batch_bos,
1086 struct anv_bo *) > 0);
1087
1088 return anv_queue_exec_utrace_locked(queue, submit);
1089 }
1090