• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "i915/anv_batch_chain.h"
25 #include "anv_private.h"
26 #include "anv_measure.h"
27 
28 #include "perf/intel_perf.h"
29 #include "util/u_debug.h"
30 
31 #include "drm-uapi/i915_drm.h"
32 
33 struct anv_execbuf {
34    struct drm_i915_gem_execbuffer2           execbuf;
35 
36    struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
37 
38    struct drm_i915_gem_exec_object2 *        objects;
39    uint32_t                                  bo_count;
40    uint32_t                                  bo_array_length;
41    struct anv_bo **                          bos;
42 
43    uint32_t                                  syncobj_count;
44    uint32_t                                  syncobj_array_length;
45    struct drm_i915_gem_exec_fence *          syncobjs;
46    uint64_t *                                syncobj_values;
47 
48    uint32_t                                  cmd_buffer_count;
49    struct anv_query_pool                     *perf_query_pool;
50 
51    const VkAllocationCallbacks *             alloc;
52    VkSystemAllocationScope                   alloc_scope;
53 
54    int                                       perf_query_pass;
55 };
56 
57 static void
anv_execbuf_finish(struct anv_execbuf * exec)58 anv_execbuf_finish(struct anv_execbuf *exec)
59 {
60    vk_free(exec->alloc, exec->syncobjs);
61    vk_free(exec->alloc, exec->syncobj_values);
62    vk_free(exec->alloc, exec->objects);
63    vk_free(exec->alloc, exec->bos);
64 }
65 
66 static void
anv_execbuf_add_ext(struct anv_execbuf * exec,uint32_t ext_name,struct i915_user_extension * ext)67 anv_execbuf_add_ext(struct anv_execbuf *exec,
68                     uint32_t ext_name,
69                     struct i915_user_extension *ext)
70 {
71    __u64 *iter = &exec->execbuf.cliprects_ptr;
72 
73    exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
74 
75    while (*iter != 0) {
76       iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
77    }
78 
79    ext->name = ext_name;
80 
81    *iter = (uintptr_t) ext;
82 }
83 
84 static VkResult
85 anv_execbuf_add_bo_bitset(struct anv_device *device,
86                           struct anv_execbuf *exec,
87                           uint32_t dep_words,
88                           BITSET_WORD *deps,
89                           uint32_t extra_flags);
90 
91 static VkResult
anv_execbuf_add_bo(struct anv_device * device,struct anv_execbuf * exec,struct anv_bo * bo,struct anv_reloc_list * relocs,uint32_t extra_flags)92 anv_execbuf_add_bo(struct anv_device *device,
93                    struct anv_execbuf *exec,
94                    struct anv_bo *bo,
95                    struct anv_reloc_list *relocs,
96                    uint32_t extra_flags)
97 {
98    struct drm_i915_gem_exec_object2 *obj = NULL;
99 
100    if (bo->exec_obj_index < exec->bo_count &&
101        exec->bos[bo->exec_obj_index] == bo)
102       obj = &exec->objects[bo->exec_obj_index];
103 
104    if (obj == NULL) {
105       /* We've never seen this one before.  Add it to the list and assign
106        * an id that we can use later.
107        */
108       if (exec->bo_count >= exec->bo_array_length) {
109          uint32_t new_len = exec->objects ? exec->bo_array_length * 2 : 64;
110 
111          struct drm_i915_gem_exec_object2 *new_objects =
112             vk_realloc(exec->alloc, exec->objects,
113                        new_len * sizeof(*new_objects), 8, exec->alloc_scope);
114          if (new_objects == NULL)
115             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
116 
117          exec->objects = new_objects;
118 
119          struct anv_bo **new_bos =
120             vk_realloc(exec->alloc, exec->bos, new_len * sizeof(*new_bos), 8,
121                        exec->alloc_scope);
122          if (new_bos == NULL)
123             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
124 
125          exec->bos = new_bos;
126          exec->bo_array_length = new_len;
127       }
128 
129       assert(exec->bo_count < exec->bo_array_length);
130 
131       bo->exec_obj_index = exec->bo_count++;
132       obj = &exec->objects[bo->exec_obj_index];
133       exec->bos[bo->exec_obj_index] = bo;
134 
135       obj->handle = bo->gem_handle;
136       obj->relocation_count = 0;
137       obj->relocs_ptr = 0;
138       obj->alignment = 0;
139       obj->offset = bo->offset;
140       obj->flags = bo->flags | extra_flags;
141       obj->rsvd1 = 0;
142       obj->rsvd2 = 0;
143    }
144 
145    if (extra_flags & EXEC_OBJECT_WRITE) {
146       obj->flags |= EXEC_OBJECT_WRITE;
147       obj->flags &= ~EXEC_OBJECT_ASYNC;
148    }
149 
150    if (relocs != NULL) {
151       return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
152                                        relocs->deps, extra_flags);
153    }
154 
155    return VK_SUCCESS;
156 }
157 
158 /* Add BO dependencies to execbuf */
159 static VkResult
anv_execbuf_add_bo_bitset(struct anv_device * device,struct anv_execbuf * exec,uint32_t dep_words,BITSET_WORD * deps,uint32_t extra_flags)160 anv_execbuf_add_bo_bitset(struct anv_device *device,
161                           struct anv_execbuf *exec,
162                           uint32_t dep_words,
163                           BITSET_WORD *deps,
164                           uint32_t extra_flags)
165 {
166    for (uint32_t w = 0; w < dep_words; w++) {
167       BITSET_WORD mask = deps[w];
168       while (mask) {
169          int i = u_bit_scan(&mask);
170          uint32_t gem_handle = w * BITSET_WORDBITS + i;
171          struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
172          assert(bo->refcount > 0);
173          VkResult result =
174             anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
175          if (result != VK_SUCCESS)
176             return result;
177       }
178    }
179 
180    return VK_SUCCESS;
181 }
182 
183 static VkResult
anv_execbuf_add_syncobj(struct anv_device * device,struct anv_execbuf * exec,uint32_t syncobj,uint32_t flags,uint64_t timeline_value)184 anv_execbuf_add_syncobj(struct anv_device *device,
185                         struct anv_execbuf *exec,
186                         uint32_t syncobj,
187                         uint32_t flags,
188                         uint64_t timeline_value)
189 {
190    if (exec->syncobj_count >= exec->syncobj_array_length) {
191       uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
192 
193       struct drm_i915_gem_exec_fence *new_syncobjs =
194          vk_realloc(exec->alloc, exec->syncobjs,
195                     new_len * sizeof(*new_syncobjs), 8, exec->alloc_scope);
196       if (new_syncobjs == NULL)
197          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
198 
199       exec->syncobjs = new_syncobjs;
200 
201       if (exec->syncobj_values) {
202          uint64_t *new_syncobj_values =
203             vk_realloc(exec->alloc, exec->syncobj_values,
204                        new_len * sizeof(*new_syncobj_values), 8,
205                        exec->alloc_scope);
206          if (new_syncobj_values == NULL)
207             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
208 
209          exec->syncobj_values = new_syncobj_values;
210       }
211 
212       exec->syncobj_array_length = new_len;
213    }
214 
215    if (timeline_value && !exec->syncobj_values) {
216       exec->syncobj_values =
217          vk_zalloc(exec->alloc, exec->syncobj_array_length *
218                                 sizeof(*exec->syncobj_values),
219                    8, exec->alloc_scope);
220       if (!exec->syncobj_values)
221          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
222    }
223 
224    exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
225       .handle = syncobj,
226       .flags = flags,
227    };
228    if (exec->syncobj_values)
229       exec->syncobj_values[exec->syncobj_count] = timeline_value;
230 
231    exec->syncobj_count++;
232 
233    return VK_SUCCESS;
234 }
235 
236 static VkResult
anv_execbuf_add_sync(struct anv_device * device,struct anv_execbuf * execbuf,struct vk_sync * sync,bool is_signal,uint64_t value)237 anv_execbuf_add_sync(struct anv_device *device,
238                      struct anv_execbuf *execbuf,
239                      struct vk_sync *sync,
240                      bool is_signal,
241                      uint64_t value)
242 {
243    /* It's illegal to signal a timeline with value 0 because that's never
244     * higher than the current value.  A timeline wait on value 0 is always
245     * trivial because 0 <= uint64_t always.
246     */
247    if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
248       return VK_SUCCESS;
249 
250    if (vk_sync_is_anv_bo_sync(sync)) {
251       struct anv_bo_sync *bo_sync =
252          container_of(sync, struct anv_bo_sync, sync);
253 
254       assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
255 
256       return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
257                                 is_signal ? EXEC_OBJECT_WRITE : 0);
258    } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
259       struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
260 
261       if (!(sync->flags & VK_SYNC_IS_TIMELINE))
262          value = 0;
263 
264       return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
265                                      is_signal ? I915_EXEC_FENCE_SIGNAL :
266                                                  I915_EXEC_FENCE_WAIT,
267                                      value);
268    }
269 
270    unreachable("Invalid sync type");
271 }
272 
273 static VkResult
setup_execbuf_for_cmd_buffer(struct anv_execbuf * execbuf,struct anv_cmd_buffer * cmd_buffer)274 setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
275                              struct anv_cmd_buffer *cmd_buffer)
276 {
277    VkResult result;
278    /* Add surface dependencies (BOs) to the execbuf */
279    result = anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
280                                       cmd_buffer->surface_relocs.dep_words,
281                                       cmd_buffer->surface_relocs.deps, 0);
282    if (result != VK_SUCCESS)
283       return result;
284 
285    /* First, we walk over all of the bos we've seen and add them and their
286     * relocations to the validate list.
287     */
288    struct anv_batch_bo **bbo;
289    u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
290       result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
291                                   (*bbo)->bo, &(*bbo)->relocs, 0);
292       if (result != VK_SUCCESS)
293          return result;
294    }
295 
296    struct anv_bo **bo_entry;
297    u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
298       result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
299                                   *bo_entry, NULL, 0);
300       if (result != VK_SUCCESS)
301          return result;
302    }
303 
304    return VK_SUCCESS;
305 }
306 
307 static VkResult
pin_state_pool(struct anv_device * device,struct anv_execbuf * execbuf,struct anv_state_pool * pool)308 pin_state_pool(struct anv_device *device,
309                struct anv_execbuf *execbuf,
310                struct anv_state_pool *pool)
311 {
312    anv_block_pool_foreach_bo(bo, &pool->block_pool) {
313       VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
314       if (result != VK_SUCCESS)
315          return result;
316    }
317 
318    return VK_SUCCESS;
319 }
320 
321 static void
get_context_and_exec_flags(struct anv_queue * queue,bool is_companion_rcs_batch,uint64_t * exec_flags,uint32_t * context_id)322 get_context_and_exec_flags(struct anv_queue *queue,
323                            bool is_companion_rcs_batch,
324                            uint64_t *exec_flags,
325                            uint32_t *context_id)
326 {
327    assert(queue != NULL);
328 
329    struct anv_device *device = queue->device;
330 
331    /** Submit batch to index 0 which is the main virtual engine */
332    *exec_flags = device->physical->has_vm_control ? 0 : queue->exec_flags;
333 
334    *context_id = device->physical->has_vm_control ?
335                  is_companion_rcs_batch ?
336                  queue->companion_rcs_id :
337                  queue->context_id :
338                  device->context_id;
339 }
340 
341 static VkResult
anv_execbuf_add_trtt_bos(struct anv_device * device,struct anv_execbuf * execbuf)342 anv_execbuf_add_trtt_bos(struct anv_device *device,
343                          struct anv_execbuf *execbuf)
344 {
345    struct anv_trtt *trtt = &device->trtt;
346    VkResult result = VK_SUCCESS;
347 
348    /* If l3_addr is zero we're not using TR-TT, there's no bo to add. */
349    if (!trtt->l3_addr)
350       return VK_SUCCESS;
351 
352    pthread_mutex_lock(&trtt->mutex);
353 
354    for (int i = 0; i < trtt->num_page_table_bos; i++) {
355       result = anv_execbuf_add_bo(device, execbuf, trtt->page_table_bos[i],
356                                   NULL, 0);
357       if (result != VK_SUCCESS)
358          goto out;
359    }
360 
361 out:
362    pthread_mutex_unlock(&trtt->mutex);
363    return result;
364 }
365 
366 static VkResult
setup_execbuf_for_cmd_buffers(struct anv_execbuf * execbuf,struct anv_queue * queue,struct anv_cmd_buffer ** cmd_buffers,uint32_t num_cmd_buffers)367 setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
368                               struct anv_queue *queue,
369                               struct anv_cmd_buffer **cmd_buffers,
370                               uint32_t num_cmd_buffers)
371 {
372    struct anv_device *device = queue->device;
373    VkResult result;
374 
375    if (unlikely(device->physical->measure_device.config)) {
376       for (uint32_t i = 0; i < num_cmd_buffers; i++)
377          anv_measure_submit(cmd_buffers[i]);
378    }
379 
380    /* Edit the tail of the command buffers to chain them all together if they
381     * can be.
382     */
383    anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
384 
385    for (uint32_t i = 0; i < num_cmd_buffers; i++) {
386       result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
387       if (result != VK_SUCCESS)
388          return result;
389    }
390 
391    /* Add all the global BOs to the object list for softpin case. */
392    result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
393    if (result != VK_SUCCESS)
394       return result;
395 
396    if (device->physical->va.bindless_surface_state_pool.size > 0) {
397       result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
398       if (result != VK_SUCCESS)
399          return result;
400    }
401 
402    if (device->physical->va.indirect_push_descriptor_pool.size > 0) {
403       result = pin_state_pool(device, execbuf, &device->indirect_push_descriptor_pool);
404       if (result != VK_SUCCESS)
405          return result;
406    }
407 
408    result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
409    if (result != VK_SUCCESS)
410       return result;
411 
412    result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
413    if (result != VK_SUCCESS)
414       return result;
415 
416    result = pin_state_pool(device, execbuf, &device->general_state_pool);
417    if (result != VK_SUCCESS)
418       return result;
419 
420    result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
421    if (result != VK_SUCCESS)
422       return result;
423 
424    result = pin_state_pool(device, execbuf, &device->binding_table_pool);
425    if (result != VK_SUCCESS)
426       return result;
427 
428    /* Add the BOs for all user allocated memory objects because we can't
429     * track after binding updates of VK_EXT_descriptor_indexing and due to how
430     * sparse resources work.
431     */
432    list_for_each_entry(struct anv_device_memory, mem,
433                        &device->memory_objects, link) {
434       result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
435       if (result != VK_SUCCESS)
436          return result;
437    }
438 
439    result = anv_execbuf_add_trtt_bos(device, execbuf);
440    if (result != VK_SUCCESS)
441       return result;
442 
443    /* Add all the private BOs from images because we can't track after binding
444     * updates of VK_EXT_descriptor_indexing.
445     */
446    list_for_each_entry(struct anv_image, image,
447                        &device->image_private_objects, link) {
448       struct anv_bo *private_bo =
449          image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE].address.bo;
450       result = anv_execbuf_add_bo(device, execbuf, private_bo, NULL, 0);
451       if (result != VK_SUCCESS)
452          return result;
453    }
454 
455    struct list_head *batch_bo = &cmd_buffers[0]->batch_bos;
456    struct anv_batch_bo *first_batch_bo =
457       list_first_entry(batch_bo, struct anv_batch_bo, link);
458 
459    /* The kernel requires that the last entry in the validation list be the
460     * batch buffer to execute.  We can simply swap the element
461     * corresponding to the first batch_bo in the chain with the last
462     * element in the list.
463     */
464    if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
465       uint32_t idx = first_batch_bo->bo->exec_obj_index;
466       uint32_t last_idx = execbuf->bo_count - 1;
467 
468       struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
469       assert(execbuf->bos[idx] == first_batch_bo->bo);
470 
471       execbuf->objects[idx] = execbuf->objects[last_idx];
472       execbuf->bos[idx] = execbuf->bos[last_idx];
473       execbuf->bos[idx]->exec_obj_index = idx;
474 
475       execbuf->objects[last_idx] = tmp_obj;
476       execbuf->bos[last_idx] = first_batch_bo->bo;
477       first_batch_bo->bo->exec_obj_index = last_idx;
478    }
479 
480 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
481    if (device->physical->memory.need_flush &&
482        anv_bo_needs_host_cache_flush(device->batch_bo_pool.bo_alloc_flags))
483       anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers);
484 #endif
485 
486    assert(!cmd_buffers[0]->is_companion_rcs_cmd_buffer || device->physical->has_vm_control);
487    uint64_t exec_flags = 0;
488    uint32_t context_id;
489    get_context_and_exec_flags(queue, cmd_buffers[0]->is_companion_rcs_cmd_buffer,
490                               &exec_flags, &context_id);
491 
492    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
493       .buffers_ptr = (uintptr_t) execbuf->objects,
494       .buffer_count = execbuf->bo_count,
495       .batch_start_offset = 0,
496       .batch_len = 0,
497       .cliprects_ptr = 0,
498       .num_cliprects = 0,
499       .DR1 = 0,
500       .DR4 = 0,
501       .flags = I915_EXEC_NO_RELOC |
502                I915_EXEC_HANDLE_LUT |
503                exec_flags,
504       .rsvd1 = context_id,
505       .rsvd2 = 0,
506    };
507 
508    return VK_SUCCESS;
509 }
510 
511 static VkResult
setup_empty_execbuf(struct anv_execbuf * execbuf,struct anv_queue * queue)512 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
513 {
514    struct anv_device *device = queue->device;
515    VkResult result = anv_execbuf_add_bo(device, execbuf,
516                                         device->trivial_batch_bo,
517                                         NULL, 0);
518    if (result != VK_SUCCESS)
519       return result;
520 
521    uint64_t exec_flags = 0;
522    uint32_t context_id;
523    get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
524 
525    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
526       .buffers_ptr = (uintptr_t) execbuf->objects,
527       .buffer_count = execbuf->bo_count,
528       .batch_start_offset = 0,
529       .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
530       .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
531       .rsvd1 = context_id,
532       .rsvd2 = 0,
533    };
534 
535    return VK_SUCCESS;
536 }
537 
538 static VkResult
setup_utrace_execbuf(struct anv_execbuf * execbuf,struct anv_queue * queue,struct anv_utrace_submit * submit)539 setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
540                      struct anv_utrace_submit *submit)
541 {
542    struct anv_device *device = queue->device;
543 
544    /* Always add the workaround BO as it includes a driver identifier for the
545     * error_state.
546     */
547    VkResult result = anv_execbuf_add_bo(device, execbuf,
548                                         device->workaround_bo,
549                                         NULL, 0);
550    if (result != VK_SUCCESS)
551       return result;
552 
553    util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, _bo) {
554       struct anv_bo *bo = *_bo;
555 
556       result = anv_execbuf_add_bo(device, execbuf, bo,
557                                   &submit->relocs, 0);
558       if (result != VK_SUCCESS)
559          return result;
560 
561 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
562       if (device->physical->memory.need_flush &&
563           anv_bo_needs_host_cache_flush(bo->alloc_flags))
564          intel_flush_range(bo->map, bo->size);
565 #endif
566    }
567 
568    result = anv_execbuf_add_sync(device, execbuf, submit->sync,
569                                  true /* is_signal */, 0 /* value */);
570    if (result != VK_SUCCESS)
571       return result;
572 
573    struct anv_bo *batch_bo =
574       *util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
575    if (batch_bo->exec_obj_index != execbuf->bo_count - 1) {
576       uint32_t idx = batch_bo->exec_obj_index;
577       uint32_t last_idx = execbuf->bo_count - 1;
578 
579       struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
580       assert(execbuf->bos[idx] == batch_bo);
581 
582       execbuf->objects[idx] = execbuf->objects[last_idx];
583       execbuf->bos[idx] = execbuf->bos[last_idx];
584       execbuf->bos[idx]->exec_obj_index = idx;
585 
586       execbuf->objects[last_idx] = tmp_obj;
587       execbuf->bos[last_idx] = batch_bo;
588       batch_bo->exec_obj_index = last_idx;
589    }
590 
591    uint64_t exec_flags = 0;
592    uint32_t context_id;
593    get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
594 
595    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
596       .buffers_ptr = (uintptr_t) execbuf->objects,
597       .buffer_count = execbuf->bo_count,
598       .batch_start_offset = 0,
599       .batch_len = submit->batch.next - submit->batch.start,
600       .flags = I915_EXEC_NO_RELOC |
601                I915_EXEC_HANDLE_LUT |
602                I915_EXEC_FENCE_ARRAY |
603                exec_flags,
604       .rsvd1 = context_id,
605       .rsvd2 = 0,
606       .num_cliprects = execbuf->syncobj_count,
607       .cliprects_ptr = (uintptr_t)execbuf->syncobjs,
608    };
609 
610    return VK_SUCCESS;
611 }
612 
613 static int
anv_gem_execbuffer(struct anv_device * device,struct drm_i915_gem_execbuffer2 * execbuf)614 anv_gem_execbuffer(struct anv_device *device,
615                    struct drm_i915_gem_execbuffer2 *execbuf)
616 {
617    int ret;
618    const unsigned long request = (execbuf->flags & I915_EXEC_FENCE_OUT) ?
619       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR :
620       DRM_IOCTL_I915_GEM_EXECBUFFER2;
621 
622    do {
623       ret = intel_ioctl(device->fd, request, execbuf);
624    } while (ret && errno == ENOMEM);
625 
626    return ret;
627 }
628 
629 static VkResult
anv_queue_exec_utrace_locked(struct anv_queue * queue,struct anv_utrace_submit * submit)630 anv_queue_exec_utrace_locked(struct anv_queue *queue,
631                              struct anv_utrace_submit *submit)
632 {
633    assert(util_dynarray_num_elements(&submit->batch_bos,
634                                      struct anv_bo *) > 0);
635 
636    struct anv_device *device = queue->device;
637    struct anv_execbuf execbuf = {
638       .alloc = &device->vk.alloc,
639       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
640    };
641 
642    VkResult result = setup_utrace_execbuf(&execbuf, queue, submit);
643    if (result != VK_SUCCESS)
644       goto error;
645 
646    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
647 
648    int ret = queue->device->info->no_hw ? 0 :
649       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
650    if (ret)
651       result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
652 
653  error:
654    anv_execbuf_finish(&execbuf);
655 
656    return result;
657 }
658 
659 static void
anv_i915_debug_submit(const struct anv_execbuf * execbuf)660 anv_i915_debug_submit(const struct anv_execbuf *execbuf)
661 {
662    uint32_t total_size_kb = 0, total_vram_only_size_kb = 0;
663    for (uint32_t i = 0; i < execbuf->bo_count; i++) {
664       const struct anv_bo *bo = execbuf->bos[i];
665       total_size_kb += bo->size / 1024;
666       if (anv_bo_is_vram_only(bo))
667          total_vram_only_size_kb += bo->size / 1024;
668    }
669 
670    fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (aperture: %.1fMb, %.1fMb VRAM only)\n",
671            execbuf->execbuf.batch_start_offset, execbuf->execbuf.batch_len,
672            (float)total_size_kb / 1024.0f,
673            (float)total_vram_only_size_kb / 1024.0f);
674    for (uint32_t i = 0; i < execbuf->bo_count; i++) {
675       const struct anv_bo *bo = execbuf->bos[i];
676 
677       fprintf(stderr, "   BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
678               "KB handle=%05u capture=%u vram_only=%u name=%s\n",
679               bo->offset, bo->offset + bo->size - 1, bo->size / 1024,
680               bo->gem_handle, (bo->flags & EXEC_OBJECT_CAPTURE) != 0,
681               anv_bo_is_vram_only(bo), bo->name);
682    }
683 }
684 
685 static void
setup_execbuf_fence_params(struct anv_execbuf * execbuf)686 setup_execbuf_fence_params(struct anv_execbuf *execbuf)
687 {
688    if (execbuf->syncobj_values) {
689       execbuf->timeline_fences.fence_count = execbuf->syncobj_count;
690       execbuf->timeline_fences.handles_ptr = (uintptr_t)execbuf->syncobjs;
691       execbuf->timeline_fences.values_ptr = (uintptr_t)execbuf->syncobj_values;
692       anv_execbuf_add_ext(execbuf,
693                           DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
694                           &execbuf->timeline_fences.base);
695    } else if (execbuf->syncobjs) {
696       execbuf->execbuf.flags |= I915_EXEC_FENCE_ARRAY;
697       execbuf->execbuf.num_cliprects = execbuf->syncobj_count;
698       execbuf->execbuf.cliprects_ptr = (uintptr_t)execbuf->syncobjs;
699    }
700 }
701 
702 static VkResult
i915_companion_rcs_queue_exec_locked(struct anv_queue * queue,struct anv_cmd_buffer * companion_rcs_cmd_buffer,uint32_t wait_count,const struct vk_sync_wait * waits)703 i915_companion_rcs_queue_exec_locked(struct anv_queue *queue,
704                                      struct anv_cmd_buffer *companion_rcs_cmd_buffer,
705                                      uint32_t wait_count,
706                                      const struct vk_sync_wait *waits)
707 {
708    struct anv_device *device = queue->device;
709    struct anv_execbuf execbuf = {
710       .alloc = &queue->device->vk.alloc,
711       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
712    };
713 
714    /* Always add the workaround BO as it includes a driver identifier for the
715     * error_state.
716     */
717    VkResult result =
718       anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
719    if (result != VK_SUCCESS)
720       goto error;
721 
722    for (uint32_t i = 0; i < wait_count; i++) {
723       result = anv_execbuf_add_sync(device, &execbuf,
724                                     waits[i].sync,
725                                     false /* is_signal */,
726                                     waits[i].wait_value);
727       if (result != VK_SUCCESS)
728          goto error;
729    }
730 
731    if (queue->companion_sync) {
732       result = anv_execbuf_add_sync(device, &execbuf,
733                                     queue->companion_sync,
734                                     true /* is_signal */, 0);
735       if (result != VK_SUCCESS)
736          goto error;
737    }
738 
739    result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
740                                           &companion_rcs_cmd_buffer, 1);
741    if (result != VK_SUCCESS)
742       goto error;
743 
744    if (INTEL_DEBUG(DEBUG_SUBMIT))
745       anv_i915_debug_submit(&execbuf);
746 
747    anv_cmd_buffer_exec_batch_debug(queue, 1, &companion_rcs_cmd_buffer, NULL, 0);
748 
749    setup_execbuf_fence_params(&execbuf);
750 
751    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
752 
753    int ret = queue->device->info->no_hw ? 0 :
754       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
755    if (ret) {
756       anv_i915_debug_submit(&execbuf);
757       result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
758    }
759 
760  error:
761    anv_execbuf_finish(&execbuf);
762    return result;
763 }
764 
765 VkResult
i915_queue_exec_locked(struct anv_queue * queue,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t signal_count,const struct vk_sync_signal * signals,struct anv_query_pool * perf_query_pool,uint32_t perf_query_pass,struct anv_utrace_submit * utrace_submit)766 i915_queue_exec_locked(struct anv_queue *queue,
767                        uint32_t wait_count,
768                        const struct vk_sync_wait *waits,
769                        uint32_t cmd_buffer_count,
770                        struct anv_cmd_buffer **cmd_buffers,
771                        uint32_t signal_count,
772                        const struct vk_sync_signal *signals,
773                        struct anv_query_pool *perf_query_pool,
774                        uint32_t perf_query_pass,
775                        struct anv_utrace_submit *utrace_submit)
776 {
777    struct anv_device *device = queue->device;
778    struct anv_execbuf execbuf = {
779       .alloc = &queue->device->vk.alloc,
780       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
781       .perf_query_pass = perf_query_pass,
782    };
783    VkResult result;
784 
785    if (utrace_submit &&
786        util_dynarray_num_elements(&utrace_submit->batch_bos,
787                                   struct anv_bo *) == 0) {
788       result = anv_execbuf_add_sync(device, &execbuf,
789                                     utrace_submit->sync,
790                                     true /* is_signal */,
791                                     0);
792       if (result != VK_SUCCESS)
793          goto error;
794 
795       /* When The utrace submission doesn't have its own batch buffer*/
796       utrace_submit = NULL;
797    }
798 
799    /* Always add the workaround BO as it includes a driver identifier for the
800     * error_state.
801     */
802    result =
803       anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
804    if (result != VK_SUCCESS)
805       goto error;
806 
807    for (uint32_t i = 0; i < wait_count; i++) {
808       result = anv_execbuf_add_sync(device, &execbuf,
809                                     waits[i].sync,
810                                     false /* is_signal */,
811                                     waits[i].wait_value);
812       if (result != VK_SUCCESS)
813          goto error;
814    }
815 
816    for (uint32_t i = 0; i < signal_count; i++) {
817       result = anv_execbuf_add_sync(device, &execbuf,
818                                     signals[i].sync,
819                                     true /* is_signal */,
820                                     signals[i].signal_value);
821       if (result != VK_SUCCESS)
822          goto error;
823    }
824 
825    if (queue->sync) {
826       result = anv_execbuf_add_sync(device, &execbuf,
827                                     queue->sync,
828                                     true /* is_signal */,
829                                     0 /* signal_value */);
830       if (result != VK_SUCCESS)
831          goto error;
832    }
833 
834    if (cmd_buffer_count) {
835       result = setup_execbuf_for_cmd_buffers(&execbuf, queue, cmd_buffers,
836                                              cmd_buffer_count);
837    } else {
838       result = setup_empty_execbuf(&execbuf, queue);
839    }
840 
841    if (result != VK_SUCCESS)
842       goto error;
843 
844    const bool has_perf_query =
845       perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
846 
847    if (INTEL_DEBUG(DEBUG_SUBMIT))
848       anv_i915_debug_submit(&execbuf);
849 
850    anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
851                                    perf_query_pool, perf_query_pass);
852 
853    setup_execbuf_fence_params(&execbuf);
854 
855    if (has_perf_query) {
856       assert(perf_query_pass < perf_query_pool->n_passes);
857       struct intel_perf_query_info *query_info =
858          perf_query_pool->pass_query[perf_query_pass];
859 
860       /* Some performance queries just the pipeline statistic HW, no need for
861        * OA in that case, so no need to reconfigure.
862        */
863       if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
864           (query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
865            query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
866          int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
867                                (void *)(uintptr_t) query_info->oa_metrics_set_id);
868          if (ret < 0) {
869             result = vk_device_set_lost(&device->vk,
870                                         "i915-perf config failed: %s",
871                                         strerror(errno));
872          }
873       }
874 
875       struct anv_bo *pass_batch_bo = perf_query_pool->bo;
876 
877       struct drm_i915_gem_exec_object2 query_pass_object = {
878          .handle = pass_batch_bo->gem_handle,
879          .offset = pass_batch_bo->offset,
880          .flags  = pass_batch_bo->flags,
881       };
882 
883       uint64_t exec_flags = 0;
884       uint32_t context_id;
885       get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
886 
887       struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
888          .buffers_ptr = (uintptr_t) &query_pass_object,
889          .buffer_count = 1,
890          .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
891                                                               perf_query_pass),
892          .flags = I915_EXEC_HANDLE_LUT | exec_flags,
893          .rsvd1 = context_id,
894       };
895 
896       int ret = queue->device->info->no_hw ? 0 :
897          anv_gem_execbuffer(queue->device, &query_pass_execbuf);
898       if (ret)
899          result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
900    }
901 
902    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
903 
904    int ret = queue->device->info->no_hw ? 0 :
905       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
906    if (ret) {
907       anv_i915_debug_submit(&execbuf);
908       result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
909    }
910 
911    if (cmd_buffer_count != 0 && cmd_buffers[0]->companion_rcs_cmd_buffer) {
912       struct anv_cmd_buffer *companion_rcs_cmd_buffer =
913          cmd_buffers[0]->companion_rcs_cmd_buffer;
914       assert(companion_rcs_cmd_buffer->is_companion_rcs_cmd_buffer);
915       assert(cmd_buffer_count == 1);
916       result = i915_companion_rcs_queue_exec_locked(queue,
917                                                     cmd_buffers[0]->companion_rcs_cmd_buffer, wait_count,
918                                                     waits);
919    }
920 
921    result = anv_queue_post_submit(queue, result);
922 
923  error:
924    anv_execbuf_finish(&execbuf);
925 
926    if (result == VK_SUCCESS && utrace_submit)
927       result = anv_queue_exec_utrace_locked(queue, utrace_submit);
928 
929    return result;
930 }
931 
932 VkResult
i915_execute_simple_batch(struct anv_queue * queue,struct anv_bo * batch_bo,uint32_t batch_bo_size,bool is_companion_rcs_batch)933 i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
934                           uint32_t batch_bo_size, bool is_companion_rcs_batch)
935 {
936    struct anv_device *device = queue->device;
937    struct anv_execbuf execbuf = {
938       .alloc = &queue->device->vk.alloc,
939       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
940    };
941 
942    VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
943    if (result != VK_SUCCESS)
944       goto fail;
945 
946    assert(!is_companion_rcs_batch || device->physical->has_vm_control);
947    uint64_t exec_flags = 0;
948    uint32_t context_id;
949    get_context_and_exec_flags(queue, is_companion_rcs_batch, &exec_flags,
950                               &context_id);
951 
952    execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
953       .buffers_ptr = (uintptr_t) execbuf.objects,
954       .buffer_count = execbuf.bo_count,
955       .batch_start_offset = 0,
956       .batch_len = batch_bo_size,
957       .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
958       .rsvd1 = context_id,
959       .rsvd2 = 0,
960    };
961 
962    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
963 
964    if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
965       result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
966       goto fail;
967    }
968 
969    result = anv_device_wait(device, batch_bo, INT64_MAX);
970    if (result != VK_SUCCESS)
971       result = vk_device_set_lost(&device->vk,
972                                   "anv_device_wait failed: %m");
973 
974 fail:
975    anv_execbuf_finish(&execbuf);
976    return result;
977 }
978 
979 VkResult
i915_execute_trtt_batch(struct anv_sparse_submission * submit,struct anv_trtt_batch_bo * trtt_bbo)980 i915_execute_trtt_batch(struct anv_sparse_submission *submit,
981                         struct anv_trtt_batch_bo *trtt_bbo)
982 {
983    struct anv_queue *queue = submit->queue;
984    struct anv_device *device = queue->device;
985    struct anv_trtt *trtt = &device->trtt;
986    struct anv_execbuf execbuf = {
987       .alloc = &device->vk.alloc,
988       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
989    };
990    VkResult result;
991 
992    for (uint32_t i = 0; i < submit->wait_count; i++) {
993       result = anv_execbuf_add_sync(device, &execbuf, submit->waits[i].sync,
994                                     false /* is_signal */,
995                                     submit->waits[i].wait_value);
996       if (result != VK_SUCCESS)
997          goto out;
998    }
999 
1000    for (uint32_t i = 0; i < submit->signal_count; i++) {
1001       result = anv_execbuf_add_sync(device, &execbuf, submit->signals[i].sync,
1002                                     true /* is_signal */,
1003                                     submit->signals[i].signal_value);
1004       if (result != VK_SUCCESS)
1005          goto out;
1006    }
1007 
1008    result = anv_execbuf_add_syncobj(device, &execbuf, trtt->timeline_handle,
1009                                     I915_EXEC_FENCE_SIGNAL,
1010                                     trtt_bbo->timeline_val);
1011    if (result != VK_SUCCESS)
1012       goto out;
1013 
1014 
1015    result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL,
1016                                0);
1017    if (result != VK_SUCCESS)
1018       goto out;
1019 
1020    for (int i = 0; i < trtt->num_page_table_bos; i++) {
1021       result = anv_execbuf_add_bo(device, &execbuf, trtt->page_table_bos[i],
1022                                   NULL, EXEC_OBJECT_WRITE);
1023       if (result != VK_SUCCESS)
1024          goto out;
1025    }
1026 
1027    if (queue->sync) {
1028       result = anv_execbuf_add_sync(device, &execbuf, queue->sync,
1029                                     true /* is_signal */,
1030                                     0 /* signal_value */);
1031       if (result != VK_SUCCESS)
1032          goto out;
1033    }
1034 
1035    result = anv_execbuf_add_bo(device, &execbuf, trtt_bbo->bo, NULL, 0);
1036    if (result != VK_SUCCESS)
1037       goto out;
1038 
1039    if (INTEL_DEBUG(DEBUG_SUBMIT))
1040       anv_i915_debug_submit(&execbuf);
1041 
1042    uint64_t exec_flags = 0;
1043    uint32_t context_id;
1044    get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
1045 
1046    execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
1047       .buffers_ptr = (uintptr_t) execbuf.objects,
1048       .buffer_count = execbuf.bo_count,
1049       .batch_start_offset = 0,
1050       .batch_len = trtt_bbo->size,
1051       .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | exec_flags,
1052       .rsvd1 = context_id,
1053       .rsvd2 = 0,
1054    };
1055    setup_execbuf_fence_params(&execbuf);
1056 
1057    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
1058 
1059    int ret = queue->device->info->no_hw ? 0 :
1060       anv_gem_execbuffer(device, &execbuf.execbuf);
1061    if (ret) {
1062       result = vk_device_set_lost(&device->vk,
1063                                   "trtt anv_gem_execbuffer failed: %m");
1064       goto out;
1065    }
1066 
1067    if (queue->sync) {
1068       result = vk_sync_wait(&device->vk, queue->sync, 0,
1069                             VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
1070       if (result != VK_SUCCESS) {
1071          result = vk_queue_set_lost(&queue->vk, "trtt sync wait failed");
1072          goto out;
1073       }
1074    }
1075 
1076 out:
1077    anv_execbuf_finish(&execbuf);
1078    return result;
1079 }
1080 
1081 VkResult
i915_queue_exec_trace(struct anv_queue * queue,struct anv_utrace_submit * submit)1082 i915_queue_exec_trace(struct anv_queue *queue,
1083                       struct anv_utrace_submit *submit)
1084 {
1085    assert(util_dynarray_num_elements(&submit->batch_bos,
1086                                      struct anv_bo *) > 0);
1087 
1088    return anv_queue_exec_utrace_locked(queue, submit);
1089 }
1090