• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #include "iris/iris_kmd_backend.h"
24 
25 #include <sys/mman.h>
26 
27 #include "common/intel_debug_identifier.h"
28 #include "common/intel_gem.h"
29 #include "common/i915/intel_gem.h"
30 #include "dev/intel_debug.h"
31 
32 #include "drm-uapi/i915_drm.h"
33 
34 #include "iris/iris_bufmgr.h"
35 #include "iris/iris_batch.h"
36 #include "iris/iris_context.h"
37 
38 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
39 
40 static int
i915_gem_set_domain(struct iris_bufmgr * bufmgr,uint32_t handle,uint32_t read_domains,uint32_t write_domains)41 i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
42                     uint32_t read_domains, uint32_t write_domains)
43 {
44    struct drm_i915_gem_set_domain sd = {
45       .handle = handle,
46       .read_domains = read_domains,
47       .write_domain = write_domains,
48    };
49    return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
50                       DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
51 }
52 
53 static uint32_t
i915_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap,unsigned alloc_flags)54 i915_gem_create(struct iris_bufmgr *bufmgr,
55                 const struct intel_memory_class_instance **regions,
56                 uint16_t regions_count, uint64_t size,
57                 enum iris_heap heap, unsigned alloc_flags)
58 {
59    const struct intel_device_info *devinfo =
60       iris_bufmgr_get_device_info(bufmgr);
61    if (unlikely(!devinfo->mem.use_class_instance)) {
62       struct drm_i915_gem_create create_legacy = { .size = size };
63 
64       assert(regions_count == 1 &&
65              regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);
66 
67       /* All new BOs we get from the kernel are zeroed, so we don't need to
68        * worry about that here.
69        */
70       if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
71                       &create_legacy))
72          return 0;
73 
74       return create_legacy.handle;
75    }
76 
77    struct drm_i915_gem_memory_class_instance i915_regions[2];
78    assert(regions_count <= ARRAY_SIZE(i915_regions));
79    for (uint16_t i = 0; i < regions_count; i++) {
80       i915_regions[i].memory_class = regions[i]->klass;
81       i915_regions[i].memory_instance = regions[i]->instance;
82    }
83 
84    struct drm_i915_gem_create_ext create = {
85       .size = size,
86    };
87    struct drm_i915_gem_create_ext_memory_regions ext_regions = {
88       .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
89       .num_regions = regions_count,
90       .regions = (uintptr_t)i915_regions,
91    };
92    intel_i915_gem_add_ext(&create.extensions,
93                           I915_GEM_CREATE_EXT_MEMORY_REGIONS,
94                           &ext_regions.base);
95 
96    if (iris_bufmgr_vram_size(bufmgr) > 0 &&
97        !intel_vram_all_mappable(devinfo) &&
98        heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
99       /* For lmem + smem placements, the NEEDS_CPU_ACCESS flag will avoid a
100        * page fault when the CPU tries to access the BO.
101        * Although it's counterintuitive, we cannot set this flag for
102        * IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR because i915 does not
103        * accept that flag for lmem only placements.
104        * When lmem only BOs are accessed by the CPU, i915 will fault and
105        * automatically migrate the BO to the lmem portion that is CPU
106        * accessible.
107        * The CPU_VISIBLE heap is still valuable for other reasons however
108        * (e.g., it tells the functions which calculate the iris_mmap_mode
109        * that it can be mapped).
110        */
111       create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
112 
113    /* Protected param */
114    struct drm_i915_gem_create_ext_protected_content protected_param = {
115       .flags = 0,
116    };
117    if (alloc_flags & BO_ALLOC_PROTECTED) {
118       intel_i915_gem_add_ext(&create.extensions,
119                              I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
120                              &protected_param.base);
121    }
122 
123    /* Set PAT param */
124    struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
125    if (devinfo->has_set_pat_uapi) {
126       set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap,
127                                                        alloc_flags & BO_ALLOC_SCANOUT)->index;
128       intel_i915_gem_add_ext(&create.extensions,
129                              I915_GEM_CREATE_EXT_SET_PAT,
130                              &set_pat_param.base);
131    }
132 
133    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
134                    &create))
135       return 0;
136 
137    if (iris_bufmgr_vram_size(bufmgr) == 0)
138       /* Calling set_domain() will allocate pages for the BO outside of the
139        * struct mutex lock in the kernel, which is more efficient than waiting
140        * to create them during the first execbuf that uses the BO.
141        */
142       i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);
143 
144    return create.handle;
145 }
146 
147 static bool
i915_bo_madvise(struct iris_bo * bo,enum iris_madvice state)148 i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
149 {
150    uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
151                                   I915_MADV_WILLNEED : I915_MADV_DONTNEED;
152    struct drm_i915_gem_madvise madv = {
153       .handle = bo->gem_handle,
154       .madv = i915_state,
155       .retained = 1,
156    };
157 
158    intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);
159 
160    return madv.retained;
161 }
162 
163 static int
i915_bo_set_caching(struct iris_bo * bo,bool cached)164 i915_bo_set_caching(struct iris_bo *bo, bool cached)
165 {
166    struct drm_i915_gem_caching arg = {
167       .handle = bo->gem_handle,
168       .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
169    };
170    return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
171                       DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
172 }
173 
174 static void *
i915_gem_mmap_offset(struct iris_bufmgr * bufmgr,struct iris_bo * bo)175 i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
176 {
177    struct drm_i915_gem_mmap_offset mmap_arg = {
178       .handle = bo->gem_handle,
179    };
180 
181    if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
182       /* On discrete memory platforms, we cannot control the mmap caching mode
183        * at mmap time.  Instead, it's fixed when the object is created (this
184        * is a limitation of TTM).
185        *
186        * On DG1, our only currently enabled discrete platform, there is no
187        * control over what mode we get.  For SMEM, we always get WB because
188        * it's fast (probably what we want) and when the device views SMEM
189        * across PCIe, it's always snooped.  The only caching mode allowed by
190        * DG1 hardware for LMEM is WC.
191        */
192       if (iris_heap_is_device_local(bo->real.heap))
193          assert(bo->real.mmap_mode == IRIS_MMAP_WC);
194       else
195          assert(bo->real.mmap_mode == IRIS_MMAP_WB);
196 
197       mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
198    } else {
199       /* Only integrated platforms get to select a mmap caching mode here */
200       static const uint32_t mmap_offset_for_mode[] = {
201          [IRIS_MMAP_UC]    = I915_MMAP_OFFSET_UC,
202          [IRIS_MMAP_WC]    = I915_MMAP_OFFSET_WC,
203          [IRIS_MMAP_WB]    = I915_MMAP_OFFSET_WB,
204       };
205       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
206       assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
207       mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
208    }
209 
210    /* Get the fake offset back */
211    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
212                    &mmap_arg)) {
213       DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
214           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
215       return NULL;
216    }
217 
218    /* And map it */
219    void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
220                     iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
221    if (map == MAP_FAILED) {
222       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
223           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
224       return NULL;
225    }
226 
227    return map;
228 }
229 
230 static void *
i915_gem_mmap_legacy(struct iris_bufmgr * bufmgr,struct iris_bo * bo)231 i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
232 {
233    assert(iris_bufmgr_vram_size(bufmgr) == 0);
234    assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
235           bo->real.mmap_mode == IRIS_MMAP_WC);
236 
237    struct drm_i915_gem_mmap mmap_arg = {
238       .handle = bo->gem_handle,
239       .size = bo->size,
240       .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
241    };
242 
243    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
244                    &mmap_arg)) {
245       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
246           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
247       return NULL;
248    }
249 
250    return (void *)(uintptr_t) mmap_arg.addr_ptr;
251 }
252 
253 static void *
i915_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)254 i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
255 {
256    assert(iris_bo_is_real(bo));
257 
258    if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
259       return i915_gem_mmap_offset(bufmgr, bo);
260    else
261       return i915_gem_mmap_legacy(bufmgr, bo);
262 }
263 
264 static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch * batch)265 i915_batch_check_for_reset(struct iris_batch *batch)
266 {
267    struct iris_screen *screen = batch->screen;
268    enum pipe_reset_status status = PIPE_NO_RESET;
269    struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };
270 
271    if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
272       DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
273 
274    if (stats.batch_active != 0) {
275       /* A reset was observed while a batch from this hardware context was
276        * executing.  Assume that this context was at fault.
277        */
278       status = PIPE_GUILTY_CONTEXT_RESET;
279    } else if (stats.batch_pending != 0) {
280       /* A reset was observed while a batch from this context was in progress,
281        * but the batch was not executing.  In this case, assume that the
282        * context was not at fault.
283        */
284       status = PIPE_INNOCENT_CONTEXT_RESET;
285    }
286 
287    return status;
288 }
289 
290 /**
291  * Submit the batch to the GPU via execbuffer2.
292  */
293 static int
i915_batch_submit(struct iris_batch * batch)294 i915_batch_submit(struct iris_batch *batch)
295 {
296    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
297    simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
298 
299    iris_bo_unmap(batch->bo);
300 
301    struct drm_i915_gem_exec_object2 *validation_list =
302       malloc(batch->exec_count * sizeof(*validation_list));
303 
304    size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
305    int *index_for_handle = malloc(sz);
306    memset(index_for_handle, -1, sz);
307 
308    unsigned validation_count = 0;
309    for (int i = 0; i < batch->exec_count; i++) {
310       struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
311       assert(bo->gem_handle != 0);
312 
313       bool written = BITSET_TEST(batch->bos_written, i);
314       int prev_index = index_for_handle[bo->gem_handle];
315       if (prev_index != -1) {
316          if (written)
317             validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
318       } else {
319          uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
320          flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
321          flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
322          flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
323          flags |= written ? EXEC_OBJECT_WRITE : 0;
324 
325          index_for_handle[bo->gem_handle] = validation_count;
326          validation_list[validation_count] =
327             (struct drm_i915_gem_exec_object2) {
328                .handle = bo->gem_handle,
329                .offset = bo->address,
330                .flags  = flags,
331             };
332          ++validation_count;
333       }
334    }
335 
336    free(index_for_handle);
337 
338    /* The decode operation may map and wait on the batch buffer, which could
339     * in theory try to grab bo_deps_lock. Let's keep it safe and decode
340     * outside the lock.
341     */
342    if (INTEL_DEBUG(DEBUG_BATCH) &&
343        intel_debug_batch_in_range(batch->ice->frame))
344       iris_batch_decode_batch(batch);
345 
346    simple_mtx_lock(bo_deps_lock);
347 
348    iris_batch_update_syncobjs(batch);
349 
350    if ((INTEL_DEBUG(DEBUG_BATCH) &&
351         intel_debug_batch_in_range(batch->ice->frame)) ||
352        INTEL_DEBUG(DEBUG_SUBMIT)) {
353       iris_dump_fence_list(batch);
354       iris_dump_bo_list(batch);
355    }
356 
357    /* The requirement for using I915_EXEC_NO_RELOC are:
358     *
359     *   The addresses written in the objects must match the corresponding
360     *   reloc.address which in turn must match the corresponding
361     *   execobject.offset.
362     *
363     *   Any render targets written to in the batch must be flagged with
364     *   EXEC_OBJECT_WRITE.
365     *
366     *   To avoid stalling, execobject.offset should match the current
367     *   address of that object within the active context.
368     */
369    struct drm_i915_gem_execbuffer2 execbuf = {
370       .buffers_ptr = (uintptr_t) validation_list,
371       .buffer_count = validation_count,
372       .batch_start_offset = 0,
373       /* This must be QWord aligned. */
374       .batch_len = ALIGN(batch->primary_batch_size, 8),
375       .flags = batch->i915.exec_flags |
376                I915_EXEC_NO_RELOC |
377                I915_EXEC_BATCH_FIRST |
378                I915_EXEC_HANDLE_LUT,
379       .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
380    };
381 
382    if (iris_batch_num_fences(batch)) {
383       execbuf.flags |= I915_EXEC_FENCE_ARRAY;
384       execbuf.num_cliprects = iris_batch_num_fences(batch);
385       execbuf.cliprects_ptr =
386          (uintptr_t)util_dynarray_begin(&batch->exec_fences);
387    }
388 
389    int ret = 0;
390    if (!batch->screen->devinfo->no_hw) {
391       do {
392          ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
393       } while (ret && errno == ENOMEM);
394 
395       if (ret)
396     ret = -errno;
397    }
398 
399    simple_mtx_unlock(bo_deps_lock);
400 
401    for (int i = 0; i < batch->exec_count; i++) {
402       struct iris_bo *bo = batch->exec_bos[i];
403 
404       bo->idle = false;
405       bo->index = -1;
406 
407       iris_get_backing_bo(bo)->idle = false;
408 
409       iris_bo_unreference(bo);
410    }
411 
412    free(validation_list);
413 
414    return ret;
415 }
416 
417 static bool
i915_gem_vm_bind(struct iris_bo * bo)418 i915_gem_vm_bind(struct iris_bo *bo)
419 {
420    /*
421     * i915 does not support VM_BIND yet. The binding operation happens at
422     * submission when we supply BO handle & offset in the execbuffer list.
423     */
424    return true;
425 }
426 
427 static bool
i915_gem_vm_unbind(struct iris_bo * bo)428 i915_gem_vm_unbind(struct iris_bo *bo)
429 {
430    return true;
431 }
432 
433 static int
i915_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)434 i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
435 {
436    struct drm_gem_close close = {
437       .handle = bo->gem_handle,
438    };
439    return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
440 }
441 
442 static uint32_t
i915_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)443 i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
444 {
445    const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
446    struct drm_i915_gem_userptr arg = {
447       .user_ptr = (uintptr_t)ptr,
448       .user_size = size,
449       .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
450    };
451    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
452       return 0;
453 
454    if (!devinfo->has_userptr_probe) {
455       /* Check the buffer for validity before we try and use it in a batch */
456       if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
457          struct drm_gem_close close = {
458                .handle = arg.handle,
459          };
460          intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
461          return 0;
462       }
463    }
464 
465    return arg.handle;
466 }
467 
i915_get_backend(void)468 const struct iris_kmd_backend *i915_get_backend(void)
469 {
470    static const struct iris_kmd_backend i915_backend = {
471       .gem_create = i915_gem_create,
472       .gem_create_userptr = i915_gem_create_userptr,
473       .gem_close = i915_gem_close,
474       .bo_madvise = i915_bo_madvise,
475       .bo_set_caching = i915_bo_set_caching,
476       .gem_mmap = i915_gem_mmap,
477       .batch_check_for_reset = i915_batch_check_for_reset,
478       .batch_submit = i915_batch_submit,
479       .gem_vm_bind = i915_gem_vm_bind,
480       .gem_vm_unbind = i915_gem_vm_unbind,
481    };
482    return &i915_backend;
483 }
484