• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #include "iris/iris_kmd_backend.h"
24 
25 #include <sys/mman.h>
26 
27 #include "common/intel_gem.h"
28 #include "common/i915/intel_gem.h"
29 #include "dev/intel_debug.h"
30 
31 #include "drm-uapi/i915_drm.h"
32 
33 #include "iris/iris_bufmgr.h"
34 #include "iris/iris_batch.h"
35 #include "iris/iris_context.h"
36 
37 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
38 
39 static int
i915_gem_set_domain(struct iris_bufmgr * bufmgr,uint32_t handle,uint32_t read_domains,uint32_t write_domains)40 i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
41                     uint32_t read_domains, uint32_t write_domains)
42 {
43    struct drm_i915_gem_set_domain sd = {
44       .handle = handle,
45       .read_domains = read_domains,
46       .write_domain = write_domains,
47    };
48    return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
49                       DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
50 }
51 
52 static uint32_t
i915_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap,unsigned alloc_flags)53 i915_gem_create(struct iris_bufmgr *bufmgr,
54                 const struct intel_memory_class_instance **regions,
55                 uint16_t regions_count, uint64_t size,
56                 enum iris_heap heap, unsigned alloc_flags)
57 {
58    const struct intel_device_info *devinfo =
59       iris_bufmgr_get_device_info(bufmgr);
60    if (unlikely(!devinfo->mem.use_class_instance)) {
61       struct drm_i915_gem_create create_legacy = { .size = size };
62 
63       assert(regions_count == 1 &&
64              regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);
65 
66       /* All new BOs we get from the kernel are zeroed, so we don't need to
67        * worry about that here.
68        */
69       if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
70                       &create_legacy))
71          return 0;
72 
73       return create_legacy.handle;
74    }
75 
76    struct drm_i915_gem_memory_class_instance i915_regions[2];
77    assert(regions_count <= ARRAY_SIZE(i915_regions));
78    for (uint16_t i = 0; i < regions_count; i++) {
79       i915_regions[i].memory_class = regions[i]->klass;
80       i915_regions[i].memory_instance = regions[i]->instance;
81    }
82 
83    struct drm_i915_gem_create_ext create = {
84       .size = size,
85    };
86    struct drm_i915_gem_create_ext_memory_regions ext_regions = {
87       .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
88       .num_regions = regions_count,
89       .regions = (uintptr_t)i915_regions,
90    };
91    intel_i915_gem_add_ext(&create.extensions,
92                           I915_GEM_CREATE_EXT_MEMORY_REGIONS,
93                           &ext_regions.base);
94 
95    if (iris_bufmgr_vram_size(bufmgr) > 0 &&
96        !intel_vram_all_mappable(devinfo) &&
97        heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
98       create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
99 
100    /* Protected param */
101    struct drm_i915_gem_create_ext_protected_content protected_param = {
102       .flags = 0,
103    };
104    if (alloc_flags & BO_ALLOC_PROTECTED) {
105       intel_i915_gem_add_ext(&create.extensions,
106                              I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
107                              &protected_param.base);
108    }
109 
110    /* Set PAT param */
111    struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
112    if (devinfo->has_set_pat_uapi) {
113       set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap)->index;
114       intel_i915_gem_add_ext(&create.extensions,
115                              I915_GEM_CREATE_EXT_SET_PAT,
116                              &set_pat_param.base);
117    }
118 
119    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
120                    &create))
121       return 0;
122 
123    if (iris_bufmgr_vram_size(bufmgr) == 0)
124       /* Calling set_domain() will allocate pages for the BO outside of the
125        * struct mutex lock in the kernel, which is more efficient than waiting
126        * to create them during the first execbuf that uses the BO.
127        */
128       i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);
129 
130    return create.handle;
131 }
132 
133 static bool
i915_bo_madvise(struct iris_bo * bo,enum iris_madvice state)134 i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
135 {
136    uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
137                                   I915_MADV_WILLNEED : I915_MADV_DONTNEED;
138    struct drm_i915_gem_madvise madv = {
139       .handle = bo->gem_handle,
140       .madv = i915_state,
141       .retained = 1,
142    };
143 
144    intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);
145 
146    return madv.retained;
147 }
148 
149 static int
i915_bo_set_caching(struct iris_bo * bo,bool cached)150 i915_bo_set_caching(struct iris_bo *bo, bool cached)
151 {
152    struct drm_i915_gem_caching arg = {
153       .handle = bo->gem_handle,
154       .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
155    };
156    return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
157                       DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
158 }
159 
160 static void *
i915_gem_mmap_offset(struct iris_bufmgr * bufmgr,struct iris_bo * bo)161 i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
162 {
163    struct drm_i915_gem_mmap_offset mmap_arg = {
164       .handle = bo->gem_handle,
165    };
166 
167    if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
168       /* On discrete memory platforms, we cannot control the mmap caching mode
169        * at mmap time.  Instead, it's fixed when the object is created (this
170        * is a limitation of TTM).
171        *
172        * On DG1, our only currently enabled discrete platform, there is no
173        * control over what mode we get.  For SMEM, we always get WB because
174        * it's fast (probably what we want) and when the device views SMEM
175        * across PCIe, it's always snooped.  The only caching mode allowed by
176        * DG1 hardware for LMEM is WC.
177        */
178       if (iris_heap_is_device_local(bo->real.heap))
179          assert(bo->real.mmap_mode == IRIS_MMAP_WC);
180       else
181          assert(bo->real.mmap_mode == IRIS_MMAP_WB);
182 
183       mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
184    } else {
185       /* Only integrated platforms get to select a mmap caching mode here */
186       static const uint32_t mmap_offset_for_mode[] = {
187          [IRIS_MMAP_UC]    = I915_MMAP_OFFSET_UC,
188          [IRIS_MMAP_WC]    = I915_MMAP_OFFSET_WC,
189          [IRIS_MMAP_WB]    = I915_MMAP_OFFSET_WB,
190       };
191       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
192       assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
193       mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
194    }
195 
196    /* Get the fake offset back */
197    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
198                    &mmap_arg)) {
199       DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
200           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
201       return NULL;
202    }
203 
204    /* And map it */
205    void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
206                     iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
207    if (map == MAP_FAILED) {
208       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
209           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
210       return NULL;
211    }
212 
213    return map;
214 }
215 
216 static void *
i915_gem_mmap_legacy(struct iris_bufmgr * bufmgr,struct iris_bo * bo)217 i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
218 {
219    assert(iris_bufmgr_vram_size(bufmgr) == 0);
220    assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
221           bo->real.mmap_mode == IRIS_MMAP_WC);
222 
223    struct drm_i915_gem_mmap mmap_arg = {
224       .handle = bo->gem_handle,
225       .size = bo->size,
226       .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
227    };
228 
229    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
230                    &mmap_arg)) {
231       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
232           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
233       return NULL;
234    }
235 
236    return (void *)(uintptr_t) mmap_arg.addr_ptr;
237 }
238 
239 static void *
i915_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)240 i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
241 {
242    assert(iris_bo_is_real(bo));
243 
244    if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
245       return i915_gem_mmap_offset(bufmgr, bo);
246    else
247       return i915_gem_mmap_legacy(bufmgr, bo);
248 }
249 
250 static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch * batch)251 i915_batch_check_for_reset(struct iris_batch *batch)
252 {
253    struct iris_screen *screen = batch->screen;
254    enum pipe_reset_status status = PIPE_NO_RESET;
255    struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };
256 
257    if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
258       DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
259 
260    if (stats.batch_active != 0) {
261       /* A reset was observed while a batch from this hardware context was
262        * executing.  Assume that this context was at fault.
263        */
264       status = PIPE_GUILTY_CONTEXT_RESET;
265    } else if (stats.batch_pending != 0) {
266       /* A reset was observed while a batch from this context was in progress,
267        * but the batch was not executing.  In this case, assume that the
268        * context was not at fault.
269        */
270       status = PIPE_INNOCENT_CONTEXT_RESET;
271    }
272 
273    return status;
274 }
275 
276 /**
277  * Submit the batch to the GPU via execbuffer2.
278  */
279 static int
i915_batch_submit(struct iris_batch * batch)280 i915_batch_submit(struct iris_batch *batch)
281 {
282    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
283    simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
284 
285    iris_bo_unmap(batch->bo);
286 
287    struct drm_i915_gem_exec_object2 *validation_list =
288       malloc(batch->exec_count * sizeof(*validation_list));
289 
290    size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
291    int *index_for_handle = malloc(sz);
292    memset(index_for_handle, -1, sz);
293 
294    unsigned validation_count = 0;
295    for (int i = 0; i < batch->exec_count; i++) {
296       struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
297       assert(bo->gem_handle != 0);
298 
299       bool written = BITSET_TEST(batch->bos_written, i);
300       int prev_index = index_for_handle[bo->gem_handle];
301       if (prev_index != -1) {
302          if (written)
303             validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
304       } else {
305          uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
306          flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
307          flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
308          flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
309          flags |= written ? EXEC_OBJECT_WRITE : 0;
310 
311          index_for_handle[bo->gem_handle] = validation_count;
312          validation_list[validation_count] =
313             (struct drm_i915_gem_exec_object2) {
314                .handle = bo->gem_handle,
315                .offset = bo->address,
316                .flags  = flags,
317             };
318          ++validation_count;
319       }
320    }
321 
322    free(index_for_handle);
323 
324    /* The decode operation may map and wait on the batch buffer, which could
325     * in theory try to grab bo_deps_lock. Let's keep it safe and decode
326     * outside the lock.
327     */
328    if (INTEL_DEBUG(DEBUG_BATCH) &&
329        intel_debug_batch_in_range(batch->ice->frame))
330       iris_batch_decode_batch(batch);
331 
332    simple_mtx_lock(bo_deps_lock);
333 
334    iris_batch_update_syncobjs(batch);
335 
336    if ((INTEL_DEBUG(DEBUG_BATCH) &&
337         intel_debug_batch_in_range(batch->ice->frame)) ||
338        INTEL_DEBUG(DEBUG_SUBMIT)) {
339       iris_dump_fence_list(batch);
340       iris_dump_bo_list(batch);
341    }
342 
343    /* The requirement for using I915_EXEC_NO_RELOC are:
344     *
345     *   The addresses written in the objects must match the corresponding
346     *   reloc.address which in turn must match the corresponding
347     *   execobject.offset.
348     *
349     *   Any render targets written to in the batch must be flagged with
350     *   EXEC_OBJECT_WRITE.
351     *
352     *   To avoid stalling, execobject.offset should match the current
353     *   address of that object within the active context.
354     */
355    struct drm_i915_gem_execbuffer2 execbuf = {
356       .buffers_ptr = (uintptr_t) validation_list,
357       .buffer_count = validation_count,
358       .batch_start_offset = 0,
359       /* This must be QWord aligned. */
360       .batch_len = ALIGN(batch->primary_batch_size, 8),
361       .flags = batch->i915.exec_flags |
362                I915_EXEC_NO_RELOC |
363                I915_EXEC_BATCH_FIRST |
364                I915_EXEC_HANDLE_LUT,
365       .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
366    };
367 
368    if (iris_batch_num_fences(batch)) {
369       execbuf.flags |= I915_EXEC_FENCE_ARRAY;
370       execbuf.num_cliprects = iris_batch_num_fences(batch);
371       execbuf.cliprects_ptr =
372          (uintptr_t)util_dynarray_begin(&batch->exec_fences);
373    }
374 
375    int ret = 0;
376    if (!batch->screen->devinfo->no_hw) {
377       do {
378          ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
379       } while (ret && errno == ENOMEM);
380 
381       if (ret)
382     ret = -errno;
383    }
384 
385    simple_mtx_unlock(bo_deps_lock);
386 
387    for (int i = 0; i < batch->exec_count; i++) {
388       struct iris_bo *bo = batch->exec_bos[i];
389 
390       bo->idle = false;
391       bo->index = -1;
392 
393       iris_get_backing_bo(bo)->idle = false;
394 
395       iris_bo_unreference(bo);
396    }
397 
398    free(validation_list);
399 
400    return ret;
401 }
402 
403 static bool
i915_gem_vm_bind(struct iris_bo * bo)404 i915_gem_vm_bind(struct iris_bo *bo)
405 {
406    /*
407     * i915 does not support VM_BIND yet. The binding operation happens at
408     * submission when we supply BO handle & offset in the execbuffer list.
409     */
410    return true;
411 }
412 
413 static bool
i915_gem_vm_unbind(struct iris_bo * bo)414 i915_gem_vm_unbind(struct iris_bo *bo)
415 {
416    return true;
417 }
418 
419 static int
i915_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)420 i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
421 {
422    struct drm_gem_close close = {
423       .handle = bo->gem_handle,
424    };
425    return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
426 }
427 
428 static uint32_t
i915_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)429 i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
430 {
431    const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
432    struct drm_i915_gem_userptr arg = {
433       .user_ptr = (uintptr_t)ptr,
434       .user_size = size,
435       .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
436    };
437    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
438       return 0;
439 
440    if (!devinfo->has_userptr_probe) {
441       /* Check the buffer for validity before we try and use it in a batch */
442       if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
443          struct drm_gem_close close = {
444                .handle = arg.handle,
445          };
446          intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
447          return 0;
448       }
449    }
450 
451    return arg.handle;
452 }
453 
i915_get_backend(void)454 const struct iris_kmd_backend *i915_get_backend(void)
455 {
456    static const struct iris_kmd_backend i915_backend = {
457       .gem_create = i915_gem_create,
458       .gem_create_userptr = i915_gem_create_userptr,
459       .gem_close = i915_gem_close,
460       .bo_madvise = i915_bo_madvise,
461       .bo_set_caching = i915_bo_set_caching,
462       .gem_mmap = i915_gem_mmap,
463       .batch_check_for_reset = i915_batch_check_for_reset,
464       .batch_submit = i915_batch_submit,
465       .gem_vm_bind = i915_gem_vm_bind,
466       .gem_vm_unbind = i915_gem_vm_unbind,
467    };
468    return &i915_backend;
469 }
470