1 /*
2 * Copyright © 2023 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include "iris/iris_kmd_backend.h"
24
25 #include <sys/mman.h>
26
27 #include "common/intel_gem.h"
28 #include "common/i915/intel_gem.h"
29 #include "dev/intel_debug.h"
30
31 #include "drm-uapi/i915_drm.h"
32
33 #include "iris/iris_bufmgr.h"
34 #include "iris/iris_batch.h"
35 #include "iris/iris_context.h"
36
37 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
38
39 static int
i915_gem_set_domain(struct iris_bufmgr * bufmgr,uint32_t handle,uint32_t read_domains,uint32_t write_domains)40 i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
41 uint32_t read_domains, uint32_t write_domains)
42 {
43 struct drm_i915_gem_set_domain sd = {
44 .handle = handle,
45 .read_domains = read_domains,
46 .write_domain = write_domains,
47 };
48 return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
49 DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
50 }
51
52 static uint32_t
i915_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap,unsigned alloc_flags)53 i915_gem_create(struct iris_bufmgr *bufmgr,
54 const struct intel_memory_class_instance **regions,
55 uint16_t regions_count, uint64_t size,
56 enum iris_heap heap, unsigned alloc_flags)
57 {
58 const struct intel_device_info *devinfo =
59 iris_bufmgr_get_device_info(bufmgr);
60 if (unlikely(!devinfo->mem.use_class_instance)) {
61 struct drm_i915_gem_create create_legacy = { .size = size };
62
63 assert(regions_count == 1 &&
64 regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);
65
66 /* All new BOs we get from the kernel are zeroed, so we don't need to
67 * worry about that here.
68 */
69 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
70 &create_legacy))
71 return 0;
72
73 return create_legacy.handle;
74 }
75
76 struct drm_i915_gem_memory_class_instance i915_regions[2];
77 assert(regions_count <= ARRAY_SIZE(i915_regions));
78 for (uint16_t i = 0; i < regions_count; i++) {
79 i915_regions[i].memory_class = regions[i]->klass;
80 i915_regions[i].memory_instance = regions[i]->instance;
81 }
82
83 struct drm_i915_gem_create_ext create = {
84 .size = size,
85 };
86 struct drm_i915_gem_create_ext_memory_regions ext_regions = {
87 .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
88 .num_regions = regions_count,
89 .regions = (uintptr_t)i915_regions,
90 };
91 intel_i915_gem_add_ext(&create.extensions,
92 I915_GEM_CREATE_EXT_MEMORY_REGIONS,
93 &ext_regions.base);
94
95 if (iris_bufmgr_vram_size(bufmgr) > 0 &&
96 !intel_vram_all_mappable(devinfo) &&
97 heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
98 create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
99
100 /* Protected param */
101 struct drm_i915_gem_create_ext_protected_content protected_param = {
102 .flags = 0,
103 };
104 if (alloc_flags & BO_ALLOC_PROTECTED) {
105 intel_i915_gem_add_ext(&create.extensions,
106 I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
107 &protected_param.base);
108 }
109
110 /* Set PAT param */
111 struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
112 if (devinfo->has_set_pat_uapi) {
113 set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap)->index;
114 intel_i915_gem_add_ext(&create.extensions,
115 I915_GEM_CREATE_EXT_SET_PAT,
116 &set_pat_param.base);
117 }
118
119 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
120 &create))
121 return 0;
122
123 if (iris_bufmgr_vram_size(bufmgr) == 0)
124 /* Calling set_domain() will allocate pages for the BO outside of the
125 * struct mutex lock in the kernel, which is more efficient than waiting
126 * to create them during the first execbuf that uses the BO.
127 */
128 i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);
129
130 return create.handle;
131 }
132
133 static bool
i915_bo_madvise(struct iris_bo * bo,enum iris_madvice state)134 i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
135 {
136 uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
137 I915_MADV_WILLNEED : I915_MADV_DONTNEED;
138 struct drm_i915_gem_madvise madv = {
139 .handle = bo->gem_handle,
140 .madv = i915_state,
141 .retained = 1,
142 };
143
144 intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);
145
146 return madv.retained;
147 }
148
149 static int
i915_bo_set_caching(struct iris_bo * bo,bool cached)150 i915_bo_set_caching(struct iris_bo *bo, bool cached)
151 {
152 struct drm_i915_gem_caching arg = {
153 .handle = bo->gem_handle,
154 .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
155 };
156 return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
157 DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
158 }
159
160 static void *
i915_gem_mmap_offset(struct iris_bufmgr * bufmgr,struct iris_bo * bo)161 i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
162 {
163 struct drm_i915_gem_mmap_offset mmap_arg = {
164 .handle = bo->gem_handle,
165 };
166
167 if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
168 /* On discrete memory platforms, we cannot control the mmap caching mode
169 * at mmap time. Instead, it's fixed when the object is created (this
170 * is a limitation of TTM).
171 *
172 * On DG1, our only currently enabled discrete platform, there is no
173 * control over what mode we get. For SMEM, we always get WB because
174 * it's fast (probably what we want) and when the device views SMEM
175 * across PCIe, it's always snooped. The only caching mode allowed by
176 * DG1 hardware for LMEM is WC.
177 */
178 if (iris_heap_is_device_local(bo->real.heap))
179 assert(bo->real.mmap_mode == IRIS_MMAP_WC);
180 else
181 assert(bo->real.mmap_mode == IRIS_MMAP_WB);
182
183 mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
184 } else {
185 /* Only integrated platforms get to select a mmap caching mode here */
186 static const uint32_t mmap_offset_for_mode[] = {
187 [IRIS_MMAP_UC] = I915_MMAP_OFFSET_UC,
188 [IRIS_MMAP_WC] = I915_MMAP_OFFSET_WC,
189 [IRIS_MMAP_WB] = I915_MMAP_OFFSET_WB,
190 };
191 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
192 assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
193 mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
194 }
195
196 /* Get the fake offset back */
197 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
198 &mmap_arg)) {
199 DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
200 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
201 return NULL;
202 }
203
204 /* And map it */
205 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
206 iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
207 if (map == MAP_FAILED) {
208 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
209 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
210 return NULL;
211 }
212
213 return map;
214 }
215
216 static void *
i915_gem_mmap_legacy(struct iris_bufmgr * bufmgr,struct iris_bo * bo)217 i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
218 {
219 assert(iris_bufmgr_vram_size(bufmgr) == 0);
220 assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
221 bo->real.mmap_mode == IRIS_MMAP_WC);
222
223 struct drm_i915_gem_mmap mmap_arg = {
224 .handle = bo->gem_handle,
225 .size = bo->size,
226 .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
227 };
228
229 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
230 &mmap_arg)) {
231 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
232 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
233 return NULL;
234 }
235
236 return (void *)(uintptr_t) mmap_arg.addr_ptr;
237 }
238
239 static void *
i915_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)240 i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
241 {
242 assert(iris_bo_is_real(bo));
243
244 if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
245 return i915_gem_mmap_offset(bufmgr, bo);
246 else
247 return i915_gem_mmap_legacy(bufmgr, bo);
248 }
249
250 static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch * batch)251 i915_batch_check_for_reset(struct iris_batch *batch)
252 {
253 struct iris_screen *screen = batch->screen;
254 enum pipe_reset_status status = PIPE_NO_RESET;
255 struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };
256
257 if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
258 DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
259
260 if (stats.batch_active != 0) {
261 /* A reset was observed while a batch from this hardware context was
262 * executing. Assume that this context was at fault.
263 */
264 status = PIPE_GUILTY_CONTEXT_RESET;
265 } else if (stats.batch_pending != 0) {
266 /* A reset was observed while a batch from this context was in progress,
267 * but the batch was not executing. In this case, assume that the
268 * context was not at fault.
269 */
270 status = PIPE_INNOCENT_CONTEXT_RESET;
271 }
272
273 return status;
274 }
275
276 /**
277 * Submit the batch to the GPU via execbuffer2.
278 */
279 static int
i915_batch_submit(struct iris_batch * batch)280 i915_batch_submit(struct iris_batch *batch)
281 {
282 struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
283 simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
284
285 iris_bo_unmap(batch->bo);
286
287 struct drm_i915_gem_exec_object2 *validation_list =
288 malloc(batch->exec_count * sizeof(*validation_list));
289
290 size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
291 int *index_for_handle = malloc(sz);
292 memset(index_for_handle, -1, sz);
293
294 unsigned validation_count = 0;
295 for (int i = 0; i < batch->exec_count; i++) {
296 struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
297 assert(bo->gem_handle != 0);
298
299 bool written = BITSET_TEST(batch->bos_written, i);
300 int prev_index = index_for_handle[bo->gem_handle];
301 if (prev_index != -1) {
302 if (written)
303 validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
304 } else {
305 uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
306 flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
307 flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
308 flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
309 flags |= written ? EXEC_OBJECT_WRITE : 0;
310
311 index_for_handle[bo->gem_handle] = validation_count;
312 validation_list[validation_count] =
313 (struct drm_i915_gem_exec_object2) {
314 .handle = bo->gem_handle,
315 .offset = bo->address,
316 .flags = flags,
317 };
318 ++validation_count;
319 }
320 }
321
322 free(index_for_handle);
323
324 /* The decode operation may map and wait on the batch buffer, which could
325 * in theory try to grab bo_deps_lock. Let's keep it safe and decode
326 * outside the lock.
327 */
328 if (INTEL_DEBUG(DEBUG_BATCH) &&
329 intel_debug_batch_in_range(batch->ice->frame))
330 iris_batch_decode_batch(batch);
331
332 simple_mtx_lock(bo_deps_lock);
333
334 iris_batch_update_syncobjs(batch);
335
336 if ((INTEL_DEBUG(DEBUG_BATCH) &&
337 intel_debug_batch_in_range(batch->ice->frame)) ||
338 INTEL_DEBUG(DEBUG_SUBMIT)) {
339 iris_dump_fence_list(batch);
340 iris_dump_bo_list(batch);
341 }
342
343 /* The requirement for using I915_EXEC_NO_RELOC are:
344 *
345 * The addresses written in the objects must match the corresponding
346 * reloc.address which in turn must match the corresponding
347 * execobject.offset.
348 *
349 * Any render targets written to in the batch must be flagged with
350 * EXEC_OBJECT_WRITE.
351 *
352 * To avoid stalling, execobject.offset should match the current
353 * address of that object within the active context.
354 */
355 struct drm_i915_gem_execbuffer2 execbuf = {
356 .buffers_ptr = (uintptr_t) validation_list,
357 .buffer_count = validation_count,
358 .batch_start_offset = 0,
359 /* This must be QWord aligned. */
360 .batch_len = ALIGN(batch->primary_batch_size, 8),
361 .flags = batch->i915.exec_flags |
362 I915_EXEC_NO_RELOC |
363 I915_EXEC_BATCH_FIRST |
364 I915_EXEC_HANDLE_LUT,
365 .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
366 };
367
368 if (iris_batch_num_fences(batch)) {
369 execbuf.flags |= I915_EXEC_FENCE_ARRAY;
370 execbuf.num_cliprects = iris_batch_num_fences(batch);
371 execbuf.cliprects_ptr =
372 (uintptr_t)util_dynarray_begin(&batch->exec_fences);
373 }
374
375 int ret = 0;
376 if (!batch->screen->devinfo->no_hw) {
377 do {
378 ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
379 } while (ret && errno == ENOMEM);
380
381 if (ret)
382 ret = -errno;
383 }
384
385 simple_mtx_unlock(bo_deps_lock);
386
387 for (int i = 0; i < batch->exec_count; i++) {
388 struct iris_bo *bo = batch->exec_bos[i];
389
390 bo->idle = false;
391 bo->index = -1;
392
393 iris_get_backing_bo(bo)->idle = false;
394
395 iris_bo_unreference(bo);
396 }
397
398 free(validation_list);
399
400 return ret;
401 }
402
403 static bool
i915_gem_vm_bind(struct iris_bo * bo)404 i915_gem_vm_bind(struct iris_bo *bo)
405 {
406 /*
407 * i915 does not support VM_BIND yet. The binding operation happens at
408 * submission when we supply BO handle & offset in the execbuffer list.
409 */
410 return true;
411 }
412
413 static bool
i915_gem_vm_unbind(struct iris_bo * bo)414 i915_gem_vm_unbind(struct iris_bo *bo)
415 {
416 return true;
417 }
418
419 static int
i915_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)420 i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
421 {
422 struct drm_gem_close close = {
423 .handle = bo->gem_handle,
424 };
425 return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
426 }
427
428 static uint32_t
i915_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)429 i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
430 {
431 const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
432 struct drm_i915_gem_userptr arg = {
433 .user_ptr = (uintptr_t)ptr,
434 .user_size = size,
435 .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
436 };
437 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
438 return 0;
439
440 if (!devinfo->has_userptr_probe) {
441 /* Check the buffer for validity before we try and use it in a batch */
442 if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
443 struct drm_gem_close close = {
444 .handle = arg.handle,
445 };
446 intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
447 return 0;
448 }
449 }
450
451 return arg.handle;
452 }
453
i915_get_backend(void)454 const struct iris_kmd_backend *i915_get_backend(void)
455 {
456 static const struct iris_kmd_backend i915_backend = {
457 .gem_create = i915_gem_create,
458 .gem_create_userptr = i915_gem_create_userptr,
459 .gem_close = i915_gem_close,
460 .bo_madvise = i915_bo_madvise,
461 .bo_set_caching = i915_bo_set_caching,
462 .gem_mmap = i915_gem_mmap,
463 .batch_check_for_reset = i915_batch_check_for_reset,
464 .batch_submit = i915_batch_submit,
465 .gem_vm_bind = i915_gem_vm_bind,
466 .gem_vm_unbind = i915_gem_vm_unbind,
467 };
468 return &i915_backend;
469 }
470