1 /*
2 * Copyright © 2023 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include "iris/iris_kmd_backend.h"
24
25 #include <sys/mman.h>
26
27 #include "common/intel_debug_identifier.h"
28 #include "common/intel_gem.h"
29 #include "common/i915/intel_gem.h"
30 #include "dev/intel_debug.h"
31
32 #include "drm-uapi/i915_drm.h"
33
34 #include "iris/iris_bufmgr.h"
35 #include "iris/iris_batch.h"
36 #include "iris/iris_context.h"
37
38 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
39
40 static int
i915_gem_set_domain(struct iris_bufmgr * bufmgr,uint32_t handle,uint32_t read_domains,uint32_t write_domains)41 i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
42 uint32_t read_domains, uint32_t write_domains)
43 {
44 struct drm_i915_gem_set_domain sd = {
45 .handle = handle,
46 .read_domains = read_domains,
47 .write_domain = write_domains,
48 };
49 return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
50 DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
51 }
52
53 static uint32_t
i915_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap,unsigned alloc_flags)54 i915_gem_create(struct iris_bufmgr *bufmgr,
55 const struct intel_memory_class_instance **regions,
56 uint16_t regions_count, uint64_t size,
57 enum iris_heap heap, unsigned alloc_flags)
58 {
59 const struct intel_device_info *devinfo =
60 iris_bufmgr_get_device_info(bufmgr);
61 if (unlikely(!devinfo->mem.use_class_instance)) {
62 struct drm_i915_gem_create create_legacy = { .size = size };
63
64 assert(regions_count == 1 &&
65 regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);
66
67 /* All new BOs we get from the kernel are zeroed, so we don't need to
68 * worry about that here.
69 */
70 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
71 &create_legacy))
72 return 0;
73
74 return create_legacy.handle;
75 }
76
77 struct drm_i915_gem_memory_class_instance i915_regions[2];
78 assert(regions_count <= ARRAY_SIZE(i915_regions));
79 for (uint16_t i = 0; i < regions_count; i++) {
80 i915_regions[i].memory_class = regions[i]->klass;
81 i915_regions[i].memory_instance = regions[i]->instance;
82 }
83
84 struct drm_i915_gem_create_ext create = {
85 .size = size,
86 };
87 struct drm_i915_gem_create_ext_memory_regions ext_regions = {
88 .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
89 .num_regions = regions_count,
90 .regions = (uintptr_t)i915_regions,
91 };
92 intel_i915_gem_add_ext(&create.extensions,
93 I915_GEM_CREATE_EXT_MEMORY_REGIONS,
94 &ext_regions.base);
95
96 if (iris_bufmgr_vram_size(bufmgr) > 0 &&
97 !intel_vram_all_mappable(devinfo) &&
98 heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
99 /* For lmem + smem placements, the NEEDS_CPU_ACCESS flag will avoid a
100 * page fault when the CPU tries to access the BO.
101 * Although it's counterintuitive, we cannot set this flag for
102 * IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR because i915 does not
103 * accept that flag for lmem only placements.
104 * When lmem only BOs are accessed by the CPU, i915 will fault and
105 * automatically migrate the BO to the lmem portion that is CPU
106 * accessible.
107 * The CPU_VISIBLE heap is still valuable for other reasons however
108 * (e.g., it tells the functions which calculate the iris_mmap_mode
109 * that it can be mapped).
110 */
111 create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
112
113 /* Protected param */
114 struct drm_i915_gem_create_ext_protected_content protected_param = {
115 .flags = 0,
116 };
117 if (alloc_flags & BO_ALLOC_PROTECTED) {
118 intel_i915_gem_add_ext(&create.extensions,
119 I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
120 &protected_param.base);
121 }
122
123 /* Set PAT param */
124 struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
125 if (devinfo->has_set_pat_uapi) {
126 set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap,
127 alloc_flags & BO_ALLOC_SCANOUT)->index;
128 intel_i915_gem_add_ext(&create.extensions,
129 I915_GEM_CREATE_EXT_SET_PAT,
130 &set_pat_param.base);
131 }
132
133 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
134 &create))
135 return 0;
136
137 if (iris_bufmgr_vram_size(bufmgr) == 0)
138 /* Calling set_domain() will allocate pages for the BO outside of the
139 * struct mutex lock in the kernel, which is more efficient than waiting
140 * to create them during the first execbuf that uses the BO.
141 */
142 i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);
143
144 return create.handle;
145 }
146
147 static bool
i915_bo_madvise(struct iris_bo * bo,enum iris_madvice state)148 i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
149 {
150 uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
151 I915_MADV_WILLNEED : I915_MADV_DONTNEED;
152 struct drm_i915_gem_madvise madv = {
153 .handle = bo->gem_handle,
154 .madv = i915_state,
155 .retained = 1,
156 };
157
158 intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);
159
160 return madv.retained;
161 }
162
163 static int
i915_bo_set_caching(struct iris_bo * bo,bool cached)164 i915_bo_set_caching(struct iris_bo *bo, bool cached)
165 {
166 struct drm_i915_gem_caching arg = {
167 .handle = bo->gem_handle,
168 .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
169 };
170 return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
171 DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
172 }
173
174 static void *
i915_gem_mmap_offset(struct iris_bufmgr * bufmgr,struct iris_bo * bo)175 i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
176 {
177 struct drm_i915_gem_mmap_offset mmap_arg = {
178 .handle = bo->gem_handle,
179 };
180
181 if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
182 /* On discrete memory platforms, we cannot control the mmap caching mode
183 * at mmap time. Instead, it's fixed when the object is created (this
184 * is a limitation of TTM).
185 *
186 * On DG1, our only currently enabled discrete platform, there is no
187 * control over what mode we get. For SMEM, we always get WB because
188 * it's fast (probably what we want) and when the device views SMEM
189 * across PCIe, it's always snooped. The only caching mode allowed by
190 * DG1 hardware for LMEM is WC.
191 */
192 if (iris_heap_is_device_local(bo->real.heap))
193 assert(bo->real.mmap_mode == IRIS_MMAP_WC);
194 else
195 assert(bo->real.mmap_mode == IRIS_MMAP_WB);
196
197 mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
198 } else {
199 /* Only integrated platforms get to select a mmap caching mode here */
200 static const uint32_t mmap_offset_for_mode[] = {
201 [IRIS_MMAP_UC] = I915_MMAP_OFFSET_UC,
202 [IRIS_MMAP_WC] = I915_MMAP_OFFSET_WC,
203 [IRIS_MMAP_WB] = I915_MMAP_OFFSET_WB,
204 };
205 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
206 assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
207 mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
208 }
209
210 /* Get the fake offset back */
211 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
212 &mmap_arg)) {
213 DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
214 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
215 return NULL;
216 }
217
218 /* And map it */
219 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
220 iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
221 if (map == MAP_FAILED) {
222 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
223 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
224 return NULL;
225 }
226
227 return map;
228 }
229
230 static void *
i915_gem_mmap_legacy(struct iris_bufmgr * bufmgr,struct iris_bo * bo)231 i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
232 {
233 assert(iris_bufmgr_vram_size(bufmgr) == 0);
234 assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
235 bo->real.mmap_mode == IRIS_MMAP_WC);
236
237 struct drm_i915_gem_mmap mmap_arg = {
238 .handle = bo->gem_handle,
239 .size = bo->size,
240 .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
241 };
242
243 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
244 &mmap_arg)) {
245 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
246 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
247 return NULL;
248 }
249
250 return (void *)(uintptr_t) mmap_arg.addr_ptr;
251 }
252
253 static void *
i915_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)254 i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
255 {
256 assert(iris_bo_is_real(bo));
257
258 if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
259 return i915_gem_mmap_offset(bufmgr, bo);
260 else
261 return i915_gem_mmap_legacy(bufmgr, bo);
262 }
263
264 static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch * batch)265 i915_batch_check_for_reset(struct iris_batch *batch)
266 {
267 struct iris_screen *screen = batch->screen;
268 enum pipe_reset_status status = PIPE_NO_RESET;
269 struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };
270
271 if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
272 DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
273
274 if (stats.batch_active != 0) {
275 /* A reset was observed while a batch from this hardware context was
276 * executing. Assume that this context was at fault.
277 */
278 status = PIPE_GUILTY_CONTEXT_RESET;
279 } else if (stats.batch_pending != 0) {
280 /* A reset was observed while a batch from this context was in progress,
281 * but the batch was not executing. In this case, assume that the
282 * context was not at fault.
283 */
284 status = PIPE_INNOCENT_CONTEXT_RESET;
285 }
286
287 return status;
288 }
289
290 /**
291 * Submit the batch to the GPU via execbuffer2.
292 */
293 static int
i915_batch_submit(struct iris_batch * batch)294 i915_batch_submit(struct iris_batch *batch)
295 {
296 struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
297 simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
298
299 iris_bo_unmap(batch->bo);
300
301 struct drm_i915_gem_exec_object2 *validation_list =
302 malloc(batch->exec_count * sizeof(*validation_list));
303
304 size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
305 int *index_for_handle = malloc(sz);
306 memset(index_for_handle, -1, sz);
307
308 unsigned validation_count = 0;
309 for (int i = 0; i < batch->exec_count; i++) {
310 struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
311 assert(bo->gem_handle != 0);
312
313 bool written = BITSET_TEST(batch->bos_written, i);
314 int prev_index = index_for_handle[bo->gem_handle];
315 if (prev_index != -1) {
316 if (written)
317 validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
318 } else {
319 uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
320 flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
321 flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
322 flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
323 flags |= written ? EXEC_OBJECT_WRITE : 0;
324
325 index_for_handle[bo->gem_handle] = validation_count;
326 validation_list[validation_count] =
327 (struct drm_i915_gem_exec_object2) {
328 .handle = bo->gem_handle,
329 .offset = bo->address,
330 .flags = flags,
331 };
332 ++validation_count;
333 }
334 }
335
336 free(index_for_handle);
337
338 /* The decode operation may map and wait on the batch buffer, which could
339 * in theory try to grab bo_deps_lock. Let's keep it safe and decode
340 * outside the lock.
341 */
342 if (INTEL_DEBUG(DEBUG_BATCH) &&
343 intel_debug_batch_in_range(batch->ice->frame))
344 iris_batch_decode_batch(batch);
345
346 simple_mtx_lock(bo_deps_lock);
347
348 iris_batch_update_syncobjs(batch);
349
350 if ((INTEL_DEBUG(DEBUG_BATCH) &&
351 intel_debug_batch_in_range(batch->ice->frame)) ||
352 INTEL_DEBUG(DEBUG_SUBMIT)) {
353 iris_dump_fence_list(batch);
354 iris_dump_bo_list(batch);
355 }
356
357 /* The requirement for using I915_EXEC_NO_RELOC are:
358 *
359 * The addresses written in the objects must match the corresponding
360 * reloc.address which in turn must match the corresponding
361 * execobject.offset.
362 *
363 * Any render targets written to in the batch must be flagged with
364 * EXEC_OBJECT_WRITE.
365 *
366 * To avoid stalling, execobject.offset should match the current
367 * address of that object within the active context.
368 */
369 struct drm_i915_gem_execbuffer2 execbuf = {
370 .buffers_ptr = (uintptr_t) validation_list,
371 .buffer_count = validation_count,
372 .batch_start_offset = 0,
373 /* This must be QWord aligned. */
374 .batch_len = ALIGN(batch->primary_batch_size, 8),
375 .flags = batch->i915.exec_flags |
376 I915_EXEC_NO_RELOC |
377 I915_EXEC_BATCH_FIRST |
378 I915_EXEC_HANDLE_LUT,
379 .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
380 };
381
382 if (iris_batch_num_fences(batch)) {
383 execbuf.flags |= I915_EXEC_FENCE_ARRAY;
384 execbuf.num_cliprects = iris_batch_num_fences(batch);
385 execbuf.cliprects_ptr =
386 (uintptr_t)util_dynarray_begin(&batch->exec_fences);
387 }
388
389 int ret = 0;
390 if (!batch->screen->devinfo->no_hw) {
391 do {
392 ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
393 } while (ret && errno == ENOMEM);
394
395 if (ret)
396 ret = -errno;
397 }
398
399 simple_mtx_unlock(bo_deps_lock);
400
401 for (int i = 0; i < batch->exec_count; i++) {
402 struct iris_bo *bo = batch->exec_bos[i];
403
404 bo->idle = false;
405 bo->index = -1;
406
407 iris_get_backing_bo(bo)->idle = false;
408
409 iris_bo_unreference(bo);
410 }
411
412 free(validation_list);
413
414 return ret;
415 }
416
417 static bool
i915_gem_vm_bind(struct iris_bo * bo)418 i915_gem_vm_bind(struct iris_bo *bo)
419 {
420 /*
421 * i915 does not support VM_BIND yet. The binding operation happens at
422 * submission when we supply BO handle & offset in the execbuffer list.
423 */
424 return true;
425 }
426
427 static bool
i915_gem_vm_unbind(struct iris_bo * bo)428 i915_gem_vm_unbind(struct iris_bo *bo)
429 {
430 return true;
431 }
432
433 static int
i915_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)434 i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
435 {
436 struct drm_gem_close close = {
437 .handle = bo->gem_handle,
438 };
439 return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
440 }
441
442 static uint32_t
i915_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)443 i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
444 {
445 const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
446 struct drm_i915_gem_userptr arg = {
447 .user_ptr = (uintptr_t)ptr,
448 .user_size = size,
449 .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
450 };
451 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
452 return 0;
453
454 if (!devinfo->has_userptr_probe) {
455 /* Check the buffer for validity before we try and use it in a batch */
456 if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
457 struct drm_gem_close close = {
458 .handle = arg.handle,
459 };
460 intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
461 return 0;
462 }
463 }
464
465 return arg.handle;
466 }
467
i915_get_backend(void)468 const struct iris_kmd_backend *i915_get_backend(void)
469 {
470 static const struct iris_kmd_backend i915_backend = {
471 .gem_create = i915_gem_create,
472 .gem_create_userptr = i915_gem_create_userptr,
473 .gem_close = i915_gem_close,
474 .bo_madvise = i915_bo_madvise,
475 .bo_set_caching = i915_bo_set_caching,
476 .gem_mmap = i915_gem_mmap,
477 .batch_check_for_reset = i915_batch_check_for_reset,
478 .batch_submit = i915_batch_submit,
479 .gem_vm_bind = i915_gem_vm_bind,
480 .gem_vm_unbind = i915_gem_vm_unbind,
481 };
482 return &i915_backend;
483 }
484