• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 
28 #include <linux/file.h>
29 #include <linux/pagemap.h>
30 #include <linux/sync_file.h>
31 #include <linux/dma-buf.h>
32 
33 #include <drm/amdgpu_drm.h>
34 #include <drm/drm_syncobj.h>
35 #include "amdgpu.h"
36 #include "amdgpu_trace.h"
37 #include "amdgpu_gmc.h"
38 #include "amdgpu_gem.h"
39 #include "amdgpu_ras.h"
40 
amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser * p,struct drm_amdgpu_cs_chunk_fence * data,uint32_t * offset)41 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
42 				      struct drm_amdgpu_cs_chunk_fence *data,
43 				      uint32_t *offset)
44 {
45 	struct drm_gem_object *gobj;
46 	struct amdgpu_bo *bo;
47 	unsigned long size;
48 
49 	gobj = drm_gem_object_lookup(p->filp, data->handle);
50 	if (gobj == NULL)
51 		return -EINVAL;
52 
53 	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
54 	p->uf_entry.priority = 0;
55 	p->uf_entry.tv.bo = &bo->tbo;
56 	/* One for TTM and one for the CS job */
57 	p->uf_entry.tv.num_shared = 2;
58 
59 	drm_gem_object_put(gobj);
60 
61 	size = amdgpu_bo_size(bo);
62 	if (size != PAGE_SIZE || data->offset > (size - 8))
63 		return -EINVAL;
64 
65 	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
66 		return -EINVAL;
67 
68 	*offset = data->offset;
69 	return 0;
70 }
71 
amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser * p,struct drm_amdgpu_bo_list_in * data)72 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
73 				      struct drm_amdgpu_bo_list_in *data)
74 {
75 	int r;
76 	struct drm_amdgpu_bo_list_entry *info = NULL;
77 
78 	r = amdgpu_bo_create_list_entry_array(data, &info);
79 	if (r)
80 		return r;
81 
82 	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
83 				  &p->bo_list);
84 	if (r)
85 		goto error_free;
86 
87 	kvfree(info);
88 	return 0;
89 
90 error_free:
91 	if (info)
92 		kvfree(info);
93 
94 	return r;
95 }
96 
amdgpu_cs_parser_init(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)97 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
98 {
99 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
100 	struct amdgpu_vm *vm = &fpriv->vm;
101 	uint64_t *chunk_array_user;
102 	uint64_t *chunk_array;
103 	unsigned size, num_ibs = 0;
104 	uint32_t uf_offset = 0;
105 	int i;
106 	int ret;
107 
108 	if (cs->in.num_chunks == 0)
109 		return -EINVAL;
110 
111 	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
112 	if (!chunk_array)
113 		return -ENOMEM;
114 
115 	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
116 	if (!p->ctx) {
117 		ret = -EINVAL;
118 		goto free_chunk;
119 	}
120 
121 	mutex_lock(&p->ctx->lock);
122 
123 	/* skip guilty context job */
124 	if (atomic_read(&p->ctx->guilty) == 1) {
125 		ret = -ECANCELED;
126 		goto free_chunk;
127 	}
128 
129 	/* get chunks */
130 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
131 	if (copy_from_user(chunk_array, chunk_array_user,
132 			   sizeof(uint64_t)*cs->in.num_chunks)) {
133 		ret = -EFAULT;
134 		goto free_chunk;
135 	}
136 
137 	p->nchunks = cs->in.num_chunks;
138 	p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
139 			    GFP_KERNEL);
140 	if (!p->chunks) {
141 		ret = -ENOMEM;
142 		goto free_chunk;
143 	}
144 
145 	for (i = 0; i < p->nchunks; i++) {
146 		struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
147 		struct drm_amdgpu_cs_chunk user_chunk;
148 		uint32_t __user *cdata;
149 
150 		chunk_ptr = u64_to_user_ptr(chunk_array[i]);
151 		if (copy_from_user(&user_chunk, chunk_ptr,
152 				       sizeof(struct drm_amdgpu_cs_chunk))) {
153 			ret = -EFAULT;
154 			i--;
155 			goto free_partial_kdata;
156 		}
157 		p->chunks[i].chunk_id = user_chunk.chunk_id;
158 		p->chunks[i].length_dw = user_chunk.length_dw;
159 
160 		size = p->chunks[i].length_dw;
161 		cdata = u64_to_user_ptr(user_chunk.chunk_data);
162 
163 		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
164 		if (p->chunks[i].kdata == NULL) {
165 			ret = -ENOMEM;
166 			i--;
167 			goto free_partial_kdata;
168 		}
169 		size *= sizeof(uint32_t);
170 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
171 			ret = -EFAULT;
172 			goto free_partial_kdata;
173 		}
174 
175 		switch (p->chunks[i].chunk_id) {
176 		case AMDGPU_CHUNK_ID_IB:
177 			++num_ibs;
178 			break;
179 
180 		case AMDGPU_CHUNK_ID_FENCE:
181 			size = sizeof(struct drm_amdgpu_cs_chunk_fence);
182 			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
183 				ret = -EINVAL;
184 				goto free_partial_kdata;
185 			}
186 
187 			ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
188 							 &uf_offset);
189 			if (ret)
190 				goto free_partial_kdata;
191 
192 			break;
193 
194 		case AMDGPU_CHUNK_ID_BO_HANDLES:
195 			size = sizeof(struct drm_amdgpu_bo_list_in);
196 			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
197 				ret = -EINVAL;
198 				goto free_partial_kdata;
199 			}
200 
201 			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
202 			if (ret)
203 				goto free_partial_kdata;
204 
205 			break;
206 
207 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
208 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
209 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
210 		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
211 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
212 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
213 			break;
214 
215 		default:
216 			ret = -EINVAL;
217 			goto free_partial_kdata;
218 		}
219 	}
220 
221 	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
222 	if (ret)
223 		goto free_all_kdata;
224 
225 	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
226 		ret = -ECANCELED;
227 		goto free_all_kdata;
228 	}
229 
230 	if (p->uf_entry.tv.bo)
231 		p->job->uf_addr = uf_offset;
232 	kfree(chunk_array);
233 
234 	/* Use this opportunity to fill in task info for the vm */
235 	amdgpu_vm_set_task_info(vm);
236 
237 	return 0;
238 
239 free_all_kdata:
240 	i = p->nchunks - 1;
241 free_partial_kdata:
242 	for (; i >= 0; i--)
243 		kvfree(p->chunks[i].kdata);
244 	kfree(p->chunks);
245 	p->chunks = NULL;
246 	p->nchunks = 0;
247 free_chunk:
248 	kfree(chunk_array);
249 
250 	return ret;
251 }
252 
253 /* Convert microseconds to bytes. */
us_to_bytes(struct amdgpu_device * adev,s64 us)254 static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
255 {
256 	if (us <= 0 || !adev->mm_stats.log2_max_MBps)
257 		return 0;
258 
259 	/* Since accum_us is incremented by a million per second, just
260 	 * multiply it by the number of MB/s to get the number of bytes.
261 	 */
262 	return us << adev->mm_stats.log2_max_MBps;
263 }
264 
bytes_to_us(struct amdgpu_device * adev,u64 bytes)265 static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
266 {
267 	if (!adev->mm_stats.log2_max_MBps)
268 		return 0;
269 
270 	return bytes >> adev->mm_stats.log2_max_MBps;
271 }
272 
273 /* Returns how many bytes TTM can move right now. If no bytes can be moved,
274  * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
275  * which means it can go over the threshold once. If that happens, the driver
276  * will be in debt and no other buffer migrations can be done until that debt
277  * is repaid.
278  *
279  * This approach allows moving a buffer of any size (it's important to allow
280  * that).
281  *
282  * The currency is simply time in microseconds and it increases as the clock
283  * ticks. The accumulated microseconds (us) are converted to bytes and
284  * returned.
285  */
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device * adev,u64 * max_bytes,u64 * max_vis_bytes)286 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
287 					      u64 *max_bytes,
288 					      u64 *max_vis_bytes)
289 {
290 	s64 time_us, increment_us;
291 	u64 free_vram, total_vram, used_vram;
292 	struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
293 	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
294 	 * throttling.
295 	 *
296 	 * It means that in order to get full max MBps, at least 5 IBs per
297 	 * second must be submitted and not more than 200ms apart from each
298 	 * other.
299 	 */
300 	const s64 us_upper_bound = 200000;
301 
302 	if (!adev->mm_stats.log2_max_MBps) {
303 		*max_bytes = 0;
304 		*max_vis_bytes = 0;
305 		return;
306 	}
307 
308 	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
309 	used_vram = amdgpu_vram_mgr_usage(vram_man);
310 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
311 
312 	spin_lock(&adev->mm_stats.lock);
313 
314 	/* Increase the amount of accumulated us. */
315 	time_us = ktime_to_us(ktime_get());
316 	increment_us = time_us - adev->mm_stats.last_update_us;
317 	adev->mm_stats.last_update_us = time_us;
318 	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
319                                       us_upper_bound);
320 
321 	/* This prevents the short period of low performance when the VRAM
322 	 * usage is low and the driver is in debt or doesn't have enough
323 	 * accumulated us to fill VRAM quickly.
324 	 *
325 	 * The situation can occur in these cases:
326 	 * - a lot of VRAM is freed by userspace
327 	 * - the presence of a big buffer causes a lot of evictions
328 	 *   (solution: split buffers into smaller ones)
329 	 *
330 	 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
331 	 * accum_us to a positive number.
332 	 */
333 	if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
334 		s64 min_us;
335 
336 		/* Be more aggresive on dGPUs. Try to fill a portion of free
337 		 * VRAM now.
338 		 */
339 		if (!(adev->flags & AMD_IS_APU))
340 			min_us = bytes_to_us(adev, free_vram / 4);
341 		else
342 			min_us = 0; /* Reset accum_us on APUs. */
343 
344 		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
345 	}
346 
347 	/* This is set to 0 if the driver is in debt to disallow (optional)
348 	 * buffer moves.
349 	 */
350 	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
351 
352 	/* Do the same for visible VRAM if half of it is free */
353 	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
354 		u64 total_vis_vram = adev->gmc.visible_vram_size;
355 		u64 used_vis_vram =
356 		  amdgpu_vram_mgr_vis_usage(vram_man);
357 
358 		if (used_vis_vram < total_vis_vram) {
359 			u64 free_vis_vram = total_vis_vram - used_vis_vram;
360 			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
361 							  increment_us, us_upper_bound);
362 
363 			if (free_vis_vram >= total_vis_vram / 2)
364 				adev->mm_stats.accum_us_vis =
365 					max(bytes_to_us(adev, free_vis_vram / 2),
366 					    adev->mm_stats.accum_us_vis);
367 		}
368 
369 		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
370 	} else {
371 		*max_vis_bytes = 0;
372 	}
373 
374 	spin_unlock(&adev->mm_stats.lock);
375 }
376 
377 /* Report how many bytes have really been moved for the last command
378  * submission. This can result in a debt that can stop buffer migrations
379  * temporarily.
380  */
amdgpu_cs_report_moved_bytes(struct amdgpu_device * adev,u64 num_bytes,u64 num_vis_bytes)381 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
382 				  u64 num_vis_bytes)
383 {
384 	spin_lock(&adev->mm_stats.lock);
385 	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
386 	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
387 	spin_unlock(&adev->mm_stats.lock);
388 }
389 
amdgpu_cs_bo_validate(struct amdgpu_cs_parser * p,struct amdgpu_bo * bo)390 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
391 				 struct amdgpu_bo *bo)
392 {
393 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
394 	struct ttm_operation_ctx ctx = {
395 		.interruptible = true,
396 		.no_wait_gpu = false,
397 		.resv = bo->tbo.base.resv,
398 		.flags = 0
399 	};
400 	uint32_t domain;
401 	int r;
402 
403 	if (bo->pin_count)
404 		return 0;
405 
406 	/* Don't move this buffer if we have depleted our allowance
407 	 * to move it. Don't move anything if the threshold is zero.
408 	 */
409 	if (p->bytes_moved < p->bytes_moved_threshold &&
410 	    (!bo->tbo.base.dma_buf ||
411 	    list_empty(&bo->tbo.base.dma_buf->attachments))) {
412 		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
413 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
414 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
415 			 * visible VRAM if we've depleted our allowance to do
416 			 * that.
417 			 */
418 			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
419 				domain = bo->preferred_domains;
420 			else
421 				domain = bo->allowed_domains;
422 		} else {
423 			domain = bo->preferred_domains;
424 		}
425 	} else {
426 		domain = bo->allowed_domains;
427 	}
428 
429 retry:
430 	amdgpu_bo_placement_from_domain(bo, domain);
431 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
432 
433 	p->bytes_moved += ctx.bytes_moved;
434 	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
435 	    amdgpu_bo_in_cpu_visible_vram(bo))
436 		p->bytes_moved_vis += ctx.bytes_moved;
437 
438 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
439 		domain = bo->allowed_domains;
440 		goto retry;
441 	}
442 
443 	return r;
444 }
445 
amdgpu_cs_validate(void * param,struct amdgpu_bo * bo)446 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
447 {
448 	struct amdgpu_cs_parser *p = param;
449 	int r;
450 
451 	r = amdgpu_cs_bo_validate(p, bo);
452 	if (r)
453 		return r;
454 
455 	if (bo->shadow)
456 		r = amdgpu_cs_bo_validate(p, bo->shadow);
457 
458 	return r;
459 }
460 
amdgpu_cs_list_validate(struct amdgpu_cs_parser * p,struct list_head * validated)461 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
462 			    struct list_head *validated)
463 {
464 	struct ttm_operation_ctx ctx = { true, false };
465 	struct amdgpu_bo_list_entry *lobj;
466 	int r;
467 
468 	list_for_each_entry(lobj, validated, tv.head) {
469 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
470 		struct mm_struct *usermm;
471 
472 		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
473 		if (usermm && usermm != current->mm)
474 			return -EPERM;
475 
476 		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
477 		    lobj->user_invalidated && lobj->user_pages) {
478 			amdgpu_bo_placement_from_domain(bo,
479 							AMDGPU_GEM_DOMAIN_CPU);
480 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
481 			if (r)
482 				return r;
483 
484 			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
485 						     lobj->user_pages);
486 		}
487 
488 		r = amdgpu_cs_validate(p, bo);
489 		if (r)
490 			return r;
491 
492 		kvfree(lobj->user_pages);
493 		lobj->user_pages = NULL;
494 	}
495 	return 0;
496 }
497 
amdgpu_cs_parser_bos(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)498 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
499 				union drm_amdgpu_cs *cs)
500 {
501 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
502 	struct amdgpu_vm *vm = &fpriv->vm;
503 	struct amdgpu_bo_list_entry *e;
504 	struct list_head duplicates;
505 	struct amdgpu_bo *gds;
506 	struct amdgpu_bo *gws;
507 	struct amdgpu_bo *oa;
508 	int r;
509 
510 	INIT_LIST_HEAD(&p->validated);
511 
512 	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
513 	if (cs->in.bo_list_handle) {
514 		if (p->bo_list)
515 			return -EINVAL;
516 
517 		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
518 				       &p->bo_list);
519 		if (r)
520 			return r;
521 	} else if (!p->bo_list) {
522 		/* Create a empty bo_list when no handle is provided */
523 		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
524 					  &p->bo_list);
525 		if (r)
526 			return r;
527 	}
528 
529 	/* One for TTM and one for the CS job */
530 	amdgpu_bo_list_for_each_entry(e, p->bo_list)
531 		e->tv.num_shared = 2;
532 
533 	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
534 
535 	INIT_LIST_HEAD(&duplicates);
536 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
537 
538 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
539 		list_add(&p->uf_entry.tv.head, &p->validated);
540 
541 	/* Get userptr backing pages. If pages are updated after registered
542 	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
543 	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
544 	 */
545 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
546 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
547 		bool userpage_invalidated = false;
548 		int i;
549 
550 		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
551 					sizeof(struct page *),
552 					GFP_KERNEL | __GFP_ZERO);
553 		if (!e->user_pages) {
554 			DRM_ERROR("calloc failure\n");
555 			return -ENOMEM;
556 		}
557 
558 		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
559 		if (r) {
560 			kvfree(e->user_pages);
561 			e->user_pages = NULL;
562 			return r;
563 		}
564 
565 		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
566 			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
567 				userpage_invalidated = true;
568 				break;
569 			}
570 		}
571 		e->user_invalidated = userpage_invalidated;
572 	}
573 
574 	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
575 				   &duplicates);
576 	if (unlikely(r != 0)) {
577 		if (r != -ERESTARTSYS)
578 			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
579 		goto out;
580 	}
581 
582 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
583 					  &p->bytes_moved_vis_threshold);
584 	p->bytes_moved = 0;
585 	p->bytes_moved_vis = 0;
586 
587 	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
588 				      amdgpu_cs_validate, p);
589 	if (r) {
590 		DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
591 		goto error_validate;
592 	}
593 
594 	r = amdgpu_cs_list_validate(p, &duplicates);
595 	if (r)
596 		goto error_validate;
597 
598 	r = amdgpu_cs_list_validate(p, &p->validated);
599 	if (r)
600 		goto error_validate;
601 
602 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
603 				     p->bytes_moved_vis);
604 
605 	gds = p->bo_list->gds_obj;
606 	gws = p->bo_list->gws_obj;
607 	oa = p->bo_list->oa_obj;
608 
609 	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
610 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
611 
612 		/* Make sure we use the exclusive slot for shared BOs */
613 		if (bo->prime_shared_count)
614 			e->tv.num_shared = 0;
615 		e->bo_va = amdgpu_vm_bo_find(vm, bo);
616 	}
617 
618 	if (gds) {
619 		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
620 		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
621 	}
622 	if (gws) {
623 		p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
624 		p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
625 	}
626 	if (oa) {
627 		p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
628 		p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
629 	}
630 
631 	if (!r && p->uf_entry.tv.bo) {
632 		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
633 
634 		r = amdgpu_ttm_alloc_gart(&uf->tbo);
635 		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
636 	}
637 
638 error_validate:
639 	if (r)
640 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
641 out:
642 	return r;
643 }
644 
amdgpu_cs_sync_rings(struct amdgpu_cs_parser * p)645 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
646 {
647 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
648 	struct amdgpu_bo_list_entry *e;
649 	int r;
650 
651 	list_for_each_entry(e, &p->validated, tv.head) {
652 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
653 		struct dma_resv *resv = bo->tbo.base.resv;
654 		enum amdgpu_sync_mode sync_mode;
655 
656 		sync_mode = amdgpu_bo_explicit_sync(bo) ?
657 			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
658 		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
659 				     &fpriv->vm);
660 		if (r)
661 			return r;
662 	}
663 	return 0;
664 }
665 
666 /**
667  * cs_parser_fini() - clean parser states
668  * @parser:	parser structure holding parsing context.
669  * @error:	error number
670  *
671  * If error is set than unvalidate buffer, otherwise just free memory
672  * used by parsing context.
673  **/
amdgpu_cs_parser_fini(struct amdgpu_cs_parser * parser,int error,bool backoff)674 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
675 				  bool backoff)
676 {
677 	unsigned i;
678 
679 	if (error && backoff)
680 		ttm_eu_backoff_reservation(&parser->ticket,
681 					   &parser->validated);
682 
683 	for (i = 0; i < parser->num_post_deps; i++) {
684 		drm_syncobj_put(parser->post_deps[i].syncobj);
685 		kfree(parser->post_deps[i].chain);
686 	}
687 	kfree(parser->post_deps);
688 
689 	dma_fence_put(parser->fence);
690 
691 	if (parser->ctx) {
692 		mutex_unlock(&parser->ctx->lock);
693 		amdgpu_ctx_put(parser->ctx);
694 	}
695 	if (parser->bo_list)
696 		amdgpu_bo_list_put(parser->bo_list);
697 
698 	for (i = 0; i < parser->nchunks; i++)
699 		kvfree(parser->chunks[i].kdata);
700 	kfree(parser->chunks);
701 	if (parser->job)
702 		amdgpu_job_free(parser->job);
703 	if (parser->uf_entry.tv.bo) {
704 		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
705 
706 		amdgpu_bo_unref(&uf);
707 	}
708 }
709 
amdgpu_cs_vm_handling(struct amdgpu_cs_parser * p)710 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
711 {
712 	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
713 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
714 	struct amdgpu_device *adev = p->adev;
715 	struct amdgpu_vm *vm = &fpriv->vm;
716 	struct amdgpu_bo_list_entry *e;
717 	struct amdgpu_bo_va *bo_va;
718 	struct amdgpu_bo *bo;
719 	int r;
720 
721 	/* Only for UVD/VCE VM emulation */
722 	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
723 		unsigned i, j;
724 
725 		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
726 			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
727 			struct amdgpu_bo_va_mapping *m;
728 			struct amdgpu_bo *aobj = NULL;
729 			struct amdgpu_cs_chunk *chunk;
730 			uint64_t offset, va_start;
731 			struct amdgpu_ib *ib;
732 			uint8_t *kptr;
733 
734 			chunk = &p->chunks[i];
735 			ib = &p->job->ibs[j];
736 			chunk_ib = chunk->kdata;
737 
738 			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
739 				continue;
740 
741 			va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
742 			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
743 			if (r) {
744 				DRM_ERROR("IB va_start is invalid\n");
745 				return r;
746 			}
747 
748 			if ((va_start + chunk_ib->ib_bytes) >
749 			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
750 				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
751 				return -EINVAL;
752 			}
753 
754 			/* the IB should be reserved at this point */
755 			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
756 			if (r) {
757 				return r;
758 			}
759 
760 			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
761 			kptr += va_start - offset;
762 
763 			if (ring->funcs->parse_cs) {
764 				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
765 				amdgpu_bo_kunmap(aobj);
766 
767 				r = amdgpu_ring_parse_cs(ring, p, j);
768 				if (r)
769 					return r;
770 			} else {
771 				ib->ptr = (uint32_t *)kptr;
772 				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
773 				amdgpu_bo_kunmap(aobj);
774 				if (r)
775 					return r;
776 			}
777 
778 			j++;
779 		}
780 	}
781 
782 	if (!p->job->vm)
783 		return amdgpu_cs_sync_rings(p);
784 
785 
786 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
787 	if (r)
788 		return r;
789 
790 	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
791 	if (r)
792 		return r;
793 
794 	r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
795 	if (r)
796 		return r;
797 
798 	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
799 		bo_va = fpriv->csa_va;
800 		BUG_ON(!bo_va);
801 		r = amdgpu_vm_bo_update(adev, bo_va, false);
802 		if (r)
803 			return r;
804 
805 		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
806 		if (r)
807 			return r;
808 	}
809 
810 	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
811 		/* ignore duplicates */
812 		bo = ttm_to_amdgpu_bo(e->tv.bo);
813 		if (!bo)
814 			continue;
815 
816 		bo_va = e->bo_va;
817 		if (bo_va == NULL)
818 			continue;
819 
820 		r = amdgpu_vm_bo_update(adev, bo_va, false);
821 		if (r)
822 			return r;
823 
824 		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
825 		if (r)
826 			return r;
827 	}
828 
829 	r = amdgpu_vm_handle_moved(adev, vm);
830 	if (r)
831 		return r;
832 
833 	r = amdgpu_vm_update_pdes(adev, vm, false);
834 	if (r)
835 		return r;
836 
837 	r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
838 	if (r)
839 		return r;
840 
841 	p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
842 
843 	if (amdgpu_vm_debug) {
844 		/* Invalidate all BOs to test for userspace bugs */
845 		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
846 			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
847 
848 			/* ignore duplicates */
849 			if (!bo)
850 				continue;
851 
852 			amdgpu_vm_bo_invalidate(adev, bo, false);
853 		}
854 	}
855 
856 	return amdgpu_cs_sync_rings(p);
857 }
858 
amdgpu_cs_ib_fill(struct amdgpu_device * adev,struct amdgpu_cs_parser * parser)859 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
860 			     struct amdgpu_cs_parser *parser)
861 {
862 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
863 	struct amdgpu_vm *vm = &fpriv->vm;
864 	int r, ce_preempt = 0, de_preempt = 0;
865 	struct amdgpu_ring *ring;
866 	int i, j;
867 
868 	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
869 		struct amdgpu_cs_chunk *chunk;
870 		struct amdgpu_ib *ib;
871 		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
872 		struct drm_sched_entity *entity;
873 
874 		chunk = &parser->chunks[i];
875 		ib = &parser->job->ibs[j];
876 		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
877 
878 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
879 			continue;
880 
881 		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
882 		    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
883 			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
884 				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
885 					ce_preempt++;
886 				else
887 					de_preempt++;
888 			}
889 
890 			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
891 			if (ce_preempt > 1 || de_preempt > 1)
892 				return -EINVAL;
893 		}
894 
895 		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
896 					  chunk_ib->ip_instance, chunk_ib->ring,
897 					  &entity);
898 		if (r)
899 			return r;
900 
901 		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
902 			parser->job->preamble_status |=
903 				AMDGPU_PREAMBLE_IB_PRESENT;
904 
905 		if (parser->entity && parser->entity != entity)
906 			return -EINVAL;
907 
908 		/* Return if there is no run queue associated with this entity.
909 		 * Possibly because of disabled HW IP*/
910 		if (entity->rq == NULL)
911 			return -EINVAL;
912 
913 		parser->entity = entity;
914 
915 		ring = to_amdgpu_ring(entity->rq->sched);
916 		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
917 				   chunk_ib->ib_bytes : 0,
918 				   AMDGPU_IB_POOL_DELAYED, ib);
919 		if (r) {
920 			DRM_ERROR("Failed to get ib !\n");
921 			return r;
922 		}
923 
924 		ib->gpu_addr = chunk_ib->va_start;
925 		ib->length_dw = chunk_ib->ib_bytes / 4;
926 		ib->flags = chunk_ib->flags;
927 
928 		j++;
929 	}
930 
931 	/* MM engine doesn't support user fences */
932 	ring = to_amdgpu_ring(parser->entity->rq->sched);
933 	if (parser->job->uf_addr && ring->funcs->no_user_fence)
934 		return -EINVAL;
935 
936 	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
937 }
938 
amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)939 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
940 				       struct amdgpu_cs_chunk *chunk)
941 {
942 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
943 	unsigned num_deps;
944 	int i, r;
945 	struct drm_amdgpu_cs_chunk_dep *deps;
946 
947 	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
948 	num_deps = chunk->length_dw * 4 /
949 		sizeof(struct drm_amdgpu_cs_chunk_dep);
950 
951 	for (i = 0; i < num_deps; ++i) {
952 		struct amdgpu_ctx *ctx;
953 		struct drm_sched_entity *entity;
954 		struct dma_fence *fence;
955 
956 		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
957 		if (ctx == NULL)
958 			return -EINVAL;
959 
960 		r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
961 					  deps[i].ip_instance,
962 					  deps[i].ring, &entity);
963 		if (r) {
964 			amdgpu_ctx_put(ctx);
965 			return r;
966 		}
967 
968 		fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
969 		amdgpu_ctx_put(ctx);
970 
971 		if (IS_ERR(fence))
972 			return PTR_ERR(fence);
973 		else if (!fence)
974 			continue;
975 
976 		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
977 			struct drm_sched_fence *s_fence;
978 			struct dma_fence *old = fence;
979 
980 			s_fence = to_drm_sched_fence(fence);
981 			fence = dma_fence_get(&s_fence->scheduled);
982 			dma_fence_put(old);
983 		}
984 
985 		r = amdgpu_sync_fence(&p->job->sync, fence);
986 		dma_fence_put(fence);
987 		if (r)
988 			return r;
989 	}
990 	return 0;
991 }
992 
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser * p,uint32_t handle,u64 point,u64 flags)993 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
994 						 uint32_t handle, u64 point,
995 						 u64 flags)
996 {
997 	struct dma_fence *fence;
998 	int r;
999 
1000 	r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1001 	if (r) {
1002 		DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1003 			  handle, point, r);
1004 		return r;
1005 	}
1006 
1007 	r = amdgpu_sync_fence(&p->job->sync, fence);
1008 	dma_fence_put(fence);
1009 
1010 	return r;
1011 }
1012 
amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1013 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1014 					    struct amdgpu_cs_chunk *chunk)
1015 {
1016 	struct drm_amdgpu_cs_chunk_sem *deps;
1017 	unsigned num_deps;
1018 	int i, r;
1019 
1020 	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1021 	num_deps = chunk->length_dw * 4 /
1022 		sizeof(struct drm_amdgpu_cs_chunk_sem);
1023 	for (i = 0; i < num_deps; ++i) {
1024 		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1025 							  0, 0);
1026 		if (r)
1027 			return r;
1028 	}
1029 
1030 	return 0;
1031 }
1032 
1033 
amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1034 static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1035 						     struct amdgpu_cs_chunk *chunk)
1036 {
1037 	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1038 	unsigned num_deps;
1039 	int i, r;
1040 
1041 	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1042 	num_deps = chunk->length_dw * 4 /
1043 		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1044 	for (i = 0; i < num_deps; ++i) {
1045 		r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1046 							  syncobj_deps[i].handle,
1047 							  syncobj_deps[i].point,
1048 							  syncobj_deps[i].flags);
1049 		if (r)
1050 			return r;
1051 	}
1052 
1053 	return 0;
1054 }
1055 
amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1056 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1057 					     struct amdgpu_cs_chunk *chunk)
1058 {
1059 	struct drm_amdgpu_cs_chunk_sem *deps;
1060 	unsigned num_deps;
1061 	int i;
1062 
1063 	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1064 	num_deps = chunk->length_dw * 4 /
1065 		sizeof(struct drm_amdgpu_cs_chunk_sem);
1066 
1067 	if (p->post_deps)
1068 		return -EINVAL;
1069 
1070 	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1071 				     GFP_KERNEL);
1072 	p->num_post_deps = 0;
1073 
1074 	if (!p->post_deps)
1075 		return -ENOMEM;
1076 
1077 
1078 	for (i = 0; i < num_deps; ++i) {
1079 		p->post_deps[i].syncobj =
1080 			drm_syncobj_find(p->filp, deps[i].handle);
1081 		if (!p->post_deps[i].syncobj)
1082 			return -EINVAL;
1083 		p->post_deps[i].chain = NULL;
1084 		p->post_deps[i].point = 0;
1085 		p->num_post_deps++;
1086 	}
1087 
1088 	return 0;
1089 }
1090 
1091 
amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1092 static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1093 						      struct amdgpu_cs_chunk *chunk)
1094 {
1095 	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1096 	unsigned num_deps;
1097 	int i;
1098 
1099 	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1100 	num_deps = chunk->length_dw * 4 /
1101 		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1102 
1103 	if (p->post_deps)
1104 		return -EINVAL;
1105 
1106 	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1107 				     GFP_KERNEL);
1108 	p->num_post_deps = 0;
1109 
1110 	if (!p->post_deps)
1111 		return -ENOMEM;
1112 
1113 	for (i = 0; i < num_deps; ++i) {
1114 		struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1115 
1116 		dep->chain = NULL;
1117 		if (syncobj_deps[i].point) {
1118 			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1119 			if (!dep->chain)
1120 				return -ENOMEM;
1121 		}
1122 
1123 		dep->syncobj = drm_syncobj_find(p->filp,
1124 						syncobj_deps[i].handle);
1125 		if (!dep->syncobj) {
1126 			kfree(dep->chain);
1127 			return -EINVAL;
1128 		}
1129 		dep->point = syncobj_deps[i].point;
1130 		p->num_post_deps++;
1131 	}
1132 
1133 	return 0;
1134 }
1135 
amdgpu_cs_dependencies(struct amdgpu_device * adev,struct amdgpu_cs_parser * p)1136 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1137 				  struct amdgpu_cs_parser *p)
1138 {
1139 	int i, r;
1140 
1141 	for (i = 0; i < p->nchunks; ++i) {
1142 		struct amdgpu_cs_chunk *chunk;
1143 
1144 		chunk = &p->chunks[i];
1145 
1146 		switch (chunk->chunk_id) {
1147 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
1148 		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1149 			r = amdgpu_cs_process_fence_dep(p, chunk);
1150 			if (r)
1151 				return r;
1152 			break;
1153 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1154 			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1155 			if (r)
1156 				return r;
1157 			break;
1158 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1159 			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1160 			if (r)
1161 				return r;
1162 			break;
1163 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1164 			r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1165 			if (r)
1166 				return r;
1167 			break;
1168 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1169 			r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1170 			if (r)
1171 				return r;
1172 			break;
1173 		}
1174 	}
1175 
1176 	return 0;
1177 }
1178 
amdgpu_cs_post_dependencies(struct amdgpu_cs_parser * p)1179 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1180 {
1181 	int i;
1182 
1183 	for (i = 0; i < p->num_post_deps; ++i) {
1184 		if (p->post_deps[i].chain && p->post_deps[i].point) {
1185 			drm_syncobj_add_point(p->post_deps[i].syncobj,
1186 					      p->post_deps[i].chain,
1187 					      p->fence, p->post_deps[i].point);
1188 			p->post_deps[i].chain = NULL;
1189 		} else {
1190 			drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1191 						  p->fence);
1192 		}
1193 	}
1194 }
1195 
amdgpu_cs_submit(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)1196 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1197 			    union drm_amdgpu_cs *cs)
1198 {
1199 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1200 	struct drm_sched_entity *entity = p->entity;
1201 	struct amdgpu_bo_list_entry *e;
1202 	struct amdgpu_job *job;
1203 	uint64_t seq;
1204 	int r;
1205 
1206 	job = p->job;
1207 	p->job = NULL;
1208 
1209 	r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1210 	if (r)
1211 		goto error_unlock;
1212 
1213 	/* No memory allocation is allowed while holding the notifier lock.
1214 	 * The lock is held until amdgpu_cs_submit is finished and fence is
1215 	 * added to BOs.
1216 	 */
1217 	mutex_lock(&p->adev->notifier_lock);
1218 
1219 	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1220 	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1221 	 */
1222 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1224 
1225 		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1226 	}
1227 	if (r) {
1228 		r = -EAGAIN;
1229 		goto error_abort;
1230 	}
1231 
1232 	p->fence = dma_fence_get(&job->base.s_fence->finished);
1233 
1234 	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1235 	amdgpu_cs_post_dependencies(p);
1236 
1237 	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1238 	    !p->ctx->preamble_presented) {
1239 		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1240 		p->ctx->preamble_presented = true;
1241 	}
1242 
1243 	cs->out.handle = seq;
1244 	job->uf_sequence = seq;
1245 
1246 	amdgpu_job_free_resources(job);
1247 
1248 	trace_amdgpu_cs_ioctl(job);
1249 	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1250 	drm_sched_entity_push_job(&job->base, entity);
1251 
1252 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1253 
1254 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1255 	mutex_unlock(&p->adev->notifier_lock);
1256 
1257 	return 0;
1258 
1259 error_abort:
1260 	drm_sched_job_cleanup(&job->base);
1261 	mutex_unlock(&p->adev->notifier_lock);
1262 
1263 error_unlock:
1264 	amdgpu_job_free(job);
1265 	return r;
1266 }
1267 
trace_amdgpu_cs_ibs(struct amdgpu_cs_parser * parser)1268 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1269 {
1270 	int i;
1271 
1272 	if (!trace_amdgpu_cs_enabled())
1273 		return;
1274 
1275 	for (i = 0; i < parser->job->num_ibs; i++)
1276 		trace_amdgpu_cs(parser, i);
1277 }
1278 
amdgpu_cs_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1279 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1280 {
1281 	struct amdgpu_device *adev = drm_to_adev(dev);
1282 	union drm_amdgpu_cs *cs = data;
1283 	struct amdgpu_cs_parser parser = {};
1284 	bool reserved_buffers = false;
1285 	int r;
1286 
1287 	if (amdgpu_ras_intr_triggered())
1288 		return -EHWPOISON;
1289 
1290 	if (!adev->accel_working)
1291 		return -EBUSY;
1292 
1293 	parser.adev = adev;
1294 	parser.filp = filp;
1295 
1296 	r = amdgpu_cs_parser_init(&parser, data);
1297 	if (r) {
1298 		if (printk_ratelimit())
1299 			DRM_ERROR("Failed to initialize parser %d!\n", r);
1300 		goto out;
1301 	}
1302 
1303 	r = amdgpu_cs_ib_fill(adev, &parser);
1304 	if (r)
1305 		goto out;
1306 
1307 	r = amdgpu_cs_dependencies(adev, &parser);
1308 	if (r) {
1309 		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1310 		goto out;
1311 	}
1312 
1313 	r = amdgpu_cs_parser_bos(&parser, data);
1314 	if (r) {
1315 		if (r == -ENOMEM)
1316 			DRM_ERROR("Not enough memory for command submission!\n");
1317 		else if (r != -ERESTARTSYS && r != -EAGAIN)
1318 			DRM_ERROR("Failed to process the buffer list %d!\n", r);
1319 		goto out;
1320 	}
1321 
1322 	reserved_buffers = true;
1323 
1324 	trace_amdgpu_cs_ibs(&parser);
1325 
1326 	r = amdgpu_cs_vm_handling(&parser);
1327 	if (r)
1328 		goto out;
1329 
1330 	r = amdgpu_cs_submit(&parser, cs);
1331 
1332 out:
1333 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1334 
1335 	return r;
1336 }
1337 
1338 /**
1339  * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1340  *
1341  * @dev: drm device
1342  * @data: data from userspace
1343  * @filp: file private
1344  *
1345  * Wait for the command submission identified by handle to finish.
1346  */
amdgpu_cs_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1347 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1348 			 struct drm_file *filp)
1349 {
1350 	union drm_amdgpu_wait_cs *wait = data;
1351 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1352 	struct drm_sched_entity *entity;
1353 	struct amdgpu_ctx *ctx;
1354 	struct dma_fence *fence;
1355 	long r;
1356 
1357 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1358 	if (ctx == NULL)
1359 		return -EINVAL;
1360 
1361 	r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1362 				  wait->in.ring, &entity);
1363 	if (r) {
1364 		amdgpu_ctx_put(ctx);
1365 		return r;
1366 	}
1367 
1368 	fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1369 	if (IS_ERR(fence))
1370 		r = PTR_ERR(fence);
1371 	else if (fence) {
1372 		r = dma_fence_wait_timeout(fence, true, timeout);
1373 		if (r > 0 && fence->error)
1374 			r = fence->error;
1375 		dma_fence_put(fence);
1376 	} else
1377 		r = 1;
1378 
1379 	amdgpu_ctx_put(ctx);
1380 	if (r < 0)
1381 		return r;
1382 
1383 	memset(wait, 0, sizeof(*wait));
1384 	wait->out.status = (r == 0);
1385 
1386 	return 0;
1387 }
1388 
1389 /**
1390  * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1391  *
1392  * @adev: amdgpu device
1393  * @filp: file private
1394  * @user: drm_amdgpu_fence copied from user space
1395  */
amdgpu_cs_get_fence(struct amdgpu_device * adev,struct drm_file * filp,struct drm_amdgpu_fence * user)1396 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1397 					     struct drm_file *filp,
1398 					     struct drm_amdgpu_fence *user)
1399 {
1400 	struct drm_sched_entity *entity;
1401 	struct amdgpu_ctx *ctx;
1402 	struct dma_fence *fence;
1403 	int r;
1404 
1405 	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1406 	if (ctx == NULL)
1407 		return ERR_PTR(-EINVAL);
1408 
1409 	r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1410 				  user->ring, &entity);
1411 	if (r) {
1412 		amdgpu_ctx_put(ctx);
1413 		return ERR_PTR(r);
1414 	}
1415 
1416 	fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1417 	amdgpu_ctx_put(ctx);
1418 
1419 	return fence;
1420 }
1421 
amdgpu_cs_fence_to_handle_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1422 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1423 				    struct drm_file *filp)
1424 {
1425 	struct amdgpu_device *adev = drm_to_adev(dev);
1426 	union drm_amdgpu_fence_to_handle *info = data;
1427 	struct dma_fence *fence;
1428 	struct drm_syncobj *syncobj;
1429 	struct sync_file *sync_file;
1430 	int fd, r;
1431 
1432 	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1433 	if (IS_ERR(fence))
1434 		return PTR_ERR(fence);
1435 
1436 	if (!fence)
1437 		fence = dma_fence_get_stub();
1438 
1439 	switch (info->in.what) {
1440 	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1441 		r = drm_syncobj_create(&syncobj, 0, fence);
1442 		dma_fence_put(fence);
1443 		if (r)
1444 			return r;
1445 		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1446 		drm_syncobj_put(syncobj);
1447 		return r;
1448 
1449 	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1450 		r = drm_syncobj_create(&syncobj, 0, fence);
1451 		dma_fence_put(fence);
1452 		if (r)
1453 			return r;
1454 		r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1455 		drm_syncobj_put(syncobj);
1456 		return r;
1457 
1458 	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1459 		fd = get_unused_fd_flags(O_CLOEXEC);
1460 		if (fd < 0) {
1461 			dma_fence_put(fence);
1462 			return fd;
1463 		}
1464 
1465 		sync_file = sync_file_create(fence);
1466 		dma_fence_put(fence);
1467 		if (!sync_file) {
1468 			put_unused_fd(fd);
1469 			return -ENOMEM;
1470 		}
1471 
1472 		fd_install(fd, sync_file->file);
1473 		info->out.handle = fd;
1474 		return 0;
1475 
1476 	default:
1477 		dma_fence_put(fence);
1478 		return -EINVAL;
1479 	}
1480 }
1481 
1482 /**
1483  * amdgpu_cs_wait_all_fence - wait on all fences to signal
1484  *
1485  * @adev: amdgpu device
1486  * @filp: file private
1487  * @wait: wait parameters
1488  * @fences: array of drm_amdgpu_fence
1489  */
amdgpu_cs_wait_all_fences(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)1490 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1491 				     struct drm_file *filp,
1492 				     union drm_amdgpu_wait_fences *wait,
1493 				     struct drm_amdgpu_fence *fences)
1494 {
1495 	uint32_t fence_count = wait->in.fence_count;
1496 	unsigned int i;
1497 	long r = 1;
1498 
1499 	for (i = 0; i < fence_count; i++) {
1500 		struct dma_fence *fence;
1501 		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1502 
1503 		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1504 		if (IS_ERR(fence))
1505 			return PTR_ERR(fence);
1506 		else if (!fence)
1507 			continue;
1508 
1509 		r = dma_fence_wait_timeout(fence, true, timeout);
1510 		if (r > 0 && fence->error)
1511 			r = fence->error;
1512 
1513 		dma_fence_put(fence);
1514 		if (r < 0)
1515 			return r;
1516 
1517 		if (r == 0)
1518 			break;
1519 	}
1520 
1521 	memset(wait, 0, sizeof(*wait));
1522 	wait->out.status = (r > 0);
1523 
1524 	return 0;
1525 }
1526 
1527 /**
1528  * amdgpu_cs_wait_any_fence - wait on any fence to signal
1529  *
1530  * @adev: amdgpu device
1531  * @filp: file private
1532  * @wait: wait parameters
1533  * @fences: array of drm_amdgpu_fence
1534  */
amdgpu_cs_wait_any_fence(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)1535 static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1536 				    struct drm_file *filp,
1537 				    union drm_amdgpu_wait_fences *wait,
1538 				    struct drm_amdgpu_fence *fences)
1539 {
1540 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1541 	uint32_t fence_count = wait->in.fence_count;
1542 	uint32_t first = ~0;
1543 	struct dma_fence **array;
1544 	unsigned int i;
1545 	long r;
1546 
1547 	/* Prepare the fence array */
1548 	array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1549 
1550 	if (array == NULL)
1551 		return -ENOMEM;
1552 
1553 	for (i = 0; i < fence_count; i++) {
1554 		struct dma_fence *fence;
1555 
1556 		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1557 		if (IS_ERR(fence)) {
1558 			r = PTR_ERR(fence);
1559 			goto err_free_fence_array;
1560 		} else if (fence) {
1561 			array[i] = fence;
1562 		} else { /* NULL, the fence has been already signaled */
1563 			r = 1;
1564 			first = i;
1565 			goto out;
1566 		}
1567 	}
1568 
1569 	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1570 				       &first);
1571 	if (r < 0)
1572 		goto err_free_fence_array;
1573 
1574 out:
1575 	memset(wait, 0, sizeof(*wait));
1576 	wait->out.status = (r > 0);
1577 	wait->out.first_signaled = first;
1578 
1579 	if (first < fence_count && array[first])
1580 		r = array[first]->error;
1581 	else
1582 		r = 0;
1583 
1584 err_free_fence_array:
1585 	for (i = 0; i < fence_count; i++)
1586 		dma_fence_put(array[i]);
1587 	kfree(array);
1588 
1589 	return r;
1590 }
1591 
1592 /**
1593  * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1594  *
1595  * @dev: drm device
1596  * @data: data from userspace
1597  * @filp: file private
1598  */
amdgpu_cs_wait_fences_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1599 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1600 				struct drm_file *filp)
1601 {
1602 	struct amdgpu_device *adev = drm_to_adev(dev);
1603 	union drm_amdgpu_wait_fences *wait = data;
1604 	uint32_t fence_count = wait->in.fence_count;
1605 	struct drm_amdgpu_fence *fences_user;
1606 	struct drm_amdgpu_fence *fences;
1607 	int r;
1608 
1609 	/* Get the fences from userspace */
1610 	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1611 			GFP_KERNEL);
1612 	if (fences == NULL)
1613 		return -ENOMEM;
1614 
1615 	fences_user = u64_to_user_ptr(wait->in.fences);
1616 	if (copy_from_user(fences, fences_user,
1617 		sizeof(struct drm_amdgpu_fence) * fence_count)) {
1618 		r = -EFAULT;
1619 		goto err_free_fences;
1620 	}
1621 
1622 	if (wait->in.wait_all)
1623 		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1624 	else
1625 		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1626 
1627 err_free_fences:
1628 	kfree(fences);
1629 
1630 	return r;
1631 }
1632 
1633 /**
1634  * amdgpu_cs_find_bo_va - find bo_va for VM address
1635  *
1636  * @parser: command submission parser context
1637  * @addr: VM address
1638  * @bo: resulting BO of the mapping found
1639  *
1640  * Search the buffer objects in the command submission context for a certain
1641  * virtual memory address. Returns allocation structure when found, NULL
1642  * otherwise.
1643  */
amdgpu_cs_find_mapping(struct amdgpu_cs_parser * parser,uint64_t addr,struct amdgpu_bo ** bo,struct amdgpu_bo_va_mapping ** map)1644 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1645 			   uint64_t addr, struct amdgpu_bo **bo,
1646 			   struct amdgpu_bo_va_mapping **map)
1647 {
1648 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1649 	struct ttm_operation_ctx ctx = { false, false };
1650 	struct amdgpu_vm *vm = &fpriv->vm;
1651 	struct amdgpu_bo_va_mapping *mapping;
1652 	int r;
1653 
1654 	addr /= AMDGPU_GPU_PAGE_SIZE;
1655 
1656 	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1657 	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1658 		return -EINVAL;
1659 
1660 	*bo = mapping->bo_va->base.bo;
1661 	*map = mapping;
1662 
1663 	/* Double check that the BO is reserved by this CS */
1664 	if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1665 		return -EINVAL;
1666 
1667 	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1668 		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1669 		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1670 		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1671 		if (r)
1672 			return r;
1673 	}
1674 
1675 	return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1676 }
1677