• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
10 
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
13 
14 static void job_wq_completion(struct work_struct *work);
15 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
16 		struct hl_ctx *ctx, u64 timeout_us, u64 seq);
17 static void cs_do_release(struct kref *ref);
18 
hl_fence_get_driver_name(struct dma_fence * fence)19 static const char *hl_fence_get_driver_name(struct dma_fence *fence)
20 {
21 	return "HabanaLabs";
22 }
23 
hl_fence_get_timeline_name(struct dma_fence * fence)24 static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
25 {
26 	struct hl_dma_fence *hl_fence =
27 		container_of(fence, struct hl_dma_fence, base_fence);
28 
29 	return dev_name(hl_fence->hdev->dev);
30 }
31 
hl_fence_enable_signaling(struct dma_fence * fence)32 static bool hl_fence_enable_signaling(struct dma_fence *fence)
33 {
34 	return true;
35 }
36 
hl_fence_release(struct dma_fence * fence)37 static void hl_fence_release(struct dma_fence *fence)
38 {
39 	struct hl_dma_fence *hl_fence =
40 		container_of(fence, struct hl_dma_fence, base_fence);
41 
42 	kfree_rcu(hl_fence, base_fence.rcu);
43 }
44 
45 static const struct dma_fence_ops hl_fence_ops = {
46 	.get_driver_name = hl_fence_get_driver_name,
47 	.get_timeline_name = hl_fence_get_timeline_name,
48 	.enable_signaling = hl_fence_enable_signaling,
49 	.wait = dma_fence_default_wait,
50 	.release = hl_fence_release
51 };
52 
cs_get(struct hl_cs * cs)53 static void cs_get(struct hl_cs *cs)
54 {
55 	kref_get(&cs->refcount);
56 }
57 
cs_get_unless_zero(struct hl_cs * cs)58 static int cs_get_unless_zero(struct hl_cs *cs)
59 {
60 	return kref_get_unless_zero(&cs->refcount);
61 }
62 
cs_put(struct hl_cs * cs)63 static void cs_put(struct hl_cs *cs)
64 {
65 	kref_put(&cs->refcount, cs_do_release);
66 }
67 
68 /*
69  * cs_parser - parse the user command submission
70  *
71  * @hpriv	: pointer to the private data of the fd
72  * @job        : pointer to the job that holds the command submission info
73  *
74  * The function parses the command submission of the user. It calls the
75  * ASIC specific parser, which returns a list of memory blocks to send
76  * to the device as different command buffers
77  *
78  */
cs_parser(struct hl_fpriv * hpriv,struct hl_cs_job * job)79 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
80 {
81 	struct hl_device *hdev = hpriv->hdev;
82 	struct hl_cs_parser parser;
83 	int rc;
84 
85 	parser.ctx_id = job->cs->ctx->asid;
86 	parser.cs_sequence = job->cs->sequence;
87 	parser.job_id = job->id;
88 
89 	parser.hw_queue_id = job->hw_queue_id;
90 	parser.job_userptr_list = &job->userptr_list;
91 	parser.patched_cb = NULL;
92 	parser.user_cb = job->user_cb;
93 	parser.user_cb_size = job->user_cb_size;
94 	parser.ext_queue = job->ext_queue;
95 	job->patched_cb = NULL;
96 
97 	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
98 	if (job->ext_queue) {
99 		if (!rc) {
100 			job->patched_cb = parser.patched_cb;
101 			job->job_cb_size = parser.patched_cb_size;
102 
103 			spin_lock(&job->patched_cb->lock);
104 			job->patched_cb->cs_cnt++;
105 			spin_unlock(&job->patched_cb->lock);
106 		}
107 
108 		/*
109 		 * Whether the parsing worked or not, we don't need the
110 		 * original CB anymore because it was already parsed and
111 		 * won't be accessed again for this CS
112 		 */
113 		spin_lock(&job->user_cb->lock);
114 		job->user_cb->cs_cnt--;
115 		spin_unlock(&job->user_cb->lock);
116 		hl_cb_put(job->user_cb);
117 		job->user_cb = NULL;
118 	}
119 
120 	return rc;
121 }
122 
free_job(struct hl_device * hdev,struct hl_cs_job * job)123 static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
124 {
125 	struct hl_cs *cs = job->cs;
126 
127 	if (job->ext_queue) {
128 		hl_userptr_delete_list(hdev, &job->userptr_list);
129 
130 		/*
131 		 * We might arrive here from rollback and patched CB wasn't
132 		 * created, so we need to check it's not NULL
133 		 */
134 		if (job->patched_cb) {
135 			spin_lock(&job->patched_cb->lock);
136 			job->patched_cb->cs_cnt--;
137 			spin_unlock(&job->patched_cb->lock);
138 
139 			hl_cb_put(job->patched_cb);
140 		}
141 	}
142 
143 	/*
144 	 * This is the only place where there can be multiple threads
145 	 * modifying the list at the same time
146 	 */
147 	spin_lock(&cs->job_lock);
148 	list_del(&job->cs_node);
149 	spin_unlock(&cs->job_lock);
150 
151 	hl_debugfs_remove_job(hdev, job);
152 
153 	if (job->ext_queue)
154 		cs_put(cs);
155 
156 	kfree(job);
157 }
158 
cs_do_release(struct kref * ref)159 static void cs_do_release(struct kref *ref)
160 {
161 	struct hl_cs *cs = container_of(ref, struct hl_cs,
162 						refcount);
163 	struct hl_device *hdev = cs->ctx->hdev;
164 	struct hl_cs_job *job, *tmp;
165 
166 	cs->completed = true;
167 
168 	/*
169 	 * Although if we reached here it means that all external jobs have
170 	 * finished, because each one of them took refcnt to CS, we still
171 	 * need to go over the internal jobs and free them. Otherwise, we
172 	 * will have leaked memory and what's worse, the CS object (and
173 	 * potentially the CTX object) could be released, while the JOB
174 	 * still holds a pointer to them (but no reference).
175 	 */
176 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
177 		free_job(hdev, job);
178 
179 	/* We also need to update CI for internal queues */
180 	if (cs->submitted) {
181 		hdev->asic_funcs->hw_queues_lock(hdev);
182 
183 		hdev->cs_active_cnt--;
184 		if (!hdev->cs_active_cnt) {
185 			struct hl_device_idle_busy_ts *ts;
186 
187 			ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
188 			ts->busy_to_idle_ts = ktime_get();
189 
190 			if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
191 				hdev->idle_busy_ts_idx = 0;
192 		} else if (hdev->cs_active_cnt < 0) {
193 			dev_crit(hdev->dev, "CS active cnt %d is negative\n",
194 				hdev->cs_active_cnt);
195 		}
196 
197 		hdev->asic_funcs->hw_queues_unlock(hdev);
198 
199 		hl_int_hw_queue_update_ci(cs);
200 
201 		spin_lock(&hdev->hw_queues_mirror_lock);
202 		/* remove CS from hw_queues mirror list */
203 		list_del_init(&cs->mirror_node);
204 		spin_unlock(&hdev->hw_queues_mirror_lock);
205 
206 		/*
207 		 * Don't cancel TDR in case this CS was timedout because we
208 		 * might be running from the TDR context
209 		 */
210 		if ((!cs->timedout) &&
211 			(hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
212 			struct hl_cs *next;
213 
214 			if (cs->tdr_active)
215 				cancel_delayed_work_sync(&cs->work_tdr);
216 
217 			spin_lock(&hdev->hw_queues_mirror_lock);
218 
219 			/* queue TDR for next CS */
220 			next = list_first_entry_or_null(
221 					&hdev->hw_queues_mirror_list,
222 					struct hl_cs, mirror_node);
223 
224 			if ((next) && (!next->tdr_active)) {
225 				next->tdr_active = true;
226 				schedule_delayed_work(&next->work_tdr,
227 							hdev->timeout_jiffies);
228 			}
229 
230 			spin_unlock(&hdev->hw_queues_mirror_lock);
231 		}
232 	}
233 
234 	/*
235 	 * Must be called before hl_ctx_put because inside we use ctx to get
236 	 * the device
237 	 */
238 	hl_debugfs_remove_cs(cs);
239 
240 	hl_ctx_put(cs->ctx);
241 
242 	if (cs->timedout)
243 		dma_fence_set_error(cs->fence, -ETIMEDOUT);
244 	else if (cs->aborted)
245 		dma_fence_set_error(cs->fence, -EIO);
246 
247 	dma_fence_signal(cs->fence);
248 	dma_fence_put(cs->fence);
249 
250 	kfree(cs);
251 }
252 
cs_timedout(struct work_struct * work)253 static void cs_timedout(struct work_struct *work)
254 {
255 	struct hl_device *hdev;
256 	int ctx_asid, rc;
257 	struct hl_cs *cs = container_of(work, struct hl_cs,
258 						 work_tdr.work);
259 	rc = cs_get_unless_zero(cs);
260 	if (!rc)
261 		return;
262 
263 	if ((!cs->submitted) || (cs->completed)) {
264 		cs_put(cs);
265 		return;
266 	}
267 
268 	/* Mark the CS is timed out so we won't try to cancel its TDR */
269 	cs->timedout = true;
270 
271 	hdev = cs->ctx->hdev;
272 	ctx_asid = cs->ctx->asid;
273 
274 	/* TODO: add information about last signaled seq and last emitted seq */
275 	dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
276 		ctx_asid, cs->sequence);
277 
278 	cs_put(cs);
279 
280 	if (hdev->reset_on_lockup)
281 		hl_device_reset(hdev, false, false);
282 }
283 
allocate_cs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs ** cs_new)284 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
285 			struct hl_cs **cs_new)
286 {
287 	struct hl_dma_fence *fence;
288 	struct dma_fence *other = NULL;
289 	struct hl_cs *cs;
290 	int rc;
291 
292 	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
293 	if (!cs)
294 		return -ENOMEM;
295 
296 	cs->ctx = ctx;
297 	cs->submitted = false;
298 	cs->completed = false;
299 	INIT_LIST_HEAD(&cs->job_list);
300 	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
301 	kref_init(&cs->refcount);
302 	spin_lock_init(&cs->job_lock);
303 
304 	fence = kmalloc(sizeof(*fence), GFP_ATOMIC);
305 	if (!fence) {
306 		rc = -ENOMEM;
307 		goto free_cs;
308 	}
309 
310 	fence->hdev = hdev;
311 	spin_lock_init(&fence->lock);
312 	cs->fence = &fence->base_fence;
313 
314 	spin_lock(&ctx->cs_lock);
315 
316 	fence->cs_seq = ctx->cs_sequence;
317 	other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
318 	if ((other) && (!dma_fence_is_signaled(other))) {
319 		spin_unlock(&ctx->cs_lock);
320 		dev_dbg(hdev->dev,
321 			"Rejecting CS because of too many in-flights CS\n");
322 		rc = -EAGAIN;
323 		goto free_fence;
324 	}
325 
326 	dma_fence_init(&fence->base_fence, &hl_fence_ops, &fence->lock,
327 			ctx->asid, ctx->cs_sequence);
328 
329 	cs->sequence = fence->cs_seq;
330 
331 	ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)] =
332 							&fence->base_fence;
333 	ctx->cs_sequence++;
334 
335 	dma_fence_get(&fence->base_fence);
336 
337 	dma_fence_put(other);
338 
339 	spin_unlock(&ctx->cs_lock);
340 
341 	*cs_new = cs;
342 
343 	return 0;
344 
345 free_fence:
346 	kfree(fence);
347 free_cs:
348 	kfree(cs);
349 	return rc;
350 }
351 
cs_rollback(struct hl_device * hdev,struct hl_cs * cs)352 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
353 {
354 	struct hl_cs_job *job, *tmp;
355 
356 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
357 		free_job(hdev, job);
358 }
359 
hl_cs_rollback_all(struct hl_device * hdev)360 void hl_cs_rollback_all(struct hl_device *hdev)
361 {
362 	struct hl_cs *cs, *tmp;
363 
364 	/* flush all completions */
365 	flush_workqueue(hdev->cq_wq);
366 
367 	/* Make sure we don't have leftovers in the H/W queues mirror list */
368 	list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
369 				mirror_node) {
370 		cs_get(cs);
371 		cs->aborted = true;
372 		dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
373 					cs->ctx->asid, cs->sequence);
374 		cs_rollback(hdev, cs);
375 		cs_put(cs);
376 	}
377 }
378 
job_wq_completion(struct work_struct * work)379 static void job_wq_completion(struct work_struct *work)
380 {
381 	struct hl_cs_job *job = container_of(work, struct hl_cs_job,
382 						finish_work);
383 	struct hl_cs *cs = job->cs;
384 	struct hl_device *hdev = cs->ctx->hdev;
385 
386 	/* job is no longer needed */
387 	free_job(hdev, job);
388 }
389 
validate_queue_index(struct hl_device * hdev,struct hl_cb_mgr * cb_mgr,struct hl_cs_chunk * chunk,bool * ext_queue)390 static struct hl_cb *validate_queue_index(struct hl_device *hdev,
391 					struct hl_cb_mgr *cb_mgr,
392 					struct hl_cs_chunk *chunk,
393 					bool *ext_queue)
394 {
395 	struct asic_fixed_properties *asic = &hdev->asic_prop;
396 	struct hw_queue_properties *hw_queue_prop;
397 	u32 cb_handle;
398 	struct hl_cb *cb;
399 
400 	/* Assume external queue */
401 	*ext_queue = true;
402 
403 	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
404 
405 	if ((chunk->queue_index >= HL_MAX_QUEUES) ||
406 			(hw_queue_prop->type == QUEUE_TYPE_NA)) {
407 		dev_err(hdev->dev, "Queue index %d is invalid\n",
408 			chunk->queue_index);
409 		return NULL;
410 	}
411 
412 	if (hw_queue_prop->driver_only) {
413 		dev_err(hdev->dev,
414 			"Queue index %d is restricted for the kernel driver\n",
415 			chunk->queue_index);
416 		return NULL;
417 	} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
418 		*ext_queue = false;
419 		return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
420 	}
421 
422 	/* Retrieve CB object */
423 	cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
424 
425 	cb = hl_cb_get(hdev, cb_mgr, cb_handle);
426 	if (!cb) {
427 		dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
428 		return NULL;
429 	}
430 
431 	if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
432 		dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
433 		goto release_cb;
434 	}
435 
436 	spin_lock(&cb->lock);
437 	cb->cs_cnt++;
438 	spin_unlock(&cb->lock);
439 
440 	return cb;
441 
442 release_cb:
443 	hl_cb_put(cb);
444 	return NULL;
445 }
446 
hl_cs_allocate_job(struct hl_device * hdev,bool ext_queue)447 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
448 {
449 	struct hl_cs_job *job;
450 
451 	job = kzalloc(sizeof(*job), GFP_ATOMIC);
452 	if (!job)
453 		return NULL;
454 
455 	job->ext_queue = ext_queue;
456 
457 	if (job->ext_queue) {
458 		INIT_LIST_HEAD(&job->userptr_list);
459 		INIT_WORK(&job->finish_work, job_wq_completion);
460 	}
461 
462 	return job;
463 }
464 
_hl_cs_ioctl(struct hl_fpriv * hpriv,void __user * chunks,u32 num_chunks,u64 * cs_seq)465 static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
466 			u32 num_chunks, u64 *cs_seq)
467 {
468 	struct hl_device *hdev = hpriv->hdev;
469 	struct hl_cs_chunk *cs_chunk_array;
470 	struct hl_cs_job *job;
471 	struct hl_cs *cs;
472 	struct hl_cb *cb;
473 	bool ext_queue_present = false;
474 	u32 size_to_copy;
475 	int rc, i, parse_cnt;
476 
477 	*cs_seq = ULLONG_MAX;
478 
479 	if (num_chunks > HL_MAX_JOBS_PER_CS) {
480 		dev_err(hdev->dev,
481 			"Number of chunks can NOT be larger than %d\n",
482 			HL_MAX_JOBS_PER_CS);
483 		rc = -EINVAL;
484 		goto out;
485 	}
486 
487 	cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
488 					GFP_ATOMIC);
489 	if (!cs_chunk_array) {
490 		rc = -ENOMEM;
491 		goto out;
492 	}
493 
494 	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
495 	if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
496 		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
497 		rc = -EFAULT;
498 		goto free_cs_chunk_array;
499 	}
500 
501 	/* increment refcnt for context */
502 	hl_ctx_get(hdev, hpriv->ctx);
503 
504 	rc = allocate_cs(hdev, hpriv->ctx, &cs);
505 	if (rc) {
506 		hl_ctx_put(hpriv->ctx);
507 		goto free_cs_chunk_array;
508 	}
509 
510 	*cs_seq = cs->sequence;
511 
512 	hl_debugfs_add_cs(cs);
513 
514 	/* Validate ALL the CS chunks before submitting the CS */
515 	for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
516 		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
517 		bool ext_queue;
518 
519 		cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
520 					&ext_queue);
521 		if (ext_queue) {
522 			ext_queue_present = true;
523 			if (!cb) {
524 				rc = -EINVAL;
525 				goto free_cs_object;
526 			}
527 		}
528 
529 		job = hl_cs_allocate_job(hdev, ext_queue);
530 		if (!job) {
531 			dev_err(hdev->dev, "Failed to allocate a new job\n");
532 			rc = -ENOMEM;
533 			if (ext_queue)
534 				goto release_cb;
535 			else
536 				goto free_cs_object;
537 		}
538 
539 		job->id = i + 1;
540 		job->cs = cs;
541 		job->user_cb = cb;
542 		job->user_cb_size = chunk->cb_size;
543 		if (job->ext_queue)
544 			job->job_cb_size = cb->size;
545 		else
546 			job->job_cb_size = chunk->cb_size;
547 		job->hw_queue_id = chunk->queue_index;
548 
549 		cs->jobs_in_queue_cnt[job->hw_queue_id]++;
550 
551 		list_add_tail(&job->cs_node, &cs->job_list);
552 
553 		/*
554 		 * Increment CS reference. When CS reference is 0, CS is
555 		 * done and can be signaled to user and free all its resources
556 		 * Only increment for JOB on external queues, because only
557 		 * for those JOBs we get completion
558 		 */
559 		if (job->ext_queue)
560 			cs_get(cs);
561 
562 		hl_debugfs_add_job(hdev, job);
563 
564 		rc = cs_parser(hpriv, job);
565 		if (rc) {
566 			dev_err(hdev->dev,
567 				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
568 				cs->ctx->asid, cs->sequence, job->id, rc);
569 			goto free_cs_object;
570 		}
571 	}
572 
573 	if (!ext_queue_present) {
574 		dev_err(hdev->dev,
575 			"Reject CS %d.%llu because no external queues jobs\n",
576 			cs->ctx->asid, cs->sequence);
577 		rc = -EINVAL;
578 		goto free_cs_object;
579 	}
580 
581 	rc = hl_hw_queue_schedule_cs(cs);
582 	if (rc) {
583 		dev_err(hdev->dev,
584 			"Failed to submit CS %d.%llu to H/W queues, error %d\n",
585 			cs->ctx->asid, cs->sequence, rc);
586 		goto free_cs_object;
587 	}
588 
589 	rc = HL_CS_STATUS_SUCCESS;
590 	goto put_cs;
591 
592 release_cb:
593 	spin_lock(&cb->lock);
594 	cb->cs_cnt--;
595 	spin_unlock(&cb->lock);
596 	hl_cb_put(cb);
597 free_cs_object:
598 	cs_rollback(hdev, cs);
599 	*cs_seq = ULLONG_MAX;
600 	/* The path below is both for good and erroneous exits */
601 put_cs:
602 	/* We finished with the CS in this function, so put the ref */
603 	cs_put(cs);
604 free_cs_chunk_array:
605 	kfree(cs_chunk_array);
606 out:
607 	return rc;
608 }
609 
hl_cs_ioctl(struct hl_fpriv * hpriv,void * data)610 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
611 {
612 	struct hl_device *hdev = hpriv->hdev;
613 	union hl_cs_args *args = data;
614 	struct hl_ctx *ctx = hpriv->ctx;
615 	void __user *chunks;
616 	u32 num_chunks;
617 	u64 cs_seq = ULONG_MAX;
618 	int rc, do_ctx_switch;
619 	bool need_soft_reset = false;
620 
621 	if (hl_device_disabled_or_in_reset(hdev)) {
622 		dev_warn_ratelimited(hdev->dev,
623 			"Device is %s. Can't submit new CS\n",
624 			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
625 		rc = -EBUSY;
626 		goto out;
627 	}
628 
629 	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
630 
631 	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
632 		long ret;
633 
634 		chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
635 		num_chunks = args->in.num_chunks_restore;
636 
637 		mutex_lock(&hpriv->restore_phase_mutex);
638 
639 		if (do_ctx_switch) {
640 			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
641 			if (rc) {
642 				dev_err_ratelimited(hdev->dev,
643 					"Failed to switch to context %d, rejecting CS! %d\n",
644 					ctx->asid, rc);
645 				/*
646 				 * If we timedout, or if the device is not IDLE
647 				 * while we want to do context-switch (-EBUSY),
648 				 * we need to soft-reset because QMAN is
649 				 * probably stuck. However, we can't call to
650 				 * reset here directly because of deadlock, so
651 				 * need to do it at the very end of this
652 				 * function
653 				 */
654 				if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
655 					need_soft_reset = true;
656 				mutex_unlock(&hpriv->restore_phase_mutex);
657 				goto out;
658 			}
659 		}
660 
661 		hdev->asic_funcs->restore_phase_topology(hdev);
662 
663 		if (num_chunks == 0) {
664 			dev_dbg(hdev->dev,
665 			"Need to run restore phase but restore CS is empty\n");
666 			rc = 0;
667 		} else {
668 			rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
669 						&cs_seq);
670 		}
671 
672 		mutex_unlock(&hpriv->restore_phase_mutex);
673 
674 		if (rc) {
675 			dev_err(hdev->dev,
676 				"Failed to submit restore CS for context %d (%d)\n",
677 				ctx->asid, rc);
678 			goto out;
679 		}
680 
681 		/* Need to wait for restore completion before execution phase */
682 		if (num_chunks > 0) {
683 			ret = _hl_cs_wait_ioctl(hdev, ctx,
684 					jiffies_to_usecs(hdev->timeout_jiffies),
685 					cs_seq);
686 			if (ret <= 0) {
687 				dev_err(hdev->dev,
688 					"Restore CS for context %d failed to complete %ld\n",
689 					ctx->asid, ret);
690 				rc = -ENOEXEC;
691 				goto out;
692 			}
693 		}
694 
695 		ctx->thread_ctx_switch_wait_token = 1;
696 	} else if (!ctx->thread_ctx_switch_wait_token) {
697 		u32 tmp;
698 
699 		rc = hl_poll_timeout_memory(hdev,
700 			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
701 			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
702 
703 		if (rc == -ETIMEDOUT) {
704 			dev_err(hdev->dev,
705 				"context switch phase timeout (%d)\n", tmp);
706 			goto out;
707 		}
708 	}
709 
710 	chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
711 	num_chunks = args->in.num_chunks_execute;
712 
713 	if (num_chunks == 0) {
714 		dev_err(hdev->dev,
715 			"Got execute CS with 0 chunks, context %d\n",
716 			ctx->asid);
717 		rc = -EINVAL;
718 		goto out;
719 	}
720 
721 	rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
722 
723 out:
724 	if (rc != -EAGAIN) {
725 		memset(args, 0, sizeof(*args));
726 		args->out.status = rc;
727 		args->out.seq = cs_seq;
728 	}
729 
730 	if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
731 		hl_device_reset(hdev, false, false);
732 
733 	return rc;
734 }
735 
_hl_cs_wait_ioctl(struct hl_device * hdev,struct hl_ctx * ctx,u64 timeout_us,u64 seq)736 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
737 		struct hl_ctx *ctx, u64 timeout_us, u64 seq)
738 {
739 	struct dma_fence *fence;
740 	unsigned long timeout;
741 	long rc;
742 
743 	if (timeout_us == MAX_SCHEDULE_TIMEOUT)
744 		timeout = timeout_us;
745 	else
746 		timeout = usecs_to_jiffies(timeout_us);
747 
748 	hl_ctx_get(hdev, ctx);
749 
750 	fence = hl_ctx_get_fence(ctx, seq);
751 	if (IS_ERR(fence)) {
752 		rc = PTR_ERR(fence);
753 	} else if (fence) {
754 		rc = dma_fence_wait_timeout(fence, true, timeout);
755 		if (fence->error == -ETIMEDOUT)
756 			rc = -ETIMEDOUT;
757 		else if (fence->error == -EIO)
758 			rc = -EIO;
759 		dma_fence_put(fence);
760 	} else
761 		rc = 1;
762 
763 	hl_ctx_put(ctx);
764 
765 	return rc;
766 }
767 
hl_cs_wait_ioctl(struct hl_fpriv * hpriv,void * data)768 int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
769 {
770 	struct hl_device *hdev = hpriv->hdev;
771 	union hl_wait_cs_args *args = data;
772 	u64 seq = args->in.seq;
773 	long rc;
774 
775 	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
776 
777 	memset(args, 0, sizeof(*args));
778 
779 	if (rc < 0) {
780 		dev_err_ratelimited(hdev->dev,
781 				"Error %ld on waiting for CS handle %llu\n",
782 				rc, seq);
783 		if (rc == -ERESTARTSYS) {
784 			args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
785 			rc = -EINTR;
786 		} else if (rc == -ETIMEDOUT) {
787 			args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
788 		} else if (rc == -EIO) {
789 			args->out.status = HL_WAIT_CS_STATUS_ABORTED;
790 		}
791 		return rc;
792 	}
793 
794 	if (rc == 0)
795 		args->out.status = HL_WAIT_CS_STATUS_BUSY;
796 	else
797 		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
798 
799 	return 0;
800 }
801