• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #define pr_fmt(fmt)			"habanalabs: " fmt
9 
10 #include "habanalabs.h"
11 
12 #include <linux/pci.h>
13 #include <linux/sched/signal.h>
14 #include <linux/hwmon.h>
15 #include <uapi/misc/habanalabs.h>
16 
17 #define HL_PLDM_PENDING_RESET_PER_SEC	(HL_PENDING_RESET_PER_SEC * 10)
18 
hl_device_disabled_or_in_reset(struct hl_device * hdev)19 bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
20 {
21 	if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
22 		return true;
23 	else
24 		return false;
25 }
26 
hl_device_status(struct hl_device * hdev)27 enum hl_device_status hl_device_status(struct hl_device *hdev)
28 {
29 	enum hl_device_status status;
30 
31 	if (hdev->disabled)
32 		status = HL_DEVICE_STATUS_MALFUNCTION;
33 	else if (atomic_read(&hdev->in_reset))
34 		status = HL_DEVICE_STATUS_IN_RESET;
35 	else
36 		status = HL_DEVICE_STATUS_OPERATIONAL;
37 
38 	return status;
39 }
40 
hpriv_release(struct kref * ref)41 static void hpriv_release(struct kref *ref)
42 {
43 	struct hl_fpriv *hpriv;
44 	struct hl_device *hdev;
45 
46 	hpriv = container_of(ref, struct hl_fpriv, refcount);
47 
48 	hdev = hpriv->hdev;
49 
50 	put_pid(hpriv->taskpid);
51 
52 	hl_debugfs_remove_file(hpriv);
53 
54 	mutex_destroy(&hpriv->restore_phase_mutex);
55 
56 	mutex_lock(&hdev->fpriv_list_lock);
57 	list_del(&hpriv->dev_node);
58 	hdev->compute_ctx = NULL;
59 	mutex_unlock(&hdev->fpriv_list_lock);
60 
61 	kfree(hpriv);
62 }
63 
hl_hpriv_get(struct hl_fpriv * hpriv)64 void hl_hpriv_get(struct hl_fpriv *hpriv)
65 {
66 	kref_get(&hpriv->refcount);
67 }
68 
hl_hpriv_put(struct hl_fpriv * hpriv)69 void hl_hpriv_put(struct hl_fpriv *hpriv)
70 {
71 	kref_put(&hpriv->refcount, hpriv_release);
72 }
73 
74 /*
75  * hl_device_release - release function for habanalabs device
76  *
77  * @inode: pointer to inode structure
78  * @filp: pointer to file structure
79  *
80  * Called when process closes an habanalabs device
81  */
hl_device_release(struct inode * inode,struct file * filp)82 static int hl_device_release(struct inode *inode, struct file *filp)
83 {
84 	struct hl_fpriv *hpriv = filp->private_data;
85 
86 	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
87 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
88 
89 	filp->private_data = NULL;
90 
91 	hl_hpriv_put(hpriv);
92 
93 	return 0;
94 }
95 
hl_device_release_ctrl(struct inode * inode,struct file * filp)96 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
97 {
98 	struct hl_fpriv *hpriv = filp->private_data;
99 	struct hl_device *hdev;
100 
101 	filp->private_data = NULL;
102 
103 	hdev = hpriv->hdev;
104 
105 	mutex_lock(&hdev->fpriv_list_lock);
106 	list_del(&hpriv->dev_node);
107 	mutex_unlock(&hdev->fpriv_list_lock);
108 
109 	put_pid(hpriv->taskpid);
110 
111 	kfree(hpriv);
112 
113 	return 0;
114 }
115 
116 /*
117  * hl_mmap - mmap function for habanalabs device
118  *
119  * @*filp: pointer to file structure
120  * @*vma: pointer to vm_area_struct of the process
121  *
122  * Called when process does an mmap on habanalabs device. Call the device's mmap
123  * function at the end of the common code.
124  */
hl_mmap(struct file * filp,struct vm_area_struct * vma)125 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
126 {
127 	struct hl_fpriv *hpriv = filp->private_data;
128 	unsigned long vm_pgoff;
129 
130 	vm_pgoff = vma->vm_pgoff;
131 	vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
132 
133 	switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
134 	case HL_MMAP_TYPE_CB:
135 		return hl_cb_mmap(hpriv, vma);
136 	}
137 
138 	return -EINVAL;
139 }
140 
141 static const struct file_operations hl_ops = {
142 	.owner = THIS_MODULE,
143 	.open = hl_device_open,
144 	.release = hl_device_release,
145 	.mmap = hl_mmap,
146 	.unlocked_ioctl = hl_ioctl,
147 	.compat_ioctl = hl_ioctl
148 };
149 
150 static const struct file_operations hl_ctrl_ops = {
151 	.owner = THIS_MODULE,
152 	.open = hl_device_open_ctrl,
153 	.release = hl_device_release_ctrl,
154 	.unlocked_ioctl = hl_ioctl_control,
155 	.compat_ioctl = hl_ioctl_control
156 };
157 
device_release_func(struct device * dev)158 static void device_release_func(struct device *dev)
159 {
160 	kfree(dev);
161 }
162 
163 /*
164  * device_init_cdev - Initialize cdev and device for habanalabs device
165  *
166  * @hdev: pointer to habanalabs device structure
167  * @hclass: pointer to the class object of the device
168  * @minor: minor number of the specific device
169  * @fpos: file operations to install for this device
170  * @name: name of the device as it will appear in the filesystem
171  * @cdev: pointer to the char device object that will be initialized
172  * @dev: pointer to the device object that will be initialized
173  *
174  * Initialize a cdev and a Linux device for habanalabs's device.
175  */
device_init_cdev(struct hl_device * hdev,struct class * hclass,int minor,const struct file_operations * fops,char * name,struct cdev * cdev,struct device ** dev)176 static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
177 				int minor, const struct file_operations *fops,
178 				char *name, struct cdev *cdev,
179 				struct device **dev)
180 {
181 	cdev_init(cdev, fops);
182 	cdev->owner = THIS_MODULE;
183 
184 	*dev = kzalloc(sizeof(**dev), GFP_KERNEL);
185 	if (!*dev)
186 		return -ENOMEM;
187 
188 	device_initialize(*dev);
189 	(*dev)->devt = MKDEV(hdev->major, minor);
190 	(*dev)->class = hclass;
191 	(*dev)->release = device_release_func;
192 	dev_set_drvdata(*dev, hdev);
193 	dev_set_name(*dev, "%s", name);
194 
195 	return 0;
196 }
197 
device_cdev_sysfs_add(struct hl_device * hdev)198 static int device_cdev_sysfs_add(struct hl_device *hdev)
199 {
200 	int rc;
201 
202 	rc = cdev_device_add(&hdev->cdev, hdev->dev);
203 	if (rc) {
204 		dev_err(hdev->dev,
205 			"failed to add a char device to the system\n");
206 		return rc;
207 	}
208 
209 	rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
210 	if (rc) {
211 		dev_err(hdev->dev,
212 			"failed to add a control char device to the system\n");
213 		goto delete_cdev_device;
214 	}
215 
216 	/* hl_sysfs_init() must be done after adding the device to the system */
217 	rc = hl_sysfs_init(hdev);
218 	if (rc) {
219 		dev_err(hdev->dev, "failed to initialize sysfs\n");
220 		goto delete_ctrl_cdev_device;
221 	}
222 
223 	hdev->cdev_sysfs_created = true;
224 
225 	return 0;
226 
227 delete_ctrl_cdev_device:
228 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
229 delete_cdev_device:
230 	cdev_device_del(&hdev->cdev, hdev->dev);
231 	return rc;
232 }
233 
device_cdev_sysfs_del(struct hl_device * hdev)234 static void device_cdev_sysfs_del(struct hl_device *hdev)
235 {
236 	if (!hdev->cdev_sysfs_created)
237 		goto put_devices;
238 
239 	hl_sysfs_fini(hdev);
240 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
241 	cdev_device_del(&hdev->cdev, hdev->dev);
242 
243 put_devices:
244 	put_device(hdev->dev);
245 	put_device(hdev->dev_ctrl);
246 }
247 
248 /*
249  * device_early_init - do some early initialization for the habanalabs device
250  *
251  * @hdev: pointer to habanalabs device structure
252  *
253  * Install the relevant function pointers and call the early_init function,
254  * if such a function exists
255  */
device_early_init(struct hl_device * hdev)256 static int device_early_init(struct hl_device *hdev)
257 {
258 	int i, rc;
259 	char workq_name[32];
260 
261 	switch (hdev->asic_type) {
262 	case ASIC_GOYA:
263 		goya_set_asic_funcs(hdev);
264 		strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
265 		break;
266 	case ASIC_GAUDI:
267 		gaudi_set_asic_funcs(hdev);
268 		sprintf(hdev->asic_name, "GAUDI");
269 		break;
270 	default:
271 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
272 			hdev->asic_type);
273 		return -EINVAL;
274 	}
275 
276 	rc = hdev->asic_funcs->early_init(hdev);
277 	if (rc)
278 		return rc;
279 
280 	rc = hl_asid_init(hdev);
281 	if (rc)
282 		goto early_fini;
283 
284 	if (hdev->asic_prop.completion_queues_count) {
285 		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
286 				sizeof(*hdev->cq_wq),
287 				GFP_ATOMIC);
288 		if (!hdev->cq_wq) {
289 			rc = -ENOMEM;
290 			goto asid_fini;
291 		}
292 	}
293 
294 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
295 		snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
296 		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
297 		if (hdev->cq_wq[i] == NULL) {
298 			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
299 			rc = -ENOMEM;
300 			goto free_cq_wq;
301 		}
302 	}
303 
304 	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
305 	if (hdev->eq_wq == NULL) {
306 		dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
307 		rc = -ENOMEM;
308 		goto free_cq_wq;
309 	}
310 
311 	hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
312 					GFP_KERNEL);
313 	if (!hdev->hl_chip_info) {
314 		rc = -ENOMEM;
315 		goto free_eq_wq;
316 	}
317 
318 	hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
319 					sizeof(struct hl_device_idle_busy_ts),
320 					(GFP_KERNEL | __GFP_ZERO));
321 	if (!hdev->idle_busy_ts_arr) {
322 		rc = -ENOMEM;
323 		goto free_chip_info;
324 	}
325 
326 	rc = hl_mmu_if_set_funcs(hdev);
327 	if (rc)
328 		goto free_idle_busy_ts_arr;
329 
330 	hl_cb_mgr_init(&hdev->kernel_cb_mgr);
331 
332 	mutex_init(&hdev->send_cpu_message_lock);
333 	mutex_init(&hdev->debug_lock);
334 	mutex_init(&hdev->mmu_cache_lock);
335 	INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
336 	spin_lock_init(&hdev->hw_queues_mirror_lock);
337 	INIT_LIST_HEAD(&hdev->fpriv_list);
338 	mutex_init(&hdev->fpriv_list_lock);
339 	atomic_set(&hdev->in_reset, 0);
340 
341 	return 0;
342 
343 free_idle_busy_ts_arr:
344 	kfree(hdev->idle_busy_ts_arr);
345 free_chip_info:
346 	kfree(hdev->hl_chip_info);
347 free_eq_wq:
348 	destroy_workqueue(hdev->eq_wq);
349 free_cq_wq:
350 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
351 		if (hdev->cq_wq[i])
352 			destroy_workqueue(hdev->cq_wq[i]);
353 	kfree(hdev->cq_wq);
354 asid_fini:
355 	hl_asid_fini(hdev);
356 early_fini:
357 	if (hdev->asic_funcs->early_fini)
358 		hdev->asic_funcs->early_fini(hdev);
359 
360 	return rc;
361 }
362 
363 /*
364  * device_early_fini - finalize all that was done in device_early_init
365  *
366  * @hdev: pointer to habanalabs device structure
367  *
368  */
device_early_fini(struct hl_device * hdev)369 static void device_early_fini(struct hl_device *hdev)
370 {
371 	int i;
372 
373 	mutex_destroy(&hdev->mmu_cache_lock);
374 	mutex_destroy(&hdev->debug_lock);
375 	mutex_destroy(&hdev->send_cpu_message_lock);
376 
377 	mutex_destroy(&hdev->fpriv_list_lock);
378 
379 	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
380 
381 	kfree(hdev->idle_busy_ts_arr);
382 	kfree(hdev->hl_chip_info);
383 
384 	destroy_workqueue(hdev->eq_wq);
385 
386 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
387 		destroy_workqueue(hdev->cq_wq[i]);
388 	kfree(hdev->cq_wq);
389 
390 	hl_asid_fini(hdev);
391 
392 	if (hdev->asic_funcs->early_fini)
393 		hdev->asic_funcs->early_fini(hdev);
394 }
395 
set_freq_to_low_job(struct work_struct * work)396 static void set_freq_to_low_job(struct work_struct *work)
397 {
398 	struct hl_device *hdev = container_of(work, struct hl_device,
399 						work_freq.work);
400 
401 	mutex_lock(&hdev->fpriv_list_lock);
402 
403 	if (!hdev->compute_ctx)
404 		hl_device_set_frequency(hdev, PLL_LOW);
405 
406 	mutex_unlock(&hdev->fpriv_list_lock);
407 
408 	schedule_delayed_work(&hdev->work_freq,
409 			usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
410 }
411 
hl_device_heartbeat(struct work_struct * work)412 static void hl_device_heartbeat(struct work_struct *work)
413 {
414 	struct hl_device *hdev = container_of(work, struct hl_device,
415 						work_heartbeat.work);
416 
417 	if (hl_device_disabled_or_in_reset(hdev))
418 		goto reschedule;
419 
420 	if (!hdev->asic_funcs->send_heartbeat(hdev))
421 		goto reschedule;
422 
423 	dev_err(hdev->dev, "Device heartbeat failed!\n");
424 	hl_device_reset(hdev, true, false);
425 
426 	return;
427 
428 reschedule:
429 	schedule_delayed_work(&hdev->work_heartbeat,
430 			usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
431 }
432 
433 /*
434  * device_late_init - do late stuff initialization for the habanalabs device
435  *
436  * @hdev: pointer to habanalabs device structure
437  *
438  * Do stuff that either needs the device H/W queues to be active or needs
439  * to happen after all the rest of the initialization is finished
440  */
device_late_init(struct hl_device * hdev)441 static int device_late_init(struct hl_device *hdev)
442 {
443 	int rc;
444 
445 	if (hdev->asic_funcs->late_init) {
446 		rc = hdev->asic_funcs->late_init(hdev);
447 		if (rc) {
448 			dev_err(hdev->dev,
449 				"failed late initialization for the H/W\n");
450 			return rc;
451 		}
452 	}
453 
454 	hdev->high_pll = hdev->asic_prop.high_pll;
455 
456 	/* force setting to low frequency */
457 	hdev->curr_pll_profile = PLL_LOW;
458 
459 	if (hdev->pm_mng_profile == PM_AUTO)
460 		hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
461 	else
462 		hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
463 
464 	INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
465 	schedule_delayed_work(&hdev->work_freq,
466 	usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
467 
468 	if (hdev->heartbeat) {
469 		INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
470 		schedule_delayed_work(&hdev->work_heartbeat,
471 				usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
472 	}
473 
474 	hdev->late_init_done = true;
475 
476 	return 0;
477 }
478 
479 /*
480  * device_late_fini - finalize all that was done in device_late_init
481  *
482  * @hdev: pointer to habanalabs device structure
483  *
484  */
device_late_fini(struct hl_device * hdev)485 static void device_late_fini(struct hl_device *hdev)
486 {
487 	if (!hdev->late_init_done)
488 		return;
489 
490 	cancel_delayed_work_sync(&hdev->work_freq);
491 	if (hdev->heartbeat)
492 		cancel_delayed_work_sync(&hdev->work_heartbeat);
493 
494 	if (hdev->asic_funcs->late_fini)
495 		hdev->asic_funcs->late_fini(hdev);
496 
497 	hdev->late_init_done = false;
498 }
499 
hl_device_utilization(struct hl_device * hdev,uint32_t period_ms)500 uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
501 {
502 	struct hl_device_idle_busy_ts *ts;
503 	ktime_t zero_ktime, curr = ktime_get();
504 	u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
505 	s64 period_us, last_start_us, last_end_us, last_busy_time_us,
506 		total_busy_time_us = 0, total_busy_time_ms;
507 
508 	zero_ktime = ktime_set(0, 0);
509 	period_us = period_ms * USEC_PER_MSEC;
510 	ts = &hdev->idle_busy_ts_arr[last_index];
511 
512 	/* check case that device is currently in idle */
513 	if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
514 			!ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
515 
516 		last_index--;
517 		/* Handle case idle_busy_ts_idx was 0 */
518 		if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
519 			last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
520 
521 		ts = &hdev->idle_busy_ts_arr[last_index];
522 	}
523 
524 	while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
525 		/* Check if we are in last sample case. i.e. if the sample
526 		 * begun before the sampling period. This could be a real
527 		 * sample or 0 so need to handle both cases
528 		 */
529 		last_start_us = ktime_to_us(
530 				ktime_sub(curr, ts->idle_to_busy_ts));
531 
532 		if (last_start_us > period_us) {
533 
534 			/* First check two cases:
535 			 * 1. If the device is currently busy
536 			 * 2. If the device was idle during the whole sampling
537 			 *    period
538 			 */
539 
540 			if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
541 				/* Check if the device is currently busy */
542 				if (ktime_compare(ts->idle_to_busy_ts,
543 						zero_ktime))
544 					return 100;
545 
546 				/* We either didn't have any activity or we
547 				 * reached an entry which is 0. Either way,
548 				 * exit and return what was accumulated so far
549 				 */
550 				break;
551 			}
552 
553 			/* If sample has finished, check it is relevant */
554 			last_end_us = ktime_to_us(
555 					ktime_sub(curr, ts->busy_to_idle_ts));
556 
557 			if (last_end_us > period_us)
558 				break;
559 
560 			/* It is relevant so add it but with adjustment */
561 			last_busy_time_us = ktime_to_us(
562 						ktime_sub(ts->busy_to_idle_ts,
563 						ts->idle_to_busy_ts));
564 			total_busy_time_us += last_busy_time_us -
565 					(last_start_us - period_us);
566 			break;
567 		}
568 
569 		/* Check if the sample is finished or still open */
570 		if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
571 			last_busy_time_us = ktime_to_us(
572 						ktime_sub(ts->busy_to_idle_ts,
573 						ts->idle_to_busy_ts));
574 		else
575 			last_busy_time_us = ktime_to_us(
576 					ktime_sub(curr, ts->idle_to_busy_ts));
577 
578 		total_busy_time_us += last_busy_time_us;
579 
580 		last_index--;
581 		/* Handle case idle_busy_ts_idx was 0 */
582 		if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
583 			last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
584 
585 		ts = &hdev->idle_busy_ts_arr[last_index];
586 
587 		overlap_cnt++;
588 	}
589 
590 	total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
591 						USEC_PER_MSEC);
592 
593 	return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
594 }
595 
596 /*
597  * hl_device_set_frequency - set the frequency of the device
598  *
599  * @hdev: pointer to habanalabs device structure
600  * @freq: the new frequency value
601  *
602  * Change the frequency if needed. This function has no protection against
603  * concurrency, therefore it is assumed that the calling function has protected
604  * itself against the case of calling this function from multiple threads with
605  * different values
606  *
607  * Returns 0 if no change was done, otherwise returns 1
608  */
hl_device_set_frequency(struct hl_device * hdev,enum hl_pll_frequency freq)609 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
610 {
611 	if ((hdev->pm_mng_profile == PM_MANUAL) ||
612 			(hdev->curr_pll_profile == freq))
613 		return 0;
614 
615 	dev_dbg(hdev->dev, "Changing device frequency to %s\n",
616 		freq == PLL_HIGH ? "high" : "low");
617 
618 	hdev->asic_funcs->set_pll_profile(hdev, freq);
619 
620 	hdev->curr_pll_profile = freq;
621 
622 	return 1;
623 }
624 
hl_device_set_debug_mode(struct hl_device * hdev,bool enable)625 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
626 {
627 	int rc = 0;
628 
629 	mutex_lock(&hdev->debug_lock);
630 
631 	if (!enable) {
632 		if (!hdev->in_debug) {
633 			dev_err(hdev->dev,
634 				"Failed to disable debug mode because device was not in debug mode\n");
635 			rc = -EFAULT;
636 			goto out;
637 		}
638 
639 		if (!hdev->hard_reset_pending)
640 			hdev->asic_funcs->halt_coresight(hdev);
641 
642 		hdev->in_debug = 0;
643 
644 		if (!hdev->hard_reset_pending)
645 			hdev->asic_funcs->set_clock_gating(hdev);
646 
647 		goto out;
648 	}
649 
650 	if (hdev->in_debug) {
651 		dev_err(hdev->dev,
652 			"Failed to enable debug mode because device is already in debug mode\n");
653 		rc = -EFAULT;
654 		goto out;
655 	}
656 
657 	hdev->asic_funcs->disable_clock_gating(hdev);
658 	hdev->in_debug = 1;
659 
660 out:
661 	mutex_unlock(&hdev->debug_lock);
662 
663 	return rc;
664 }
665 
666 /*
667  * hl_device_suspend - initiate device suspend
668  *
669  * @hdev: pointer to habanalabs device structure
670  *
671  * Puts the hw in the suspend state (all asics).
672  * Returns 0 for success or an error on failure.
673  * Called at driver suspend.
674  */
hl_device_suspend(struct hl_device * hdev)675 int hl_device_suspend(struct hl_device *hdev)
676 {
677 	int rc;
678 
679 	pci_save_state(hdev->pdev);
680 
681 	/* Block future CS/VM/JOB completion operations */
682 	rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
683 	if (rc) {
684 		dev_err(hdev->dev, "Can't suspend while in reset\n");
685 		return -EIO;
686 	}
687 
688 	/* This blocks all other stuff that is not blocked by in_reset */
689 	hdev->disabled = true;
690 
691 	/*
692 	 * Flush anyone that is inside the critical section of enqueue
693 	 * jobs to the H/W
694 	 */
695 	hdev->asic_funcs->hw_queues_lock(hdev);
696 	hdev->asic_funcs->hw_queues_unlock(hdev);
697 
698 	/* Flush processes that are sending message to CPU */
699 	mutex_lock(&hdev->send_cpu_message_lock);
700 	mutex_unlock(&hdev->send_cpu_message_lock);
701 
702 	rc = hdev->asic_funcs->suspend(hdev);
703 	if (rc)
704 		dev_err(hdev->dev,
705 			"Failed to disable PCI access of device CPU\n");
706 
707 	/* Shut down the device */
708 	pci_disable_device(hdev->pdev);
709 	pci_set_power_state(hdev->pdev, PCI_D3hot);
710 
711 	return 0;
712 }
713 
714 /*
715  * hl_device_resume - initiate device resume
716  *
717  * @hdev: pointer to habanalabs device structure
718  *
719  * Bring the hw back to operating state (all asics).
720  * Returns 0 for success or an error on failure.
721  * Called at driver resume.
722  */
hl_device_resume(struct hl_device * hdev)723 int hl_device_resume(struct hl_device *hdev)
724 {
725 	int rc;
726 
727 	pci_set_power_state(hdev->pdev, PCI_D0);
728 	pci_restore_state(hdev->pdev);
729 	rc = pci_enable_device_mem(hdev->pdev);
730 	if (rc) {
731 		dev_err(hdev->dev,
732 			"Failed to enable PCI device in resume\n");
733 		return rc;
734 	}
735 
736 	pci_set_master(hdev->pdev);
737 
738 	rc = hdev->asic_funcs->resume(hdev);
739 	if (rc) {
740 		dev_err(hdev->dev, "Failed to resume device after suspend\n");
741 		goto disable_device;
742 	}
743 
744 
745 	hdev->disabled = false;
746 	atomic_set(&hdev->in_reset, 0);
747 
748 	rc = hl_device_reset(hdev, true, false);
749 	if (rc) {
750 		dev_err(hdev->dev, "Failed to reset device during resume\n");
751 		goto disable_device;
752 	}
753 
754 	return 0;
755 
756 disable_device:
757 	pci_clear_master(hdev->pdev);
758 	pci_disable_device(hdev->pdev);
759 
760 	return rc;
761 }
762 
device_kill_open_processes(struct hl_device * hdev)763 static int device_kill_open_processes(struct hl_device *hdev)
764 {
765 	u16 pending_total, pending_cnt;
766 	struct hl_fpriv	*hpriv;
767 	struct task_struct *task = NULL;
768 
769 	if (hdev->pldm)
770 		pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
771 	else
772 		pending_total = HL_PENDING_RESET_PER_SEC;
773 
774 	/* Giving time for user to close FD, and for processes that are inside
775 	 * hl_device_open to finish
776 	 */
777 	if (!list_empty(&hdev->fpriv_list))
778 		ssleep(1);
779 
780 	mutex_lock(&hdev->fpriv_list_lock);
781 
782 	/* This section must be protected because we are dereferencing
783 	 * pointers that are freed if the process exits
784 	 */
785 	list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
786 		task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
787 		if (task) {
788 			dev_info(hdev->dev, "Killing user process pid=%d\n",
789 				task_pid_nr(task));
790 			send_sig(SIGKILL, task, 1);
791 			usleep_range(1000, 10000);
792 
793 			put_task_struct(task);
794 		}
795 	}
796 
797 	mutex_unlock(&hdev->fpriv_list_lock);
798 
799 	/* We killed the open users, but because the driver cleans up after the
800 	 * user contexts are closed (e.g. mmu mappings), we need to wait again
801 	 * to make sure the cleaning phase is finished before continuing with
802 	 * the reset
803 	 */
804 
805 	pending_cnt = pending_total;
806 
807 	while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
808 		dev_info(hdev->dev,
809 			"Waiting for all unmap operations to finish before hard reset\n");
810 
811 		pending_cnt--;
812 
813 		ssleep(1);
814 	}
815 
816 	return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
817 }
818 
device_hard_reset_pending(struct work_struct * work)819 static void device_hard_reset_pending(struct work_struct *work)
820 {
821 	struct hl_device_reset_work *device_reset_work =
822 		container_of(work, struct hl_device_reset_work, reset_work);
823 	struct hl_device *hdev = device_reset_work->hdev;
824 
825 	hl_device_reset(hdev, true, true);
826 
827 	kfree(device_reset_work);
828 }
829 
830 /*
831  * hl_device_reset - reset the device
832  *
833  * @hdev: pointer to habanalabs device structure
834  * @hard_reset: should we do hard reset to all engines or just reset the
835  *              compute/dma engines
836  * @from_hard_reset_thread: is the caller the hard-reset thread
837  *
838  * Block future CS and wait for pending CS to be enqueued
839  * Call ASIC H/W fini
840  * Flush all completions
841  * Re-initialize all internal data structures
842  * Call ASIC H/W init, late_init
843  * Test queues
844  * Enable device
845  *
846  * Returns 0 for success or an error on failure.
847  */
hl_device_reset(struct hl_device * hdev,bool hard_reset,bool from_hard_reset_thread)848 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
849 			bool from_hard_reset_thread)
850 {
851 	int i, rc;
852 
853 	if (!hdev->init_done) {
854 		dev_err(hdev->dev,
855 			"Can't reset before initialization is done\n");
856 		return 0;
857 	}
858 
859 	if ((!hard_reset) && (!hdev->supports_soft_reset)) {
860 		dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
861 		hard_reset = true;
862 	}
863 
864 	/*
865 	 * Prevent concurrency in this function - only one reset should be
866 	 * done at any given time. Only need to perform this if we didn't
867 	 * get from the dedicated hard reset thread
868 	 */
869 	if (!from_hard_reset_thread) {
870 		/* Block future CS/VM/JOB completion operations */
871 		rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
872 		if (rc)
873 			return 0;
874 
875 		if (hard_reset) {
876 			/* Disable PCI access from device F/W so he won't send
877 			 * us additional interrupts. We disable MSI/MSI-X at
878 			 * the halt_engines function and we can't have the F/W
879 			 * sending us interrupts after that. We need to disable
880 			 * the access here because if the device is marked
881 			 * disable, the message won't be send. Also, in case
882 			 * of heartbeat, the device CPU is marked as disable
883 			 * so this message won't be sent
884 			 */
885 			if (hl_fw_send_pci_access_msg(hdev,
886 					CPUCP_PACKET_DISABLE_PCI_ACCESS))
887 				dev_warn(hdev->dev,
888 					"Failed to disable PCI access by F/W\n");
889 		}
890 
891 		/* This also blocks future CS/VM/JOB completion operations */
892 		hdev->disabled = true;
893 
894 		/* Flush anyone that is inside the critical section of enqueue
895 		 * jobs to the H/W
896 		 */
897 		hdev->asic_funcs->hw_queues_lock(hdev);
898 		hdev->asic_funcs->hw_queues_unlock(hdev);
899 
900 		/* Flush anyone that is inside device open */
901 		mutex_lock(&hdev->fpriv_list_lock);
902 		mutex_unlock(&hdev->fpriv_list_lock);
903 
904 		dev_err(hdev->dev, "Going to RESET device!\n");
905 	}
906 
907 again:
908 	if ((hard_reset) && (!from_hard_reset_thread)) {
909 		struct hl_device_reset_work *device_reset_work;
910 
911 		hdev->hard_reset_pending = true;
912 
913 		device_reset_work = kzalloc(sizeof(*device_reset_work),
914 						GFP_ATOMIC);
915 		if (!device_reset_work) {
916 			rc = -ENOMEM;
917 			goto out_err;
918 		}
919 
920 		/*
921 		 * Because the reset function can't run from interrupt or
922 		 * from heartbeat work, we need to call the reset function
923 		 * from a dedicated work
924 		 */
925 		INIT_WORK(&device_reset_work->reset_work,
926 				device_hard_reset_pending);
927 		device_reset_work->hdev = hdev;
928 		schedule_work(&device_reset_work->reset_work);
929 
930 		return 0;
931 	}
932 
933 	if (hard_reset) {
934 		device_late_fini(hdev);
935 
936 		/*
937 		 * Now that the heartbeat thread is closed, flush processes
938 		 * which are sending messages to CPU
939 		 */
940 		mutex_lock(&hdev->send_cpu_message_lock);
941 		mutex_unlock(&hdev->send_cpu_message_lock);
942 	}
943 
944 	/*
945 	 * Halt the engines and disable interrupts so we won't get any more
946 	 * completions from H/W and we won't have any accesses from the
947 	 * H/W to the host machine
948 	 */
949 	hdev->asic_funcs->halt_engines(hdev, hard_reset);
950 
951 	/* Go over all the queues, release all CS and their jobs */
952 	hl_cs_rollback_all(hdev);
953 
954 	if (hard_reset) {
955 		/* Kill processes here after CS rollback. This is because the
956 		 * process can't really exit until all its CSs are done, which
957 		 * is what we do in cs rollback
958 		 */
959 		rc = device_kill_open_processes(hdev);
960 		if (rc) {
961 			dev_crit(hdev->dev,
962 				"Failed to kill all open processes, stopping hard reset\n");
963 			goto out_err;
964 		}
965 
966 		/* Flush the Event queue workers to make sure no other thread is
967 		 * reading or writing to registers during the reset
968 		 */
969 		flush_workqueue(hdev->eq_wq);
970 	}
971 
972 	/* Reset the H/W. It will be in idle state after this returns */
973 	hdev->asic_funcs->hw_fini(hdev, hard_reset);
974 
975 	if (hard_reset) {
976 		/* Release kernel context */
977 		if (hl_ctx_put(hdev->kernel_ctx) == 1)
978 			hdev->kernel_ctx = NULL;
979 		hl_vm_fini(hdev);
980 		hl_mmu_fini(hdev);
981 		hl_eq_reset(hdev, &hdev->event_queue);
982 	}
983 
984 	/* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
985 	hl_hw_queue_reset(hdev, hard_reset);
986 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
987 		hl_cq_reset(hdev, &hdev->completion_queue[i]);
988 
989 	hdev->idle_busy_ts_idx = 0;
990 	hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
991 	hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
992 
993 	if (hdev->cs_active_cnt)
994 		dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
995 			hdev->cs_active_cnt);
996 
997 	mutex_lock(&hdev->fpriv_list_lock);
998 
999 	/* Make sure the context switch phase will run again */
1000 	if (hdev->compute_ctx) {
1001 		atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1002 		hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1003 	}
1004 
1005 	mutex_unlock(&hdev->fpriv_list_lock);
1006 
1007 	/* Finished tear-down, starting to re-initialize */
1008 
1009 	if (hard_reset) {
1010 		hdev->device_cpu_disabled = false;
1011 		hdev->hard_reset_pending = false;
1012 
1013 		if (hdev->kernel_ctx) {
1014 			dev_crit(hdev->dev,
1015 				"kernel ctx was alive during hard reset, something is terribly wrong\n");
1016 			rc = -EBUSY;
1017 			goto out_err;
1018 		}
1019 
1020 		rc = hl_mmu_init(hdev);
1021 		if (rc) {
1022 			dev_err(hdev->dev,
1023 				"Failed to initialize MMU S/W after hard reset\n");
1024 			goto out_err;
1025 		}
1026 
1027 		/* Allocate the kernel context */
1028 		hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1029 						GFP_KERNEL);
1030 		if (!hdev->kernel_ctx) {
1031 			rc = -ENOMEM;
1032 			hl_mmu_fini(hdev);
1033 			goto out_err;
1034 		}
1035 
1036 		hdev->compute_ctx = NULL;
1037 
1038 		rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1039 		if (rc) {
1040 			dev_err(hdev->dev,
1041 				"failed to init kernel ctx in hard reset\n");
1042 			kfree(hdev->kernel_ctx);
1043 			hdev->kernel_ctx = NULL;
1044 			hl_mmu_fini(hdev);
1045 			goto out_err;
1046 		}
1047 	}
1048 
1049 	/* Device is now enabled as part of the initialization requires
1050 	 * communication with the device firmware to get information that
1051 	 * is required for the initialization itself
1052 	 */
1053 	hdev->disabled = false;
1054 
1055 	rc = hdev->asic_funcs->hw_init(hdev);
1056 	if (rc) {
1057 		dev_err(hdev->dev,
1058 			"failed to initialize the H/W after reset\n");
1059 		goto out_err;
1060 	}
1061 
1062 	/* Check that the communication with the device is working */
1063 	rc = hdev->asic_funcs->test_queues(hdev);
1064 	if (rc) {
1065 		dev_err(hdev->dev,
1066 			"Failed to detect if device is alive after reset\n");
1067 		goto out_err;
1068 	}
1069 
1070 	if (hard_reset) {
1071 		rc = device_late_init(hdev);
1072 		if (rc) {
1073 			dev_err(hdev->dev,
1074 				"Failed late init after hard reset\n");
1075 			goto out_err;
1076 		}
1077 
1078 		rc = hl_vm_init(hdev);
1079 		if (rc) {
1080 			dev_err(hdev->dev,
1081 				"Failed to init memory module after hard reset\n");
1082 			goto out_err;
1083 		}
1084 
1085 		hl_set_max_power(hdev);
1086 	} else {
1087 		rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1088 		if (rc) {
1089 			dev_err(hdev->dev,
1090 				"Failed late init after soft reset\n");
1091 			goto out_err;
1092 		}
1093 	}
1094 
1095 	atomic_set(&hdev->in_reset, 0);
1096 
1097 	if (hard_reset)
1098 		hdev->hard_reset_cnt++;
1099 	else
1100 		hdev->soft_reset_cnt++;
1101 
1102 	dev_warn(hdev->dev, "Successfully finished resetting the device\n");
1103 
1104 	return 0;
1105 
1106 out_err:
1107 	hdev->disabled = true;
1108 
1109 	if (hard_reset) {
1110 		dev_err(hdev->dev,
1111 			"Failed to reset! Device is NOT usable\n");
1112 		hdev->hard_reset_cnt++;
1113 	} else {
1114 		dev_err(hdev->dev,
1115 			"Failed to do soft-reset, trying hard reset\n");
1116 		hdev->soft_reset_cnt++;
1117 		hard_reset = true;
1118 		goto again;
1119 	}
1120 
1121 	atomic_set(&hdev->in_reset, 0);
1122 
1123 	return rc;
1124 }
1125 
1126 /*
1127  * hl_device_init - main initialization function for habanalabs device
1128  *
1129  * @hdev: pointer to habanalabs device structure
1130  *
1131  * Allocate an id for the device, do early initialization and then call the
1132  * ASIC specific initialization functions. Finally, create the cdev and the
1133  * Linux device to expose it to the user
1134  */
hl_device_init(struct hl_device * hdev,struct class * hclass)1135 int hl_device_init(struct hl_device *hdev, struct class *hclass)
1136 {
1137 	int i, rc, cq_cnt, cq_ready_cnt;
1138 	char *name;
1139 	bool add_cdev_sysfs_on_err = false;
1140 
1141 	name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1142 	if (!name) {
1143 		rc = -ENOMEM;
1144 		goto out_disabled;
1145 	}
1146 
1147 	/* Initialize cdev and device structures */
1148 	rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1149 				&hdev->cdev, &hdev->dev);
1150 
1151 	kfree(name);
1152 
1153 	if (rc)
1154 		goto out_disabled;
1155 
1156 	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1157 	if (!name) {
1158 		rc = -ENOMEM;
1159 		goto free_dev;
1160 	}
1161 
1162 	/* Initialize cdev and device structures for control device */
1163 	rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1164 				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1165 
1166 	kfree(name);
1167 
1168 	if (rc)
1169 		goto free_dev;
1170 
1171 	/* Initialize ASIC function pointers and perform early init */
1172 	rc = device_early_init(hdev);
1173 	if (rc)
1174 		goto free_dev_ctrl;
1175 
1176 	/*
1177 	 * Start calling ASIC initialization. First S/W then H/W and finally
1178 	 * late init
1179 	 */
1180 	rc = hdev->asic_funcs->sw_init(hdev);
1181 	if (rc)
1182 		goto early_fini;
1183 
1184 	/*
1185 	 * Initialize the H/W queues. Must be done before hw_init, because
1186 	 * there the addresses of the kernel queue are being written to the
1187 	 * registers of the device
1188 	 */
1189 	rc = hl_hw_queues_create(hdev);
1190 	if (rc) {
1191 		dev_err(hdev->dev, "failed to initialize kernel queues\n");
1192 		goto sw_fini;
1193 	}
1194 
1195 	cq_cnt = hdev->asic_prop.completion_queues_count;
1196 
1197 	/*
1198 	 * Initialize the completion queues. Must be done before hw_init,
1199 	 * because there the addresses of the completion queues are being
1200 	 * passed as arguments to request_irq
1201 	 */
1202 	if (cq_cnt) {
1203 		hdev->completion_queue = kcalloc(cq_cnt,
1204 				sizeof(*hdev->completion_queue),
1205 				GFP_KERNEL);
1206 
1207 		if (!hdev->completion_queue) {
1208 			dev_err(hdev->dev,
1209 				"failed to allocate completion queues\n");
1210 			rc = -ENOMEM;
1211 			goto hw_queues_destroy;
1212 		}
1213 	}
1214 
1215 	for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1216 		rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1217 				hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1218 		if (rc) {
1219 			dev_err(hdev->dev,
1220 				"failed to initialize completion queue\n");
1221 			goto cq_fini;
1222 		}
1223 		hdev->completion_queue[i].cq_idx = i;
1224 	}
1225 
1226 	/*
1227 	 * Initialize the event queue. Must be done before hw_init,
1228 	 * because there the address of the event queue is being
1229 	 * passed as argument to request_irq
1230 	 */
1231 	rc = hl_eq_init(hdev, &hdev->event_queue);
1232 	if (rc) {
1233 		dev_err(hdev->dev, "failed to initialize event queue\n");
1234 		goto cq_fini;
1235 	}
1236 
1237 	/* MMU S/W must be initialized before kernel context is created */
1238 	rc = hl_mmu_init(hdev);
1239 	if (rc) {
1240 		dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1241 		goto eq_fini;
1242 	}
1243 
1244 	/* Allocate the kernel context */
1245 	hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1246 	if (!hdev->kernel_ctx) {
1247 		rc = -ENOMEM;
1248 		goto mmu_fini;
1249 	}
1250 
1251 	hdev->compute_ctx = NULL;
1252 
1253 	rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1254 	if (rc) {
1255 		dev_err(hdev->dev, "failed to initialize kernel context\n");
1256 		kfree(hdev->kernel_ctx);
1257 		goto mmu_fini;
1258 	}
1259 
1260 	rc = hl_cb_pool_init(hdev);
1261 	if (rc) {
1262 		dev_err(hdev->dev, "failed to initialize CB pool\n");
1263 		goto release_ctx;
1264 	}
1265 
1266 	hl_debugfs_add_device(hdev);
1267 
1268 	if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
1269 		dev_info(hdev->dev,
1270 			"H/W state is dirty, must reset before initializing\n");
1271 		hdev->asic_funcs->halt_engines(hdev, true);
1272 		hdev->asic_funcs->hw_fini(hdev, true);
1273 	}
1274 
1275 	/*
1276 	 * From this point, in case of an error, add char devices and create
1277 	 * sysfs nodes as part of the error flow, to allow debugging.
1278 	 */
1279 	add_cdev_sysfs_on_err = true;
1280 
1281 	/* Device is now enabled as part of the initialization requires
1282 	 * communication with the device firmware to get information that
1283 	 * is required for the initialization itself
1284 	 */
1285 	hdev->disabled = false;
1286 
1287 	rc = hdev->asic_funcs->hw_init(hdev);
1288 	if (rc) {
1289 		dev_err(hdev->dev, "failed to initialize the H/W\n");
1290 		rc = 0;
1291 		goto out_disabled;
1292 	}
1293 
1294 	/* Check that the communication with the device is working */
1295 	rc = hdev->asic_funcs->test_queues(hdev);
1296 	if (rc) {
1297 		dev_err(hdev->dev, "Failed to detect if device is alive\n");
1298 		rc = 0;
1299 		goto out_disabled;
1300 	}
1301 
1302 	rc = device_late_init(hdev);
1303 	if (rc) {
1304 		dev_err(hdev->dev, "Failed late initialization\n");
1305 		rc = 0;
1306 		goto out_disabled;
1307 	}
1308 
1309 	dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1310 		hdev->asic_name,
1311 		hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
1312 
1313 	rc = hl_vm_init(hdev);
1314 	if (rc) {
1315 		dev_err(hdev->dev, "Failed to initialize memory module\n");
1316 		rc = 0;
1317 		goto out_disabled;
1318 	}
1319 
1320 	/*
1321 	 * Expose devices and sysfs nodes to user.
1322 	 * From here there is no need to add char devices and create sysfs nodes
1323 	 * in case of an error.
1324 	 */
1325 	add_cdev_sysfs_on_err = false;
1326 	rc = device_cdev_sysfs_add(hdev);
1327 	if (rc) {
1328 		dev_err(hdev->dev,
1329 			"Failed to add char devices and sysfs nodes\n");
1330 		rc = 0;
1331 		goto out_disabled;
1332 	}
1333 
1334 	/* Need to call this again because the max power might change,
1335 	 * depending on card type for certain ASICs
1336 	 */
1337 	hl_set_max_power(hdev);
1338 
1339 	/*
1340 	 * hl_hwmon_init() must be called after device_late_init(), because only
1341 	 * there we get the information from the device about which
1342 	 * hwmon-related sensors the device supports.
1343 	 * Furthermore, it must be done after adding the device to the system.
1344 	 */
1345 	rc = hl_hwmon_init(hdev);
1346 	if (rc) {
1347 		dev_err(hdev->dev, "Failed to initialize hwmon\n");
1348 		rc = 0;
1349 		goto out_disabled;
1350 	}
1351 
1352 	dev_notice(hdev->dev,
1353 		"Successfully added device to habanalabs driver\n");
1354 
1355 	hdev->init_done = true;
1356 
1357 	return 0;
1358 
1359 release_ctx:
1360 	if (hl_ctx_put(hdev->kernel_ctx) != 1)
1361 		dev_err(hdev->dev,
1362 			"kernel ctx is still alive on initialization failure\n");
1363 mmu_fini:
1364 	hl_mmu_fini(hdev);
1365 eq_fini:
1366 	hl_eq_fini(hdev, &hdev->event_queue);
1367 cq_fini:
1368 	for (i = 0 ; i < cq_ready_cnt ; i++)
1369 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
1370 	kfree(hdev->completion_queue);
1371 hw_queues_destroy:
1372 	hl_hw_queues_destroy(hdev);
1373 sw_fini:
1374 	hdev->asic_funcs->sw_fini(hdev);
1375 early_fini:
1376 	device_early_fini(hdev);
1377 free_dev_ctrl:
1378 	put_device(hdev->dev_ctrl);
1379 free_dev:
1380 	put_device(hdev->dev);
1381 out_disabled:
1382 	hdev->disabled = true;
1383 	if (add_cdev_sysfs_on_err)
1384 		device_cdev_sysfs_add(hdev);
1385 	if (hdev->pdev)
1386 		dev_err(&hdev->pdev->dev,
1387 			"Failed to initialize hl%d. Device is NOT usable !\n",
1388 			hdev->id / 2);
1389 	else
1390 		pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1391 			hdev->id / 2);
1392 
1393 	return rc;
1394 }
1395 
1396 /*
1397  * hl_device_fini - main tear-down function for habanalabs device
1398  *
1399  * @hdev: pointer to habanalabs device structure
1400  *
1401  * Destroy the device, call ASIC fini functions and release the id
1402  */
hl_device_fini(struct hl_device * hdev)1403 void hl_device_fini(struct hl_device *hdev)
1404 {
1405 	int i, rc;
1406 	ktime_t timeout;
1407 
1408 	dev_info(hdev->dev, "Removing device\n");
1409 
1410 	/*
1411 	 * This function is competing with the reset function, so try to
1412 	 * take the reset atomic and if we are already in middle of reset,
1413 	 * wait until reset function is finished. Reset function is designed
1414 	 * to always finish. However, in Gaudi, because of all the network
1415 	 * ports, the hard reset could take between 10-30 seconds
1416 	 */
1417 
1418 	timeout = ktime_add_us(ktime_get(),
1419 				HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
1420 	rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1421 	while (rc) {
1422 		usleep_range(50, 200);
1423 		rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1424 		if (ktime_compare(ktime_get(), timeout) > 0) {
1425 			WARN(1, "Failed to remove device because reset function did not finish\n");
1426 			return;
1427 		}
1428 	}
1429 
1430 	/* Disable PCI access from device F/W so it won't send us additional
1431 	 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
1432 	 * can't have the F/W sending us interrupts after that. We need to
1433 	 * disable the access here because if the device is marked disable, the
1434 	 * message won't be send. Also, in case of heartbeat, the device CPU is
1435 	 * marked as disable so this message won't be sent
1436 	 */
1437 	hl_fw_send_pci_access_msg(hdev,	CPUCP_PACKET_DISABLE_PCI_ACCESS);
1438 
1439 	/* Mark device as disabled */
1440 	hdev->disabled = true;
1441 
1442 	/* Flush anyone that is inside the critical section of enqueue
1443 	 * jobs to the H/W
1444 	 */
1445 	hdev->asic_funcs->hw_queues_lock(hdev);
1446 	hdev->asic_funcs->hw_queues_unlock(hdev);
1447 
1448 	/* Flush anyone that is inside device open */
1449 	mutex_lock(&hdev->fpriv_list_lock);
1450 	mutex_unlock(&hdev->fpriv_list_lock);
1451 
1452 	hdev->hard_reset_pending = true;
1453 
1454 	hl_hwmon_fini(hdev);
1455 
1456 	device_late_fini(hdev);
1457 
1458 	hl_debugfs_remove_device(hdev);
1459 
1460 	/*
1461 	 * Halt the engines and disable interrupts so we won't get any more
1462 	 * completions from H/W and we won't have any accesses from the
1463 	 * H/W to the host machine
1464 	 */
1465 	hdev->asic_funcs->halt_engines(hdev, true);
1466 
1467 	/* Go over all the queues, release all CS and their jobs */
1468 	hl_cs_rollback_all(hdev);
1469 
1470 	/* Kill processes here after CS rollback. This is because the process
1471 	 * can't really exit until all its CSs are done, which is what we
1472 	 * do in cs rollback
1473 	 */
1474 	rc = device_kill_open_processes(hdev);
1475 	if (rc)
1476 		dev_crit(hdev->dev, "Failed to kill all open processes\n");
1477 
1478 	hl_cb_pool_fini(hdev);
1479 
1480 	/* Reset the H/W. It will be in idle state after this returns */
1481 	hdev->asic_funcs->hw_fini(hdev, true);
1482 
1483 	/* Release kernel context */
1484 	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1485 		dev_err(hdev->dev, "kernel ctx is still alive\n");
1486 
1487 	hl_vm_fini(hdev);
1488 
1489 	hl_mmu_fini(hdev);
1490 
1491 	hl_eq_fini(hdev, &hdev->event_queue);
1492 
1493 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1494 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
1495 	kfree(hdev->completion_queue);
1496 
1497 	hl_hw_queues_destroy(hdev);
1498 
1499 	/* Call ASIC S/W finalize function */
1500 	hdev->asic_funcs->sw_fini(hdev);
1501 
1502 	device_early_fini(hdev);
1503 
1504 	/* Hide devices and sysfs nodes from user */
1505 	device_cdev_sysfs_del(hdev);
1506 
1507 	pr_info("removed device successfully\n");
1508 }
1509 
1510 /*
1511  * MMIO register access helper functions.
1512  */
1513 
1514 /*
1515  * hl_rreg - Read an MMIO register
1516  *
1517  * @hdev: pointer to habanalabs device structure
1518  * @reg: MMIO register offset (in bytes)
1519  *
1520  * Returns the value of the MMIO register we are asked to read
1521  *
1522  */
hl_rreg(struct hl_device * hdev,u32 reg)1523 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1524 {
1525 	return readl(hdev->rmmio + reg);
1526 }
1527 
1528 /*
1529  * hl_wreg - Write to an MMIO register
1530  *
1531  * @hdev: pointer to habanalabs device structure
1532  * @reg: MMIO register offset (in bytes)
1533  * @val: 32-bit value
1534  *
1535  * Writes the 32-bit value into the MMIO register
1536  *
1537  */
hl_wreg(struct hl_device * hdev,u32 reg,u32 val)1538 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1539 {
1540 	writel(val, hdev->rmmio + reg);
1541 }
1542