1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #define pr_fmt(fmt) "habanalabs: " fmt
9
10 #include <uapi/misc/habanalabs.h>
11 #include "habanalabs.h"
12
13 #include <linux/pci.h>
14 #include <linux/hwmon.h>
15
16 #include <trace/events/habanalabs.h>
17
18 #define HL_RESET_DELAY_USEC 10000 /* 10ms */
19
20 enum dma_alloc_type {
21 DMA_ALLOC_COHERENT,
22 DMA_ALLOC_CPU_ACCESSIBLE,
23 DMA_ALLOC_POOL,
24 };
25
26 #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788
27
28 /*
29 * hl_set_dram_bar- sets the bar to allow later access to address
30 *
31 * @hdev: pointer to habanalabs device structure.
32 * @addr: the address the caller wants to access.
33 * @region: the PCI region.
34 *
35 * @return: the old BAR base address on success, U64_MAX for failure.
36 * The caller should set it back to the old address after use.
37 *
38 * In case the bar space does not cover the whole address space,
39 * the bar base address should be set to allow access to a given address.
40 * This function can be called also if the bar doesn't need to be set,
41 * in that case it just won't change the base.
42 */
hl_set_dram_bar(struct hl_device * hdev,u64 addr,struct pci_mem_region * region)43 static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
44 {
45 struct asic_fixed_properties *prop = &hdev->asic_prop;
46 u64 bar_base_addr, old_base;
47
48 if (is_power_of_2(prop->dram_pci_bar_size))
49 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
50 else
51 bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
52 prop->dram_pci_bar_size;
53
54 old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
55
56 /* in case of success we need to update the new BAR base */
57 if (old_base != U64_MAX)
58 region->region_base = bar_base_addr;
59
60 return old_base;
61 }
62
hl_access_sram_dram_region(struct hl_device * hdev,u64 addr,u64 * val,enum debugfs_access_type acc_type,enum pci_region region_type)63 static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
64 enum debugfs_access_type acc_type, enum pci_region region_type)
65 {
66 struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
67 void __iomem *acc_addr;
68 u64 old_base = 0, rc;
69
70 if (region_type == PCI_REGION_DRAM) {
71 old_base = hl_set_dram_bar(hdev, addr, region);
72 if (old_base == U64_MAX)
73 return -EIO;
74 }
75
76 acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
77 region->offset_in_bar;
78 switch (acc_type) {
79 case DEBUGFS_READ8:
80 *val = readb(acc_addr);
81 break;
82 case DEBUGFS_WRITE8:
83 writeb(*val, acc_addr);
84 break;
85 case DEBUGFS_READ32:
86 *val = readl(acc_addr);
87 break;
88 case DEBUGFS_WRITE32:
89 writel(*val, acc_addr);
90 break;
91 case DEBUGFS_READ64:
92 *val = readq(acc_addr);
93 break;
94 case DEBUGFS_WRITE64:
95 writeq(*val, acc_addr);
96 break;
97 }
98
99 if (region_type == PCI_REGION_DRAM) {
100 rc = hl_set_dram_bar(hdev, old_base, region);
101 if (rc == U64_MAX)
102 return -EIO;
103 }
104
105 return 0;
106 }
107
hl_dma_alloc_common(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flag,enum dma_alloc_type alloc_type,const char * caller)108 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
109 gfp_t flag, enum dma_alloc_type alloc_type,
110 const char *caller)
111 {
112 void *ptr = NULL;
113
114 switch (alloc_type) {
115 case DMA_ALLOC_COHERENT:
116 ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag);
117 break;
118 case DMA_ALLOC_CPU_ACCESSIBLE:
119 ptr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
120 break;
121 case DMA_ALLOC_POOL:
122 ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle);
123 break;
124 }
125
126 if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
127 trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size,
128 caller);
129
130 return ptr;
131 }
132
hl_asic_dma_free_common(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle,enum dma_alloc_type alloc_type,const char * caller)133 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
134 dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
135 const char *caller)
136 {
137 switch (alloc_type) {
138 case DMA_ALLOC_COHERENT:
139 hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
140 break;
141 case DMA_ALLOC_CPU_ACCESSIBLE:
142 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, cpu_addr);
143 break;
144 case DMA_ALLOC_POOL:
145 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
146 break;
147 }
148
149 trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
150 }
151
hl_asic_dma_alloc_coherent_caller(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flag,const char * caller)152 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
153 gfp_t flag, const char *caller)
154 {
155 return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller);
156 }
157
hl_asic_dma_free_coherent_caller(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle,const char * caller)158 void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
159 dma_addr_t dma_handle, const char *caller)
160 {
161 hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller);
162 }
163
hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,const char * caller)164 void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
165 dma_addr_t *dma_handle, const char *caller)
166 {
167 return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
168 }
169
hl_cpu_accessible_dma_pool_free_caller(struct hl_device * hdev,size_t size,void * vaddr,const char * caller)170 void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
171 const char *caller)
172 {
173 hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
174 }
175
hl_asic_dma_pool_zalloc_caller(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle,const char * caller)176 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
177 dma_addr_t *dma_handle, const char *caller)
178 {
179 return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller);
180 }
181
hl_asic_dma_pool_free_caller(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr,const char * caller)182 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
183 const char *caller)
184 {
185 hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller);
186 }
187
hl_dma_map_sgtable(struct hl_device * hdev,struct sg_table * sgt,enum dma_data_direction dir)188 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
189 {
190 struct asic_fixed_properties *prop = &hdev->asic_prop;
191 struct scatterlist *sg;
192 int rc, i;
193
194 rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0);
195 if (rc)
196 return rc;
197
198 /* Shift to the device's base physical address of host memory if necessary */
199 if (prop->device_dma_offset_for_host_access)
200 for_each_sgtable_dma_sg(sgt, sg, i)
201 sg->dma_address += prop->device_dma_offset_for_host_access;
202
203 return 0;
204 }
205
hl_dma_unmap_sgtable(struct hl_device * hdev,struct sg_table * sgt,enum dma_data_direction dir)206 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
207 {
208 struct asic_fixed_properties *prop = &hdev->asic_prop;
209 struct scatterlist *sg;
210 int i;
211
212 /* Cancel the device's base physical address of host memory if necessary */
213 if (prop->device_dma_offset_for_host_access)
214 for_each_sgtable_dma_sg(sgt, sg, i)
215 sg->dma_address -= prop->device_dma_offset_for_host_access;
216
217 dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0);
218 }
219
220 /*
221 * hl_access_cfg_region - access the config region
222 *
223 * @hdev: pointer to habanalabs device structure
224 * @addr: the address to access
225 * @val: the value to write from or read to
226 * @acc_type: the type of access (read/write 64/32)
227 */
hl_access_cfg_region(struct hl_device * hdev,u64 addr,u64 * val,enum debugfs_access_type acc_type)228 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
229 enum debugfs_access_type acc_type)
230 {
231 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG];
232 u32 val_h, val_l;
233
234 if (!IS_ALIGNED(addr, sizeof(u32))) {
235 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32));
236 return -EINVAL;
237 }
238
239 switch (acc_type) {
240 case DEBUGFS_READ32:
241 *val = RREG32(addr - cfg_region->region_base);
242 break;
243 case DEBUGFS_WRITE32:
244 WREG32(addr - cfg_region->region_base, *val);
245 break;
246 case DEBUGFS_READ64:
247 val_l = RREG32(addr - cfg_region->region_base);
248 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base);
249
250 *val = (((u64) val_h) << 32) | val_l;
251 break;
252 case DEBUGFS_WRITE64:
253 WREG32(addr - cfg_region->region_base, lower_32_bits(*val));
254 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val));
255 break;
256 default:
257 dev_err(hdev->dev, "access type %d is not supported\n", acc_type);
258 return -EOPNOTSUPP;
259 }
260
261 return 0;
262 }
263
264 /*
265 * hl_access_dev_mem - access device memory
266 *
267 * @hdev: pointer to habanalabs device structure
268 * @region_type: the type of the region the address belongs to
269 * @addr: the address to access
270 * @val: the value to write from or read to
271 * @acc_type: the type of access (r/w, 32/64)
272 */
hl_access_dev_mem(struct hl_device * hdev,enum pci_region region_type,u64 addr,u64 * val,enum debugfs_access_type acc_type)273 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
274 u64 addr, u64 *val, enum debugfs_access_type acc_type)
275 {
276 switch (region_type) {
277 case PCI_REGION_CFG:
278 return hl_access_cfg_region(hdev, addr, val, acc_type);
279 case PCI_REGION_SRAM:
280 case PCI_REGION_DRAM:
281 return hl_access_sram_dram_region(hdev, addr, val, acc_type,
282 region_type);
283 default:
284 return -EFAULT;
285 }
286
287 return 0;
288 }
289
hl_engine_data_sprintf(struct engines_data * e,const char * fmt,...)290 void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...)
291 {
292 va_list args;
293 int str_size;
294
295 va_start(args, fmt);
296 /* Calculate formatted string length. Assuming each string is null terminated, hence
297 * increment result by 1
298 */
299 str_size = vsnprintf(NULL, 0, fmt, args) + 1;
300 va_end(args);
301
302 if ((e->actual_size + str_size) < e->allocated_buf_size) {
303 va_start(args, fmt);
304 vsnprintf(e->buf + e->actual_size, str_size, fmt, args);
305 va_end(args);
306 }
307
308 /* Need to update the size even when not updating destination buffer to get the exact size
309 * of all input strings
310 */
311 e->actual_size += str_size;
312 }
313
hl_device_status(struct hl_device * hdev)314 enum hl_device_status hl_device_status(struct hl_device *hdev)
315 {
316 enum hl_device_status status;
317
318 if (hdev->reset_info.in_reset) {
319 if (hdev->reset_info.in_compute_reset)
320 status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
321 else
322 status = HL_DEVICE_STATUS_IN_RESET;
323 } else if (hdev->reset_info.needs_reset) {
324 status = HL_DEVICE_STATUS_NEEDS_RESET;
325 } else if (hdev->disabled) {
326 status = HL_DEVICE_STATUS_MALFUNCTION;
327 } else if (!hdev->init_done) {
328 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
329 } else {
330 status = HL_DEVICE_STATUS_OPERATIONAL;
331 }
332
333 return status;
334 }
335
hl_device_operational(struct hl_device * hdev,enum hl_device_status * status)336 bool hl_device_operational(struct hl_device *hdev,
337 enum hl_device_status *status)
338 {
339 enum hl_device_status current_status;
340
341 current_status = hl_device_status(hdev);
342 if (status)
343 *status = current_status;
344
345 switch (current_status) {
346 case HL_DEVICE_STATUS_IN_RESET:
347 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
348 case HL_DEVICE_STATUS_MALFUNCTION:
349 case HL_DEVICE_STATUS_NEEDS_RESET:
350 return false;
351 case HL_DEVICE_STATUS_OPERATIONAL:
352 case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
353 default:
354 return true;
355 }
356 }
357
hpriv_release(struct kref * ref)358 static void hpriv_release(struct kref *ref)
359 {
360 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
361 bool device_is_idle = true;
362 struct hl_fpriv *hpriv;
363 struct hl_device *hdev;
364
365 hpriv = container_of(ref, struct hl_fpriv, refcount);
366
367 hdev = hpriv->hdev;
368
369 hdev->asic_funcs->send_device_activity(hdev, false);
370
371 put_pid(hpriv->taskpid);
372
373 hl_debugfs_remove_file(hpriv);
374
375 mutex_destroy(&hpriv->ctx_lock);
376 mutex_destroy(&hpriv->restore_phase_mutex);
377
378 if ((!hdev->pldm) && (hdev->pdev) &&
379 (!hdev->asic_funcs->is_device_idle(hdev,
380 idle_mask,
381 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
382 dev_err(hdev->dev,
383 "device not idle after user context is closed (0x%llx_%llx)\n",
384 idle_mask[1], idle_mask[0]);
385
386 device_is_idle = false;
387 }
388
389 /* We need to remove the user from the list to make sure the reset process won't
390 * try to kill the user process. Because, if we got here, it means there are no
391 * more driver/device resources that the user process is occupying so there is
392 * no need to kill it
393 *
394 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent
395 * a race between the release and opening the device again. We don't want to let
396 * a user open the device while there a reset is about to happen.
397 */
398 mutex_lock(&hdev->fpriv_list_lock);
399 list_del(&hpriv->dev_node);
400 mutex_unlock(&hdev->fpriv_list_lock);
401
402 if (!device_is_idle || hdev->reset_upon_device_release) {
403 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
404 } else {
405 int rc = hdev->asic_funcs->scrub_device_mem(hdev);
406
407 if (rc)
408 dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
409 }
410
411 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
412 * thread, we don't care because the in_reset is marked so if a user will try to open
413 * the device it will fail on that, even if compute_ctx is false.
414 */
415 mutex_lock(&hdev->fpriv_list_lock);
416 hdev->is_compute_ctx_active = false;
417 mutex_unlock(&hdev->fpriv_list_lock);
418
419 hdev->compute_ctx_in_release = 0;
420
421 /* release the eventfd */
422 if (hpriv->notifier_event.eventfd)
423 eventfd_ctx_put(hpriv->notifier_event.eventfd);
424
425 mutex_destroy(&hpriv->notifier_event.lock);
426
427 kfree(hpriv);
428 }
429
hl_hpriv_get(struct hl_fpriv * hpriv)430 void hl_hpriv_get(struct hl_fpriv *hpriv)
431 {
432 kref_get(&hpriv->refcount);
433 }
434
hl_hpriv_put(struct hl_fpriv * hpriv)435 int hl_hpriv_put(struct hl_fpriv *hpriv)
436 {
437 return kref_put(&hpriv->refcount, hpriv_release);
438 }
439
440 /*
441 * hl_device_release - release function for habanalabs device
442 *
443 * @inode: pointer to inode structure
444 * @filp: pointer to file structure
445 *
446 * Called when process closes an habanalabs device
447 */
hl_device_release(struct inode * inode,struct file * filp)448 static int hl_device_release(struct inode *inode, struct file *filp)
449 {
450 struct hl_fpriv *hpriv = filp->private_data;
451 struct hl_device *hdev = hpriv->hdev;
452
453 filp->private_data = NULL;
454
455 if (!hdev) {
456 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
457 put_pid(hpriv->taskpid);
458 return 0;
459 }
460
461 /* Each pending user interrupt holds the user's context, hence we
462 * must release them all before calling hl_ctx_mgr_fini().
463 */
464 hl_release_pending_user_interrupts(hpriv->hdev);
465
466 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
467 hl_mem_mgr_fini(&hpriv->mem_mgr);
468
469 hdev->compute_ctx_in_release = 1;
470
471 if (!hl_hpriv_put(hpriv))
472 dev_notice(hdev->dev,
473 "User process closed FD but device still in use\n");
474
475 hdev->last_open_session_duration_jif =
476 jiffies - hdev->last_successful_open_jif;
477
478 return 0;
479 }
480
hl_device_release_ctrl(struct inode * inode,struct file * filp)481 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
482 {
483 struct hl_fpriv *hpriv = filp->private_data;
484 struct hl_device *hdev = hpriv->hdev;
485
486 filp->private_data = NULL;
487
488 if (!hdev) {
489 pr_err("Closing FD after device was removed\n");
490 goto out;
491 }
492
493 mutex_lock(&hdev->fpriv_ctrl_list_lock);
494 list_del(&hpriv->dev_node);
495 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
496 out:
497 /* release the eventfd */
498 if (hpriv->notifier_event.eventfd)
499 eventfd_ctx_put(hpriv->notifier_event.eventfd);
500
501 mutex_destroy(&hpriv->notifier_event.lock);
502 put_pid(hpriv->taskpid);
503
504 kfree(hpriv);
505
506 return 0;
507 }
508
509 /*
510 * hl_mmap - mmap function for habanalabs device
511 *
512 * @*filp: pointer to file structure
513 * @*vma: pointer to vm_area_struct of the process
514 *
515 * Called when process does an mmap on habanalabs device. Call the relevant mmap
516 * function at the end of the common code.
517 */
hl_mmap(struct file * filp,struct vm_area_struct * vma)518 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
519 {
520 struct hl_fpriv *hpriv = filp->private_data;
521 struct hl_device *hdev = hpriv->hdev;
522 unsigned long vm_pgoff;
523
524 if (!hdev) {
525 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
526 return -ENODEV;
527 }
528
529 vm_pgoff = vma->vm_pgoff;
530
531 switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
532 case HL_MMAP_TYPE_BLOCK:
533 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
534 return hl_hw_block_mmap(hpriv, vma);
535
536 case HL_MMAP_TYPE_CB:
537 case HL_MMAP_TYPE_TS_BUFF:
538 return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL);
539 }
540 return -EINVAL;
541 }
542
543 static const struct file_operations hl_ops = {
544 .owner = THIS_MODULE,
545 .open = hl_device_open,
546 .release = hl_device_release,
547 .mmap = hl_mmap,
548 .unlocked_ioctl = hl_ioctl,
549 .compat_ioctl = hl_ioctl
550 };
551
552 static const struct file_operations hl_ctrl_ops = {
553 .owner = THIS_MODULE,
554 .open = hl_device_open_ctrl,
555 .release = hl_device_release_ctrl,
556 .unlocked_ioctl = hl_ioctl_control,
557 .compat_ioctl = hl_ioctl_control
558 };
559
device_release_func(struct device * dev)560 static void device_release_func(struct device *dev)
561 {
562 kfree(dev);
563 }
564
565 /*
566 * device_init_cdev - Initialize cdev and device for habanalabs device
567 *
568 * @hdev: pointer to habanalabs device structure
569 * @hclass: pointer to the class object of the device
570 * @minor: minor number of the specific device
571 * @fpos: file operations to install for this device
572 * @name: name of the device as it will appear in the filesystem
573 * @cdev: pointer to the char device object that will be initialized
574 * @dev: pointer to the device object that will be initialized
575 *
576 * Initialize a cdev and a Linux device for habanalabs's device.
577 */
device_init_cdev(struct hl_device * hdev,struct class * hclass,int minor,const struct file_operations * fops,char * name,struct cdev * cdev,struct device ** dev)578 static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
579 int minor, const struct file_operations *fops,
580 char *name, struct cdev *cdev,
581 struct device **dev)
582 {
583 cdev_init(cdev, fops);
584 cdev->owner = THIS_MODULE;
585
586 *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
587 if (!*dev)
588 return -ENOMEM;
589
590 device_initialize(*dev);
591 (*dev)->devt = MKDEV(hdev->major, minor);
592 (*dev)->class = hclass;
593 (*dev)->release = device_release_func;
594 dev_set_drvdata(*dev, hdev);
595 dev_set_name(*dev, "%s", name);
596
597 return 0;
598 }
599
device_cdev_sysfs_add(struct hl_device * hdev)600 static int device_cdev_sysfs_add(struct hl_device *hdev)
601 {
602 int rc;
603
604 rc = cdev_device_add(&hdev->cdev, hdev->dev);
605 if (rc) {
606 dev_err(hdev->dev,
607 "failed to add a char device to the system\n");
608 return rc;
609 }
610
611 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
612 if (rc) {
613 dev_err(hdev->dev,
614 "failed to add a control char device to the system\n");
615 goto delete_cdev_device;
616 }
617
618 /* hl_sysfs_init() must be done after adding the device to the system */
619 rc = hl_sysfs_init(hdev);
620 if (rc) {
621 dev_err(hdev->dev, "failed to initialize sysfs\n");
622 goto delete_ctrl_cdev_device;
623 }
624
625 hdev->cdev_sysfs_created = true;
626
627 return 0;
628
629 delete_ctrl_cdev_device:
630 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
631 delete_cdev_device:
632 cdev_device_del(&hdev->cdev, hdev->dev);
633 return rc;
634 }
635
device_cdev_sysfs_del(struct hl_device * hdev)636 static void device_cdev_sysfs_del(struct hl_device *hdev)
637 {
638 if (!hdev->cdev_sysfs_created)
639 goto put_devices;
640
641 hl_sysfs_fini(hdev);
642 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
643 cdev_device_del(&hdev->cdev, hdev->dev);
644
645 put_devices:
646 put_device(hdev->dev);
647 put_device(hdev->dev_ctrl);
648 }
649
device_hard_reset_pending(struct work_struct * work)650 static void device_hard_reset_pending(struct work_struct *work)
651 {
652 struct hl_device_reset_work *device_reset_work =
653 container_of(work, struct hl_device_reset_work, reset_work.work);
654 struct hl_device *hdev = device_reset_work->hdev;
655 u32 flags;
656 int rc;
657
658 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR;
659
660 rc = hl_device_reset(hdev, flags);
661 if ((rc == -EBUSY) && !hdev->device_fini_pending) {
662 dev_info(hdev->dev,
663 "Could not reset device. will try again in %u seconds",
664 HL_PENDING_RESET_PER_SEC);
665
666 queue_delayed_work(device_reset_work->wq,
667 &device_reset_work->reset_work,
668 msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
669 }
670 }
671
672 /*
673 * device_early_init - do some early initialization for the habanalabs device
674 *
675 * @hdev: pointer to habanalabs device structure
676 *
677 * Install the relevant function pointers and call the early_init function,
678 * if such a function exists
679 */
device_early_init(struct hl_device * hdev)680 static int device_early_init(struct hl_device *hdev)
681 {
682 int i, rc;
683 char workq_name[32];
684
685 switch (hdev->asic_type) {
686 case ASIC_GOYA:
687 goya_set_asic_funcs(hdev);
688 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
689 break;
690 case ASIC_GAUDI:
691 gaudi_set_asic_funcs(hdev);
692 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
693 break;
694 case ASIC_GAUDI_SEC:
695 gaudi_set_asic_funcs(hdev);
696 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
697 break;
698 case ASIC_GAUDI2:
699 gaudi2_set_asic_funcs(hdev);
700 strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name));
701 break;
702 case ASIC_GAUDI2_SEC:
703 gaudi2_set_asic_funcs(hdev);
704 strscpy(hdev->asic_name, "GAUDI2 SEC", sizeof(hdev->asic_name));
705 break;
706 default:
707 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
708 hdev->asic_type);
709 return -EINVAL;
710 }
711
712 rc = hdev->asic_funcs->early_init(hdev);
713 if (rc)
714 return rc;
715
716 rc = hl_asid_init(hdev);
717 if (rc)
718 goto early_fini;
719
720 if (hdev->asic_prop.completion_queues_count) {
721 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
722 sizeof(struct workqueue_struct *),
723 GFP_KERNEL);
724 if (!hdev->cq_wq) {
725 rc = -ENOMEM;
726 goto asid_fini;
727 }
728 }
729
730 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
731 snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
732 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
733 if (hdev->cq_wq[i] == NULL) {
734 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
735 rc = -ENOMEM;
736 goto free_cq_wq;
737 }
738 }
739
740 hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
741 if (hdev->eq_wq == NULL) {
742 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
743 rc = -ENOMEM;
744 goto free_cq_wq;
745 }
746
747 hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0);
748 if (!hdev->cs_cmplt_wq) {
749 dev_err(hdev->dev,
750 "Failed to allocate CS completions workqueue\n");
751 rc = -ENOMEM;
752 goto free_eq_wq;
753 }
754
755 hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
756 if (!hdev->ts_free_obj_wq) {
757 dev_err(hdev->dev,
758 "Failed to allocate Timestamp registration free workqueue\n");
759 rc = -ENOMEM;
760 goto free_cs_cmplt_wq;
761 }
762
763 hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
764 if (!hdev->pf_wq) {
765 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
766 rc = -ENOMEM;
767 goto free_ts_free_wq;
768 }
769
770 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
771 GFP_KERNEL);
772 if (!hdev->hl_chip_info) {
773 rc = -ENOMEM;
774 goto free_pf_wq;
775 }
776
777 rc = hl_mmu_if_set_funcs(hdev);
778 if (rc)
779 goto free_chip_info;
780
781 hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr);
782
783 hdev->device_reset_work.wq =
784 create_singlethread_workqueue("hl_device_reset");
785 if (!hdev->device_reset_work.wq) {
786 rc = -ENOMEM;
787 dev_err(hdev->dev, "Failed to create device reset WQ\n");
788 goto free_cb_mgr;
789 }
790
791 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
792 device_hard_reset_pending);
793 hdev->device_reset_work.hdev = hdev;
794 hdev->device_fini_pending = 0;
795
796 mutex_init(&hdev->send_cpu_message_lock);
797 mutex_init(&hdev->debug_lock);
798 INIT_LIST_HEAD(&hdev->cs_mirror_list);
799 spin_lock_init(&hdev->cs_mirror_lock);
800 spin_lock_init(&hdev->reset_info.lock);
801 INIT_LIST_HEAD(&hdev->fpriv_list);
802 INIT_LIST_HEAD(&hdev->fpriv_ctrl_list);
803 mutex_init(&hdev->fpriv_list_lock);
804 mutex_init(&hdev->fpriv_ctrl_list_lock);
805 mutex_init(&hdev->clk_throttling.lock);
806
807 return 0;
808
809 free_cb_mgr:
810 hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
811 free_chip_info:
812 kfree(hdev->hl_chip_info);
813 free_pf_wq:
814 destroy_workqueue(hdev->pf_wq);
815 free_ts_free_wq:
816 destroy_workqueue(hdev->ts_free_obj_wq);
817 free_cs_cmplt_wq:
818 destroy_workqueue(hdev->cs_cmplt_wq);
819 free_eq_wq:
820 destroy_workqueue(hdev->eq_wq);
821 free_cq_wq:
822 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
823 if (hdev->cq_wq[i])
824 destroy_workqueue(hdev->cq_wq[i]);
825 kfree(hdev->cq_wq);
826 asid_fini:
827 hl_asid_fini(hdev);
828 early_fini:
829 if (hdev->asic_funcs->early_fini)
830 hdev->asic_funcs->early_fini(hdev);
831
832 return rc;
833 }
834
835 /*
836 * device_early_fini - finalize all that was done in device_early_init
837 *
838 * @hdev: pointer to habanalabs device structure
839 *
840 */
device_early_fini(struct hl_device * hdev)841 static void device_early_fini(struct hl_device *hdev)
842 {
843 int i;
844
845 mutex_destroy(&hdev->debug_lock);
846 mutex_destroy(&hdev->send_cpu_message_lock);
847
848 mutex_destroy(&hdev->fpriv_list_lock);
849 mutex_destroy(&hdev->fpriv_ctrl_list_lock);
850
851 mutex_destroy(&hdev->clk_throttling.lock);
852
853 hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
854
855 kfree(hdev->hl_chip_info);
856
857 destroy_workqueue(hdev->pf_wq);
858 destroy_workqueue(hdev->ts_free_obj_wq);
859 destroy_workqueue(hdev->cs_cmplt_wq);
860 destroy_workqueue(hdev->eq_wq);
861 destroy_workqueue(hdev->device_reset_work.wq);
862
863 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
864 destroy_workqueue(hdev->cq_wq[i]);
865 kfree(hdev->cq_wq);
866
867 hl_asid_fini(hdev);
868
869 if (hdev->asic_funcs->early_fini)
870 hdev->asic_funcs->early_fini(hdev);
871 }
872
is_pci_link_healthy(struct hl_device * hdev)873 static bool is_pci_link_healthy(struct hl_device *hdev)
874 {
875 u16 vendor_id;
876
877 if (!hdev->pdev)
878 return false;
879
880 pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id);
881
882 return (vendor_id == PCI_VENDOR_ID_HABANALABS);
883 }
884
hl_device_heartbeat(struct work_struct * work)885 static void hl_device_heartbeat(struct work_struct *work)
886 {
887 struct hl_device *hdev = container_of(work, struct hl_device,
888 work_heartbeat.work);
889
890 if (!hl_device_operational(hdev, NULL))
891 goto reschedule;
892
893 if (!hdev->asic_funcs->send_heartbeat(hdev))
894 goto reschedule;
895
896 if (hl_device_operational(hdev, NULL))
897 dev_err(hdev->dev, "Device heartbeat failed! PCI link is %s\n",
898 is_pci_link_healthy(hdev) ? "healthy" : "broken");
899
900 hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
901
902 return;
903
904 reschedule:
905 /*
906 * prev_reset_trigger tracks consecutive fatal h/w errors until first
907 * heartbeat immediately post reset.
908 * If control reached here, then at least one heartbeat work has been
909 * scheduled since last reset/init cycle.
910 * So if the device is not already in reset cycle, reset the flag
911 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
912 * status for at least one heartbeat. From this point driver restarts
913 * tracking future consecutive fatal errors.
914 */
915 if (!hdev->reset_info.in_reset)
916 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
917
918 schedule_delayed_work(&hdev->work_heartbeat,
919 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
920 }
921
922 /*
923 * device_late_init - do late stuff initialization for the habanalabs device
924 *
925 * @hdev: pointer to habanalabs device structure
926 *
927 * Do stuff that either needs the device H/W queues to be active or needs
928 * to happen after all the rest of the initialization is finished
929 */
device_late_init(struct hl_device * hdev)930 static int device_late_init(struct hl_device *hdev)
931 {
932 int rc;
933
934 if (hdev->asic_funcs->late_init) {
935 rc = hdev->asic_funcs->late_init(hdev);
936 if (rc) {
937 dev_err(hdev->dev,
938 "failed late initialization for the H/W\n");
939 return rc;
940 }
941 }
942
943 hdev->high_pll = hdev->asic_prop.high_pll;
944
945 if (hdev->heartbeat) {
946 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
947 schedule_delayed_work(&hdev->work_heartbeat,
948 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
949 }
950
951 hdev->late_init_done = true;
952
953 return 0;
954 }
955
956 /*
957 * device_late_fini - finalize all that was done in device_late_init
958 *
959 * @hdev: pointer to habanalabs device structure
960 *
961 */
device_late_fini(struct hl_device * hdev)962 static void device_late_fini(struct hl_device *hdev)
963 {
964 if (!hdev->late_init_done)
965 return;
966
967 if (hdev->heartbeat)
968 cancel_delayed_work_sync(&hdev->work_heartbeat);
969
970 if (hdev->asic_funcs->late_fini)
971 hdev->asic_funcs->late_fini(hdev);
972
973 hdev->late_init_done = false;
974 }
975
hl_device_utilization(struct hl_device * hdev,u32 * utilization)976 int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
977 {
978 u64 max_power, curr_power, dc_power, dividend;
979 int rc;
980
981 max_power = hdev->max_power;
982 dc_power = hdev->asic_prop.dc_power_default;
983 rc = hl_fw_cpucp_power_get(hdev, &curr_power);
984
985 if (rc)
986 return rc;
987
988 curr_power = clamp(curr_power, dc_power, max_power);
989
990 dividend = (curr_power - dc_power) * 100;
991 *utilization = (u32) div_u64(dividend, (max_power - dc_power));
992
993 return 0;
994 }
995
hl_device_set_debug_mode(struct hl_device * hdev,struct hl_ctx * ctx,bool enable)996 int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable)
997 {
998 int rc = 0;
999
1000 mutex_lock(&hdev->debug_lock);
1001
1002 if (!enable) {
1003 if (!hdev->in_debug) {
1004 dev_err(hdev->dev,
1005 "Failed to disable debug mode because device was not in debug mode\n");
1006 rc = -EFAULT;
1007 goto out;
1008 }
1009
1010 if (!hdev->reset_info.hard_reset_pending)
1011 hdev->asic_funcs->halt_coresight(hdev, ctx);
1012
1013 hdev->in_debug = 0;
1014
1015 goto out;
1016 }
1017
1018 if (hdev->in_debug) {
1019 dev_err(hdev->dev,
1020 "Failed to enable debug mode because device is already in debug mode\n");
1021 rc = -EFAULT;
1022 goto out;
1023 }
1024
1025 hdev->in_debug = 1;
1026
1027 out:
1028 mutex_unlock(&hdev->debug_lock);
1029
1030 return rc;
1031 }
1032
take_release_locks(struct hl_device * hdev)1033 static void take_release_locks(struct hl_device *hdev)
1034 {
1035 /* Flush anyone that is inside the critical section of enqueue
1036 * jobs to the H/W
1037 */
1038 hdev->asic_funcs->hw_queues_lock(hdev);
1039 hdev->asic_funcs->hw_queues_unlock(hdev);
1040
1041 /* Flush processes that are sending message to CPU */
1042 mutex_lock(&hdev->send_cpu_message_lock);
1043 mutex_unlock(&hdev->send_cpu_message_lock);
1044
1045 /* Flush anyone that is inside device open */
1046 mutex_lock(&hdev->fpriv_list_lock);
1047 mutex_unlock(&hdev->fpriv_list_lock);
1048 mutex_lock(&hdev->fpriv_ctrl_list_lock);
1049 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
1050 }
1051
cleanup_resources(struct hl_device * hdev,bool hard_reset,bool fw_reset,bool skip_wq_flush)1052 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
1053 bool skip_wq_flush)
1054 {
1055 if (hard_reset)
1056 device_late_fini(hdev);
1057
1058 /*
1059 * Halt the engines and disable interrupts so we won't get any more
1060 * completions from H/W and we won't have any accesses from the
1061 * H/W to the host machine
1062 */
1063 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
1064
1065 /* Go over all the queues, release all CS and their jobs */
1066 hl_cs_rollback_all(hdev, skip_wq_flush);
1067
1068 /* flush the MMU prefetch workqueue */
1069 flush_workqueue(hdev->pf_wq);
1070
1071 /* Release all pending user interrupts, each pending user interrupt
1072 * holds a reference to user context
1073 */
1074 hl_release_pending_user_interrupts(hdev);
1075 }
1076
1077 /*
1078 * hl_device_suspend - initiate device suspend
1079 *
1080 * @hdev: pointer to habanalabs device structure
1081 *
1082 * Puts the hw in the suspend state (all asics).
1083 * Returns 0 for success or an error on failure.
1084 * Called at driver suspend.
1085 */
hl_device_suspend(struct hl_device * hdev)1086 int hl_device_suspend(struct hl_device *hdev)
1087 {
1088 int rc;
1089
1090 pci_save_state(hdev->pdev);
1091
1092 /* Block future CS/VM/JOB completion operations */
1093 spin_lock(&hdev->reset_info.lock);
1094 if (hdev->reset_info.in_reset) {
1095 spin_unlock(&hdev->reset_info.lock);
1096 dev_err(hdev->dev, "Can't suspend while in reset\n");
1097 return -EIO;
1098 }
1099 hdev->reset_info.in_reset = 1;
1100 spin_unlock(&hdev->reset_info.lock);
1101
1102 /* This blocks all other stuff that is not blocked by in_reset */
1103 hdev->disabled = true;
1104
1105 take_release_locks(hdev);
1106
1107 rc = hdev->asic_funcs->suspend(hdev);
1108 if (rc)
1109 dev_err(hdev->dev,
1110 "Failed to disable PCI access of device CPU\n");
1111
1112 /* Shut down the device */
1113 pci_disable_device(hdev->pdev);
1114 pci_set_power_state(hdev->pdev, PCI_D3hot);
1115
1116 return 0;
1117 }
1118
1119 /*
1120 * hl_device_resume - initiate device resume
1121 *
1122 * @hdev: pointer to habanalabs device structure
1123 *
1124 * Bring the hw back to operating state (all asics).
1125 * Returns 0 for success or an error on failure.
1126 * Called at driver resume.
1127 */
hl_device_resume(struct hl_device * hdev)1128 int hl_device_resume(struct hl_device *hdev)
1129 {
1130 int rc;
1131
1132 pci_set_power_state(hdev->pdev, PCI_D0);
1133 pci_restore_state(hdev->pdev);
1134 rc = pci_enable_device_mem(hdev->pdev);
1135 if (rc) {
1136 dev_err(hdev->dev,
1137 "Failed to enable PCI device in resume\n");
1138 return rc;
1139 }
1140
1141 pci_set_master(hdev->pdev);
1142
1143 rc = hdev->asic_funcs->resume(hdev);
1144 if (rc) {
1145 dev_err(hdev->dev, "Failed to resume device after suspend\n");
1146 goto disable_device;
1147 }
1148
1149
1150 /* 'in_reset' was set to true during suspend, now we must clear it in order
1151 * for hard reset to be performed
1152 */
1153 spin_lock(&hdev->reset_info.lock);
1154 hdev->reset_info.in_reset = 0;
1155 spin_unlock(&hdev->reset_info.lock);
1156
1157 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
1158 if (rc) {
1159 dev_err(hdev->dev, "Failed to reset device during resume\n");
1160 goto disable_device;
1161 }
1162
1163 return 0;
1164
1165 disable_device:
1166 pci_clear_master(hdev->pdev);
1167 pci_disable_device(hdev->pdev);
1168
1169 return rc;
1170 }
1171
device_kill_open_processes(struct hl_device * hdev,u32 timeout,bool control_dev)1172 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
1173 {
1174 struct task_struct *task = NULL;
1175 struct list_head *fd_list;
1176 struct hl_fpriv *hpriv;
1177 struct mutex *fd_lock;
1178 u32 pending_cnt;
1179
1180 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
1181 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
1182
1183 /* Giving time for user to close FD, and for processes that are inside
1184 * hl_device_open to finish
1185 */
1186 if (!list_empty(fd_list))
1187 ssleep(1);
1188
1189 if (timeout) {
1190 pending_cnt = timeout;
1191 } else {
1192 if (hdev->process_kill_trial_cnt) {
1193 /* Processes have been already killed */
1194 pending_cnt = 1;
1195 goto wait_for_processes;
1196 } else {
1197 /* Wait a small period after process kill */
1198 pending_cnt = HL_PENDING_RESET_PER_SEC;
1199 }
1200 }
1201
1202 mutex_lock(fd_lock);
1203
1204 /* This section must be protected because we are dereferencing
1205 * pointers that are freed if the process exits
1206 */
1207 list_for_each_entry(hpriv, fd_list, dev_node) {
1208 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
1209 if (task) {
1210 dev_info(hdev->dev, "Killing user process pid=%d\n",
1211 task_pid_nr(task));
1212 send_sig(SIGKILL, task, 1);
1213 usleep_range(1000, 10000);
1214
1215 put_task_struct(task);
1216 } else {
1217 /*
1218 * If we got here, it means that process was killed from outside the driver
1219 * right after it started looping on fd_list and before get_pid_task, thus
1220 * we don't need to kill it.
1221 */
1222 dev_dbg(hdev->dev,
1223 "Can't get task struct for user process, assuming process was killed from outside the driver\n");
1224 }
1225 }
1226
1227 mutex_unlock(fd_lock);
1228
1229 /*
1230 * We killed the open users, but that doesn't mean they are closed.
1231 * It could be that they are running a long cleanup phase in the driver
1232 * e.g. MMU unmappings, or running other long teardown flow even before
1233 * our cleanup.
1234 * Therefore we need to wait again to make sure they are closed before
1235 * continuing with the reset.
1236 */
1237
1238 wait_for_processes:
1239 while ((!list_empty(fd_list)) && (pending_cnt)) {
1240 dev_dbg(hdev->dev,
1241 "Waiting for all unmap operations to finish before hard reset\n");
1242
1243 pending_cnt--;
1244
1245 ssleep(1);
1246 }
1247
1248 /* All processes exited successfully */
1249 if (list_empty(fd_list))
1250 return 0;
1251
1252 /* Give up waiting for processes to exit */
1253 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
1254 return -ETIME;
1255
1256 hdev->process_kill_trial_cnt++;
1257
1258 return -EBUSY;
1259 }
1260
device_disable_open_processes(struct hl_device * hdev,bool control_dev)1261 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
1262 {
1263 struct list_head *fd_list;
1264 struct hl_fpriv *hpriv;
1265 struct mutex *fd_lock;
1266
1267 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
1268 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
1269
1270 mutex_lock(fd_lock);
1271 list_for_each_entry(hpriv, fd_list, dev_node)
1272 hpriv->hdev = NULL;
1273 mutex_unlock(fd_lock);
1274 }
1275
handle_reset_trigger(struct hl_device * hdev,u32 flags)1276 static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
1277 {
1278 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
1279
1280 /*
1281 * 'reset cause' is being updated here, because getting here
1282 * means that it's the 1st time and the last time we're here
1283 * ('in_reset' makes sure of it). This makes sure that
1284 * 'reset_cause' will continue holding its 1st recorded reason!
1285 */
1286 if (flags & HL_DRV_RESET_HEARTBEAT) {
1287 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
1288 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
1289 } else if (flags & HL_DRV_RESET_TDR) {
1290 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR;
1291 cur_reset_trigger = HL_DRV_RESET_TDR;
1292 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
1293 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
1294 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
1295 } else {
1296 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
1297 }
1298
1299 /*
1300 * If reset cause is same twice, then reset_trigger_repeated
1301 * is set and if this reset is due to a fatal FW error
1302 * device is set to an unstable state.
1303 */
1304 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
1305 hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
1306 hdev->reset_info.reset_trigger_repeated = 0;
1307 } else {
1308 hdev->reset_info.reset_trigger_repeated = 1;
1309 }
1310
1311 /* If reset is due to heartbeat, device CPU is no responsive in
1312 * which case no point sending PCI disable message to it.
1313 *
1314 * If F/W is performing the reset, no need to send it a message to disable
1315 * PCI access
1316 */
1317 if ((flags & HL_DRV_RESET_HARD) &&
1318 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
1319 /* Disable PCI access from device F/W so he won't send
1320 * us additional interrupts. We disable MSI/MSI-X at
1321 * the halt_engines function and we can't have the F/W
1322 * sending us interrupts after that. We need to disable
1323 * the access here because if the device is marked
1324 * disable, the message won't be send. Also, in case
1325 * of heartbeat, the device CPU is marked as disable
1326 * so this message won't be sent
1327 */
1328 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
1329 dev_warn(hdev->dev,
1330 "Failed to disable PCI access by F/W\n");
1331 }
1332 }
1333
1334 /*
1335 * hl_device_reset - reset the device
1336 *
1337 * @hdev: pointer to habanalabs device structure
1338 * @flags: reset flags.
1339 *
1340 * Block future CS and wait for pending CS to be enqueued
1341 * Call ASIC H/W fini
1342 * Flush all completions
1343 * Re-initialize all internal data structures
1344 * Call ASIC H/W init, late_init
1345 * Test queues
1346 * Enable device
1347 *
1348 * Returns 0 for success or an error on failure.
1349 */
hl_device_reset(struct hl_device * hdev,u32 flags)1350 int hl_device_reset(struct hl_device *hdev, u32 flags)
1351 {
1352 bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
1353 reset_upon_device_release = false, schedule_hard_reset = false,
1354 skip_wq_flush, delay_reset;
1355 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
1356 struct hl_ctx *ctx;
1357 int i, rc;
1358
1359 if (!hdev->init_done) {
1360 dev_err(hdev->dev, "Can't reset before initialization is done\n");
1361 return 0;
1362 }
1363
1364 hard_reset = !!(flags & HL_DRV_RESET_HARD);
1365 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
1366 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
1367 skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
1368 delay_reset = !!(flags & HL_DRV_RESET_DELAY);
1369
1370 if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
1371 hard_instead_soft = true;
1372 hard_reset = true;
1373 }
1374
1375 if (hdev->reset_upon_device_release && (flags & HL_DRV_RESET_DEV_RELEASE)) {
1376 if (hard_reset) {
1377 dev_crit(hdev->dev,
1378 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n");
1379 return -EINVAL;
1380 }
1381
1382 reset_upon_device_release = true;
1383
1384 goto do_reset;
1385 }
1386
1387 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) {
1388 hard_instead_soft = true;
1389 hard_reset = true;
1390 }
1391
1392 if (hard_instead_soft)
1393 dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n");
1394
1395 do_reset:
1396 /* Re-entry of reset thread */
1397 if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
1398 goto kill_processes;
1399
1400 /*
1401 * Prevent concurrency in this function - only one reset should be
1402 * done at any given time. Only need to perform this if we didn't
1403 * get from the dedicated hard reset thread
1404 */
1405 if (!from_hard_reset_thread) {
1406 /* Block future CS/VM/JOB completion operations */
1407 spin_lock(&hdev->reset_info.lock);
1408 if (hdev->reset_info.in_reset) {
1409 /* We only allow scheduling of a hard reset during compute reset */
1410 if (hard_reset && hdev->reset_info.in_compute_reset)
1411 hdev->reset_info.hard_reset_schedule_flags = flags;
1412 spin_unlock(&hdev->reset_info.lock);
1413 return 0;
1414 }
1415
1416 /* This still allows the completion of some KDMA ops
1417 * Update this before in_reset because in_compute_reset implies we are in reset
1418 */
1419 hdev->reset_info.in_compute_reset = !hard_reset;
1420
1421 hdev->reset_info.in_reset = 1;
1422
1423 spin_unlock(&hdev->reset_info.lock);
1424
1425 if (delay_reset)
1426 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
1427
1428 handle_reset_trigger(hdev, flags);
1429
1430 /* This also blocks future CS/VM/JOB completion operations */
1431 hdev->disabled = true;
1432
1433 take_release_locks(hdev);
1434
1435 if (hard_reset)
1436 dev_info(hdev->dev, "Going to reset device\n");
1437 else if (reset_upon_device_release)
1438 dev_dbg(hdev->dev, "Going to reset device after release by user\n");
1439 else
1440 dev_dbg(hdev->dev, "Going to reset engines of inference device\n");
1441 }
1442
1443 again:
1444 if ((hard_reset) && (!from_hard_reset_thread)) {
1445 hdev->reset_info.hard_reset_pending = true;
1446
1447 hdev->process_kill_trial_cnt = 0;
1448
1449 hdev->device_reset_work.flags = flags;
1450
1451 /*
1452 * Because the reset function can't run from heartbeat work,
1453 * we need to call the reset function from a dedicated work.
1454 */
1455 queue_delayed_work(hdev->device_reset_work.wq,
1456 &hdev->device_reset_work.reset_work, 0);
1457
1458 return 0;
1459 }
1460
1461 cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
1462
1463 kill_processes:
1464 if (hard_reset) {
1465 /* Kill processes here after CS rollback. This is because the
1466 * process can't really exit until all its CSs are done, which
1467 * is what we do in cs rollback
1468 */
1469 rc = device_kill_open_processes(hdev, 0, false);
1470
1471 if (rc == -EBUSY) {
1472 if (hdev->device_fini_pending) {
1473 dev_crit(hdev->dev,
1474 "%s Failed to kill all open processes, stopping hard reset\n",
1475 dev_name(&(hdev)->pdev->dev));
1476 goto out_err;
1477 }
1478
1479 /* signal reset thread to reschedule */
1480 return rc;
1481 }
1482
1483 if (rc) {
1484 dev_crit(hdev->dev,
1485 "%s Failed to kill all open processes, stopping hard reset\n",
1486 dev_name(&(hdev)->pdev->dev));
1487 goto out_err;
1488 }
1489
1490 /* Flush the Event queue workers to make sure no other thread is
1491 * reading or writing to registers during the reset
1492 */
1493 flush_workqueue(hdev->eq_wq);
1494 }
1495
1496 /* Reset the H/W. It will be in idle state after this returns */
1497 hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
1498
1499 if (hard_reset) {
1500 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
1501
1502 /* Release kernel context */
1503 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
1504 hdev->kernel_ctx = NULL;
1505
1506 hl_vm_fini(hdev);
1507 hl_mmu_fini(hdev);
1508 hl_eq_reset(hdev, &hdev->event_queue);
1509 }
1510
1511 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1512 hl_hw_queue_reset(hdev, hard_reset);
1513 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1514 hl_cq_reset(hdev, &hdev->completion_queue[i]);
1515
1516 /* Make sure the context switch phase will run again */
1517 ctx = hl_get_compute_ctx(hdev);
1518 if (ctx) {
1519 atomic_set(&ctx->thread_ctx_switch_token, 1);
1520 ctx->thread_ctx_switch_wait_token = 0;
1521 hl_ctx_put(ctx);
1522 }
1523
1524 /* Finished tear-down, starting to re-initialize */
1525
1526 if (hard_reset) {
1527 hdev->device_cpu_disabled = false;
1528 hdev->reset_info.hard_reset_pending = false;
1529
1530 if (hdev->reset_info.reset_trigger_repeated &&
1531 (hdev->reset_info.prev_reset_trigger ==
1532 HL_DRV_RESET_FW_FATAL_ERR)) {
1533 /* if there 2 back to back resets from FW,
1534 * ensure driver puts the driver in a unusable state
1535 */
1536 dev_crit(hdev->dev,
1537 "%s Consecutive FW fatal errors received, stopping hard reset\n",
1538 dev_name(&(hdev)->pdev->dev));
1539 rc = -EIO;
1540 goto out_err;
1541 }
1542
1543 if (hdev->kernel_ctx) {
1544 dev_crit(hdev->dev,
1545 "%s kernel ctx was alive during hard reset, something is terribly wrong\n",
1546 dev_name(&(hdev)->pdev->dev));
1547 rc = -EBUSY;
1548 goto out_err;
1549 }
1550
1551 rc = hl_mmu_init(hdev);
1552 if (rc) {
1553 dev_err(hdev->dev,
1554 "Failed to initialize MMU S/W after hard reset\n");
1555 goto out_err;
1556 }
1557
1558 /* Allocate the kernel context */
1559 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1560 GFP_KERNEL);
1561 if (!hdev->kernel_ctx) {
1562 rc = -ENOMEM;
1563 hl_mmu_fini(hdev);
1564 goto out_err;
1565 }
1566
1567 hdev->is_compute_ctx_active = false;
1568
1569 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1570 if (rc) {
1571 dev_err(hdev->dev,
1572 "failed to init kernel ctx in hard reset\n");
1573 kfree(hdev->kernel_ctx);
1574 hdev->kernel_ctx = NULL;
1575 hl_mmu_fini(hdev);
1576 goto out_err;
1577 }
1578 }
1579
1580 /* Device is now enabled as part of the initialization requires
1581 * communication with the device firmware to get information that
1582 * is required for the initialization itself
1583 */
1584 hdev->disabled = false;
1585
1586 /* F/W security enabled indication might be updated after hard-reset */
1587 if (hard_reset) {
1588 rc = hl_fw_read_preboot_status(hdev);
1589 if (rc)
1590 goto out_err;
1591 }
1592
1593 rc = hdev->asic_funcs->hw_init(hdev);
1594 if (rc) {
1595 dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
1596 goto out_err;
1597 }
1598
1599 /* If device is not idle fail the reset process */
1600 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
1601 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
1602 dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n",
1603 idle_mask[1], idle_mask[0]);
1604 rc = -EIO;
1605 goto out_err;
1606 }
1607
1608 /* Check that the communication with the device is working */
1609 rc = hdev->asic_funcs->test_queues(hdev);
1610 if (rc) {
1611 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n");
1612 goto out_err;
1613 }
1614
1615 if (hard_reset) {
1616 rc = device_late_init(hdev);
1617 if (rc) {
1618 dev_err(hdev->dev, "Failed late init after hard reset\n");
1619 goto out_err;
1620 }
1621
1622 rc = hl_vm_init(hdev);
1623 if (rc) {
1624 dev_err(hdev->dev, "Failed to init memory module after hard reset\n");
1625 goto out_err;
1626 }
1627
1628 if (!hdev->asic_prop.fw_security_enabled)
1629 hl_fw_set_max_power(hdev);
1630 } else {
1631 rc = hdev->asic_funcs->compute_reset_late_init(hdev);
1632 if (rc) {
1633 if (reset_upon_device_release)
1634 dev_err(hdev->dev,
1635 "Failed late init in reset after device release\n");
1636 else
1637 dev_err(hdev->dev, "Failed late init after compute reset\n");
1638 goto out_err;
1639 }
1640 }
1641
1642 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1643 if (rc) {
1644 dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc);
1645 return rc;
1646 }
1647
1648 spin_lock(&hdev->reset_info.lock);
1649 hdev->reset_info.in_compute_reset = 0;
1650
1651 /* Schedule hard reset only if requested and if not already in hard reset.
1652 * We keep 'in_reset' enabled, so no other reset can go in during the hard
1653 * reset schedule
1654 */
1655 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
1656 schedule_hard_reset = true;
1657 else
1658 hdev->reset_info.in_reset = 0;
1659
1660 spin_unlock(&hdev->reset_info.lock);
1661
1662 hdev->reset_info.needs_reset = false;
1663
1664 if (hard_reset)
1665 dev_info(hdev->dev,
1666 "Successfully finished resetting the %s device\n",
1667 dev_name(&(hdev)->pdev->dev));
1668 else
1669 dev_dbg(hdev->dev,
1670 "Successfully finished resetting the %s device\n",
1671 dev_name(&(hdev)->pdev->dev));
1672
1673 if (hard_reset) {
1674 hdev->reset_info.hard_reset_cnt++;
1675
1676 /* After reset is done, we are ready to receive events from
1677 * the F/W. We can't do it before because we will ignore events
1678 * and if those events are fatal, we won't know about it and
1679 * the device will be operational although it shouldn't be
1680 */
1681 hdev->asic_funcs->enable_events_from_fw(hdev);
1682 } else if (!reset_upon_device_release) {
1683 hdev->reset_info.compute_reset_cnt++;
1684 }
1685
1686 if (schedule_hard_reset) {
1687 dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
1688 flags = hdev->reset_info.hard_reset_schedule_flags;
1689 hdev->reset_info.hard_reset_schedule_flags = 0;
1690 hdev->disabled = true;
1691 hard_reset = true;
1692 handle_reset_trigger(hdev, flags);
1693 goto again;
1694 }
1695
1696 return 0;
1697
1698 out_err:
1699 hdev->disabled = true;
1700
1701 spin_lock(&hdev->reset_info.lock);
1702 hdev->reset_info.in_compute_reset = 0;
1703
1704 if (hard_reset) {
1705 dev_err(hdev->dev,
1706 "%s Failed to reset! Device is NOT usable\n",
1707 dev_name(&(hdev)->pdev->dev));
1708 hdev->reset_info.hard_reset_cnt++;
1709 } else if (reset_upon_device_release) {
1710 spin_unlock(&hdev->reset_info.lock);
1711 dev_err(hdev->dev, "Failed to reset device after user release\n");
1712 flags |= HL_DRV_RESET_HARD;
1713 flags &= ~HL_DRV_RESET_DEV_RELEASE;
1714 hard_reset = true;
1715 goto again;
1716 } else {
1717 spin_unlock(&hdev->reset_info.lock);
1718 dev_err(hdev->dev, "Failed to do compute reset\n");
1719 hdev->reset_info.compute_reset_cnt++;
1720 flags |= HL_DRV_RESET_HARD;
1721 hard_reset = true;
1722 goto again;
1723 }
1724
1725 hdev->reset_info.in_reset = 0;
1726
1727 spin_unlock(&hdev->reset_info.lock);
1728
1729 return rc;
1730 }
1731
hl_notifier_event_send(struct hl_notifier_event * notifier_event,u64 event_mask)1732 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
1733 {
1734 mutex_lock(¬ifier_event->lock);
1735 notifier_event->events_mask |= event_mask;
1736
1737 if (notifier_event->eventfd)
1738 eventfd_signal(notifier_event->eventfd, 1);
1739
1740 mutex_unlock(¬ifier_event->lock);
1741 }
1742
1743 /*
1744 * hl_notifier_event_send_all - notify all user processes via eventfd
1745 *
1746 * @hdev: pointer to habanalabs device structure
1747 * @event_mask: the occurred event/s
1748 * Returns 0 for success or an error on failure.
1749 */
hl_notifier_event_send_all(struct hl_device * hdev,u64 event_mask)1750 void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
1751 {
1752 struct hl_fpriv *hpriv;
1753
1754 mutex_lock(&hdev->fpriv_list_lock);
1755
1756 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
1757 hl_notifier_event_send(&hpriv->notifier_event, event_mask);
1758
1759 mutex_unlock(&hdev->fpriv_list_lock);
1760
1761 /* control device */
1762 mutex_lock(&hdev->fpriv_ctrl_list_lock);
1763
1764 list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
1765 hl_notifier_event_send(&hpriv->notifier_event, event_mask);
1766
1767 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
1768 }
1769
1770 /*
1771 * hl_device_init - main initialization function for habanalabs device
1772 *
1773 * @hdev: pointer to habanalabs device structure
1774 *
1775 * Allocate an id for the device, do early initialization and then call the
1776 * ASIC specific initialization functions. Finally, create the cdev and the
1777 * Linux device to expose it to the user
1778 */
hl_device_init(struct hl_device * hdev,struct class * hclass)1779 int hl_device_init(struct hl_device *hdev, struct class *hclass)
1780 {
1781 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
1782 char *name;
1783 bool add_cdev_sysfs_on_err = false;
1784
1785 hdev->cdev_idx = hdev->id / 2;
1786
1787 name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
1788 if (!name) {
1789 rc = -ENOMEM;
1790 goto out_disabled;
1791 }
1792
1793 /* Initialize cdev and device structures */
1794 rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1795 &hdev->cdev, &hdev->dev);
1796
1797 kfree(name);
1798
1799 if (rc)
1800 goto out_disabled;
1801
1802 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
1803 if (!name) {
1804 rc = -ENOMEM;
1805 goto free_dev;
1806 }
1807
1808 /* Initialize cdev and device structures for control device */
1809 rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1810 name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1811
1812 kfree(name);
1813
1814 if (rc)
1815 goto free_dev;
1816
1817 /* Initialize ASIC function pointers and perform early init */
1818 rc = device_early_init(hdev);
1819 if (rc)
1820 goto free_dev_ctrl;
1821
1822 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
1823 hdev->asic_prop.user_interrupt_count;
1824
1825 if (user_interrupt_cnt) {
1826 hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt),
1827 GFP_KERNEL);
1828 if (!hdev->user_interrupt) {
1829 rc = -ENOMEM;
1830 goto early_fini;
1831 }
1832 }
1833
1834 /*
1835 * Start calling ASIC initialization. First S/W then H/W and finally
1836 * late init
1837 */
1838 rc = hdev->asic_funcs->sw_init(hdev);
1839 if (rc)
1840 goto free_usr_intr_mem;
1841
1842
1843 /* initialize completion structure for multi CS wait */
1844 hl_multi_cs_completion_init(hdev);
1845
1846 /*
1847 * Initialize the H/W queues. Must be done before hw_init, because
1848 * there the addresses of the kernel queue are being written to the
1849 * registers of the device
1850 */
1851 rc = hl_hw_queues_create(hdev);
1852 if (rc) {
1853 dev_err(hdev->dev, "failed to initialize kernel queues\n");
1854 goto sw_fini;
1855 }
1856
1857 cq_cnt = hdev->asic_prop.completion_queues_count;
1858
1859 /*
1860 * Initialize the completion queues. Must be done before hw_init,
1861 * because there the addresses of the completion queues are being
1862 * passed as arguments to request_irq
1863 */
1864 if (cq_cnt) {
1865 hdev->completion_queue = kcalloc(cq_cnt,
1866 sizeof(*hdev->completion_queue),
1867 GFP_KERNEL);
1868
1869 if (!hdev->completion_queue) {
1870 dev_err(hdev->dev,
1871 "failed to allocate completion queues\n");
1872 rc = -ENOMEM;
1873 goto hw_queues_destroy;
1874 }
1875 }
1876
1877 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1878 rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1879 hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1880 if (rc) {
1881 dev_err(hdev->dev,
1882 "failed to initialize completion queue\n");
1883 goto cq_fini;
1884 }
1885 hdev->completion_queue[i].cq_idx = i;
1886 }
1887
1888 hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs,
1889 sizeof(struct hl_cs *), GFP_KERNEL);
1890 if (!hdev->shadow_cs_queue) {
1891 rc = -ENOMEM;
1892 goto cq_fini;
1893 }
1894
1895 /*
1896 * Initialize the event queue. Must be done before hw_init,
1897 * because there the address of the event queue is being
1898 * passed as argument to request_irq
1899 */
1900 rc = hl_eq_init(hdev, &hdev->event_queue);
1901 if (rc) {
1902 dev_err(hdev->dev, "failed to initialize event queue\n");
1903 goto free_shadow_cs_queue;
1904 }
1905
1906 /* MMU S/W must be initialized before kernel context is created */
1907 rc = hl_mmu_init(hdev);
1908 if (rc) {
1909 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1910 goto eq_fini;
1911 }
1912
1913 /* Allocate the kernel context */
1914 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1915 if (!hdev->kernel_ctx) {
1916 rc = -ENOMEM;
1917 goto mmu_fini;
1918 }
1919
1920 hdev->is_compute_ctx_active = false;
1921
1922 hdev->asic_funcs->state_dump_init(hdev);
1923
1924 hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL;
1925 hl_debugfs_add_device(hdev);
1926
1927 /* debugfs nodes are created in hl_ctx_init so it must be called after
1928 * hl_debugfs_add_device.
1929 */
1930 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1931 if (rc) {
1932 dev_err(hdev->dev, "failed to initialize kernel context\n");
1933 kfree(hdev->kernel_ctx);
1934 goto remove_device_from_debugfs;
1935 }
1936
1937 rc = hl_cb_pool_init(hdev);
1938 if (rc) {
1939 dev_err(hdev->dev, "failed to initialize CB pool\n");
1940 goto release_ctx;
1941 }
1942
1943 rc = hl_dec_init(hdev);
1944 if (rc) {
1945 dev_err(hdev->dev, "Failed to initialize the decoder module\n");
1946 goto cb_pool_fini;
1947 }
1948
1949 /*
1950 * From this point, override rc (=0) in case of an error to allow
1951 * debugging (by adding char devices and create sysfs nodes as part of
1952 * the error flow).
1953 */
1954 add_cdev_sysfs_on_err = true;
1955
1956 /* Device is now enabled as part of the initialization requires
1957 * communication with the device firmware to get information that
1958 * is required for the initialization itself
1959 */
1960 hdev->disabled = false;
1961
1962 rc = hdev->asic_funcs->hw_init(hdev);
1963 if (rc) {
1964 dev_err(hdev->dev, "failed to initialize the H/W\n");
1965 rc = 0;
1966 goto out_disabled;
1967 }
1968
1969 /* Check that the communication with the device is working */
1970 rc = hdev->asic_funcs->test_queues(hdev);
1971 if (rc) {
1972 dev_err(hdev->dev, "Failed to detect if device is alive\n");
1973 rc = 0;
1974 goto out_disabled;
1975 }
1976
1977 rc = device_late_init(hdev);
1978 if (rc) {
1979 dev_err(hdev->dev, "Failed late initialization\n");
1980 rc = 0;
1981 goto out_disabled;
1982 }
1983
1984 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1985 hdev->asic_name,
1986 hdev->asic_prop.dram_size / SZ_1G);
1987
1988 rc = hl_vm_init(hdev);
1989 if (rc) {
1990 dev_err(hdev->dev, "Failed to initialize memory module\n");
1991 rc = 0;
1992 goto out_disabled;
1993 }
1994
1995 /*
1996 * Expose devices and sysfs nodes to user.
1997 * From here there is no need to add char devices and create sysfs nodes
1998 * in case of an error.
1999 */
2000 add_cdev_sysfs_on_err = false;
2001 rc = device_cdev_sysfs_add(hdev);
2002 if (rc) {
2003 dev_err(hdev->dev,
2004 "Failed to add char devices and sysfs nodes\n");
2005 rc = 0;
2006 goto out_disabled;
2007 }
2008
2009 /* Need to call this again because the max power might change,
2010 * depending on card type for certain ASICs
2011 */
2012 if (hdev->asic_prop.set_max_power_on_device_init &&
2013 !hdev->asic_prop.fw_security_enabled)
2014 hl_fw_set_max_power(hdev);
2015
2016 /*
2017 * hl_hwmon_init() must be called after device_late_init(), because only
2018 * there we get the information from the device about which
2019 * hwmon-related sensors the device supports.
2020 * Furthermore, it must be done after adding the device to the system.
2021 */
2022 rc = hl_hwmon_init(hdev);
2023 if (rc) {
2024 dev_err(hdev->dev, "Failed to initialize hwmon\n");
2025 rc = 0;
2026 goto out_disabled;
2027 }
2028
2029 dev_notice(hdev->dev,
2030 "Successfully added device %s to habanalabs driver\n",
2031 dev_name(&(hdev)->pdev->dev));
2032
2033 hdev->init_done = true;
2034
2035 /* After initialization is done, we are ready to receive events from
2036 * the F/W. We can't do it before because we will ignore events and if
2037 * those events are fatal, we won't know about it and the device will
2038 * be operational although it shouldn't be
2039 */
2040 hdev->asic_funcs->enable_events_from_fw(hdev);
2041
2042 return 0;
2043
2044 cb_pool_fini:
2045 hl_cb_pool_fini(hdev);
2046 release_ctx:
2047 if (hl_ctx_put(hdev->kernel_ctx) != 1)
2048 dev_err(hdev->dev,
2049 "kernel ctx is still alive on initialization failure\n");
2050 remove_device_from_debugfs:
2051 hl_debugfs_remove_device(hdev);
2052 mmu_fini:
2053 hl_mmu_fini(hdev);
2054 eq_fini:
2055 hl_eq_fini(hdev, &hdev->event_queue);
2056 free_shadow_cs_queue:
2057 kfree(hdev->shadow_cs_queue);
2058 cq_fini:
2059 for (i = 0 ; i < cq_ready_cnt ; i++)
2060 hl_cq_fini(hdev, &hdev->completion_queue[i]);
2061 kfree(hdev->completion_queue);
2062 hw_queues_destroy:
2063 hl_hw_queues_destroy(hdev);
2064 sw_fini:
2065 hdev->asic_funcs->sw_fini(hdev);
2066 free_usr_intr_mem:
2067 kfree(hdev->user_interrupt);
2068 early_fini:
2069 device_early_fini(hdev);
2070 free_dev_ctrl:
2071 put_device(hdev->dev_ctrl);
2072 free_dev:
2073 put_device(hdev->dev);
2074 out_disabled:
2075 hdev->disabled = true;
2076 if (add_cdev_sysfs_on_err)
2077 device_cdev_sysfs_add(hdev);
2078 if (hdev->pdev)
2079 dev_err(&hdev->pdev->dev,
2080 "Failed to initialize hl%d. Device %s is NOT usable !\n",
2081 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev));
2082 else
2083 pr_err("Failed to initialize hl%d. Device %s is NOT usable !\n",
2084 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev));
2085
2086 return rc;
2087 }
2088
2089 /*
2090 * hl_device_fini - main tear-down function for habanalabs device
2091 *
2092 * @hdev: pointer to habanalabs device structure
2093 *
2094 * Destroy the device, call ASIC fini functions and release the id
2095 */
hl_device_fini(struct hl_device * hdev)2096 void hl_device_fini(struct hl_device *hdev)
2097 {
2098 bool device_in_reset;
2099 ktime_t timeout;
2100 u64 reset_sec;
2101 int i, rc;
2102
2103 dev_info(hdev->dev, "Removing device\n");
2104
2105 hdev->device_fini_pending = 1;
2106 flush_delayed_work(&hdev->device_reset_work.reset_work);
2107
2108 if (hdev->pldm)
2109 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
2110 else
2111 reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
2112
2113 /*
2114 * This function is competing with the reset function, so try to
2115 * take the reset atomic and if we are already in middle of reset,
2116 * wait until reset function is finished. Reset function is designed
2117 * to always finish. However, in Gaudi, because of all the network
2118 * ports, the hard reset could take between 10-30 seconds
2119 */
2120
2121 timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
2122
2123 spin_lock(&hdev->reset_info.lock);
2124 device_in_reset = !!hdev->reset_info.in_reset;
2125 if (!device_in_reset)
2126 hdev->reset_info.in_reset = 1;
2127 spin_unlock(&hdev->reset_info.lock);
2128
2129 while (device_in_reset) {
2130 usleep_range(50, 200);
2131
2132 spin_lock(&hdev->reset_info.lock);
2133 device_in_reset = !!hdev->reset_info.in_reset;
2134 if (!device_in_reset)
2135 hdev->reset_info.in_reset = 1;
2136 spin_unlock(&hdev->reset_info.lock);
2137
2138 if (ktime_compare(ktime_get(), timeout) > 0) {
2139 dev_crit(hdev->dev,
2140 "%s Failed to remove device because reset function did not finish\n",
2141 dev_name(&(hdev)->pdev->dev));
2142 return;
2143 }
2144 }
2145
2146 /* Disable PCI access from device F/W so it won't send us additional
2147 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
2148 * can't have the F/W sending us interrupts after that. We need to
2149 * disable the access here because if the device is marked disable, the
2150 * message won't be send. Also, in case of heartbeat, the device CPU is
2151 * marked as disable so this message won't be sent
2152 */
2153 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2154
2155 /* Mark device as disabled */
2156 hdev->disabled = true;
2157
2158 take_release_locks(hdev);
2159
2160 hdev->reset_info.hard_reset_pending = true;
2161
2162 hl_hwmon_fini(hdev);
2163
2164 cleanup_resources(hdev, true, false, false);
2165
2166 /* Kill processes here after CS rollback. This is because the process
2167 * can't really exit until all its CSs are done, which is what we
2168 * do in cs rollback
2169 */
2170 dev_info(hdev->dev,
2171 "Waiting for all processes to exit (timeout of %u seconds)",
2172 HL_PENDING_RESET_LONG_SEC);
2173
2174 rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false);
2175 if (rc) {
2176 dev_crit(hdev->dev, "Failed to kill all open processes\n");
2177 device_disable_open_processes(hdev, false);
2178 }
2179
2180 rc = device_kill_open_processes(hdev, 0, true);
2181 if (rc) {
2182 dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
2183 device_disable_open_processes(hdev, true);
2184 }
2185
2186 hl_cb_pool_fini(hdev);
2187
2188 /* Reset the H/W. It will be in idle state after this returns */
2189 hdev->asic_funcs->hw_fini(hdev, true, false);
2190
2191 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
2192
2193 /* Release kernel context */
2194 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
2195 dev_err(hdev->dev, "kernel ctx is still alive\n");
2196
2197 hl_debugfs_remove_device(hdev);
2198
2199 hl_dec_fini(hdev);
2200
2201 hl_vm_fini(hdev);
2202
2203 hl_mmu_fini(hdev);
2204
2205 hl_eq_fini(hdev, &hdev->event_queue);
2206
2207 kfree(hdev->shadow_cs_queue);
2208
2209 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2210 hl_cq_fini(hdev, &hdev->completion_queue[i]);
2211 kfree(hdev->completion_queue);
2212 kfree(hdev->user_interrupt);
2213
2214 hl_hw_queues_destroy(hdev);
2215
2216 /* Call ASIC S/W finalize function */
2217 hdev->asic_funcs->sw_fini(hdev);
2218
2219 device_early_fini(hdev);
2220
2221 /* Hide devices and sysfs nodes from user */
2222 device_cdev_sysfs_del(hdev);
2223
2224 pr_info("removed device successfully\n");
2225 }
2226
2227 /*
2228 * MMIO register access helper functions.
2229 */
2230
2231 /*
2232 * hl_rreg - Read an MMIO register
2233 *
2234 * @hdev: pointer to habanalabs device structure
2235 * @reg: MMIO register offset (in bytes)
2236 *
2237 * Returns the value of the MMIO register we are asked to read
2238 *
2239 */
hl_rreg(struct hl_device * hdev,u32 reg)2240 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
2241 {
2242 return readl(hdev->rmmio + reg);
2243 }
2244
2245 /*
2246 * hl_wreg - Write to an MMIO register
2247 *
2248 * @hdev: pointer to habanalabs device structure
2249 * @reg: MMIO register offset (in bytes)
2250 * @val: 32-bit value
2251 *
2252 * Writes the 32-bit value into the MMIO register
2253 *
2254 */
hl_wreg(struct hl_device * hdev,u32 reg,u32 val)2255 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
2256 {
2257 writel(val, hdev->rmmio + reg);
2258 }
2259