• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "habanalabs.h"
9 #include "../include/common/hl_boot_if.h"
10 
11 #include <linux/firmware.h>
12 #include <linux/genalloc.h>
13 #include <linux/io-64-nonatomic-lo-hi.h>
14 #include <linux/slab.h>
15 
16 #define FW_FILE_MAX_SIZE	0x1400000 /* maximum size of 20MB */
17 /**
18  * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
19  *
20  * @hdev: pointer to hl_device structure.
21  * @fw_name: the firmware image name
22  * @dst: IO memory mapped address space to copy firmware to
23  *
24  * Copy fw code from firmware file to device memory.
25  *
26  * Return: 0 on success, non-zero for failure.
27  */
hl_fw_load_fw_to_device(struct hl_device * hdev,const char * fw_name,void __iomem * dst)28 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
29 				void __iomem *dst)
30 {
31 	const struct firmware *fw;
32 	const u64 *fw_data;
33 	size_t fw_size;
34 	int rc;
35 
36 	rc = request_firmware(&fw, fw_name, hdev->dev);
37 	if (rc) {
38 		dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
39 		goto out;
40 	}
41 
42 	fw_size = fw->size;
43 	if ((fw_size % 4) != 0) {
44 		dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
45 			fw_name, fw_size);
46 		rc = -EINVAL;
47 		goto out;
48 	}
49 
50 	dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
51 
52 	if (fw_size > FW_FILE_MAX_SIZE) {
53 		dev_err(hdev->dev,
54 			"FW file size %zu exceeds maximum of %u bytes\n",
55 			fw_size, FW_FILE_MAX_SIZE);
56 		rc = -EINVAL;
57 		goto out;
58 	}
59 
60 	fw_data = (const u64 *) fw->data;
61 
62 	memcpy_toio(dst, fw_data, fw_size);
63 
64 out:
65 	release_firmware(fw);
66 	return rc;
67 }
68 
hl_fw_send_pci_access_msg(struct hl_device * hdev,u32 opcode)69 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
70 {
71 	struct cpucp_packet pkt = {};
72 
73 	pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
74 
75 	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
76 						sizeof(pkt), 0, NULL);
77 }
78 
hl_fw_send_cpu_message(struct hl_device * hdev,u32 hw_queue_id,u32 * msg,u16 len,u32 timeout,long * result)79 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
80 				u16 len, u32 timeout, long *result)
81 {
82 	struct cpucp_packet *pkt;
83 	dma_addr_t pkt_dma_addr;
84 	u32 tmp;
85 	int rc = 0;
86 
87 	pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
88 								&pkt_dma_addr);
89 	if (!pkt) {
90 		dev_err(hdev->dev,
91 			"Failed to allocate DMA memory for packet to CPU\n");
92 		return -ENOMEM;
93 	}
94 
95 	memcpy(pkt, msg, len);
96 
97 	mutex_lock(&hdev->send_cpu_message_lock);
98 
99 	if (hdev->disabled)
100 		goto out;
101 
102 	if (hdev->device_cpu_disabled) {
103 		rc = -EIO;
104 		goto out;
105 	}
106 
107 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
108 	if (rc) {
109 		dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
110 		goto out;
111 	}
112 
113 	rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
114 				(tmp == CPUCP_PACKET_FENCE_VAL), 1000,
115 				timeout, true);
116 
117 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
118 
119 	if (rc == -ETIMEDOUT) {
120 		dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
121 		hdev->device_cpu_disabled = true;
122 		goto out;
123 	}
124 
125 	tmp = le32_to_cpu(pkt->ctl);
126 
127 	rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
128 	if (rc) {
129 		dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
130 			rc,
131 			(tmp & CPUCP_PKT_CTL_OPCODE_MASK)
132 						>> CPUCP_PKT_CTL_OPCODE_SHIFT);
133 		rc = -EIO;
134 	} else if (result) {
135 		*result = (long) le64_to_cpu(pkt->result);
136 	}
137 
138 out:
139 	mutex_unlock(&hdev->send_cpu_message_lock);
140 
141 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
142 
143 	return rc;
144 }
145 
hl_fw_unmask_irq(struct hl_device * hdev,u16 event_type)146 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
147 {
148 	struct cpucp_packet pkt;
149 	long result;
150 	int rc;
151 
152 	memset(&pkt, 0, sizeof(pkt));
153 
154 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
155 				CPUCP_PKT_CTL_OPCODE_SHIFT);
156 	pkt.value = cpu_to_le64(event_type);
157 
158 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
159 						0, &result);
160 
161 	if (rc)
162 		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
163 
164 	return rc;
165 }
166 
hl_fw_unmask_irq_arr(struct hl_device * hdev,const u32 * irq_arr,size_t irq_arr_size)167 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
168 		size_t irq_arr_size)
169 {
170 	struct cpucp_unmask_irq_arr_packet *pkt;
171 	size_t total_pkt_size;
172 	long result;
173 	int rc;
174 
175 	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
176 			irq_arr_size;
177 
178 	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
179 	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
180 
181 	/* total_pkt_size is casted to u16 later on */
182 	if (total_pkt_size > USHRT_MAX) {
183 		dev_err(hdev->dev, "too many elements in IRQ array\n");
184 		return -EINVAL;
185 	}
186 
187 	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
188 	if (!pkt)
189 		return -ENOMEM;
190 
191 	pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
192 	memcpy(&pkt->irqs, irq_arr, irq_arr_size);
193 
194 	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
195 						CPUCP_PKT_CTL_OPCODE_SHIFT);
196 
197 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
198 						total_pkt_size, 0, &result);
199 
200 	if (rc)
201 		dev_err(hdev->dev, "failed to unmask IRQ array\n");
202 
203 	kfree(pkt);
204 
205 	return rc;
206 }
207 
hl_fw_test_cpu_queue(struct hl_device * hdev)208 int hl_fw_test_cpu_queue(struct hl_device *hdev)
209 {
210 	struct cpucp_packet test_pkt = {};
211 	long result;
212 	int rc;
213 
214 	test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
215 					CPUCP_PKT_CTL_OPCODE_SHIFT);
216 	test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
217 
218 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
219 						sizeof(test_pkt), 0, &result);
220 
221 	if (!rc) {
222 		if (result != CPUCP_PACKET_FENCE_VAL)
223 			dev_err(hdev->dev,
224 				"CPU queue test failed (0x%08lX)\n", result);
225 	} else {
226 		dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
227 	}
228 
229 	return rc;
230 }
231 
hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)232 void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
233 						dma_addr_t *dma_handle)
234 {
235 	u64 kernel_addr;
236 
237 	kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
238 
239 	*dma_handle = hdev->cpu_accessible_dma_address +
240 		(kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
241 
242 	return (void *) (uintptr_t) kernel_addr;
243 }
244 
hl_fw_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)245 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
246 					void *vaddr)
247 {
248 	gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
249 			size);
250 }
251 
hl_fw_send_heartbeat(struct hl_device * hdev)252 int hl_fw_send_heartbeat(struct hl_device *hdev)
253 {
254 	struct cpucp_packet hb_pkt = {};
255 	long result;
256 	int rc;
257 
258 	hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
259 					CPUCP_PKT_CTL_OPCODE_SHIFT);
260 	hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
261 
262 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
263 						sizeof(hb_pkt), 0, &result);
264 
265 	if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
266 		rc = -EIO;
267 
268 	return rc;
269 }
270 
hl_fw_cpucp_info_get(struct hl_device * hdev)271 int hl_fw_cpucp_info_get(struct hl_device *hdev)
272 {
273 	struct asic_fixed_properties *prop = &hdev->asic_prop;
274 	struct cpucp_packet pkt = {};
275 	void *cpucp_info_cpu_addr;
276 	dma_addr_t cpucp_info_dma_addr;
277 	long result;
278 	int rc;
279 
280 	cpucp_info_cpu_addr =
281 			hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
282 					sizeof(struct cpucp_info),
283 					&cpucp_info_dma_addr);
284 	if (!cpucp_info_cpu_addr) {
285 		dev_err(hdev->dev,
286 			"Failed to allocate DMA memory for CPU-CP info packet\n");
287 		return -ENOMEM;
288 	}
289 
290 	memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
291 
292 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
293 				CPUCP_PKT_CTL_OPCODE_SHIFT);
294 	pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
295 	pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
296 
297 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
298 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
299 	if (rc) {
300 		dev_err(hdev->dev,
301 			"Failed to handle CPU-CP info pkt, error %d\n", rc);
302 		goto out;
303 	}
304 
305 	memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
306 			sizeof(prop->cpucp_info));
307 
308 	rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
309 	if (rc) {
310 		dev_err(hdev->dev,
311 			"Failed to build hwmon channel info, error %d\n", rc);
312 		rc = -EFAULT;
313 		goto out;
314 	}
315 
316 out:
317 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
318 			sizeof(struct cpucp_info), cpucp_info_cpu_addr);
319 
320 	return rc;
321 }
322 
hl_fw_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)323 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
324 {
325 	struct cpucp_packet pkt = {};
326 	void *eeprom_info_cpu_addr;
327 	dma_addr_t eeprom_info_dma_addr;
328 	long result;
329 	int rc;
330 
331 	eeprom_info_cpu_addr =
332 			hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
333 					max_size, &eeprom_info_dma_addr);
334 	if (!eeprom_info_cpu_addr) {
335 		dev_err(hdev->dev,
336 			"Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
337 		return -ENOMEM;
338 	}
339 
340 	memset(eeprom_info_cpu_addr, 0, max_size);
341 
342 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
343 				CPUCP_PKT_CTL_OPCODE_SHIFT);
344 	pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
345 	pkt.data_max_size = cpu_to_le32(max_size);
346 
347 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
348 			HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
349 
350 	if (rc) {
351 		dev_err(hdev->dev,
352 			"Failed to handle CPU-CP EEPROM packet, error %d\n",
353 			rc);
354 		goto out;
355 	}
356 
357 	/* result contains the actual size */
358 	memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
359 
360 out:
361 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
362 			eeprom_info_cpu_addr);
363 
364 	return rc;
365 }
366 
hl_fw_cpucp_pci_counters_get(struct hl_device * hdev,struct hl_info_pci_counters * counters)367 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
368 		struct hl_info_pci_counters *counters)
369 {
370 	struct cpucp_packet pkt = {};
371 	long result;
372 	int rc;
373 
374 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
375 			CPUCP_PKT_CTL_OPCODE_SHIFT);
376 
377 	/* Fetch PCI rx counter */
378 	pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
379 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
380 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
381 	if (rc) {
382 		dev_err(hdev->dev,
383 			"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
384 		return rc;
385 	}
386 	counters->rx_throughput = result;
387 
388 	memset(&pkt, 0, sizeof(pkt));
389 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
390 			CPUCP_PKT_CTL_OPCODE_SHIFT);
391 
392 	/* Fetch PCI tx counter */
393 	pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
394 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
395 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
396 	if (rc) {
397 		dev_err(hdev->dev,
398 			"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
399 		return rc;
400 	}
401 	counters->tx_throughput = result;
402 
403 	/* Fetch PCI replay counter */
404 	memset(&pkt, 0, sizeof(pkt));
405 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
406 			CPUCP_PKT_CTL_OPCODE_SHIFT);
407 
408 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
409 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
410 	if (rc) {
411 		dev_err(hdev->dev,
412 			"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
413 		return rc;
414 	}
415 	counters->replay_cnt = (u32) result;
416 
417 	return rc;
418 }
419 
hl_fw_cpucp_total_energy_get(struct hl_device * hdev,u64 * total_energy)420 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
421 {
422 	struct cpucp_packet pkt = {};
423 	long result;
424 	int rc;
425 
426 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
427 				CPUCP_PKT_CTL_OPCODE_SHIFT);
428 
429 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
430 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
431 	if (rc) {
432 		dev_err(hdev->dev,
433 			"Failed to handle CpuCP total energy pkt, error %d\n",
434 				rc);
435 		return rc;
436 	}
437 
438 	*total_energy = result;
439 
440 	return rc;
441 }
442 
fw_read_errors(struct hl_device * hdev,u32 boot_err0_reg)443 static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
444 {
445 	u32 err_val;
446 
447 	/* Some of the firmware status codes are deprecated in newer f/w
448 	 * versions. In those versions, the errors are reported
449 	 * in different registers. Therefore, we need to check those
450 	 * registers and print the exact errors. Moreover, there
451 	 * may be multiple errors, so we need to report on each error
452 	 * separately. Some of the error codes might indicate a state
453 	 * that is not an error per-se, but it is an error in production
454 	 * environment
455 	 */
456 	err_val = RREG32(boot_err0_reg);
457 	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
458 		return;
459 
460 	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
461 		dev_err(hdev->dev,
462 			"Device boot error - DRAM initialization failed\n");
463 	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
464 		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
465 	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
466 		dev_err(hdev->dev,
467 			"Device boot error - Thermal Sensor initialization failed\n");
468 	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
469 		dev_warn(hdev->dev,
470 			"Device boot warning - Skipped DRAM initialization\n");
471 	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
472 		dev_warn(hdev->dev,
473 			"Device boot error - Skipped waiting for BMC\n");
474 	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
475 		dev_err(hdev->dev,
476 			"Device boot error - Serdes data from BMC not available\n");
477 	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
478 		dev_err(hdev->dev,
479 			"Device boot error - NIC F/W initialization failed\n");
480 }
481 
detect_cpu_boot_status(struct hl_device * hdev,u32 status)482 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
483 {
484 	/* Some of the status codes below are deprecated in newer f/w
485 	 * versions but we keep them here for backward compatibility
486 	 */
487 	switch (status) {
488 	case CPU_BOOT_STATUS_NA:
489 		dev_err(hdev->dev,
490 			"Device boot error - BTL did NOT run\n");
491 		break;
492 	case CPU_BOOT_STATUS_IN_WFE:
493 		dev_err(hdev->dev,
494 			"Device boot error - Stuck inside WFE loop\n");
495 		break;
496 	case CPU_BOOT_STATUS_IN_BTL:
497 		dev_err(hdev->dev,
498 			"Device boot error - Stuck in BTL\n");
499 		break;
500 	case CPU_BOOT_STATUS_IN_PREBOOT:
501 		dev_err(hdev->dev,
502 			"Device boot error - Stuck in Preboot\n");
503 		break;
504 	case CPU_BOOT_STATUS_IN_SPL:
505 		dev_err(hdev->dev,
506 			"Device boot error - Stuck in SPL\n");
507 		break;
508 	case CPU_BOOT_STATUS_IN_UBOOT:
509 		dev_err(hdev->dev,
510 			"Device boot error - Stuck in u-boot\n");
511 		break;
512 	case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
513 		dev_err(hdev->dev,
514 			"Device boot error - DRAM initialization failed\n");
515 		break;
516 	case CPU_BOOT_STATUS_UBOOT_NOT_READY:
517 		dev_err(hdev->dev,
518 			"Device boot error - u-boot stopped by user\n");
519 		break;
520 	case CPU_BOOT_STATUS_TS_INIT_FAIL:
521 		dev_err(hdev->dev,
522 			"Device boot error - Thermal Sensor initialization failed\n");
523 		break;
524 	default:
525 		dev_err(hdev->dev,
526 			"Device boot error - Invalid status code %d\n",
527 			status);
528 		break;
529 	}
530 }
531 
hl_fw_read_preboot_ver(struct hl_device * hdev,u32 cpu_boot_status_reg,u32 boot_err0_reg,u32 timeout)532 int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
533 				u32 boot_err0_reg, u32 timeout)
534 {
535 	u32 status;
536 	int rc;
537 
538 	if (!hdev->cpu_enable)
539 		return 0;
540 
541 	/* Need to check two possible scenarios:
542 	 *
543 	 * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
544 	 * the preboot is waiting for the boot fit
545 	 *
546 	 * All other status values - for older firmwares where the uboot was
547 	 * loaded from the FLASH
548 	 */
549 	rc = hl_poll_timeout(
550 		hdev,
551 		cpu_boot_status_reg,
552 		status,
553 		(status == CPU_BOOT_STATUS_IN_UBOOT) ||
554 		(status == CPU_BOOT_STATUS_DRAM_RDY) ||
555 		(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
556 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
557 		(status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
558 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
559 		10000,
560 		timeout);
561 
562 	if (rc) {
563 		dev_err(hdev->dev, "Failed to read preboot version\n");
564 		detect_cpu_boot_status(hdev, status);
565 		fw_read_errors(hdev, boot_err0_reg);
566 		return -EIO;
567 	}
568 
569 	hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
570 
571 	return 0;
572 }
573 
hl_fw_init_cpu(struct hl_device * hdev,u32 cpu_boot_status_reg,u32 msg_to_cpu_reg,u32 cpu_msg_status_reg,u32 boot_err0_reg,bool skip_bmc,u32 cpu_timeout,u32 boot_fit_timeout)574 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
575 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
576 			u32 boot_err0_reg, bool skip_bmc,
577 			u32 cpu_timeout, u32 boot_fit_timeout)
578 {
579 	u32 status;
580 	int rc;
581 
582 	dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
583 		cpu_timeout / USEC_PER_SEC);
584 
585 	/* Wait for boot FIT request */
586 	rc = hl_poll_timeout(
587 		hdev,
588 		cpu_boot_status_reg,
589 		status,
590 		status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
591 		10000,
592 		boot_fit_timeout);
593 
594 	if (rc) {
595 		dev_dbg(hdev->dev,
596 			"No boot fit request received, resuming boot\n");
597 	} else {
598 		rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
599 		if (rc)
600 			goto out;
601 
602 		/* Clear device CPU message status */
603 		WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
604 
605 		/* Signal device CPU that boot loader is ready */
606 		WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
607 
608 		/* Poll for CPU device ack */
609 		rc = hl_poll_timeout(
610 			hdev,
611 			cpu_msg_status_reg,
612 			status,
613 			status == CPU_MSG_OK,
614 			10000,
615 			boot_fit_timeout);
616 
617 		if (rc) {
618 			dev_err(hdev->dev,
619 				"Timeout waiting for boot fit load ack\n");
620 			goto out;
621 		}
622 
623 		/* Clear message */
624 		WREG32(msg_to_cpu_reg, KMD_MSG_NA);
625 	}
626 
627 	/* Make sure CPU boot-loader is running */
628 	rc = hl_poll_timeout(
629 		hdev,
630 		cpu_boot_status_reg,
631 		status,
632 		(status == CPU_BOOT_STATUS_DRAM_RDY) ||
633 		(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
634 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
635 		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
636 		10000,
637 		cpu_timeout);
638 
639 	/* Read U-Boot version now in case we will later fail */
640 	hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
641 
642 	if (rc) {
643 		detect_cpu_boot_status(hdev, status);
644 		rc = -EIO;
645 		goto out;
646 	}
647 
648 	if (!hdev->fw_loading) {
649 		dev_info(hdev->dev, "Skip loading FW\n");
650 		goto out;
651 	}
652 
653 	if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
654 		goto out;
655 
656 	dev_info(hdev->dev,
657 		"Loading firmware to device, may take some time...\n");
658 
659 	rc = hdev->asic_funcs->load_firmware_to_device(hdev);
660 	if (rc)
661 		goto out;
662 
663 	if (skip_bmc) {
664 		WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
665 
666 		rc = hl_poll_timeout(
667 			hdev,
668 			cpu_boot_status_reg,
669 			status,
670 			(status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
671 			10000,
672 			cpu_timeout);
673 
674 		if (rc) {
675 			dev_err(hdev->dev,
676 				"Failed to get ACK on skipping BMC, %d\n",
677 				status);
678 			WREG32(msg_to_cpu_reg, KMD_MSG_NA);
679 			rc = -EIO;
680 			goto out;
681 		}
682 	}
683 
684 	WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
685 
686 	rc = hl_poll_timeout(
687 		hdev,
688 		cpu_boot_status_reg,
689 		status,
690 		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
691 		10000,
692 		cpu_timeout);
693 
694 	/* Clear message */
695 	WREG32(msg_to_cpu_reg, KMD_MSG_NA);
696 
697 	if (rc) {
698 		if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
699 			dev_err(hdev->dev,
700 				"Device reports FIT image is corrupted\n");
701 		else
702 			dev_err(hdev->dev,
703 				"Failed to load firmware to device, %d\n",
704 				status);
705 
706 		rc = -EIO;
707 		goto out;
708 	}
709 
710 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
711 
712 out:
713 	fw_read_errors(hdev, boot_err0_reg);
714 
715 	return rc;
716 }
717