• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
99 
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
101 		BIT(GAUDI_ENGINE_ID_MME_0) |\
102 		BIT(GAUDI_ENGINE_ID_MME_2) |\
103 		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104 
105 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
106 
107 #define GAUDI_PLL_MAX 10
108 
109 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
110 
111 #define MONITOR_SOB_STRING_SIZE		256
112 
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114 	GAUDI_QUEUE_ID_DMA_0_0,
115 	GAUDI_QUEUE_ID_DMA_0_1,
116 	GAUDI_QUEUE_ID_DMA_0_2,
117 	GAUDI_QUEUE_ID_DMA_0_3,
118 	GAUDI_QUEUE_ID_DMA_1_0,
119 	GAUDI_QUEUE_ID_DMA_1_1,
120 	GAUDI_QUEUE_ID_DMA_1_2,
121 	GAUDI_QUEUE_ID_DMA_1_3
122 };
123 
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128 		"gaudi cpu eq"
129 };
130 
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141 
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
144 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
145 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
146 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
147 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
148 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
149 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
150 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152 
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
155 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
156 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
157 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
158 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
159 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
160 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
161 	[PACKET_FENCE]		= sizeof(struct packet_fence),
162 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
163 	[PACKET_NOP]		= sizeof(struct packet_nop),
164 	[PACKET_STOP]		= sizeof(struct packet_stop),
165 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
166 	[PACKET_WAIT]		= sizeof(struct packet_wait),
167 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
168 };
169 
validate_packet_id(enum packet_id id)170 static inline bool validate_packet_id(enum packet_id id)
171 {
172 	switch (id) {
173 	case PACKET_WREG_32:
174 	case PACKET_WREG_BULK:
175 	case PACKET_MSG_LONG:
176 	case PACKET_MSG_SHORT:
177 	case PACKET_CP_DMA:
178 	case PACKET_REPEAT:
179 	case PACKET_MSG_PROT:
180 	case PACKET_FENCE:
181 	case PACKET_LIN_DMA:
182 	case PACKET_NOP:
183 	case PACKET_STOP:
184 	case PACKET_ARB_POINT:
185 	case PACKET_WAIT:
186 	case PACKET_LOAD_AND_EXE:
187 		return true;
188 	default:
189 		return false;
190 	}
191 }
192 
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195 	"tpc_address_exceed_slm",
196 	"tpc_div_by_0",
197 	"tpc_spu_mac_overflow",
198 	"tpc_spu_addsub_overflow",
199 	"tpc_spu_abs_overflow",
200 	"tpc_spu_fp_dst_nan_inf",
201 	"tpc_spu_fp_dst_denorm",
202 	"tpc_vpu_mac_overflow",
203 	"tpc_vpu_addsub_overflow",
204 	"tpc_vpu_abs_overflow",
205 	"tpc_vpu_fp_dst_nan_inf",
206 	"tpc_vpu_fp_dst_denorm",
207 	"tpc_assertions",
208 	"tpc_illegal_instruction",
209 	"tpc_pc_wrap_around",
210 	"tpc_qm_sw_err",
211 	"tpc_hbw_rresp_err",
212 	"tpc_hbw_bresp_err",
213 	"tpc_lbw_rresp_err",
214 	"tpc_lbw_bresp_err"
215 };
216 
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219 	"PQ AXI HBW error",
220 	"CQ AXI HBW error",
221 	"CP AXI HBW error",
222 	"CP error due to undefined OPCODE",
223 	"CP encountered STOP OPCODE",
224 	"CP AXI LBW error",
225 	"CP WRREG32 or WRBULK returned error",
226 	"N/A",
227 	"FENCE 0 inc over max value and clipped",
228 	"FENCE 1 inc over max value and clipped",
229 	"FENCE 2 inc over max value and clipped",
230 	"FENCE 3 inc over max value and clipped",
231 	"FENCE 0 dec under min value and clipped",
232 	"FENCE 1 dec under min value and clipped",
233 	"FENCE 2 dec under min value and clipped",
234 	"FENCE 3 dec under min value and clipped"
235 };
236 
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239 	"Choice push while full error",
240 	"Choice Q watchdog error",
241 	"MSG AXI LBW returned with error"
242 };
243 
244 enum gaudi_sm_sei_cause {
245 	GAUDI_SM_SEI_SO_OVERFLOW,
246 	GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247 	GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249 
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365 
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395 
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409 
410 static s64 gaudi_state_dump_specs_props[] = {
411 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414 	[SP_MON_OBJ_WR_ADDR_LOW] =
415 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416 	[SP_MON_OBJ_WR_ADDR_HIGH] =
417 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438 	[SP_FENCE0_CNT_OFFSET] =
439 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440 	[SP_FENCE0_RDATA_OFFSET] =
441 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443 	[SP_NUM_CORES] = 1,
444 };
445 
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450 	"SYNC_MGR_E_N",
451 	"SYNC_MGR_W_N",
452 	"SYNC_MGR_E_S",
453 	"SYNC_MGR_W_S",
454 	NULL
455 };
456 
457 struct ecc_info_extract_params {
458 	u64 block_address;
459 	u32 num_memories;
460 	bool derr;
461 	bool disable_clock_gating;
462 };
463 
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465 								u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467 					struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469 					u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471 					u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473 				u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479 				u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481 				struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486 		return HL_COLLECTIVE_MASTER;
487 
488 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490 		return HL_COLLECTIVE_SLAVE;
491 
492 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494 		return HL_COLLECTIVE_SLAVE;
495 
496 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498 		return HL_COLLECTIVE_SLAVE;
499 
500 	return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502 
set_default_power_values(struct hl_device * hdev)503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505 	struct asic_fixed_properties *prop = &hdev->asic_prop;
506 
507 	if (hdev->card_type == cpucp_card_type_pmc) {
508 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509 
510 		if (prop->fw_security_enabled)
511 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512 		else
513 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514 	} else {
515 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517 	}
518 }
519 
gaudi_set_fixed_properties(struct hl_device * hdev)520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522 	struct asic_fixed_properties *prop = &hdev->asic_prop;
523 	u32 num_sync_stream_queues = 0;
524 	int i;
525 
526 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527 	prop->hw_queues_props = kcalloc(prop->max_queues,
528 			sizeof(struct hw_queue_properties),
529 			GFP_KERNEL);
530 
531 	if (!prop->hw_queues_props)
532 		return -ENOMEM;
533 
534 	for (i = 0 ; i < prop->max_queues ; i++) {
535 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537 			prop->hw_queues_props[i].driver_only = 0;
538 			prop->hw_queues_props[i].supports_sync_stream = 1;
539 			prop->hw_queues_props[i].cb_alloc_flags =
540 				CB_ALLOC_KERNEL;
541 			num_sync_stream_queues++;
542 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544 			prop->hw_queues_props[i].driver_only = 1;
545 			prop->hw_queues_props[i].supports_sync_stream = 0;
546 			prop->hw_queues_props[i].cb_alloc_flags =
547 				CB_ALLOC_KERNEL;
548 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550 			prop->hw_queues_props[i].driver_only = 0;
551 			prop->hw_queues_props[i].supports_sync_stream = 0;
552 			prop->hw_queues_props[i].cb_alloc_flags =
553 				CB_ALLOC_USER;
554 
555 		}
556 		prop->hw_queues_props[i].collective_mode =
557 						get_collective_mode(hdev, i);
558 	}
559 
560 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562 	prop->collective_first_sob = 0;
563 	prop->collective_first_mon = 0;
564 
565 	/* 2 SOBs per internal queue stream are reserved for collective */
566 	prop->sync_stream_first_sob =
567 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568 			* QMAN_STREAMS * HL_RSVD_SOBS;
569 
570 	/* 1 monitor per internal queue stream are reserved for collective
571 	 * 2 monitors per external queue stream are reserved for collective
572 	 */
573 	prop->sync_stream_first_mon =
574 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575 			(NUMBER_OF_EXT_HW_QUEUES * 2);
576 
577 	prop->dram_base_address = DRAM_PHYS_BASE;
578 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
579 	prop->dram_end_address = prop->dram_base_address +
580 					prop->dram_size;
581 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582 
583 	prop->sram_base_address = SRAM_BASE_ADDR;
584 	prop->sram_size = SRAM_SIZE;
585 	prop->sram_end_address = prop->sram_base_address +
586 					prop->sram_size;
587 	prop->sram_user_base_address = prop->sram_base_address +
588 					SRAM_USER_BASE_OFFSET;
589 
590 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591 	if (hdev->pldm)
592 		prop->mmu_pgt_size = 0x800000; /* 8MB */
593 	else
594 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595 	prop->mmu_pte_size = HL_PTE_SIZE;
596 	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598 	prop->dram_page_size = PAGE_SIZE_2MB;
599 	prop->dram_supports_virtual_memory = false;
600 
601 	prop->pmmu.hop0_shift = HOP0_SHIFT;
602 	prop->pmmu.hop1_shift = HOP1_SHIFT;
603 	prop->pmmu.hop2_shift = HOP2_SHIFT;
604 	prop->pmmu.hop3_shift = HOP3_SHIFT;
605 	prop->pmmu.hop4_shift = HOP4_SHIFT;
606 	prop->pmmu.hop0_mask = HOP0_MASK;
607 	prop->pmmu.hop1_mask = HOP1_MASK;
608 	prop->pmmu.hop2_mask = HOP2_MASK;
609 	prop->pmmu.hop3_mask = HOP3_MASK;
610 	prop->pmmu.hop4_mask = HOP4_MASK;
611 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
612 	prop->pmmu.end_addr =
613 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614 	prop->pmmu.page_size = PAGE_SIZE_4KB;
615 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616 
617 	/* PMMU and HPMMU are the same except of page size */
618 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620 
621 	/* shifts and masks are the same in PMMU and DMMU */
622 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
625 	prop->dmmu.page_size = PAGE_SIZE_2MB;
626 
627 	prop->cfg_size = CFG_SIZE;
628 	prop->max_asid = MAX_ASID;
629 	prop->num_of_events = GAUDI_EVENT_SIZE;
630 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631 
632 	set_default_power_values(hdev);
633 
634 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636 
637 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639 
640 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641 					CARD_NAME_MAX_LEN);
642 
643 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644 
645 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646 			prop->sync_stream_first_sob +
647 			(num_sync_stream_queues * HL_RSVD_SOBS);
648 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649 			prop->sync_stream_first_mon +
650 			(num_sync_stream_queues * HL_RSVD_MONS);
651 
652 	prop->first_available_user_msix_interrupt = USHRT_MAX;
653 
654 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
655 		prop->first_available_cq[i] = USHRT_MAX;
656 
657 	prop->fw_cpu_boot_dev_sts0_valid = false;
658 	prop->fw_cpu_boot_dev_sts1_valid = false;
659 	prop->hard_reset_done_by_fw = false;
660 	prop->gic_interrupts_enable = true;
661 
662 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663 
664 	return 0;
665 }
666 
gaudi_pci_bars_map(struct hl_device * hdev)667 static int gaudi_pci_bars_map(struct hl_device *hdev)
668 {
669 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
670 	bool is_wc[3] = {false, false, true};
671 	int rc;
672 
673 	rc = hl_pci_bars_map(hdev, name, is_wc);
674 	if (rc)
675 		return rc;
676 
677 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
678 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
679 
680 	return 0;
681 }
682 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)683 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
684 {
685 	struct gaudi_device *gaudi = hdev->asic_specific;
686 	struct hl_inbound_pci_region pci_region;
687 	u64 old_addr = addr;
688 	int rc;
689 
690 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
691 		return old_addr;
692 
693 	if (hdev->asic_prop.iatu_done_by_fw)
694 		return U64_MAX;
695 
696 	/* Inbound Region 2 - Bar 4 - Point to HBM */
697 	pci_region.mode = PCI_BAR_MATCH_MODE;
698 	pci_region.bar = HBM_BAR_ID;
699 	pci_region.addr = addr;
700 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
701 	if (rc)
702 		return U64_MAX;
703 
704 	if (gaudi) {
705 		old_addr = gaudi->hbm_bar_cur_addr;
706 		gaudi->hbm_bar_cur_addr = addr;
707 	}
708 
709 	return old_addr;
710 }
711 
gaudi_init_iatu(struct hl_device * hdev)712 static int gaudi_init_iatu(struct hl_device *hdev)
713 {
714 	struct hl_inbound_pci_region inbound_region;
715 	struct hl_outbound_pci_region outbound_region;
716 	int rc;
717 
718 	if (hdev->asic_prop.iatu_done_by_fw)
719 		return 0;
720 
721 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
722 	inbound_region.mode = PCI_BAR_MATCH_MODE;
723 	inbound_region.bar = SRAM_BAR_ID;
724 	inbound_region.addr = SRAM_BASE_ADDR;
725 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
726 	if (rc)
727 		goto done;
728 
729 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
730 	inbound_region.mode = PCI_BAR_MATCH_MODE;
731 	inbound_region.bar = CFG_BAR_ID;
732 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
733 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
734 	if (rc)
735 		goto done;
736 
737 	/* Inbound Region 2 - Bar 4 - Point to HBM */
738 	inbound_region.mode = PCI_BAR_MATCH_MODE;
739 	inbound_region.bar = HBM_BAR_ID;
740 	inbound_region.addr = DRAM_PHYS_BASE;
741 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
742 	if (rc)
743 		goto done;
744 
745 	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
746 
747 	/* Outbound Region 0 - Point to Host */
748 	outbound_region.addr = HOST_PHYS_BASE;
749 	outbound_region.size = HOST_PHYS_SIZE;
750 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
751 
752 done:
753 	return rc;
754 }
755 
gaudi_get_hw_state(struct hl_device * hdev)756 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
757 {
758 	return RREG32(mmHW_STATE);
759 }
760 
gaudi_early_init(struct hl_device * hdev)761 static int gaudi_early_init(struct hl_device *hdev)
762 {
763 	struct asic_fixed_properties *prop = &hdev->asic_prop;
764 	struct pci_dev *pdev = hdev->pdev;
765 	u32 fw_boot_status;
766 	int rc;
767 
768 	rc = gaudi_set_fixed_properties(hdev);
769 	if (rc) {
770 		dev_err(hdev->dev, "Failed setting fixed properties\n");
771 		return rc;
772 	}
773 
774 	/* Check BAR sizes */
775 	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
776 		dev_err(hdev->dev,
777 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
778 			SRAM_BAR_ID,
779 			(unsigned long long) pci_resource_len(pdev,
780 							SRAM_BAR_ID),
781 			SRAM_BAR_SIZE);
782 		rc = -ENODEV;
783 		goto free_queue_props;
784 	}
785 
786 	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
787 		dev_err(hdev->dev,
788 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
789 			CFG_BAR_ID,
790 			(unsigned long long) pci_resource_len(pdev,
791 								CFG_BAR_ID),
792 			CFG_BAR_SIZE);
793 		rc = -ENODEV;
794 		goto free_queue_props;
795 	}
796 
797 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
798 
799 	/* If FW security is enabled at this point it means no access to ELBI */
800 	if (hdev->asic_prop.fw_security_enabled) {
801 		hdev->asic_prop.iatu_done_by_fw = true;
802 
803 		/*
804 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
805 		 * decision can only be taken based on PCI ID security.
806 		 */
807 		hdev->asic_prop.gic_interrupts_enable = false;
808 		goto pci_init;
809 	}
810 
811 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
812 				&fw_boot_status);
813 	if (rc)
814 		goto free_queue_props;
815 
816 	/* Check whether FW is configuring iATU */
817 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
818 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
819 		hdev->asic_prop.iatu_done_by_fw = true;
820 
821 pci_init:
822 	rc = hl_pci_init(hdev);
823 	if (rc)
824 		goto free_queue_props;
825 
826 	/* Before continuing in the initialization, we need to read the preboot
827 	 * version to determine whether we run with a security-enabled firmware
828 	 */
829 	rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
830 					mmCPU_BOOT_DEV_STS0,
831 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
832 					mmCPU_BOOT_ERR1,
833 					GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
834 	if (rc) {
835 		if (hdev->reset_on_preboot_fail)
836 			hdev->asic_funcs->hw_fini(hdev, true, false);
837 		goto pci_fini;
838 	}
839 
840 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
841 		dev_info(hdev->dev,
842 			"H/W state is dirty, must reset before initializing\n");
843 		hdev->asic_funcs->hw_fini(hdev, true, false);
844 	}
845 
846 	return 0;
847 
848 pci_fini:
849 	hl_pci_fini(hdev);
850 free_queue_props:
851 	kfree(hdev->asic_prop.hw_queues_props);
852 	return rc;
853 }
854 
gaudi_early_fini(struct hl_device * hdev)855 static int gaudi_early_fini(struct hl_device *hdev)
856 {
857 	kfree(hdev->asic_prop.hw_queues_props);
858 	hl_pci_fini(hdev);
859 
860 	return 0;
861 }
862 
863 /**
864  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
865  *
866  * @hdev: pointer to hl_device structure
867  *
868  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)869 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
870 {
871 	struct asic_fixed_properties *prop = &hdev->asic_prop;
872 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
873 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
874 	int rc;
875 
876 	if (hdev->asic_prop.fw_security_enabled) {
877 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
878 
879 		if (rc)
880 			return rc;
881 
882 		freq = pll_freq_arr[2];
883 	} else {
884 		/* Backward compatibility */
885 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
886 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
887 		nr = RREG32(mmPSOC_CPU_PLL_NR);
888 		nf = RREG32(mmPSOC_CPU_PLL_NF);
889 		od = RREG32(mmPSOC_CPU_PLL_OD);
890 
891 		if (div_sel == DIV_SEL_REF_CLK ||
892 				div_sel == DIV_SEL_DIVIDED_REF) {
893 			if (div_sel == DIV_SEL_REF_CLK)
894 				freq = PLL_REF_CLK;
895 			else
896 				freq = PLL_REF_CLK / (div_fctr + 1);
897 		} else if (div_sel == DIV_SEL_PLL_CLK ||
898 			div_sel == DIV_SEL_DIVIDED_PLL) {
899 			pll_clk = PLL_REF_CLK * (nf + 1) /
900 					((nr + 1) * (od + 1));
901 			if (div_sel == DIV_SEL_PLL_CLK)
902 				freq = pll_clk;
903 			else
904 				freq = pll_clk / (div_fctr + 1);
905 		} else {
906 			dev_warn(hdev->dev,
907 				"Received invalid div select value: %d",
908 				div_sel);
909 			freq = 0;
910 		}
911 	}
912 
913 	prop->psoc_timestamp_frequency = freq;
914 	prop->psoc_pci_pll_nr = nr;
915 	prop->psoc_pci_pll_nf = nf;
916 	prop->psoc_pci_pll_od = od;
917 	prop->psoc_pci_pll_div_factor = div_fctr;
918 
919 	return 0;
920 }
921 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)922 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
923 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
924 {
925 	struct asic_fixed_properties *prop = &hdev->asic_prop;
926 	struct packet_lin_dma *init_tpc_mem_pkt;
927 	struct hl_cs_job *job;
928 	struct hl_cb *cb;
929 	u64 dst_addr;
930 	u32 cb_size, ctl;
931 	u8 tpc_id;
932 	int rc;
933 
934 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
935 	if (!cb)
936 		return -EFAULT;
937 
938 	init_tpc_mem_pkt = cb->kernel_address;
939 	cb_size = sizeof(*init_tpc_mem_pkt);
940 	memset(init_tpc_mem_pkt, 0, cb_size);
941 
942 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
943 
944 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
945 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
946 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
947 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
948 
949 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
950 
951 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
952 	dst_addr = (prop->sram_user_base_address &
953 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
954 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
955 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
956 
957 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
958 	if (!job) {
959 		dev_err(hdev->dev, "Failed to allocate a new job\n");
960 		rc = -ENOMEM;
961 		goto release_cb;
962 	}
963 
964 	job->id = 0;
965 	job->user_cb = cb;
966 	atomic_inc(&job->user_cb->cs_cnt);
967 	job->user_cb_size = cb_size;
968 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
969 	job->patched_cb = job->user_cb;
970 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
971 
972 	hl_debugfs_add_job(hdev, job);
973 
974 	rc = gaudi_send_job_on_qman0(hdev, job);
975 
976 	if (rc)
977 		goto free_job;
978 
979 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
980 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
981 		if (rc)
982 			break;
983 	}
984 
985 free_job:
986 	hl_userptr_delete_list(hdev, &job->userptr_list);
987 	hl_debugfs_remove_job(hdev, job);
988 	kfree(job);
989 	atomic_dec(&cb->cs_cnt);
990 
991 release_cb:
992 	hl_cb_put(cb);
993 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
994 
995 	return rc;
996 }
997 
998 /*
999  * gaudi_init_tpc_mem() - Initialize TPC memories.
1000  * @hdev: Pointer to hl_device structure.
1001  *
1002  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1003  *
1004  * Return: 0 for success, negative value for error.
1005  */
gaudi_init_tpc_mem(struct hl_device * hdev)1006 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1007 {
1008 	const struct firmware *fw;
1009 	size_t fw_size;
1010 	void *cpu_addr;
1011 	dma_addr_t dma_handle;
1012 	int rc, count = 5;
1013 
1014 again:
1015 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1016 	if (rc == -EINTR && count-- > 0) {
1017 		msleep(50);
1018 		goto again;
1019 	}
1020 
1021 	if (rc) {
1022 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1023 				GAUDI_TPC_FW_FILE);
1024 		goto out;
1025 	}
1026 
1027 	fw_size = fw->size;
1028 	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1029 			&dma_handle, GFP_KERNEL | __GFP_ZERO);
1030 	if (!cpu_addr) {
1031 		dev_err(hdev->dev,
1032 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1033 			fw_size);
1034 		rc = -ENOMEM;
1035 		goto out;
1036 	}
1037 
1038 	memcpy(cpu_addr, fw->data, fw_size);
1039 
1040 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1041 
1042 	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1043 			dma_handle);
1044 
1045 out:
1046 	release_firmware(fw);
1047 	return rc;
1048 }
1049 
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1050 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1051 {
1052 	struct gaudi_device *gaudi = hdev->asic_specific;
1053 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1054 	struct hl_hw_queue *q;
1055 	u32 i, sob_id, sob_group_id, queue_id;
1056 
1057 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1058 	sob_group_id =
1059 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1060 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1061 
1062 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1063 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1064 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1065 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1066 	}
1067 
1068 	/* Both DMA5 and TPC7 use the same resources since only a single
1069 	 * engine need to participate in the reduction process
1070 	 */
1071 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1072 	q = &hdev->kernel_queues[queue_id];
1073 	q->sync_stream_prop.collective_sob_id =
1074 			sob_id + NIC_NUMBER_OF_ENGINES;
1075 
1076 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1077 	q = &hdev->kernel_queues[queue_id];
1078 	q->sync_stream_prop.collective_sob_id =
1079 			sob_id + NIC_NUMBER_OF_ENGINES;
1080 }
1081 
gaudi_sob_group_hw_reset(struct kref * ref)1082 static void gaudi_sob_group_hw_reset(struct kref *ref)
1083 {
1084 	struct gaudi_hw_sob_group *hw_sob_group =
1085 		container_of(ref, struct gaudi_hw_sob_group, kref);
1086 	struct hl_device *hdev = hw_sob_group->hdev;
1087 	int i;
1088 
1089 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1090 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1091 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1092 
1093 	kref_init(&hw_sob_group->kref);
1094 }
1095 
gaudi_sob_group_reset_error(struct kref * ref)1096 static void gaudi_sob_group_reset_error(struct kref *ref)
1097 {
1098 	struct gaudi_hw_sob_group *hw_sob_group =
1099 		container_of(ref, struct gaudi_hw_sob_group, kref);
1100 	struct hl_device *hdev = hw_sob_group->hdev;
1101 
1102 	dev_crit(hdev->dev,
1103 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1104 		hw_sob_group->base_sob_id);
1105 }
1106 
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1107 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1108 {
1109 	struct gaudi_collective_properties *prop;
1110 	int i;
1111 
1112 	prop = &gaudi->collective_props;
1113 
1114 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1115 
1116 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1117 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1118 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1119 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1120 	/* Set collective engine bit */
1121 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1122 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1123 }
1124 
gaudi_collective_init(struct hl_device * hdev)1125 static int gaudi_collective_init(struct hl_device *hdev)
1126 {
1127 	u32 i, sob_id, reserved_sobs_per_group;
1128 	struct gaudi_collective_properties *prop;
1129 	struct gaudi_device *gaudi;
1130 
1131 	gaudi = hdev->asic_specific;
1132 	prop = &gaudi->collective_props;
1133 	sob_id = hdev->asic_prop.collective_first_sob;
1134 
1135 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1136 	reserved_sobs_per_group =
1137 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1138 
1139 	/* Init SOB groups */
1140 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1141 		prop->hw_sob_group[i].hdev = hdev;
1142 		prop->hw_sob_group[i].base_sob_id = sob_id;
1143 		sob_id += reserved_sobs_per_group;
1144 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1145 	}
1146 
1147 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1148 		prop->next_sob_group_val[i] = 1;
1149 		prop->curr_sob_group_idx[i] = 0;
1150 		gaudi_collective_map_sobs(hdev, i);
1151 	}
1152 
1153 	gaudi_collective_mstr_sob_mask_set(gaudi);
1154 
1155 	return 0;
1156 }
1157 
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1158 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1159 {
1160 	struct gaudi_device *gaudi = hdev->asic_specific;
1161 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1162 
1163 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1164 					gaudi_sob_group_hw_reset);
1165 }
1166 
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1167 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1168 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1169 {
1170 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1171 	struct gaudi_collective_properties *cprop;
1172 	struct hl_gen_wait_properties wait_prop;
1173 	struct hl_sync_stream_properties *prop;
1174 	struct gaudi_device *gaudi;
1175 
1176 	gaudi = hdev->asic_specific;
1177 	cprop = &gaudi->collective_props;
1178 	queue_id = job->hw_queue_id;
1179 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1180 
1181 	master_sob_base =
1182 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1183 	master_monitor = prop->collective_mstr_mon_id[0];
1184 
1185 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1186 
1187 	dev_dbg(hdev->dev,
1188 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1189 		master_sob_base, cprop->mstr_sob_mask[0],
1190 		cprop->next_sob_group_val[stream],
1191 		master_monitor, queue_id);
1192 
1193 	wait_prop.data = (void *) job->patched_cb;
1194 	wait_prop.sob_base = master_sob_base;
1195 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1196 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1197 	wait_prop.mon_id = master_monitor;
1198 	wait_prop.q_idx = queue_id;
1199 	wait_prop.size = cb_size;
1200 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201 
1202 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1203 	master_monitor = prop->collective_mstr_mon_id[1];
1204 
1205 	dev_dbg(hdev->dev,
1206 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207 		master_sob_base, cprop->mstr_sob_mask[1],
1208 		cprop->next_sob_group_val[stream],
1209 		master_monitor, queue_id);
1210 
1211 	wait_prop.sob_base = master_sob_base;
1212 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1213 	wait_prop.mon_id = master_monitor;
1214 	wait_prop.size = cb_size;
1215 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 }
1217 
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1218 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1219 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1220 {
1221 	struct hl_gen_wait_properties wait_prop;
1222 	struct hl_sync_stream_properties *prop;
1223 	u32 queue_id, cb_size = 0;
1224 
1225 	queue_id = job->hw_queue_id;
1226 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1227 
1228 	if (job->cs->encaps_signals) {
1229 		/* use the encaps signal handle store earlier in the flow
1230 		 * and set the SOB information from the encaps
1231 		 * signals handle
1232 		 */
1233 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1234 						cs_cmpl);
1235 
1236 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1237 				job->cs->sequence,
1238 				cs_cmpl->hw_sob->sob_id,
1239 				cs_cmpl->sob_val);
1240 	}
1241 
1242 	/* Add to wait CBs using slave monitor */
1243 	wait_prop.data = (void *) job->user_cb;
1244 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1245 	wait_prop.sob_mask = 0x1;
1246 	wait_prop.sob_val = cs_cmpl->sob_val;
1247 	wait_prop.mon_id = prop->collective_slave_mon_id;
1248 	wait_prop.q_idx = queue_id;
1249 	wait_prop.size = cb_size;
1250 
1251 	dev_dbg(hdev->dev,
1252 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1253 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1254 		prop->collective_slave_mon_id, queue_id);
1255 
1256 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257 
1258 	dev_dbg(hdev->dev,
1259 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1260 		prop->collective_sob_id, queue_id);
1261 
1262 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1263 			prop->collective_sob_id, cb_size, false);
1264 }
1265 
gaudi_collective_wait_init_cs(struct hl_cs * cs)1266 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1267 {
1268 	struct hl_cs_compl *signal_cs_cmpl =
1269 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1270 	struct hl_cs_compl *cs_cmpl =
1271 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1272 	struct gaudi_collective_properties *cprop;
1273 	u32 stream, queue_id, sob_group_offset;
1274 	struct gaudi_device *gaudi;
1275 	struct hl_device *hdev;
1276 	struct hl_cs_job *job;
1277 	struct hl_ctx *ctx;
1278 
1279 	ctx = cs->ctx;
1280 	hdev = ctx->hdev;
1281 	gaudi = hdev->asic_specific;
1282 	cprop = &gaudi->collective_props;
1283 
1284 	/* In encaps signals case the SOB info will be retrieved from
1285 	 * the handle in gaudi_collective_slave_init_job.
1286 	 */
1287 	if (!cs->encaps_signals) {
1288 		/* copy the SOB id and value of the signal CS */
1289 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1290 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1291 	}
1292 
1293 	/* check again if the signal cs already completed.
1294 	 * if yes then don't send any wait cs since the hw_sob
1295 	 * could be in reset already. if signal is not completed
1296 	 * then get refcount to hw_sob to prevent resetting the sob
1297 	 * while wait cs is not submitted.
1298 	 * note that this check is protected by two locks,
1299 	 * hw queue lock and completion object lock,
1300 	 * and the same completion object lock also protects
1301 	 * the hw_sob reset handler function.
1302 	 * The hw_queue lock prevent out of sync of hw_sob
1303 	 * refcount value, changed by signal/wait flows.
1304 	 */
1305 	spin_lock(&signal_cs_cmpl->lock);
1306 
1307 	if (completion_done(&cs->signal_fence->completion)) {
1308 		spin_unlock(&signal_cs_cmpl->lock);
1309 		return -EINVAL;
1310 	}
1311 	/* Increment kref since all slave queues are now waiting on it */
1312 	kref_get(&cs_cmpl->hw_sob->kref);
1313 
1314 	spin_unlock(&signal_cs_cmpl->lock);
1315 
1316 	/* Calculate the stream from collective master queue (1st job) */
1317 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1318 	stream = job->hw_queue_id % 4;
1319 	sob_group_offset =
1320 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1321 
1322 	list_for_each_entry(job, &cs->job_list, cs_node) {
1323 		queue_id = job->hw_queue_id;
1324 
1325 		if (hdev->kernel_queues[queue_id].collective_mode ==
1326 				HL_COLLECTIVE_MASTER)
1327 			gaudi_collective_master_init_job(hdev, job, stream,
1328 						sob_group_offset);
1329 		else
1330 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1331 	}
1332 
1333 	cs_cmpl->sob_group = sob_group_offset;
1334 
1335 	/* Handle sob group kref and wraparound */
1336 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1337 	cprop->next_sob_group_val[stream]++;
1338 
1339 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1340 		/*
1341 		 * Decrement as we reached the max value.
1342 		 * The release function won't be called here as we've
1343 		 * just incremented the refcount.
1344 		 */
1345 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1346 				gaudi_sob_group_reset_error);
1347 		cprop->next_sob_group_val[stream] = 1;
1348 		/* only two SOBs are currently in use */
1349 		cprop->curr_sob_group_idx[stream] =
1350 			(cprop->curr_sob_group_idx[stream] + 1) &
1351 							(HL_RSVD_SOBS - 1);
1352 
1353 		gaudi_collective_map_sobs(hdev, stream);
1354 
1355 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1356 				cprop->curr_sob_group_idx[stream], stream);
1357 	}
1358 
1359 	mb();
1360 	hl_fence_put(cs->signal_fence);
1361 	cs->signal_fence = NULL;
1362 
1363 	return 0;
1364 }
1365 
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1366 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1367 		struct hl_ctx *ctx, struct hl_cs *cs,
1368 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1369 		u32 encaps_signal_offset)
1370 {
1371 	struct hw_queue_properties *hw_queue_prop;
1372 	struct hl_cs_counters_atomic *cntr;
1373 	struct hl_cs_job *job;
1374 	struct hl_cb *cb;
1375 	u32 cb_size;
1376 	bool patched_cb;
1377 
1378 	cntr = &hdev->aggregated_cs_counters;
1379 
1380 	if (mode == HL_COLLECTIVE_MASTER) {
1381 		/* CB size of collective master queue contains
1382 		 * 4 msg short packets for monitor 1 configuration
1383 		 * 1 fence packet
1384 		 * 4 msg short packets for monitor 2 configuration
1385 		 * 1 fence packet
1386 		 * 2 msg prot packets for completion and MSI-X
1387 		 */
1388 		cb_size = sizeof(struct packet_msg_short) * 8 +
1389 				sizeof(struct packet_fence) * 2 +
1390 				sizeof(struct packet_msg_prot) * 2;
1391 		patched_cb = true;
1392 	} else {
1393 		/* CB size of collective slave queues contains
1394 		 * 4 msg short packets for monitor configuration
1395 		 * 1 fence packet
1396 		 * 1 additional msg short packet for sob signal
1397 		 */
1398 		cb_size = sizeof(struct packet_msg_short) * 5 +
1399 				sizeof(struct packet_fence);
1400 		patched_cb = false;
1401 	}
1402 
1403 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1404 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1405 	if (!job) {
1406 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1407 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1408 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1409 		return -ENOMEM;
1410 	}
1411 
1412 	/* Allocate internal mapped CB for non patched CBs */
1413 	cb = hl_cb_kernel_create(hdev, cb_size,
1414 			hdev->mmu_enable && !patched_cb);
1415 	if (!cb) {
1416 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1417 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1418 		kfree(job);
1419 		return -EFAULT;
1420 	}
1421 
1422 	job->id = 0;
1423 	job->cs = cs;
1424 	job->user_cb = cb;
1425 	atomic_inc(&job->user_cb->cs_cnt);
1426 	job->user_cb_size = cb_size;
1427 	job->hw_queue_id = queue_id;
1428 
1429 	/* since its guaranteed to have only one chunk in the collective wait
1430 	 * cs, we can use this chunk to set the encapsulated signal offset
1431 	 * in the jobs.
1432 	 */
1433 	if (cs->encaps_signals)
1434 		job->encaps_sig_wait_offset = encaps_signal_offset;
1435 
1436 	/*
1437 	 * No need in parsing, user CB is the patched CB.
1438 	 * We call hl_cb_destroy() out of two reasons - we don't need
1439 	 * the CB in the CB idr anymore and to decrement its refcount as
1440 	 * it was incremented inside hl_cb_kernel_create().
1441 	 */
1442 	if (patched_cb)
1443 		job->patched_cb = job->user_cb;
1444 	else
1445 		job->patched_cb = NULL;
1446 
1447 	job->job_cb_size = job->user_cb_size;
1448 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1449 
1450 	/* increment refcount as for external queues we get completion */
1451 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1452 		cs_get(cs);
1453 
1454 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1455 
1456 	list_add_tail(&job->cs_node, &cs->job_list);
1457 
1458 	hl_debugfs_add_job(hdev, job);
1459 
1460 	return 0;
1461 }
1462 
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1463 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1464 		struct hl_ctx *ctx, struct hl_cs *cs,
1465 		u32 wait_queue_id, u32 collective_engine_id,
1466 		u32 encaps_signal_offset)
1467 {
1468 	struct gaudi_device *gaudi = hdev->asic_specific;
1469 	struct hw_queue_properties *hw_queue_prop;
1470 	u32 queue_id, collective_queue, num_jobs;
1471 	u32 stream, nic_queue, nic_idx = 0;
1472 	bool skip;
1473 	int i, rc = 0;
1474 
1475 	/* Verify wait queue id is configured as master */
1476 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1477 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1478 		dev_err(hdev->dev,
1479 			"Queue %d is not configured as collective master\n",
1480 			wait_queue_id);
1481 		return -EINVAL;
1482 	}
1483 
1484 	/* Verify engine id is supported */
1485 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1486 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1487 		dev_err(hdev->dev,
1488 			"Collective wait does not support engine %u\n",
1489 			collective_engine_id);
1490 		return -EINVAL;
1491 	}
1492 
1493 	stream = wait_queue_id % 4;
1494 
1495 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1496 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1497 	else
1498 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1499 
1500 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1501 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1502 
1503 	/* First job goes to the collective master queue, it will wait for
1504 	 * the collective slave queues to finish execution.
1505 	 * The synchronization is done using two monitors:
1506 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1507 	 * reduction engine (DMA5/TPC7).
1508 	 *
1509 	 * Rest of the jobs goes to the collective slave queues which will
1510 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1511 	 */
1512 	for (i = 0 ; i < num_jobs ; i++) {
1513 		if (i == 0) {
1514 			queue_id = wait_queue_id;
1515 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1516 				HL_COLLECTIVE_MASTER, queue_id,
1517 				wait_queue_id, encaps_signal_offset);
1518 		} else {
1519 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1520 				if (gaudi->hw_cap_initialized &
1521 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1522 					skip = false;
1523 				else
1524 					skip = true;
1525 
1526 				queue_id = nic_queue;
1527 				nic_queue += 4;
1528 				nic_idx++;
1529 
1530 				if (skip)
1531 					continue;
1532 			} else {
1533 				queue_id = collective_queue;
1534 			}
1535 
1536 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537 				HL_COLLECTIVE_SLAVE, queue_id,
1538 				wait_queue_id, encaps_signal_offset);
1539 		}
1540 
1541 		if (rc)
1542 			return rc;
1543 	}
1544 
1545 	return rc;
1546 }
1547 
gaudi_late_init(struct hl_device * hdev)1548 static int gaudi_late_init(struct hl_device *hdev)
1549 {
1550 	struct gaudi_device *gaudi = hdev->asic_specific;
1551 	int rc;
1552 
1553 	rc = gaudi->cpucp_info_get(hdev);
1554 	if (rc) {
1555 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1556 		return rc;
1557 	}
1558 
1559 	if ((hdev->card_type == cpucp_card_type_pci) &&
1560 			(hdev->nic_ports_mask & 0x3)) {
1561 		dev_info(hdev->dev,
1562 			"PCI card detected, only 8 ports are enabled\n");
1563 		hdev->nic_ports_mask &= ~0x3;
1564 
1565 		/* Stop and disable unused NIC QMANs */
1566 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1567 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1568 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1569 
1570 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573 
1574 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1575 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1576 
1577 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1578 	}
1579 
1580 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1581 	if (rc) {
1582 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1583 		return rc;
1584 	}
1585 
1586 	/* Scrub both SRAM and DRAM */
1587 	rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1588 	if (rc)
1589 		goto disable_pci_access;
1590 
1591 	rc = gaudi_fetch_psoc_frequency(hdev);
1592 	if (rc) {
1593 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1594 		goto disable_pci_access;
1595 	}
1596 
1597 	rc = gaudi_mmu_clear_pgt_range(hdev);
1598 	if (rc) {
1599 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1600 		goto disable_pci_access;
1601 	}
1602 
1603 	rc = gaudi_init_tpc_mem(hdev);
1604 	if (rc) {
1605 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1606 		goto disable_pci_access;
1607 	}
1608 
1609 	rc = gaudi_collective_init(hdev);
1610 	if (rc) {
1611 		dev_err(hdev->dev, "Failed to init collective\n");
1612 		goto disable_pci_access;
1613 	}
1614 
1615 	/* We only support a single ASID for the user, so for the sake of optimization, just
1616 	 * initialize the ASID one time during device initialization with the fixed value of 1
1617 	 */
1618 	gaudi_mmu_prepare(hdev, 1);
1619 
1620 	return 0;
1621 
1622 disable_pci_access:
1623 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1624 
1625 	return rc;
1626 }
1627 
gaudi_late_fini(struct hl_device * hdev)1628 static void gaudi_late_fini(struct hl_device *hdev)
1629 {
1630 	const struct hwmon_channel_info **channel_info_arr;
1631 	int i = 0;
1632 
1633 	if (!hdev->hl_chip_info->info)
1634 		return;
1635 
1636 	channel_info_arr = hdev->hl_chip_info->info;
1637 
1638 	while (channel_info_arr[i]) {
1639 		kfree(channel_info_arr[i]->config);
1640 		kfree(channel_info_arr[i]);
1641 		i++;
1642 	}
1643 
1644 	kfree(channel_info_arr);
1645 
1646 	hdev->hl_chip_info->info = NULL;
1647 }
1648 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1649 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1650 {
1651 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1652 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1653 	int i, j, rc = 0;
1654 
1655 	/*
1656 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1657 	 * to '1' when accessing the host.
1658 	 * Bits 49:39 of the full host address are saved for a later
1659 	 * configuration of the HW to perform extension to 50 bits.
1660 	 * Because there is a single HW register that holds the extension bits,
1661 	 * these bits must be identical in all allocated range.
1662 	 */
1663 
1664 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1665 		virt_addr_arr[i] =
1666 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1667 						HL_CPU_ACCESSIBLE_MEM_SIZE,
1668 						&dma_addr_arr[i],
1669 						GFP_KERNEL | __GFP_ZERO);
1670 		if (!virt_addr_arr[i]) {
1671 			rc = -ENOMEM;
1672 			goto free_dma_mem_arr;
1673 		}
1674 
1675 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1676 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1677 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1678 			break;
1679 	}
1680 
1681 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1682 		dev_err(hdev->dev,
1683 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1684 		rc = -EFAULT;
1685 		goto free_dma_mem_arr;
1686 	}
1687 
1688 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1689 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1690 	hdev->cpu_pci_msb_addr =
1691 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1692 
1693 	if (!hdev->asic_prop.fw_security_enabled)
1694 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1695 
1696 free_dma_mem_arr:
1697 	for (j = 0 ; j < i ; j++)
1698 		hdev->asic_funcs->asic_dma_free_coherent(hdev,
1699 						HL_CPU_ACCESSIBLE_MEM_SIZE,
1700 						virt_addr_arr[j],
1701 						dma_addr_arr[j]);
1702 
1703 	return rc;
1704 }
1705 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1706 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1707 {
1708 	struct gaudi_device *gaudi = hdev->asic_specific;
1709 	struct gaudi_internal_qman_info *q;
1710 	u32 i;
1711 
1712 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1713 		q = &gaudi->internal_qmans[i];
1714 		if (!q->pq_kernel_addr)
1715 			continue;
1716 		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1717 							q->pq_kernel_addr,
1718 							q->pq_dma_addr);
1719 	}
1720 }
1721 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1722 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1723 {
1724 	struct gaudi_device *gaudi = hdev->asic_specific;
1725 	struct gaudi_internal_qman_info *q;
1726 	int rc, i;
1727 
1728 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1729 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1730 			continue;
1731 
1732 		q = &gaudi->internal_qmans[i];
1733 
1734 		switch (i) {
1735 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1736 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1737 			break;
1738 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1739 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1740 			break;
1741 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1742 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1743 			break;
1744 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1745 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1746 			break;
1747 		default:
1748 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1749 			rc = -EINVAL;
1750 			goto free_internal_qmans_pq_mem;
1751 		}
1752 
1753 		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1754 						hdev, q->pq_size,
1755 						&q->pq_dma_addr,
1756 						GFP_KERNEL | __GFP_ZERO);
1757 		if (!q->pq_kernel_addr) {
1758 			rc = -ENOMEM;
1759 			goto free_internal_qmans_pq_mem;
1760 		}
1761 	}
1762 
1763 	return 0;
1764 
1765 free_internal_qmans_pq_mem:
1766 	gaudi_free_internal_qmans_pq_mem(hdev);
1767 	return rc;
1768 }
1769 
gaudi_set_pci_memory_regions(struct hl_device * hdev)1770 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1771 {
1772 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1773 	struct pci_mem_region *region;
1774 
1775 	/* CFG */
1776 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1777 	region->region_base = CFG_BASE;
1778 	region->region_size = CFG_SIZE;
1779 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1780 	region->bar_size = CFG_BAR_SIZE;
1781 	region->bar_id = CFG_BAR_ID;
1782 	region->used = 1;
1783 
1784 	/* SRAM */
1785 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1786 	region->region_base = SRAM_BASE_ADDR;
1787 	region->region_size = SRAM_SIZE;
1788 	region->offset_in_bar = 0;
1789 	region->bar_size = SRAM_BAR_SIZE;
1790 	region->bar_id = SRAM_BAR_ID;
1791 	region->used = 1;
1792 
1793 	/* DRAM */
1794 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1795 	region->region_base = DRAM_PHYS_BASE;
1796 	region->region_size = hdev->asic_prop.dram_size;
1797 	region->offset_in_bar = 0;
1798 	region->bar_size = prop->dram_pci_bar_size;
1799 	region->bar_id = HBM_BAR_ID;
1800 	region->used = 1;
1801 
1802 	/* SP SRAM */
1803 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1804 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1805 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1806 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1807 	region->bar_size = CFG_BAR_SIZE;
1808 	region->bar_id = CFG_BAR_ID;
1809 	region->used = 1;
1810 }
1811 
gaudi_sw_init(struct hl_device * hdev)1812 static int gaudi_sw_init(struct hl_device *hdev)
1813 {
1814 	struct gaudi_device *gaudi;
1815 	u32 i, event_id = 0;
1816 	int rc;
1817 
1818 	/* Allocate device structure */
1819 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1820 	if (!gaudi)
1821 		return -ENOMEM;
1822 
1823 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1824 		if (gaudi_irq_map_table[i].valid) {
1825 			if (event_id == GAUDI_EVENT_SIZE) {
1826 				dev_err(hdev->dev,
1827 					"Event array exceeds the limit of %u events\n",
1828 					GAUDI_EVENT_SIZE);
1829 				rc = -EINVAL;
1830 				goto free_gaudi_device;
1831 			}
1832 
1833 			gaudi->events[event_id++] =
1834 					gaudi_irq_map_table[i].fc_id;
1835 		}
1836 	}
1837 
1838 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1839 
1840 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1841 
1842 	hdev->asic_specific = gaudi;
1843 
1844 	/* Create DMA pool for small allocations */
1845 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1846 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1847 	if (!hdev->dma_pool) {
1848 		dev_err(hdev->dev, "failed to create DMA pool\n");
1849 		rc = -ENOMEM;
1850 		goto free_gaudi_device;
1851 	}
1852 
1853 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1854 	if (rc)
1855 		goto free_dma_pool;
1856 
1857 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1858 	if (!hdev->cpu_accessible_dma_pool) {
1859 		dev_err(hdev->dev,
1860 			"Failed to create CPU accessible DMA pool\n");
1861 		rc = -ENOMEM;
1862 		goto free_cpu_dma_mem;
1863 	}
1864 
1865 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1866 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1867 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1868 	if (rc) {
1869 		dev_err(hdev->dev,
1870 			"Failed to add memory to CPU accessible DMA pool\n");
1871 		rc = -EFAULT;
1872 		goto free_cpu_accessible_dma_pool;
1873 	}
1874 
1875 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1876 	if (rc)
1877 		goto free_cpu_accessible_dma_pool;
1878 
1879 	spin_lock_init(&gaudi->hw_queues_lock);
1880 	mutex_init(&gaudi->clk_gate_mutex);
1881 
1882 	hdev->supports_sync_stream = true;
1883 	hdev->supports_coresight = true;
1884 	hdev->supports_staged_submission = true;
1885 	hdev->supports_wait_for_multi_cs = true;
1886 
1887 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1888 	hdev->stream_master_qid_arr =
1889 				hdev->asic_funcs->get_stream_master_qid_arr();
1890 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1891 
1892 	return 0;
1893 
1894 free_cpu_accessible_dma_pool:
1895 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1896 free_cpu_dma_mem:
1897 	if (!hdev->asic_prop.fw_security_enabled)
1898 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1899 					hdev->cpu_pci_msb_addr);
1900 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1901 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1902 			hdev->cpu_accessible_dma_mem,
1903 			hdev->cpu_accessible_dma_address);
1904 free_dma_pool:
1905 	dma_pool_destroy(hdev->dma_pool);
1906 free_gaudi_device:
1907 	kfree(gaudi);
1908 	return rc;
1909 }
1910 
gaudi_sw_fini(struct hl_device * hdev)1911 static int gaudi_sw_fini(struct hl_device *hdev)
1912 {
1913 	struct gaudi_device *gaudi = hdev->asic_specific;
1914 
1915 	gaudi_free_internal_qmans_pq_mem(hdev);
1916 
1917 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1918 
1919 	if (!hdev->asic_prop.fw_security_enabled)
1920 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1921 					hdev->cpu_pci_msb_addr);
1922 
1923 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1924 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1925 			hdev->cpu_accessible_dma_mem,
1926 			hdev->cpu_accessible_dma_address);
1927 
1928 	dma_pool_destroy(hdev->dma_pool);
1929 
1930 	mutex_destroy(&gaudi->clk_gate_mutex);
1931 
1932 	kfree(gaudi);
1933 
1934 	return 0;
1935 }
1936 
gaudi_irq_handler_single(int irq,void * arg)1937 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1938 {
1939 	struct hl_device *hdev = arg;
1940 	int i;
1941 
1942 	if (hdev->disabled)
1943 		return IRQ_HANDLED;
1944 
1945 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1946 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1947 
1948 	hl_irq_handler_eq(irq, &hdev->event_queue);
1949 
1950 	return IRQ_HANDLED;
1951 }
1952 
1953 /*
1954  * For backward compatibility, new MSI interrupts should be set after the
1955  * existing CPU and NIC interrupts.
1956  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1957 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1958 				bool cpu_eq)
1959 {
1960 	int msi_vec;
1961 
1962 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1963 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1964 				GAUDI_EVENT_QUEUE_MSI_IDX);
1965 
1966 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1967 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1968 
1969 	return pci_irq_vector(hdev->pdev, msi_vec);
1970 }
1971 
gaudi_enable_msi_single(struct hl_device * hdev)1972 static int gaudi_enable_msi_single(struct hl_device *hdev)
1973 {
1974 	int rc, irq;
1975 
1976 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1977 
1978 	irq = gaudi_pci_irq_vector(hdev, 0, false);
1979 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1980 			"gaudi single msi", hdev);
1981 	if (rc)
1982 		dev_err(hdev->dev,
1983 			"Failed to request single MSI IRQ\n");
1984 
1985 	return rc;
1986 }
1987 
gaudi_enable_msi_multi(struct hl_device * hdev)1988 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1989 {
1990 	int cq_cnt = hdev->asic_prop.completion_queues_count;
1991 	int rc, i, irq_cnt_init, irq;
1992 
1993 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1994 		irq = gaudi_pci_irq_vector(hdev, i, false);
1995 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1996 				&hdev->completion_queue[i]);
1997 		if (rc) {
1998 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1999 			goto free_irqs;
2000 		}
2001 	}
2002 
2003 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2004 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2005 				&hdev->event_queue);
2006 	if (rc) {
2007 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2008 		goto free_irqs;
2009 	}
2010 
2011 	return 0;
2012 
2013 free_irqs:
2014 	for (i = 0 ; i < irq_cnt_init ; i++)
2015 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2016 				&hdev->completion_queue[i]);
2017 	return rc;
2018 }
2019 
gaudi_enable_msi(struct hl_device * hdev)2020 static int gaudi_enable_msi(struct hl_device *hdev)
2021 {
2022 	struct gaudi_device *gaudi = hdev->asic_specific;
2023 	int rc;
2024 
2025 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2026 		return 0;
2027 
2028 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2029 	if (rc < 0) {
2030 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031 		return rc;
2032 	}
2033 
2034 	if (rc < NUMBER_OF_INTERRUPTS) {
2035 		gaudi->multi_msi_mode = false;
2036 		rc = gaudi_enable_msi_single(hdev);
2037 	} else {
2038 		gaudi->multi_msi_mode = true;
2039 		rc = gaudi_enable_msi_multi(hdev);
2040 	}
2041 
2042 	if (rc)
2043 		goto free_pci_irq_vectors;
2044 
2045 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2046 
2047 	return 0;
2048 
2049 free_pci_irq_vectors:
2050 	pci_free_irq_vectors(hdev->pdev);
2051 	return rc;
2052 }
2053 
gaudi_sync_irqs(struct hl_device * hdev)2054 static void gaudi_sync_irqs(struct hl_device *hdev)
2055 {
2056 	struct gaudi_device *gaudi = hdev->asic_specific;
2057 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2058 
2059 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060 		return;
2061 
2062 	/* Wait for all pending IRQs to be finished */
2063 	if (gaudi->multi_msi_mode) {
2064 		for (i = 0 ; i < cq_cnt ; i++)
2065 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2066 
2067 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2068 						GAUDI_EVENT_QUEUE_MSI_IDX,
2069 						true));
2070 	} else {
2071 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2072 	}
2073 }
2074 
gaudi_disable_msi(struct hl_device * hdev)2075 static void gaudi_disable_msi(struct hl_device *hdev)
2076 {
2077 	struct gaudi_device *gaudi = hdev->asic_specific;
2078 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2079 
2080 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2081 		return;
2082 
2083 	gaudi_sync_irqs(hdev);
2084 
2085 	if (gaudi->multi_msi_mode) {
2086 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2087 						true);
2088 		free_irq(irq, &hdev->event_queue);
2089 
2090 		for (i = 0 ; i < cq_cnt ; i++) {
2091 			irq = gaudi_pci_irq_vector(hdev, i, false);
2092 			free_irq(irq, &hdev->completion_queue[i]);
2093 		}
2094 	} else {
2095 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2096 	}
2097 
2098 	pci_free_irq_vectors(hdev->pdev);
2099 
2100 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2101 }
2102 
gaudi_init_scrambler_sram(struct hl_device * hdev)2103 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2104 {
2105 	struct gaudi_device *gaudi = hdev->asic_specific;
2106 
2107 	if (hdev->asic_prop.fw_security_enabled)
2108 		return;
2109 
2110 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2111 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2112 		return;
2113 
2114 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2115 		return;
2116 
2117 	if (!hdev->sram_scrambler_enable)
2118 		return;
2119 
2120 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2121 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2123 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2125 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2127 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2129 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2131 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2133 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2135 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136 
2137 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2138 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2140 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2142 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2144 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2146 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2148 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2150 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2152 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153 
2154 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2155 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2156 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2157 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2159 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2161 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2163 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2165 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2167 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2169 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170 
2171 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2172 }
2173 
gaudi_init_scrambler_hbm(struct hl_device * hdev)2174 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2175 {
2176 	struct gaudi_device *gaudi = hdev->asic_specific;
2177 
2178 	if (hdev->asic_prop.fw_security_enabled)
2179 		return;
2180 
2181 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2182 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2183 		return;
2184 
2185 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2186 		return;
2187 
2188 	if (!hdev->dram_scrambler_enable)
2189 		return;
2190 
2191 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2192 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2193 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2194 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2196 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2198 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2200 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2202 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2204 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2206 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207 
2208 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 
2225 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2226 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2227 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2228 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2230 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2232 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2234 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2236 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2238 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2240 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241 
2242 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2243 }
2244 
gaudi_init_e2e(struct hl_device * hdev)2245 static void gaudi_init_e2e(struct hl_device *hdev)
2246 {
2247 	if (hdev->asic_prop.fw_security_enabled)
2248 		return;
2249 
2250 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2251 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2252 		return;
2253 
2254 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2255 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2256 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2257 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2258 
2259 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263 
2264 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268 
2269 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273 
2274 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278 
2279 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283 
2284 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288 
2289 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2290 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2291 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2292 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2293 
2294 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2295 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2296 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2297 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2298 
2299 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2300 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2301 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2302 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2303 
2304 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2305 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2306 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2307 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2308 
2309 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2310 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2311 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2312 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2313 
2314 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2315 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2316 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2317 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2318 
2319 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2320 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2321 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2322 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2323 
2324 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2325 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2326 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2327 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2328 
2329 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2330 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2331 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2332 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2333 
2334 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338 
2339 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343 
2344 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348 
2349 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353 
2354 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2355 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2356 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2357 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2358 
2359 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2360 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2361 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2362 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2363 
2364 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2365 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2366 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2367 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2368 
2369 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2370 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2371 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2372 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2373 
2374 	if (!hdev->dram_scrambler_enable) {
2375 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2376 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2377 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2378 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2379 
2380 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2381 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2382 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2383 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2384 
2385 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2386 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2387 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2388 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2389 
2390 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2391 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2392 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2393 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2394 
2395 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2396 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2397 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2398 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2399 
2400 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2401 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2402 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2403 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2404 
2405 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2406 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2407 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2408 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2409 
2410 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2411 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2412 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2413 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2414 
2415 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2416 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2417 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2418 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2419 
2420 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2421 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2422 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2423 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2424 
2425 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2426 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2427 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2428 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2429 
2430 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2431 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2432 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2433 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2434 
2435 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2436 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2437 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2438 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2439 
2440 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2441 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2442 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2443 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2444 
2445 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2446 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2447 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2448 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2449 
2450 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2451 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2452 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2453 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2454 
2455 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459 
2460 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464 
2465 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469 
2470 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474 
2475 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2476 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2477 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2478 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2479 
2480 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2481 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2482 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2483 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2484 
2485 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2486 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2487 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2488 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2489 
2490 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2491 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2492 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2493 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2494 	}
2495 
2496 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2497 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2499 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500 
2501 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2502 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2504 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505 
2506 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2507 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2509 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510 
2511 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2512 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2514 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515 
2516 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2517 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2519 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520 
2521 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2522 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2524 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525 
2526 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2527 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2529 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530 
2531 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2532 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2534 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535 
2536 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2537 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2539 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540 
2541 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2542 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2544 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545 
2546 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2547 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2549 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550 
2551 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2552 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2554 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555 
2556 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2557 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2559 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560 
2561 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2562 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2564 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565 
2566 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2567 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2569 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570 
2571 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2572 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2574 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575 
2576 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2577 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2579 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580 
2581 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2582 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2584 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585 
2586 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2587 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2589 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590 
2591 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2592 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2594 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595 
2596 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2597 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2599 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600 
2601 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2602 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2604 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605 
2606 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2607 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2609 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610 
2611 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2612 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2614 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615 }
2616 
gaudi_init_hbm_cred(struct hl_device * hdev)2617 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2618 {
2619 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2620 
2621 	if (hdev->asic_prop.fw_security_enabled)
2622 		return;
2623 
2624 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2625 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2626 		return;
2627 
2628 	hbm0_wr = 0x33333333;
2629 	hbm0_rd = 0x77777777;
2630 	hbm1_wr = 0x55555555;
2631 	hbm1_rd = 0xDDDDDDDD;
2632 
2633 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2634 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2635 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2636 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2637 
2638 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2639 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2640 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2641 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2642 
2643 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2644 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2645 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2646 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2647 
2648 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2649 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2650 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2651 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2652 
2653 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2654 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2655 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2656 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2657 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2660 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2663 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665 
2666 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2667 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2668 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2669 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2670 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2673 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2676 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678 }
2679 
gaudi_init_golden_registers(struct hl_device * hdev)2680 static void gaudi_init_golden_registers(struct hl_device *hdev)
2681 {
2682 	u32 tpc_offset;
2683 	int tpc_id, i;
2684 
2685 	gaudi_init_e2e(hdev);
2686 	gaudi_init_hbm_cred(hdev);
2687 
2688 	for (tpc_id = 0, tpc_offset = 0;
2689 				tpc_id < TPC_NUMBER_OF_ENGINES;
2690 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2691 		/* Mask all arithmetic interrupts from TPC */
2692 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2693 		/* Set 16 cache lines */
2694 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2695 				ICACHE_FETCH_LINE_NUM, 2);
2696 	}
2697 
2698 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2699 	for (i = 0 ; i < 128 ; i += 8)
2700 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2701 
2702 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2703 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2704 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 }
2707 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2708 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2709 					int qman_id, dma_addr_t qman_pq_addr)
2710 {
2711 	struct cpu_dyn_regs *dyn_regs =
2712 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2714 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2715 	u32 q_off, dma_qm_offset;
2716 	u32 dma_qm_err_cfg, irq_handler_offset;
2717 
2718 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2719 
2720 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2721 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2722 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2723 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724 	so_base_en_lo = lower_32_bits(CFG_BASE +
2725 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2726 	so_base_en_hi = upper_32_bits(CFG_BASE +
2727 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2729 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2730 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2731 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2733 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2734 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2735 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736 
2737 	q_off = dma_qm_offset + qman_id * 4;
2738 
2739 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2740 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2741 
2742 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2743 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2744 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2745 
2746 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2747 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2748 							QMAN_LDMA_SRC_OFFSET);
2749 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2750 							QMAN_LDMA_DST_OFFSET);
2751 
2752 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2753 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2754 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2755 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2756 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2757 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2758 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2759 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2760 
2761 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2762 
2763 	/* The following configuration is needed only once per QMAN */
2764 	if (qman_id == 0) {
2765 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2766 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2767 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2768 
2769 		/* Configure RAZWI IRQ */
2770 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2771 		if (hdev->stop_on_err)
2772 			dma_qm_err_cfg |=
2773 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2774 
2775 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2776 
2777 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2778 			lower_32_bits(CFG_BASE + irq_handler_offset));
2779 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2780 			upper_32_bits(CFG_BASE + irq_handler_offset));
2781 
2782 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2783 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2784 									dma_id);
2785 
2786 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2787 				QM_ARB_ERR_MSG_EN_MASK);
2788 
2789 		/* Increase ARB WDT to support streams architecture */
2790 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2791 				GAUDI_ARB_WDT_TIMEOUT);
2792 
2793 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2794 				QMAN_EXTERNAL_MAKE_TRUSTED);
2795 
2796 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2797 	}
2798 }
2799 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2800 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2801 {
2802 	struct cpu_dyn_regs *dyn_regs =
2803 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2804 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2805 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2806 	u32 irq_handler_offset;
2807 
2808 	/* Set to maximum possible according to physical size */
2809 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2810 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2811 
2812 	/* WA for H/W bug H3-2116 */
2813 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2814 
2815 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2816 	if (hdev->stop_on_err)
2817 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2818 
2819 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2820 
2821 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2822 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2823 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2824 
2825 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2826 		lower_32_bits(CFG_BASE + irq_handler_offset));
2827 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2828 		upper_32_bits(CFG_BASE + irq_handler_offset));
2829 
2830 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2831 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2832 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2833 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2834 	/* If the channel is secured, it should be in MMU bypass mode */
2835 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2836 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2837 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2838 }
2839 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2840 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2841 				u32 enable_mask)
2842 {
2843 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2844 
2845 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2846 }
2847 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2848 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2849 {
2850 	struct gaudi_device *gaudi = hdev->asic_specific;
2851 	struct hl_hw_queue *q;
2852 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2853 
2854 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2855 		return;
2856 
2857 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2858 		dma_id = gaudi_dma_assignment[i];
2859 		/*
2860 		 * For queues after the CPU Q need to add 1 to get the correct
2861 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2862 		 * order to get the correct MSI register.
2863 		 */
2864 		if (dma_id > 1) {
2865 			cpu_skip = 1;
2866 			nic_skip = NIC_NUMBER_OF_ENGINES;
2867 		} else {
2868 			cpu_skip = 0;
2869 			nic_skip = 0;
2870 		}
2871 
2872 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2873 			q_idx = 4 * dma_id + j + cpu_skip;
2874 			q = &hdev->kernel_queues[q_idx];
2875 			q->cq_id = cq_id++;
2876 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2877 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2878 						q->bus_address);
2879 		}
2880 
2881 		gaudi_init_dma_core(hdev, dma_id);
2882 
2883 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2884 	}
2885 
2886 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2887 }
2888 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2889 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2890 					int qman_id, u64 qman_base_addr)
2891 {
2892 	struct cpu_dyn_regs *dyn_regs =
2893 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2894 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2895 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2896 	u32 dma_qm_err_cfg, irq_handler_offset;
2897 	u32 q_off, dma_qm_offset;
2898 
2899 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2900 
2901 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2902 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2903 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2904 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905 	so_base_en_lo = lower_32_bits(CFG_BASE +
2906 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2907 	so_base_en_hi = upper_32_bits(CFG_BASE +
2908 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2910 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2912 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2914 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2916 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917 
2918 	q_off = dma_qm_offset + qman_id * 4;
2919 
2920 	if (qman_id < 4) {
2921 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2922 					lower_32_bits(qman_base_addr));
2923 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2924 					upper_32_bits(qman_base_addr));
2925 
2926 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2927 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2928 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2929 
2930 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2931 							QMAN_CPDMA_SIZE_OFFSET);
2932 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2933 							QMAN_CPDMA_SRC_OFFSET);
2934 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2935 							QMAN_CPDMA_DST_OFFSET);
2936 	} else {
2937 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2938 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2939 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2940 
2941 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2942 							QMAN_LDMA_SIZE_OFFSET);
2943 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2944 							QMAN_LDMA_SRC_OFFSET);
2945 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2946 							QMAN_LDMA_DST_OFFSET);
2947 
2948 		/* Configure RAZWI IRQ */
2949 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2950 		if (hdev->stop_on_err)
2951 			dma_qm_err_cfg |=
2952 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2953 
2954 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2955 
2956 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2957 			lower_32_bits(CFG_BASE + irq_handler_offset));
2958 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2959 			upper_32_bits(CFG_BASE + irq_handler_offset));
2960 
2961 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2962 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2963 									dma_id);
2964 
2965 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2966 				QM_ARB_ERR_MSG_EN_MASK);
2967 
2968 		/* Increase ARB WDT to support streams architecture */
2969 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2970 				GAUDI_ARB_WDT_TIMEOUT);
2971 
2972 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2973 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2974 				QMAN_INTERNAL_MAKE_TRUSTED);
2975 	}
2976 
2977 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2978 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2979 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2980 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2981 
2982 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2983 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2984 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2985 				mtr_base_ws_lo);
2986 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2987 				mtr_base_ws_hi);
2988 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2989 				so_base_ws_lo);
2990 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2991 				so_base_ws_hi);
2992 	}
2993 }
2994 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2995 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2996 {
2997 	struct gaudi_device *gaudi = hdev->asic_specific;
2998 	struct gaudi_internal_qman_info *q;
2999 	u64 qman_base_addr;
3000 	int i, j, dma_id, internal_q_index;
3001 
3002 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3003 		return;
3004 
3005 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3006 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3007 
3008 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
3009 			 /*
3010 			  * Add the CPU queue in order to get the correct queue
3011 			  * number as all internal queue are placed after it
3012 			  */
3013 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3014 
3015 			q = &gaudi->internal_qmans[internal_q_index];
3016 			qman_base_addr = (u64) q->pq_dma_addr;
3017 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3018 						qman_base_addr);
3019 		}
3020 
3021 		/* Initializing lower CP for HBM DMA QMAN */
3022 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3023 
3024 		gaudi_init_dma_core(hdev, dma_id);
3025 
3026 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3027 	}
3028 
3029 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3030 }
3031 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)3032 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3033 					int qman_id, u64 qman_base_addr)
3034 {
3035 	struct cpu_dyn_regs *dyn_regs =
3036 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3037 	u32 mtr_base_lo, mtr_base_hi;
3038 	u32 so_base_lo, so_base_hi;
3039 	u32 irq_handler_offset;
3040 	u32 q_off, mme_id;
3041 	u32 mme_qm_err_cfg;
3042 
3043 	mtr_base_lo = lower_32_bits(CFG_BASE +
3044 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3045 	mtr_base_hi = upper_32_bits(CFG_BASE +
3046 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047 	so_base_lo = lower_32_bits(CFG_BASE +
3048 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3049 	so_base_hi = upper_32_bits(CFG_BASE +
3050 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051 
3052 	q_off = mme_offset + qman_id * 4;
3053 
3054 	if (qman_id < 4) {
3055 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3056 					lower_32_bits(qman_base_addr));
3057 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3058 					upper_32_bits(qman_base_addr));
3059 
3060 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3061 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3062 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3063 
3064 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3065 							QMAN_CPDMA_SIZE_OFFSET);
3066 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3067 							QMAN_CPDMA_SRC_OFFSET);
3068 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3069 							QMAN_CPDMA_DST_OFFSET);
3070 	} else {
3071 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3072 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3073 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3074 
3075 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3076 							QMAN_LDMA_SIZE_OFFSET);
3077 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3078 							QMAN_LDMA_SRC_OFFSET);
3079 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3080 							QMAN_LDMA_DST_OFFSET);
3081 
3082 		/* Configure RAZWI IRQ */
3083 		mme_id = mme_offset /
3084 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3085 
3086 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3087 		if (hdev->stop_on_err)
3088 			mme_qm_err_cfg |=
3089 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3090 
3091 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3092 
3093 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3094 			lower_32_bits(CFG_BASE + irq_handler_offset));
3095 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3096 			upper_32_bits(CFG_BASE + irq_handler_offset));
3097 
3098 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3099 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3100 									mme_id);
3101 
3102 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3103 				QM_ARB_ERR_MSG_EN_MASK);
3104 
3105 		/* Increase ARB WDT to support streams architecture */
3106 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3107 				GAUDI_ARB_WDT_TIMEOUT);
3108 
3109 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3110 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3111 				QMAN_INTERNAL_MAKE_TRUSTED);
3112 	}
3113 
3114 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3115 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3116 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3117 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3118 }
3119 
gaudi_init_mme_qmans(struct hl_device * hdev)3120 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3121 {
3122 	struct gaudi_device *gaudi = hdev->asic_specific;
3123 	struct gaudi_internal_qman_info *q;
3124 	u64 qman_base_addr;
3125 	u32 mme_offset;
3126 	int i, internal_q_index;
3127 
3128 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3129 		return;
3130 
3131 	/*
3132 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3133 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3134 	 */
3135 
3136 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3137 
3138 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3139 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3140 		q = &gaudi->internal_qmans[internal_q_index];
3141 		qman_base_addr = (u64) q->pq_dma_addr;
3142 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3143 					qman_base_addr);
3144 		if (i == 3)
3145 			mme_offset = 0;
3146 	}
3147 
3148 	/* Initializing lower CP for MME QMANs */
3149 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3150 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3151 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3152 
3153 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3154 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3155 
3156 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3157 }
3158 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)3159 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3160 				int qman_id, u64 qman_base_addr)
3161 {
3162 	struct cpu_dyn_regs *dyn_regs =
3163 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3164 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3165 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3166 	u32 tpc_qm_err_cfg, irq_handler_offset;
3167 	u32 q_off, tpc_id;
3168 
3169 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3170 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3172 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173 	so_base_en_lo = lower_32_bits(CFG_BASE +
3174 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175 	so_base_en_hi = upper_32_bits(CFG_BASE +
3176 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3178 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3179 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3180 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3182 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3183 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3184 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185 
3186 	q_off = tpc_offset + qman_id * 4;
3187 
3188 	tpc_id = tpc_offset /
3189 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3190 
3191 	if (qman_id < 4) {
3192 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3193 					lower_32_bits(qman_base_addr));
3194 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3195 					upper_32_bits(qman_base_addr));
3196 
3197 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3198 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3199 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3200 
3201 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3202 							QMAN_CPDMA_SIZE_OFFSET);
3203 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3204 							QMAN_CPDMA_SRC_OFFSET);
3205 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3206 							QMAN_CPDMA_DST_OFFSET);
3207 	} else {
3208 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3209 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3210 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3211 
3212 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3213 							QMAN_LDMA_SIZE_OFFSET);
3214 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3215 							QMAN_LDMA_SRC_OFFSET);
3216 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3217 							QMAN_LDMA_DST_OFFSET);
3218 
3219 		/* Configure RAZWI IRQ */
3220 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3221 		if (hdev->stop_on_err)
3222 			tpc_qm_err_cfg |=
3223 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3224 
3225 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3226 
3227 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3228 			lower_32_bits(CFG_BASE + irq_handler_offset));
3229 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3230 			upper_32_bits(CFG_BASE + irq_handler_offset));
3231 
3232 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3233 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3234 									tpc_id);
3235 
3236 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3237 				QM_ARB_ERR_MSG_EN_MASK);
3238 
3239 		/* Increase ARB WDT to support streams architecture */
3240 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3241 				GAUDI_ARB_WDT_TIMEOUT);
3242 
3243 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3244 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3245 				QMAN_INTERNAL_MAKE_TRUSTED);
3246 	}
3247 
3248 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3249 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3250 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3251 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3252 
3253 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3254 	if (tpc_id == 6) {
3255 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3256 				mtr_base_ws_lo);
3257 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3258 				mtr_base_ws_hi);
3259 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3260 				so_base_ws_lo);
3261 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3262 				so_base_ws_hi);
3263 	}
3264 }
3265 
gaudi_init_tpc_qmans(struct hl_device * hdev)3266 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3267 {
3268 	struct gaudi_device *gaudi = hdev->asic_specific;
3269 	struct gaudi_internal_qman_info *q;
3270 	u64 qman_base_addr;
3271 	u32 so_base_hi, tpc_offset = 0;
3272 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3273 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3274 	int i, tpc_id, internal_q_index;
3275 
3276 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3277 		return;
3278 
3279 	so_base_hi = upper_32_bits(CFG_BASE +
3280 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3281 
3282 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3283 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3284 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3285 						tpc_id * QMAN_STREAMS + i;
3286 			q = &gaudi->internal_qmans[internal_q_index];
3287 			qman_base_addr = (u64) q->pq_dma_addr;
3288 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3289 						qman_base_addr);
3290 
3291 			if (i == 3) {
3292 				/* Initializing lower CP for TPC QMAN */
3293 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3294 
3295 				/* Enable the QMAN and TPC channel */
3296 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3297 						QMAN_TPC_ENABLE);
3298 			}
3299 		}
3300 
3301 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3302 				so_base_hi);
3303 
3304 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3305 
3306 		gaudi->hw_cap_initialized |=
3307 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3308 	}
3309 }
3310 
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3311 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3312 				int qman_id, u64 qman_base_addr, int nic_id)
3313 {
3314 	struct cpu_dyn_regs *dyn_regs =
3315 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3316 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3317 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3318 	u32 nic_qm_err_cfg, irq_handler_offset;
3319 	u32 q_off;
3320 
3321 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3322 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3323 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3324 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3326 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3327 	so_base_en_hi = upper_32_bits(CFG_BASE +
3328 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3330 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3331 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3332 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3334 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3335 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3336 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337 
3338 	q_off = nic_offset + qman_id * 4;
3339 
3340 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3341 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3342 
3343 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3344 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3345 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3346 
3347 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3348 							QMAN_LDMA_SIZE_OFFSET);
3349 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3350 							QMAN_LDMA_SRC_OFFSET);
3351 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3352 							QMAN_LDMA_DST_OFFSET);
3353 
3354 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3355 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3356 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3357 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3358 
3359 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3360 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3361 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3362 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3363 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3364 
3365 	if (qman_id == 0) {
3366 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3367 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3368 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3369 
3370 		/* Configure RAZWI IRQ */
3371 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3372 		if (hdev->stop_on_err)
3373 			nic_qm_err_cfg |=
3374 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3375 
3376 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3377 
3378 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3379 			lower_32_bits(CFG_BASE + irq_handler_offset));
3380 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3381 			upper_32_bits(CFG_BASE + irq_handler_offset));
3382 
3383 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3384 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3385 									nic_id);
3386 
3387 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3388 				QM_ARB_ERR_MSG_EN_MASK);
3389 
3390 		/* Increase ARB WDT to support streams architecture */
3391 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3392 				GAUDI_ARB_WDT_TIMEOUT);
3393 
3394 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3395 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3396 				QMAN_INTERNAL_MAKE_TRUSTED);
3397 	}
3398 }
3399 
gaudi_init_nic_qmans(struct hl_device * hdev)3400 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3401 {
3402 	struct gaudi_device *gaudi = hdev->asic_specific;
3403 	struct gaudi_internal_qman_info *q;
3404 	u64 qman_base_addr;
3405 	u32 nic_offset = 0;
3406 	u32 nic_delta_between_qmans =
3407 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408 	u32 nic_delta_between_nics =
3409 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410 	int i, nic_id, internal_q_index;
3411 
3412 	if (!hdev->nic_ports_mask)
3413 		return;
3414 
3415 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3416 		return;
3417 
3418 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3419 
3420 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3421 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3422 			nic_offset += nic_delta_between_qmans;
3423 			if (nic_id & 1) {
3424 				nic_offset -= (nic_delta_between_qmans * 2);
3425 				nic_offset += nic_delta_between_nics;
3426 			}
3427 			continue;
3428 		}
3429 
3430 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3431 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3432 						nic_id * QMAN_STREAMS + i;
3433 			q = &gaudi->internal_qmans[internal_q_index];
3434 			qman_base_addr = (u64) q->pq_dma_addr;
3435 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3436 						qman_base_addr, nic_id);
3437 		}
3438 
3439 		/* Enable the QMAN */
3440 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3441 
3442 		nic_offset += nic_delta_between_qmans;
3443 		if (nic_id & 1) {
3444 			nic_offset -= (nic_delta_between_qmans * 2);
3445 			nic_offset += nic_delta_between_nics;
3446 		}
3447 
3448 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3449 	}
3450 }
3451 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3452 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3453 {
3454 	struct gaudi_device *gaudi = hdev->asic_specific;
3455 
3456 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457 		return;
3458 
3459 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3460 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3461 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3462 }
3463 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3464 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3465 {
3466 	struct gaudi_device *gaudi = hdev->asic_specific;
3467 
3468 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3469 		return;
3470 
3471 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3472 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3473 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3474 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3475 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3476 }
3477 
gaudi_disable_mme_qmans(struct hl_device * hdev)3478 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3479 {
3480 	struct gaudi_device *gaudi = hdev->asic_specific;
3481 
3482 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483 		return;
3484 
3485 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3486 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3487 }
3488 
gaudi_disable_tpc_qmans(struct hl_device * hdev)3489 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3490 {
3491 	struct gaudi_device *gaudi = hdev->asic_specific;
3492 	u32 tpc_offset = 0;
3493 	int tpc_id;
3494 
3495 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496 		return;
3497 
3498 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3499 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3500 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3501 	}
3502 }
3503 
gaudi_disable_nic_qmans(struct hl_device * hdev)3504 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3505 {
3506 	struct gaudi_device *gaudi = hdev->asic_specific;
3507 	u32 nic_mask, nic_offset = 0;
3508 	u32 nic_delta_between_qmans =
3509 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3510 	u32 nic_delta_between_nics =
3511 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512 	int nic_id;
3513 
3514 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3515 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3516 
3517 		if (gaudi->hw_cap_initialized & nic_mask)
3518 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3519 
3520 		nic_offset += nic_delta_between_qmans;
3521 		if (nic_id & 1) {
3522 			nic_offset -= (nic_delta_between_qmans * 2);
3523 			nic_offset += nic_delta_between_nics;
3524 		}
3525 	}
3526 }
3527 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3528 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3529 {
3530 	struct gaudi_device *gaudi = hdev->asic_specific;
3531 
3532 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3533 		return;
3534 
3535 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3536 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3538 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 }
3540 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3541 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3542 {
3543 	struct gaudi_device *gaudi = hdev->asic_specific;
3544 
3545 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3546 		return;
3547 
3548 	/* Stop CPs of HBM DMA QMANs */
3549 
3550 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3551 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 }
3556 
gaudi_stop_mme_qmans(struct hl_device * hdev)3557 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3558 {
3559 	struct gaudi_device *gaudi = hdev->asic_specific;
3560 
3561 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3562 		return;
3563 
3564 	/* Stop CPs of MME QMANs */
3565 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567 }
3568 
gaudi_stop_tpc_qmans(struct hl_device * hdev)3569 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3570 {
3571 	struct gaudi_device *gaudi = hdev->asic_specific;
3572 
3573 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3574 		return;
3575 
3576 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3578 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 }
3585 
gaudi_stop_nic_qmans(struct hl_device * hdev)3586 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3587 {
3588 	struct gaudi_device *gaudi = hdev->asic_specific;
3589 
3590 	/* Stop upper CPs of QMANs */
3591 
3592 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3593 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3594 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597 
3598 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3599 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3600 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603 
3604 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3605 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3606 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609 
3610 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3611 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3612 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615 
3616 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3617 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3618 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621 
3622 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3623 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3624 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627 
3628 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3629 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3630 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633 
3634 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3635 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3636 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639 
3640 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3641 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3642 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645 
3646 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3647 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3648 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651 }
3652 
gaudi_pci_dma_stall(struct hl_device * hdev)3653 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3654 {
3655 	struct gaudi_device *gaudi = hdev->asic_specific;
3656 
3657 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3658 		return;
3659 
3660 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3661 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3662 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 }
3664 
gaudi_hbm_dma_stall(struct hl_device * hdev)3665 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3666 {
3667 	struct gaudi_device *gaudi = hdev->asic_specific;
3668 
3669 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3670 		return;
3671 
3672 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3673 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3674 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 }
3678 
gaudi_mme_stall(struct hl_device * hdev)3679 static void gaudi_mme_stall(struct hl_device *hdev)
3680 {
3681 	struct gaudi_device *gaudi = hdev->asic_specific;
3682 
3683 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3684 		return;
3685 
3686 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3687 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3688 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3689 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3690 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3691 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3693 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3695 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3697 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3699 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3701 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3703 }
3704 
gaudi_tpc_stall(struct hl_device * hdev)3705 static void gaudi_tpc_stall(struct hl_device *hdev)
3706 {
3707 	struct gaudi_device *gaudi = hdev->asic_specific;
3708 
3709 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3710 		return;
3711 
3712 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3713 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3714 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 }
3721 
gaudi_set_clock_gating(struct hl_device * hdev)3722 static void gaudi_set_clock_gating(struct hl_device *hdev)
3723 {
3724 	struct gaudi_device *gaudi = hdev->asic_specific;
3725 	u32 qman_offset;
3726 	bool enable;
3727 	int i;
3728 
3729 	/* In case we are during debug session, don't enable the clock gate
3730 	 * as it may interfere
3731 	 */
3732 	if (hdev->in_debug)
3733 		return;
3734 
3735 	if (hdev->asic_prop.fw_security_enabled)
3736 		return;
3737 
3738 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3739 		enable = !!(hdev->clock_gating_mask &
3740 				(BIT_ULL(gaudi_dma_assignment[i])));
3741 
3742 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3743 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3744 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3745 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3746 				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3747 	}
3748 
3749 	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3750 		enable = !!(hdev->clock_gating_mask &
3751 				(BIT_ULL(gaudi_dma_assignment[i])));
3752 
3753 		/* GC sends work to DMA engine through Upper CP in DMA5 so
3754 		 * we need to not enable clock gating in that DMA
3755 		 */
3756 		if (i == GAUDI_HBM_DMA_4)
3757 			enable = 0;
3758 
3759 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3760 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3761 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3763 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764 	}
3765 
3766 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3767 	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3768 	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3769 
3770 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3771 	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3772 	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3773 
3774 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3775 		enable = !!(hdev->clock_gating_mask &
3776 				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3777 
3778 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3779 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3780 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3781 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3782 
3783 		qman_offset += TPC_QMAN_OFFSET;
3784 	}
3785 
3786 	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3787 }
3788 
gaudi_disable_clock_gating(struct hl_device * hdev)3789 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3790 {
3791 	struct gaudi_device *gaudi = hdev->asic_specific;
3792 	u32 qman_offset;
3793 	int i;
3794 
3795 	if (hdev->asic_prop.fw_security_enabled)
3796 		return;
3797 
3798 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3799 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3800 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3801 
3802 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3803 	}
3804 
3805 	WREG32(mmMME0_QM_CGM_CFG, 0);
3806 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3807 	WREG32(mmMME2_QM_CGM_CFG, 0);
3808 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3809 
3810 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3811 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3812 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3813 
3814 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3815 	}
3816 
3817 	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3818 }
3819 
gaudi_enable_timestamp(struct hl_device * hdev)3820 static void gaudi_enable_timestamp(struct hl_device *hdev)
3821 {
3822 	/* Disable the timestamp counter */
3823 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3824 
3825 	/* Zero the lower/upper parts of the 64-bit counter */
3826 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3827 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3828 
3829 	/* Enable the counter */
3830 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3831 }
3832 
gaudi_disable_timestamp(struct hl_device * hdev)3833 static void gaudi_disable_timestamp(struct hl_device *hdev)
3834 {
3835 	/* Disable the timestamp counter */
3836 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3837 }
3838 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3839 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3840 {
3841 	u32 wait_timeout_ms;
3842 
3843 	dev_info(hdev->dev,
3844 		"Halting compute engines and disabling interrupts\n");
3845 
3846 	if (hdev->pldm)
3847 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3848 	else
3849 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3850 
3851 	if (fw_reset)
3852 		goto skip_engines;
3853 
3854 	gaudi_stop_nic_qmans(hdev);
3855 	gaudi_stop_mme_qmans(hdev);
3856 	gaudi_stop_tpc_qmans(hdev);
3857 	gaudi_stop_hbm_dma_qmans(hdev);
3858 	gaudi_stop_pci_dma_qmans(hdev);
3859 
3860 	hdev->asic_funcs->disable_clock_gating(hdev);
3861 
3862 	msleep(wait_timeout_ms);
3863 
3864 	gaudi_pci_dma_stall(hdev);
3865 	gaudi_hbm_dma_stall(hdev);
3866 	gaudi_tpc_stall(hdev);
3867 	gaudi_mme_stall(hdev);
3868 
3869 	msleep(wait_timeout_ms);
3870 
3871 	gaudi_disable_nic_qmans(hdev);
3872 	gaudi_disable_mme_qmans(hdev);
3873 	gaudi_disable_tpc_qmans(hdev);
3874 	gaudi_disable_hbm_dma_qmans(hdev);
3875 	gaudi_disable_pci_dma_qmans(hdev);
3876 
3877 	gaudi_disable_timestamp(hdev);
3878 
3879 skip_engines:
3880 	gaudi_disable_msi(hdev);
3881 }
3882 
gaudi_mmu_init(struct hl_device * hdev)3883 static int gaudi_mmu_init(struct hl_device *hdev)
3884 {
3885 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3886 	struct gaudi_device *gaudi = hdev->asic_specific;
3887 	u64 hop0_addr;
3888 	int rc, i;
3889 
3890 	if (!hdev->mmu_enable)
3891 		return 0;
3892 
3893 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3894 		return 0;
3895 
3896 	for (i = 0 ; i < prop->max_asid ; i++) {
3897 		hop0_addr = prop->mmu_pgt_addr +
3898 				(i * prop->mmu_hop_table_size);
3899 
3900 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3901 		if (rc) {
3902 			dev_err(hdev->dev,
3903 				"failed to set hop0 addr for asid %d\n", i);
3904 			goto err;
3905 		}
3906 	}
3907 
3908 	/* init MMU cache manage page */
3909 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3910 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3911 
3912 	/* mem cache invalidation */
3913 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3914 
3915 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3916 
3917 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3918 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3919 
3920 	WREG32(mmSTLB_HOP_CONFIGURATION,
3921 			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3922 
3923 	/*
3924 	 * The H/W expects the first PI after init to be 1. After wraparound
3925 	 * we'll write 0.
3926 	 */
3927 	gaudi->mmu_cache_inv_pi = 1;
3928 
3929 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3930 
3931 	return 0;
3932 
3933 err:
3934 	return rc;
3935 }
3936 
gaudi_load_firmware_to_device(struct hl_device * hdev)3937 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3938 {
3939 	void __iomem *dst;
3940 
3941 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3942 
3943 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3944 }
3945 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3946 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3947 {
3948 	void __iomem *dst;
3949 
3950 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3951 
3952 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3953 }
3954 
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3955 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3956 {
3957 	struct dynamic_fw_load_mgr *dynamic_loader;
3958 	struct cpu_dyn_regs *dyn_regs;
3959 
3960 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3961 
3962 	/*
3963 	 * here we update initial values for few specific dynamic regs (as
3964 	 * before reading the first descriptor from FW those value has to be
3965 	 * hard-coded) in later stages of the protocol those values will be
3966 	 * updated automatically by reading the FW descriptor so data there
3967 	 * will always be up-to-date
3968 	 */
3969 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3970 	dyn_regs->kmd_msg_to_cpu =
3971 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3972 	dyn_regs->cpu_cmd_status_to_host =
3973 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3974 
3975 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3976 }
3977 
gaudi_init_static_firmware_loader(struct hl_device * hdev)3978 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3979 {
3980 	struct static_fw_load_mgr *static_loader;
3981 
3982 	static_loader = &hdev->fw_loader.static_loader;
3983 
3984 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3985 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3986 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3987 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3988 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3989 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3990 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3991 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3992 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3993 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3994 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3995 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3996 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3997 			GAUDI_PLDM_RESET_WAIT_MSEC :
3998 			GAUDI_CPU_RESET_WAIT_MSEC;
3999 }
4000 
gaudi_init_firmware_loader(struct hl_device * hdev)4001 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4002 {
4003 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4004 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4005 
4006 	/* fill common fields */
4007 	fw_loader->linux_loaded = false;
4008 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4009 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4010 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4011 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4012 	fw_loader->skip_bmc = !hdev->bmc_enable;
4013 	fw_loader->sram_bar_id = SRAM_BAR_ID;
4014 	fw_loader->dram_bar_id = HBM_BAR_ID;
4015 
4016 	if (prop->dynamic_fw_load)
4017 		gaudi_init_dynamic_firmware_loader(hdev);
4018 	else
4019 		gaudi_init_static_firmware_loader(hdev);
4020 }
4021 
gaudi_init_cpu(struct hl_device * hdev)4022 static int gaudi_init_cpu(struct hl_device *hdev)
4023 {
4024 	struct gaudi_device *gaudi = hdev->asic_specific;
4025 	int rc;
4026 
4027 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4028 		return 0;
4029 
4030 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4031 		return 0;
4032 
4033 	/*
4034 	 * The device CPU works with 40 bits addresses.
4035 	 * This register sets the extension to 50 bits.
4036 	 */
4037 	if (!hdev->asic_prop.fw_security_enabled)
4038 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4039 
4040 	rc = hl_fw_init_cpu(hdev);
4041 
4042 	if (rc)
4043 		return rc;
4044 
4045 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
4046 
4047 	return 0;
4048 }
4049 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4050 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4051 {
4052 	struct cpu_dyn_regs *dyn_regs =
4053 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4054 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4055 	struct gaudi_device *gaudi = hdev->asic_specific;
4056 	u32 status, irq_handler_offset;
4057 	struct hl_eq *eq;
4058 	struct hl_hw_queue *cpu_pq =
4059 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4060 	int err;
4061 
4062 	if (!hdev->cpu_queues_enable)
4063 		return 0;
4064 
4065 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4066 		return 0;
4067 
4068 	eq = &hdev->event_queue;
4069 
4070 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4071 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4072 
4073 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4074 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4075 
4076 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4077 			lower_32_bits(hdev->cpu_accessible_dma_address));
4078 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4079 			upper_32_bits(hdev->cpu_accessible_dma_address));
4080 
4081 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4082 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4083 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4084 
4085 	/* Used for EQ CI */
4086 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4087 
4088 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4089 
4090 	if (gaudi->multi_msi_mode)
4091 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4092 	else
4093 		WREG32(mmCPU_IF_QUEUE_INIT,
4094 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4095 
4096 	irq_handler_offset = prop->gic_interrupts_enable ?
4097 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4098 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4099 
4100 	WREG32(irq_handler_offset,
4101 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4102 
4103 	err = hl_poll_timeout(
4104 		hdev,
4105 		mmCPU_IF_QUEUE_INIT,
4106 		status,
4107 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4108 		1000,
4109 		cpu_timeout);
4110 
4111 	if (err) {
4112 		dev_err(hdev->dev,
4113 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
4114 		return -EIO;
4115 	}
4116 
4117 	/* update FW application security bits */
4118 	if (prop->fw_cpu_boot_dev_sts0_valid)
4119 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4120 	if (prop->fw_cpu_boot_dev_sts1_valid)
4121 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4122 
4123 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4124 	return 0;
4125 }
4126 
gaudi_pre_hw_init(struct hl_device * hdev)4127 static void gaudi_pre_hw_init(struct hl_device *hdev)
4128 {
4129 	/* Perform read from the device to make sure device is up */
4130 	RREG32(mmHW_STATE);
4131 
4132 	if (!hdev->asic_prop.fw_security_enabled) {
4133 		/* Set the access through PCI bars (Linux driver only) as
4134 		 * secured
4135 		 */
4136 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4137 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4138 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4139 
4140 		/* Perform read to flush the waiting writes to ensure
4141 		 * configuration was set in the device
4142 		 */
4143 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4144 	}
4145 
4146 	/*
4147 	 * Let's mark in the H/W that we have reached this point. We check
4148 	 * this value in the reset_before_init function to understand whether
4149 	 * we need to reset the chip before doing H/W init. This register is
4150 	 * cleared by the H/W upon H/W reset
4151 	 */
4152 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4153 }
4154 
gaudi_hw_init(struct hl_device * hdev)4155 static int gaudi_hw_init(struct hl_device *hdev)
4156 {
4157 	struct gaudi_device *gaudi = hdev->asic_specific;
4158 	int rc;
4159 
4160 	gaudi_pre_hw_init(hdev);
4161 
4162 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4163 	 * So we set it here and if anyone tries to move it later to
4164 	 * a different address, there will be an error
4165 	 */
4166 	if (hdev->asic_prop.iatu_done_by_fw)
4167 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4168 
4169 	/*
4170 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
4171 	 * base address of dram
4172 	 */
4173 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4174 		dev_err(hdev->dev,
4175 			"failed to map HBM bar to DRAM base address\n");
4176 		return -EIO;
4177 	}
4178 
4179 	rc = gaudi_init_cpu(hdev);
4180 	if (rc) {
4181 		dev_err(hdev->dev, "failed to initialize CPU\n");
4182 		return rc;
4183 	}
4184 
4185 	/* In case the clock gating was enabled in preboot we need to disable
4186 	 * it here before touching the MME/TPC registers.
4187 	 * There is no need to take clk gating mutex because when this function
4188 	 * runs, no other relevant code can run
4189 	 */
4190 	hdev->asic_funcs->disable_clock_gating(hdev);
4191 
4192 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4193 	gaudi_init_scrambler_sram(hdev);
4194 
4195 	/* This is here just in case we are working without CPU */
4196 	gaudi_init_scrambler_hbm(hdev);
4197 
4198 	gaudi_init_golden_registers(hdev);
4199 
4200 	rc = gaudi_mmu_init(hdev);
4201 	if (rc)
4202 		return rc;
4203 
4204 	gaudi_init_security(hdev);
4205 
4206 	gaudi_init_pci_dma_qmans(hdev);
4207 
4208 	gaudi_init_hbm_dma_qmans(hdev);
4209 
4210 	gaudi_init_mme_qmans(hdev);
4211 
4212 	gaudi_init_tpc_qmans(hdev);
4213 
4214 	gaudi_init_nic_qmans(hdev);
4215 
4216 	hdev->asic_funcs->set_clock_gating(hdev);
4217 
4218 	gaudi_enable_timestamp(hdev);
4219 
4220 	/* MSI must be enabled before CPU queues and NIC are initialized */
4221 	rc = gaudi_enable_msi(hdev);
4222 	if (rc)
4223 		goto disable_queues;
4224 
4225 	/* must be called after MSI was enabled */
4226 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4227 	if (rc) {
4228 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4229 			rc);
4230 		goto disable_msi;
4231 	}
4232 
4233 	/* Perform read from the device to flush all configuration */
4234 	RREG32(mmHW_STATE);
4235 
4236 	return 0;
4237 
4238 disable_msi:
4239 	gaudi_disable_msi(hdev);
4240 disable_queues:
4241 	gaudi_disable_mme_qmans(hdev);
4242 	gaudi_disable_pci_dma_qmans(hdev);
4243 
4244 	return rc;
4245 }
4246 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4247 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4248 {
4249 	struct cpu_dyn_regs *dyn_regs =
4250 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4251 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4252 	struct gaudi_device *gaudi = hdev->asic_specific;
4253 	bool driver_performs_reset;
4254 
4255 	if (!hard_reset) {
4256 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4257 		return;
4258 	}
4259 
4260 	if (hdev->pldm) {
4261 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4262 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4263 	} else {
4264 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4265 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4266 	}
4267 
4268 	if (fw_reset) {
4269 		dev_info(hdev->dev,
4270 			"Firmware performs HARD reset, going to wait %dms\n",
4271 			reset_timeout_ms);
4272 
4273 		goto skip_reset;
4274 	}
4275 
4276 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4277 					!hdev->asic_prop.hard_reset_done_by_fw);
4278 
4279 	/* Set device to handle FLR by H/W as we will put the device CPU to
4280 	 * halt mode
4281 	 */
4282 	if (driver_performs_reset)
4283 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4284 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4285 
4286 	/* If linux is loaded in the device CPU we need to communicate with it
4287 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4288 	 * registers in case of old F/Ws
4289 	 */
4290 	if (hdev->fw_loader.linux_loaded) {
4291 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4292 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4293 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4294 
4295 		WREG32(irq_handler_offset,
4296 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4297 	} else {
4298 		if (hdev->asic_prop.hard_reset_done_by_fw)
4299 			hl_fw_ask_hard_reset_without_linux(hdev);
4300 		else
4301 			hl_fw_ask_halt_machine_without_linux(hdev);
4302 	}
4303 
4304 	if (driver_performs_reset) {
4305 
4306 		/* Configure the reset registers. Must be done as early as
4307 		 * possible in case we fail during H/W initialization
4308 		 */
4309 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4310 						(CFG_RST_H_DMA_MASK |
4311 						CFG_RST_H_MME_MASK |
4312 						CFG_RST_H_SM_MASK |
4313 						CFG_RST_H_TPC_7_MASK));
4314 
4315 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4316 
4317 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4318 						(CFG_RST_H_HBM_MASK |
4319 						CFG_RST_H_TPC_7_MASK |
4320 						CFG_RST_H_NIC_MASK |
4321 						CFG_RST_H_SM_MASK |
4322 						CFG_RST_H_DMA_MASK |
4323 						CFG_RST_H_MME_MASK |
4324 						CFG_RST_H_CPU_MASK |
4325 						CFG_RST_H_MMU_MASK));
4326 
4327 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4328 						(CFG_RST_L_IF_MASK |
4329 						CFG_RST_L_PSOC_MASK |
4330 						CFG_RST_L_TPC_MASK));
4331 
4332 		msleep(cpu_timeout_ms);
4333 
4334 		/* Tell ASIC not to re-initialize PCIe */
4335 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4336 
4337 		/* Restart BTL/BLR upon hard-reset */
4338 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4339 
4340 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4341 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4342 
4343 		dev_info(hdev->dev,
4344 			"Issued HARD reset command, going to wait %dms\n",
4345 			reset_timeout_ms);
4346 	} else {
4347 		dev_info(hdev->dev,
4348 			"Firmware performs HARD reset, going to wait %dms\n",
4349 			reset_timeout_ms);
4350 	}
4351 
4352 skip_reset:
4353 	/*
4354 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4355 	 * itself is in reset. Need to wait until the reset is deasserted
4356 	 */
4357 	msleep(reset_timeout_ms);
4358 
4359 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4360 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4361 		dev_err(hdev->dev,
4362 			"Timeout while waiting for device to reset 0x%x\n",
4363 			status);
4364 
4365 	if (gaudi) {
4366 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4367 				HW_CAP_HBM | HW_CAP_PCI_DMA |
4368 				HW_CAP_MME | HW_CAP_TPC_MASK |
4369 				HW_CAP_HBM_DMA | HW_CAP_PLL |
4370 				HW_CAP_NIC_MASK | HW_CAP_MMU |
4371 				HW_CAP_SRAM_SCRAMBLER |
4372 				HW_CAP_HBM_SCRAMBLER |
4373 				HW_CAP_CLK_GATE);
4374 
4375 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4376 
4377 		hdev->device_cpu_is_halted = false;
4378 	}
4379 }
4380 
gaudi_suspend(struct hl_device * hdev)4381 static int gaudi_suspend(struct hl_device *hdev)
4382 {
4383 	int rc;
4384 
4385 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4386 	if (rc)
4387 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4388 
4389 	return rc;
4390 }
4391 
gaudi_resume(struct hl_device * hdev)4392 static int gaudi_resume(struct hl_device *hdev)
4393 {
4394 	return gaudi_init_iatu(hdev);
4395 }
4396 
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4397 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4398 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4399 {
4400 	int rc;
4401 
4402 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4403 			VM_DONTCOPY | VM_NORESERVE;
4404 
4405 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4406 				(dma_addr - HOST_PHYS_BASE), size);
4407 	if (rc)
4408 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4409 
4410 	return rc;
4411 }
4412 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4413 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4414 {
4415 	struct cpu_dyn_regs *dyn_regs =
4416 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4417 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4418 	struct gaudi_device *gaudi = hdev->asic_specific;
4419 	bool invalid_queue = false;
4420 	int dma_id;
4421 
4422 	switch (hw_queue_id) {
4423 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4424 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4425 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4426 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4427 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4428 		break;
4429 
4430 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4431 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4432 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4433 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4434 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4435 		break;
4436 
4437 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4438 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4439 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4440 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4441 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4445 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4446 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4448 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449 		break;
4450 
4451 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4452 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4453 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4455 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456 		break;
4457 
4458 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4459 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4460 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463 		break;
4464 
4465 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4466 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4467 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470 		break;
4471 
4472 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4473 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4474 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477 		break;
4478 
4479 	case GAUDI_QUEUE_ID_CPU_PQ:
4480 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4481 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4482 		else
4483 			invalid_queue = true;
4484 		break;
4485 
4486 	case GAUDI_QUEUE_ID_MME_0_0:
4487 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4488 		break;
4489 
4490 	case GAUDI_QUEUE_ID_MME_0_1:
4491 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4492 		break;
4493 
4494 	case GAUDI_QUEUE_ID_MME_0_2:
4495 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4496 		break;
4497 
4498 	case GAUDI_QUEUE_ID_MME_0_3:
4499 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4500 		break;
4501 
4502 	case GAUDI_QUEUE_ID_MME_1_0:
4503 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4504 		break;
4505 
4506 	case GAUDI_QUEUE_ID_MME_1_1:
4507 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4508 		break;
4509 
4510 	case GAUDI_QUEUE_ID_MME_1_2:
4511 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4512 		break;
4513 
4514 	case GAUDI_QUEUE_ID_MME_1_3:
4515 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4516 		break;
4517 
4518 	case GAUDI_QUEUE_ID_TPC_0_0:
4519 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4520 		break;
4521 
4522 	case GAUDI_QUEUE_ID_TPC_0_1:
4523 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4524 		break;
4525 
4526 	case GAUDI_QUEUE_ID_TPC_0_2:
4527 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4528 		break;
4529 
4530 	case GAUDI_QUEUE_ID_TPC_0_3:
4531 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4532 		break;
4533 
4534 	case GAUDI_QUEUE_ID_TPC_1_0:
4535 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4536 		break;
4537 
4538 	case GAUDI_QUEUE_ID_TPC_1_1:
4539 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4540 		break;
4541 
4542 	case GAUDI_QUEUE_ID_TPC_1_2:
4543 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4544 		break;
4545 
4546 	case GAUDI_QUEUE_ID_TPC_1_3:
4547 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4548 		break;
4549 
4550 	case GAUDI_QUEUE_ID_TPC_2_0:
4551 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4552 		break;
4553 
4554 	case GAUDI_QUEUE_ID_TPC_2_1:
4555 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4556 		break;
4557 
4558 	case GAUDI_QUEUE_ID_TPC_2_2:
4559 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4560 		break;
4561 
4562 	case GAUDI_QUEUE_ID_TPC_2_3:
4563 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4564 		break;
4565 
4566 	case GAUDI_QUEUE_ID_TPC_3_0:
4567 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4568 		break;
4569 
4570 	case GAUDI_QUEUE_ID_TPC_3_1:
4571 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4572 		break;
4573 
4574 	case GAUDI_QUEUE_ID_TPC_3_2:
4575 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4576 		break;
4577 
4578 	case GAUDI_QUEUE_ID_TPC_3_3:
4579 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4580 		break;
4581 
4582 	case GAUDI_QUEUE_ID_TPC_4_0:
4583 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4584 		break;
4585 
4586 	case GAUDI_QUEUE_ID_TPC_4_1:
4587 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4588 		break;
4589 
4590 	case GAUDI_QUEUE_ID_TPC_4_2:
4591 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4592 		break;
4593 
4594 	case GAUDI_QUEUE_ID_TPC_4_3:
4595 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4596 		break;
4597 
4598 	case GAUDI_QUEUE_ID_TPC_5_0:
4599 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4600 		break;
4601 
4602 	case GAUDI_QUEUE_ID_TPC_5_1:
4603 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4604 		break;
4605 
4606 	case GAUDI_QUEUE_ID_TPC_5_2:
4607 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4608 		break;
4609 
4610 	case GAUDI_QUEUE_ID_TPC_5_3:
4611 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4612 		break;
4613 
4614 	case GAUDI_QUEUE_ID_TPC_6_0:
4615 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4616 		break;
4617 
4618 	case GAUDI_QUEUE_ID_TPC_6_1:
4619 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4620 		break;
4621 
4622 	case GAUDI_QUEUE_ID_TPC_6_2:
4623 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4624 		break;
4625 
4626 	case GAUDI_QUEUE_ID_TPC_6_3:
4627 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4628 		break;
4629 
4630 	case GAUDI_QUEUE_ID_TPC_7_0:
4631 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4632 		break;
4633 
4634 	case GAUDI_QUEUE_ID_TPC_7_1:
4635 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4636 		break;
4637 
4638 	case GAUDI_QUEUE_ID_TPC_7_2:
4639 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4640 		break;
4641 
4642 	case GAUDI_QUEUE_ID_TPC_7_3:
4643 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4644 		break;
4645 
4646 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4647 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4648 			invalid_queue = true;
4649 
4650 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4652 		break;
4653 
4654 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4655 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4656 			invalid_queue = true;
4657 
4658 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4660 		break;
4661 
4662 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4663 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4664 			invalid_queue = true;
4665 
4666 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4668 		break;
4669 
4670 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4671 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4672 			invalid_queue = true;
4673 
4674 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4676 		break;
4677 
4678 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4679 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4680 			invalid_queue = true;
4681 
4682 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4684 		break;
4685 
4686 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4687 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4688 			invalid_queue = true;
4689 
4690 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4692 		break;
4693 
4694 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4695 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4696 			invalid_queue = true;
4697 
4698 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4699 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4700 		break;
4701 
4702 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4703 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4704 			invalid_queue = true;
4705 
4706 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4707 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4708 		break;
4709 
4710 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4711 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4712 			invalid_queue = true;
4713 
4714 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4715 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4716 		break;
4717 
4718 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4719 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4720 			invalid_queue = true;
4721 
4722 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4723 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4724 		break;
4725 
4726 	default:
4727 		invalid_queue = true;
4728 	}
4729 
4730 	if (invalid_queue) {
4731 		/* Should never get here */
4732 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4733 			hw_queue_id);
4734 		return;
4735 	}
4736 
4737 	db_value = pi;
4738 
4739 	/* ring the doorbell */
4740 	WREG32(db_reg_offset, db_value);
4741 
4742 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4743 		/* make sure device CPU will read latest data from host */
4744 		mb();
4745 
4746 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4747 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4748 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4749 
4750 		WREG32(irq_handler_offset,
4751 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4752 	}
4753 }
4754 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4755 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4756 				struct hl_bd *bd)
4757 {
4758 	__le64 *pbd = (__le64 *) bd;
4759 
4760 	/* The QMANs are on the host memory so a simple copy suffice */
4761 	pqe[0] = pbd[0];
4762 	pqe[1] = pbd[1];
4763 }
4764 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4765 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4766 					dma_addr_t *dma_handle, gfp_t flags)
4767 {
4768 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4769 						dma_handle, flags);
4770 
4771 	/* Shift to the device's base physical address of host memory */
4772 	if (kernel_addr)
4773 		*dma_handle += HOST_PHYS_BASE;
4774 
4775 	return kernel_addr;
4776 }
4777 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4778 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4779 		void *cpu_addr, dma_addr_t dma_handle)
4780 {
4781 	/* Cancel the device's base physical address of host memory */
4782 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4783 
4784 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4785 }
4786 
gaudi_hbm_scrubbing(struct hl_device * hdev)4787 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4788 {
4789 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4790 	u64  cur_addr = DRAM_BASE_ADDR_USER;
4791 	u32 val;
4792 	u32 chunk_size;
4793 	int rc, dma_id;
4794 
4795 	while (cur_addr < prop->dram_end_address) {
4796 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4797 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4798 
4799 			chunk_size =
4800 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4801 
4802 			dev_dbg(hdev->dev,
4803 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4804 				cur_addr, cur_addr + chunk_size);
4805 
4806 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4807 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4808 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4809 						lower_32_bits(cur_addr));
4810 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4811 						upper_32_bits(cur_addr));
4812 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4813 					chunk_size);
4814 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4815 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4816 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4817 
4818 			cur_addr += chunk_size;
4819 
4820 			if (cur_addr == prop->dram_end_address)
4821 				break;
4822 		}
4823 
4824 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4825 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4826 
4827 			rc = hl_poll_timeout(
4828 				hdev,
4829 				mmDMA0_CORE_STS0 + dma_offset,
4830 				val,
4831 				((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4832 				1000,
4833 				HBM_SCRUBBING_TIMEOUT_US);
4834 
4835 			if (rc) {
4836 				dev_err(hdev->dev,
4837 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4838 					dma_id);
4839 				return -EIO;
4840 			}
4841 		}
4842 	}
4843 
4844 	return 0;
4845 }
4846 
gaudi_scrub_device_mem(struct hl_device * hdev,u64 addr,u64 size)4847 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4848 {
4849 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4850 	struct gaudi_device *gaudi = hdev->asic_specific;
4851 	int rc = 0;
4852 	u64 val = 0;
4853 
4854 	if (!hdev->memory_scrub)
4855 		return 0;
4856 
4857 	if (!addr && !size) {
4858 		/* Wait till device is idle */
4859 		rc = hl_poll_timeout(
4860 				hdev,
4861 				mmDMA0_CORE_STS0/* dummy */,
4862 				val/* dummy */,
4863 				(hdev->asic_funcs->is_device_idle(hdev, NULL,
4864 						0, NULL)),
4865 						1000,
4866 						HBM_SCRUBBING_TIMEOUT_US);
4867 		if (rc) {
4868 			dev_err(hdev->dev, "waiting for idle timeout\n");
4869 			return -EIO;
4870 		}
4871 
4872 		/* Scrub SRAM */
4873 		addr = prop->sram_user_base_address;
4874 		size = hdev->pldm ? 0x10000 :
4875 				(prop->sram_size - SRAM_USER_BASE_OFFSET);
4876 		val = 0x7777777777777777ull;
4877 
4878 		rc = gaudi_memset_device_memory(hdev, addr, size, val);
4879 		if (rc) {
4880 			dev_err(hdev->dev,
4881 				"Failed to clear SRAM in mem scrub all\n");
4882 			return rc;
4883 		}
4884 
4885 		mutex_lock(&gaudi->clk_gate_mutex);
4886 		hdev->asic_funcs->disable_clock_gating(hdev);
4887 
4888 		/* Scrub HBM using all DMA channels in parallel */
4889 		rc = gaudi_hbm_scrubbing(hdev);
4890 		if (rc)
4891 			dev_err(hdev->dev,
4892 				"Failed to clear HBM in mem scrub all\n");
4893 
4894 		hdev->asic_funcs->set_clock_gating(hdev);
4895 		mutex_unlock(&gaudi->clk_gate_mutex);
4896 	}
4897 
4898 	return rc;
4899 }
4900 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4901 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4902 				u32 queue_id, dma_addr_t *dma_handle,
4903 				u16 *queue_len)
4904 {
4905 	struct gaudi_device *gaudi = hdev->asic_specific;
4906 	struct gaudi_internal_qman_info *q;
4907 
4908 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4909 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4910 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4911 		return NULL;
4912 	}
4913 
4914 	q = &gaudi->internal_qmans[queue_id];
4915 	*dma_handle = q->pq_dma_addr;
4916 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4917 
4918 	return q->pq_kernel_addr;
4919 }
4920 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4921 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4922 				u16 len, u32 timeout, u64 *result)
4923 {
4924 	struct gaudi_device *gaudi = hdev->asic_specific;
4925 
4926 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4927 		if (result)
4928 			*result = 0;
4929 		return 0;
4930 	}
4931 
4932 	if (!timeout)
4933 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4934 
4935 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4936 						timeout, result);
4937 }
4938 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4939 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4940 {
4941 	struct packet_msg_prot *fence_pkt;
4942 	dma_addr_t pkt_dma_addr;
4943 	u32 fence_val, tmp, timeout_usec;
4944 	dma_addr_t fence_dma_addr;
4945 	u32 *fence_ptr;
4946 	int rc;
4947 
4948 	if (hdev->pldm)
4949 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4950 	else
4951 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4952 
4953 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4954 
4955 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956 							&fence_dma_addr);
4957 	if (!fence_ptr) {
4958 		dev_err(hdev->dev,
4959 			"Failed to allocate memory for H/W queue %d testing\n",
4960 			hw_queue_id);
4961 		return -ENOMEM;
4962 	}
4963 
4964 	*fence_ptr = 0;
4965 
4966 	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4967 					sizeof(struct packet_msg_prot),
4968 					GFP_KERNEL, &pkt_dma_addr);
4969 	if (!fence_pkt) {
4970 		dev_err(hdev->dev,
4971 			"Failed to allocate packet for H/W queue %d testing\n",
4972 			hw_queue_id);
4973 		rc = -ENOMEM;
4974 		goto free_fence_ptr;
4975 	}
4976 
4977 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4978 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4979 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4980 
4981 	fence_pkt->ctl = cpu_to_le32(tmp);
4982 	fence_pkt->value = cpu_to_le32(fence_val);
4983 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4984 
4985 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4986 					sizeof(struct packet_msg_prot),
4987 					pkt_dma_addr);
4988 	if (rc) {
4989 		dev_err(hdev->dev,
4990 			"Failed to send fence packet to H/W queue %d\n",
4991 			hw_queue_id);
4992 		goto free_pkt;
4993 	}
4994 
4995 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4996 					1000, timeout_usec, true);
4997 
4998 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4999 
5000 	if (rc == -ETIMEDOUT) {
5001 		dev_err(hdev->dev,
5002 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5003 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5004 		rc = -EIO;
5005 	}
5006 
5007 free_pkt:
5008 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5009 					pkt_dma_addr);
5010 free_fence_ptr:
5011 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5012 					fence_dma_addr);
5013 	return rc;
5014 }
5015 
gaudi_test_cpu_queue(struct hl_device * hdev)5016 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5017 {
5018 	struct gaudi_device *gaudi = hdev->asic_specific;
5019 
5020 	/*
5021 	 * check capability here as send_cpu_message() won't update the result
5022 	 * value if no capability
5023 	 */
5024 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5025 		return 0;
5026 
5027 	return hl_fw_test_cpu_queue(hdev);
5028 }
5029 
gaudi_test_queues(struct hl_device * hdev)5030 static int gaudi_test_queues(struct hl_device *hdev)
5031 {
5032 	int i, rc, ret_val = 0;
5033 
5034 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5035 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5036 			rc = gaudi_test_queue(hdev, i);
5037 			if (rc)
5038 				ret_val = -EINVAL;
5039 		}
5040 	}
5041 
5042 	rc = gaudi_test_cpu_queue(hdev);
5043 	if (rc)
5044 		ret_val = -EINVAL;
5045 
5046 	return ret_val;
5047 }
5048 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)5049 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5050 		gfp_t mem_flags, dma_addr_t *dma_handle)
5051 {
5052 	void *kernel_addr;
5053 
5054 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
5055 		return NULL;
5056 
5057 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5058 
5059 	/* Shift to the device's base physical address of host memory */
5060 	if (kernel_addr)
5061 		*dma_handle += HOST_PHYS_BASE;
5062 
5063 	return kernel_addr;
5064 }
5065 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)5066 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5067 			dma_addr_t dma_addr)
5068 {
5069 	/* Cancel the device's base physical address of host memory */
5070 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5071 
5072 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5073 }
5074 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)5075 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5076 					size_t size, dma_addr_t *dma_handle)
5077 {
5078 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5079 }
5080 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)5081 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5082 						size_t size, void *vaddr)
5083 {
5084 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5085 }
5086 
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5087 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5088 			int nents, enum dma_data_direction dir)
5089 {
5090 	struct scatterlist *sg;
5091 	int i;
5092 
5093 	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5094 		return -ENOMEM;
5095 
5096 	/* Shift to the device's base physical address of host memory */
5097 	for_each_sg(sgl, sg, nents, i)
5098 		sg->dma_address += HOST_PHYS_BASE;
5099 
5100 	return 0;
5101 }
5102 
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5103 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5104 			int nents, enum dma_data_direction dir)
5105 {
5106 	struct scatterlist *sg;
5107 	int i;
5108 
5109 	/* Cancel the device's base physical address of host memory */
5110 	for_each_sg(sgl, sg, nents, i)
5111 		sg->dma_address -= HOST_PHYS_BASE;
5112 
5113 	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5114 }
5115 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)5116 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5117 					struct sg_table *sgt)
5118 {
5119 	struct scatterlist *sg, *sg_next_iter;
5120 	u32 count, dma_desc_cnt;
5121 	u64 len, len_next;
5122 	dma_addr_t addr, addr_next;
5123 
5124 	dma_desc_cnt = 0;
5125 
5126 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5127 
5128 		len = sg_dma_len(sg);
5129 		addr = sg_dma_address(sg);
5130 
5131 		if (len == 0)
5132 			break;
5133 
5134 		while ((count + 1) < sgt->nents) {
5135 			sg_next_iter = sg_next(sg);
5136 			len_next = sg_dma_len(sg_next_iter);
5137 			addr_next = sg_dma_address(sg_next_iter);
5138 
5139 			if (len_next == 0)
5140 				break;
5141 
5142 			if ((addr + len == addr_next) &&
5143 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5144 				len += len_next;
5145 				count++;
5146 				sg = sg_next_iter;
5147 			} else {
5148 				break;
5149 			}
5150 		}
5151 
5152 		dma_desc_cnt++;
5153 	}
5154 
5155 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
5156 }
5157 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)5158 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5159 				struct hl_cs_parser *parser,
5160 				struct packet_lin_dma *user_dma_pkt,
5161 				u64 addr, enum dma_data_direction dir)
5162 {
5163 	struct hl_userptr *userptr;
5164 	int rc;
5165 
5166 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5167 			parser->job_userptr_list, &userptr))
5168 		goto already_pinned;
5169 
5170 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5171 	if (!userptr)
5172 		return -ENOMEM;
5173 
5174 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5175 				userptr);
5176 	if (rc)
5177 		goto free_userptr;
5178 
5179 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
5180 
5181 	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5182 					userptr->sgt->nents, dir);
5183 	if (rc) {
5184 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5185 		goto unpin_memory;
5186 	}
5187 
5188 	userptr->dma_mapped = true;
5189 	userptr->dir = dir;
5190 
5191 already_pinned:
5192 	parser->patched_cb_size +=
5193 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5194 
5195 	return 0;
5196 
5197 unpin_memory:
5198 	list_del(&userptr->job_node);
5199 	hl_unpin_host_memory(hdev, userptr);
5200 free_userptr:
5201 	kfree(userptr);
5202 	return rc;
5203 }
5204 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)5205 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5206 				struct hl_cs_parser *parser,
5207 				struct packet_lin_dma *user_dma_pkt,
5208 				bool src_in_host)
5209 {
5210 	enum dma_data_direction dir;
5211 	bool skip_host_mem_pin = false, user_memset;
5212 	u64 addr;
5213 	int rc = 0;
5214 
5215 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5216 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5217 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218 
5219 	if (src_in_host) {
5220 		if (user_memset)
5221 			skip_host_mem_pin = true;
5222 
5223 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5224 		dir = DMA_TO_DEVICE;
5225 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5226 	} else {
5227 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5228 		dir = DMA_FROM_DEVICE;
5229 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5230 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5231 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5232 	}
5233 
5234 	if (skip_host_mem_pin)
5235 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5236 	else
5237 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5238 						addr, dir);
5239 
5240 	return rc;
5241 }
5242 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5243 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5244 				struct hl_cs_parser *parser,
5245 				struct packet_lin_dma *user_dma_pkt)
5246 {
5247 	bool src_in_host = false;
5248 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5249 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5250 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5251 
5252 	dev_dbg(hdev->dev, "DMA packet details:\n");
5253 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5254 				le64_to_cpu(user_dma_pkt->src_addr));
5255 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5256 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5257 
5258 	/*
5259 	 * Special handling for DMA with size 0. Bypass all validations
5260 	 * because no transactions will be done except for WR_COMP, which
5261 	 * is not a security issue
5262 	 */
5263 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5264 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5265 		return 0;
5266 	}
5267 
5268 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5269 		src_in_host = true;
5270 
5271 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5272 						src_in_host);
5273 }
5274 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5275 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5276 					struct hl_cs_parser *parser,
5277 					struct packet_load_and_exe *user_pkt)
5278 {
5279 	u32 cfg;
5280 
5281 	cfg = le32_to_cpu(user_pkt->cfg);
5282 
5283 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5284 		dev_err(hdev->dev,
5285 			"User not allowed to use Load and Execute\n");
5286 		return -EPERM;
5287 	}
5288 
5289 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5290 
5291 	return 0;
5292 }
5293 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5294 static int gaudi_validate_cb(struct hl_device *hdev,
5295 			struct hl_cs_parser *parser, bool is_mmu)
5296 {
5297 	u32 cb_parsed_length = 0;
5298 	int rc = 0;
5299 
5300 	parser->patched_cb_size = 0;
5301 
5302 	/* cb_user_size is more than 0 so loop will always be executed */
5303 	while (cb_parsed_length < parser->user_cb_size) {
5304 		enum packet_id pkt_id;
5305 		u16 pkt_size;
5306 		struct gaudi_packet *user_pkt;
5307 
5308 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5309 
5310 		pkt_id = (enum packet_id) (
5311 				(le64_to_cpu(user_pkt->header) &
5312 				PACKET_HEADER_PACKET_ID_MASK) >>
5313 					PACKET_HEADER_PACKET_ID_SHIFT);
5314 
5315 		if (!validate_packet_id(pkt_id)) {
5316 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5317 			rc = -EINVAL;
5318 			break;
5319 		}
5320 
5321 		pkt_size = gaudi_packet_sizes[pkt_id];
5322 		cb_parsed_length += pkt_size;
5323 		if (cb_parsed_length > parser->user_cb_size) {
5324 			dev_err(hdev->dev,
5325 				"packet 0x%x is out of CB boundary\n", pkt_id);
5326 			rc = -EINVAL;
5327 			break;
5328 		}
5329 
5330 		switch (pkt_id) {
5331 		case PACKET_MSG_PROT:
5332 			dev_err(hdev->dev,
5333 				"User not allowed to use MSG_PROT\n");
5334 			rc = -EPERM;
5335 			break;
5336 
5337 		case PACKET_CP_DMA:
5338 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5339 			rc = -EPERM;
5340 			break;
5341 
5342 		case PACKET_STOP:
5343 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5344 			rc = -EPERM;
5345 			break;
5346 
5347 		case PACKET_WREG_BULK:
5348 			dev_err(hdev->dev,
5349 				"User not allowed to use WREG_BULK\n");
5350 			rc = -EPERM;
5351 			break;
5352 
5353 		case PACKET_LOAD_AND_EXE:
5354 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5355 				(struct packet_load_and_exe *) user_pkt);
5356 			break;
5357 
5358 		case PACKET_LIN_DMA:
5359 			parser->contains_dma_pkt = true;
5360 			if (is_mmu)
5361 				parser->patched_cb_size += pkt_size;
5362 			else
5363 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5364 					(struct packet_lin_dma *) user_pkt);
5365 			break;
5366 
5367 		case PACKET_WREG_32:
5368 		case PACKET_MSG_LONG:
5369 		case PACKET_MSG_SHORT:
5370 		case PACKET_REPEAT:
5371 		case PACKET_FENCE:
5372 		case PACKET_NOP:
5373 		case PACKET_ARB_POINT:
5374 			parser->patched_cb_size += pkt_size;
5375 			break;
5376 
5377 		default:
5378 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5379 				pkt_id);
5380 			rc = -EINVAL;
5381 			break;
5382 		}
5383 
5384 		if (rc)
5385 			break;
5386 	}
5387 
5388 	/*
5389 	 * The new CB should have space at the end for two MSG_PROT packets:
5390 	 * 1. A packet that will act as a completion packet
5391 	 * 2. A packet that will generate MSI-X interrupt
5392 	 */
5393 	if (parser->completion)
5394 		parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5395 
5396 	return rc;
5397 }
5398 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5399 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5400 				struct hl_cs_parser *parser,
5401 				struct packet_lin_dma *user_dma_pkt,
5402 				struct packet_lin_dma *new_dma_pkt,
5403 				u32 *new_dma_pkt_size)
5404 {
5405 	struct hl_userptr *userptr;
5406 	struct scatterlist *sg, *sg_next_iter;
5407 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5408 	u64 len, len_next;
5409 	dma_addr_t dma_addr, dma_addr_next;
5410 	u64 device_memory_addr, addr;
5411 	enum dma_data_direction dir;
5412 	struct sg_table *sgt;
5413 	bool src_in_host = false;
5414 	bool skip_host_mem_pin = false;
5415 	bool user_memset;
5416 
5417 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5418 
5419 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5420 		src_in_host = true;
5421 
5422 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5423 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5424 
5425 	if (src_in_host) {
5426 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5427 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5428 		dir = DMA_TO_DEVICE;
5429 		if (user_memset)
5430 			skip_host_mem_pin = true;
5431 	} else {
5432 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5433 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5434 		dir = DMA_FROM_DEVICE;
5435 	}
5436 
5437 	if ((!skip_host_mem_pin) &&
5438 		(!hl_userptr_is_pinned(hdev, addr,
5439 					le32_to_cpu(user_dma_pkt->tsize),
5440 					parser->job_userptr_list, &userptr))) {
5441 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5442 				addr, user_dma_pkt->tsize);
5443 		return -EFAULT;
5444 	}
5445 
5446 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5447 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5448 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5449 		return 0;
5450 	}
5451 
5452 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5453 
5454 	sgt = userptr->sgt;
5455 	dma_desc_cnt = 0;
5456 
5457 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5458 		len = sg_dma_len(sg);
5459 		dma_addr = sg_dma_address(sg);
5460 
5461 		if (len == 0)
5462 			break;
5463 
5464 		while ((count + 1) < sgt->nents) {
5465 			sg_next_iter = sg_next(sg);
5466 			len_next = sg_dma_len(sg_next_iter);
5467 			dma_addr_next = sg_dma_address(sg_next_iter);
5468 
5469 			if (len_next == 0)
5470 				break;
5471 
5472 			if ((dma_addr + len == dma_addr_next) &&
5473 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5474 				len += len_next;
5475 				count++;
5476 				sg = sg_next_iter;
5477 			} else {
5478 				break;
5479 			}
5480 		}
5481 
5482 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5483 		if (likely(dma_desc_cnt))
5484 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5485 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5486 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5487 		new_dma_pkt->tsize = cpu_to_le32(len);
5488 
5489 		if (dir == DMA_TO_DEVICE) {
5490 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5491 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5492 		} else {
5493 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5494 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5495 		}
5496 
5497 		if (!user_memset)
5498 			device_memory_addr += len;
5499 		dma_desc_cnt++;
5500 		new_dma_pkt++;
5501 	}
5502 
5503 	if (!dma_desc_cnt) {
5504 		dev_err(hdev->dev,
5505 			"Error of 0 SG entries when patching DMA packet\n");
5506 		return -EFAULT;
5507 	}
5508 
5509 	/* Fix the last dma packet - wrcomp must be as user set it */
5510 	new_dma_pkt--;
5511 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5512 
5513 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5514 
5515 	return 0;
5516 }
5517 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5518 static int gaudi_patch_cb(struct hl_device *hdev,
5519 				struct hl_cs_parser *parser)
5520 {
5521 	u32 cb_parsed_length = 0;
5522 	u32 cb_patched_cur_length = 0;
5523 	int rc = 0;
5524 
5525 	/* cb_user_size is more than 0 so loop will always be executed */
5526 	while (cb_parsed_length < parser->user_cb_size) {
5527 		enum packet_id pkt_id;
5528 		u16 pkt_size;
5529 		u32 new_pkt_size = 0;
5530 		struct gaudi_packet *user_pkt, *kernel_pkt;
5531 
5532 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5533 		kernel_pkt = parser->patched_cb->kernel_address +
5534 					cb_patched_cur_length;
5535 
5536 		pkt_id = (enum packet_id) (
5537 				(le64_to_cpu(user_pkt->header) &
5538 				PACKET_HEADER_PACKET_ID_MASK) >>
5539 					PACKET_HEADER_PACKET_ID_SHIFT);
5540 
5541 		if (!validate_packet_id(pkt_id)) {
5542 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5543 			rc = -EINVAL;
5544 			break;
5545 		}
5546 
5547 		pkt_size = gaudi_packet_sizes[pkt_id];
5548 		cb_parsed_length += pkt_size;
5549 		if (cb_parsed_length > parser->user_cb_size) {
5550 			dev_err(hdev->dev,
5551 				"packet 0x%x is out of CB boundary\n", pkt_id);
5552 			rc = -EINVAL;
5553 			break;
5554 		}
5555 
5556 		switch (pkt_id) {
5557 		case PACKET_LIN_DMA:
5558 			rc = gaudi_patch_dma_packet(hdev, parser,
5559 					(struct packet_lin_dma *) user_pkt,
5560 					(struct packet_lin_dma *) kernel_pkt,
5561 					&new_pkt_size);
5562 			cb_patched_cur_length += new_pkt_size;
5563 			break;
5564 
5565 		case PACKET_MSG_PROT:
5566 			dev_err(hdev->dev,
5567 				"User not allowed to use MSG_PROT\n");
5568 			rc = -EPERM;
5569 			break;
5570 
5571 		case PACKET_CP_DMA:
5572 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5573 			rc = -EPERM;
5574 			break;
5575 
5576 		case PACKET_STOP:
5577 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5578 			rc = -EPERM;
5579 			break;
5580 
5581 		case PACKET_WREG_32:
5582 		case PACKET_WREG_BULK:
5583 		case PACKET_MSG_LONG:
5584 		case PACKET_MSG_SHORT:
5585 		case PACKET_REPEAT:
5586 		case PACKET_FENCE:
5587 		case PACKET_NOP:
5588 		case PACKET_ARB_POINT:
5589 		case PACKET_LOAD_AND_EXE:
5590 			memcpy(kernel_pkt, user_pkt, pkt_size);
5591 			cb_patched_cur_length += pkt_size;
5592 			break;
5593 
5594 		default:
5595 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5596 				pkt_id);
5597 			rc = -EINVAL;
5598 			break;
5599 		}
5600 
5601 		if (rc)
5602 			break;
5603 	}
5604 
5605 	return rc;
5606 }
5607 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5608 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5609 		struct hl_cs_parser *parser)
5610 {
5611 	u64 patched_cb_handle;
5612 	u32 patched_cb_size;
5613 	struct hl_cb *user_cb;
5614 	int rc;
5615 
5616 	/*
5617 	 * The new CB should have space at the end for two MSG_PROT pkt:
5618 	 * 1. A packet that will act as a completion packet
5619 	 * 2. A packet that will generate MSI interrupt
5620 	 */
5621 	if (parser->completion)
5622 		parser->patched_cb_size = parser->user_cb_size +
5623 				sizeof(struct packet_msg_prot) * 2;
5624 	else
5625 		parser->patched_cb_size = parser->user_cb_size;
5626 
5627 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5628 				parser->patched_cb_size, false, false,
5629 				&patched_cb_handle);
5630 
5631 	if (rc) {
5632 		dev_err(hdev->dev,
5633 			"Failed to allocate patched CB for DMA CS %d\n",
5634 			rc);
5635 		return rc;
5636 	}
5637 
5638 	patched_cb_handle >>= PAGE_SHIFT;
5639 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5640 				(u32) patched_cb_handle);
5641 	/* hl_cb_get should never fail */
5642 	if (!parser->patched_cb) {
5643 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5644 			(u32) patched_cb_handle);
5645 		rc = -EFAULT;
5646 		goto out;
5647 	}
5648 
5649 	/*
5650 	 * The check that parser->user_cb_size <= parser->user_cb->size was done
5651 	 * in validate_queue_index().
5652 	 */
5653 	memcpy(parser->patched_cb->kernel_address,
5654 		parser->user_cb->kernel_address,
5655 		parser->user_cb_size);
5656 
5657 	patched_cb_size = parser->patched_cb_size;
5658 
5659 	/* Validate patched CB instead of user CB */
5660 	user_cb = parser->user_cb;
5661 	parser->user_cb = parser->patched_cb;
5662 	rc = gaudi_validate_cb(hdev, parser, true);
5663 	parser->user_cb = user_cb;
5664 
5665 	if (rc) {
5666 		hl_cb_put(parser->patched_cb);
5667 		goto out;
5668 	}
5669 
5670 	if (patched_cb_size != parser->patched_cb_size) {
5671 		dev_err(hdev->dev, "user CB size mismatch\n");
5672 		hl_cb_put(parser->patched_cb);
5673 		rc = -EINVAL;
5674 		goto out;
5675 	}
5676 
5677 out:
5678 	/*
5679 	 * Always call cb destroy here because we still have 1 reference
5680 	 * to it by calling cb_get earlier. After the job will be completed,
5681 	 * cb_put will release it, but here we want to remove it from the
5682 	 * idr
5683 	 */
5684 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5685 					patched_cb_handle << PAGE_SHIFT);
5686 
5687 	return rc;
5688 }
5689 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5690 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5691 		struct hl_cs_parser *parser)
5692 {
5693 	u64 patched_cb_handle;
5694 	int rc;
5695 
5696 	rc = gaudi_validate_cb(hdev, parser, false);
5697 
5698 	if (rc)
5699 		goto free_userptr;
5700 
5701 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5702 				parser->patched_cb_size, false, false,
5703 				&patched_cb_handle);
5704 	if (rc) {
5705 		dev_err(hdev->dev,
5706 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5707 		goto free_userptr;
5708 	}
5709 
5710 	patched_cb_handle >>= PAGE_SHIFT;
5711 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5712 				(u32) patched_cb_handle);
5713 	/* hl_cb_get should never fail here */
5714 	if (!parser->patched_cb) {
5715 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5716 				(u32) patched_cb_handle);
5717 		rc = -EFAULT;
5718 		goto out;
5719 	}
5720 
5721 	rc = gaudi_patch_cb(hdev, parser);
5722 
5723 	if (rc)
5724 		hl_cb_put(parser->patched_cb);
5725 
5726 out:
5727 	/*
5728 	 * Always call cb destroy here because we still have 1 reference
5729 	 * to it by calling cb_get earlier. After the job will be completed,
5730 	 * cb_put will release it, but here we want to remove it from the
5731 	 * idr
5732 	 */
5733 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5734 				patched_cb_handle << PAGE_SHIFT);
5735 
5736 free_userptr:
5737 	if (rc)
5738 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5739 	return rc;
5740 }
5741 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5742 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5743 					struct hl_cs_parser *parser)
5744 {
5745 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5746 	struct gaudi_device *gaudi = hdev->asic_specific;
5747 	u32 nic_queue_offset, nic_mask_q_id;
5748 
5749 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5750 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5751 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5752 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5753 
5754 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5755 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5756 			return -EINVAL;
5757 		}
5758 	}
5759 
5760 	/* For internal queue jobs just check if CB address is valid */
5761 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5762 					parser->user_cb_size,
5763 					asic_prop->sram_user_base_address,
5764 					asic_prop->sram_end_address))
5765 		return 0;
5766 
5767 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5768 					parser->user_cb_size,
5769 					asic_prop->dram_user_base_address,
5770 					asic_prop->dram_end_address))
5771 		return 0;
5772 
5773 	/* PMMU and HPMMU addresses are equal, check only one of them */
5774 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5775 					parser->user_cb_size,
5776 					asic_prop->pmmu.start_addr,
5777 					asic_prop->pmmu.end_addr))
5778 		return 0;
5779 
5780 	dev_err(hdev->dev,
5781 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5782 		parser->user_cb, parser->user_cb_size);
5783 
5784 	return -EFAULT;
5785 }
5786 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5787 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5788 {
5789 	struct gaudi_device *gaudi = hdev->asic_specific;
5790 
5791 	if (parser->queue_type == QUEUE_TYPE_INT)
5792 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5793 
5794 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5795 		return gaudi_parse_cb_mmu(hdev, parser);
5796 	else
5797 		return gaudi_parse_cb_no_mmu(hdev, parser);
5798 }
5799 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5800 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5801 					void *kernel_address, u32 len,
5802 					u64 cq_addr, u32 cq_val, u32 msi_vec,
5803 					bool eb)
5804 {
5805 	struct gaudi_device *gaudi = hdev->asic_specific;
5806 	struct packet_msg_prot *cq_pkt;
5807 	u64 msi_addr;
5808 	u32 tmp;
5809 
5810 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5811 
5812 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5813 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5814 
5815 	if (eb)
5816 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5817 
5818 	cq_pkt->ctl = cpu_to_le32(tmp);
5819 	cq_pkt->value = cpu_to_le32(cq_val);
5820 	cq_pkt->addr = cpu_to_le64(cq_addr);
5821 
5822 	cq_pkt++;
5823 
5824 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5825 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5826 	cq_pkt->ctl = cpu_to_le32(tmp);
5827 	cq_pkt->value = cpu_to_le32(1);
5828 
5829 	if (gaudi->multi_msi_mode)
5830 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5831 	else
5832 		msi_addr = mmPCIE_CORE_MSI_REQ;
5833 
5834 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5835 }
5836 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5837 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5838 {
5839 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5840 }
5841 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5842 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5843 					u32 size, u64 val)
5844 {
5845 	struct packet_lin_dma *lin_dma_pkt;
5846 	struct hl_cs_job *job;
5847 	u32 cb_size, ctl, err_cause;
5848 	struct hl_cb *cb;
5849 	u64 id;
5850 	int rc;
5851 
5852 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5853 	if (!cb)
5854 		return -EFAULT;
5855 
5856 	lin_dma_pkt = cb->kernel_address;
5857 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5858 	cb_size = sizeof(*lin_dma_pkt);
5859 
5860 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5861 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5862 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5863 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5864 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5865 
5866 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5867 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5868 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5869 	lin_dma_pkt->tsize = cpu_to_le32(size);
5870 
5871 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5872 	if (!job) {
5873 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5874 		rc = -ENOMEM;
5875 		goto release_cb;
5876 	}
5877 
5878 	/* Verify DMA is OK */
5879 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5880 	if (err_cause && !hdev->init_done) {
5881 		dev_dbg(hdev->dev,
5882 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5883 			err_cause);
5884 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5885 	}
5886 
5887 	job->id = 0;
5888 	job->user_cb = cb;
5889 	atomic_inc(&job->user_cb->cs_cnt);
5890 	job->user_cb_size = cb_size;
5891 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5892 	job->patched_cb = job->user_cb;
5893 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5894 
5895 	hl_debugfs_add_job(hdev, job);
5896 
5897 	rc = gaudi_send_job_on_qman0(hdev, job);
5898 	hl_debugfs_remove_job(hdev, job);
5899 	kfree(job);
5900 	atomic_dec(&cb->cs_cnt);
5901 
5902 	/* Verify DMA is OK */
5903 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5904 	if (err_cause) {
5905 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5906 		rc = -EIO;
5907 		if (!hdev->init_done) {
5908 			dev_dbg(hdev->dev,
5909 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5910 				err_cause);
5911 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5912 		}
5913 	}
5914 
5915 release_cb:
5916 	id = cb->id;
5917 	hl_cb_put(cb);
5918 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5919 
5920 	return rc;
5921 }
5922 
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5923 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5924 					u32 num_regs, u32 val)
5925 {
5926 	struct packet_msg_long *pkt;
5927 	struct hl_cs_job *job;
5928 	u32 cb_size, ctl;
5929 	struct hl_cb *cb;
5930 	int i, rc;
5931 
5932 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5933 
5934 	if (cb_size > SZ_2M) {
5935 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5936 		return -ENOMEM;
5937 	}
5938 
5939 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5940 	if (!cb)
5941 		return -EFAULT;
5942 
5943 	pkt = cb->kernel_address;
5944 
5945 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5946 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5947 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5948 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5949 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5950 
5951 	for (i = 0; i < num_regs ; i++, pkt++) {
5952 		pkt->ctl = cpu_to_le32(ctl);
5953 		pkt->value = cpu_to_le32(val);
5954 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5955 	}
5956 
5957 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5958 	if (!job) {
5959 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5960 		rc = -ENOMEM;
5961 		goto release_cb;
5962 	}
5963 
5964 	job->id = 0;
5965 	job->user_cb = cb;
5966 	atomic_inc(&job->user_cb->cs_cnt);
5967 	job->user_cb_size = cb_size;
5968 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5969 	job->patched_cb = job->user_cb;
5970 	job->job_cb_size = cb_size;
5971 
5972 	hl_debugfs_add_job(hdev, job);
5973 
5974 	rc = gaudi_send_job_on_qman0(hdev, job);
5975 	hl_debugfs_remove_job(hdev, job);
5976 	kfree(job);
5977 	atomic_dec(&cb->cs_cnt);
5978 
5979 release_cb:
5980 	hl_cb_put(cb);
5981 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5982 
5983 	return rc;
5984 }
5985 
gaudi_restore_sm_registers(struct hl_device * hdev)5986 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5987 {
5988 	u64 base_addr;
5989 	u32 num_regs;
5990 	int rc;
5991 
5992 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5993 	num_regs = NUM_OF_SOB_IN_BLOCK;
5994 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5995 	if (rc) {
5996 		dev_err(hdev->dev, "failed resetting SM registers");
5997 		return -ENOMEM;
5998 	}
5999 
6000 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
6001 	num_regs = NUM_OF_SOB_IN_BLOCK;
6002 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6003 	if (rc) {
6004 		dev_err(hdev->dev, "failed resetting SM registers");
6005 		return -ENOMEM;
6006 	}
6007 
6008 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6009 	num_regs = NUM_OF_SOB_IN_BLOCK;
6010 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6011 	if (rc) {
6012 		dev_err(hdev->dev, "failed resetting SM registers");
6013 		return -ENOMEM;
6014 	}
6015 
6016 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6017 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
6018 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6019 	if (rc) {
6020 		dev_err(hdev->dev, "failed resetting SM registers");
6021 		return -ENOMEM;
6022 	}
6023 
6024 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6025 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
6026 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6027 	if (rc) {
6028 		dev_err(hdev->dev, "failed resetting SM registers");
6029 		return -ENOMEM;
6030 	}
6031 
6032 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6033 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
6034 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6035 	if (rc) {
6036 		dev_err(hdev->dev, "failed resetting SM registers");
6037 		return -ENOMEM;
6038 	}
6039 
6040 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6041 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6042 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6043 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6044 	if (rc) {
6045 		dev_err(hdev->dev, "failed resetting SM registers");
6046 		return -ENOMEM;
6047 	}
6048 
6049 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6050 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6051 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6052 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6053 	if (rc) {
6054 		dev_err(hdev->dev, "failed resetting SM registers");
6055 		return -ENOMEM;
6056 	}
6057 
6058 	return 0;
6059 }
6060 
gaudi_restore_dma_registers(struct hl_device * hdev)6061 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6062 {
6063 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6064 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6065 	int i;
6066 
6067 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6068 		u64 sob_addr = CFG_BASE +
6069 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6070 				(i * sob_delta);
6071 		u32 dma_offset = i * DMA_CORE_OFFSET;
6072 
6073 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6074 				lower_32_bits(sob_addr));
6075 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6076 				upper_32_bits(sob_addr));
6077 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6078 
6079 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6080 		 * modified by the user for SRAM reduction
6081 		 */
6082 		if (i > 1)
6083 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6084 								0x00000001);
6085 	}
6086 }
6087 
gaudi_restore_qm_registers(struct hl_device * hdev)6088 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6089 {
6090 	u32 qman_offset;
6091 	int i;
6092 
6093 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6094 		qman_offset = i * DMA_QMAN_OFFSET;
6095 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6096 	}
6097 
6098 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6099 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6100 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6101 	}
6102 
6103 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6104 		qman_offset = i * TPC_QMAN_OFFSET;
6105 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6106 	}
6107 
6108 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6109 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6110 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6111 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6112 	}
6113 }
6114 
gaudi_restore_user_registers(struct hl_device * hdev)6115 static int gaudi_restore_user_registers(struct hl_device *hdev)
6116 {
6117 	int rc;
6118 
6119 	rc = gaudi_restore_sm_registers(hdev);
6120 	if (rc)
6121 		return rc;
6122 
6123 	gaudi_restore_dma_registers(hdev);
6124 	gaudi_restore_qm_registers(hdev);
6125 
6126 	return 0;
6127 }
6128 
gaudi_context_switch(struct hl_device * hdev,u32 asid)6129 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6130 {
6131 	return 0;
6132 }
6133 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)6134 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6135 {
6136 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6137 	struct gaudi_device *gaudi = hdev->asic_specific;
6138 	u64 addr = prop->mmu_pgt_addr;
6139 	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6140 
6141 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6142 		return 0;
6143 
6144 	return gaudi_memset_device_memory(hdev, addr, size, 0);
6145 }
6146 
gaudi_restore_phase_topology(struct hl_device * hdev)6147 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6148 {
6149 
6150 }
6151 
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,bool user_address,u32 * val)6152 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6153 			bool user_address, u32 *val)
6154 {
6155 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6156 	struct gaudi_device *gaudi = hdev->asic_specific;
6157 	u64 hbm_bar_addr, host_phys_end;
6158 	int rc = 0;
6159 
6160 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6161 
6162 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6163 
6164 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6165 				(hdev->clock_gating_mask &
6166 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6167 
6168 			dev_err_ratelimited(hdev->dev,
6169 				"Can't read register - clock gating is enabled!\n");
6170 			rc = -EFAULT;
6171 		} else {
6172 			*val = RREG32(addr - CFG_BASE);
6173 		}
6174 
6175 	} else if ((addr >= SRAM_BASE_ADDR) &&
6176 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6177 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6178 				(addr - SRAM_BASE_ADDR));
6179 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6180 		u64 bar_base_addr = DRAM_PHYS_BASE +
6181 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6182 
6183 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6184 		if (hbm_bar_addr != U64_MAX) {
6185 			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6186 						(addr - bar_base_addr));
6187 
6188 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6189 						hbm_bar_addr);
6190 		}
6191 		if (hbm_bar_addr == U64_MAX)
6192 			rc = -EIO;
6193 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6194 			user_address && !iommu_present(&pci_bus_type)) {
6195 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6196 	} else {
6197 		rc = -EFAULT;
6198 	}
6199 
6200 	return rc;
6201 }
6202 
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,bool user_address,u32 val)6203 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6204 			bool user_address, u32 val)
6205 {
6206 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6207 	struct gaudi_device *gaudi = hdev->asic_specific;
6208 	u64 hbm_bar_addr, host_phys_end;
6209 	int rc = 0;
6210 
6211 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6212 
6213 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6214 
6215 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6216 				(hdev->clock_gating_mask &
6217 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6218 
6219 			dev_err_ratelimited(hdev->dev,
6220 				"Can't write register - clock gating is enabled!\n");
6221 			rc = -EFAULT;
6222 		} else {
6223 			WREG32(addr - CFG_BASE, val);
6224 		}
6225 
6226 	} else if ((addr >= SRAM_BASE_ADDR) &&
6227 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6228 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6229 					(addr - SRAM_BASE_ADDR));
6230 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6231 		u64 bar_base_addr = DRAM_PHYS_BASE +
6232 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6233 
6234 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6235 		if (hbm_bar_addr != U64_MAX) {
6236 			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6237 						(addr - bar_base_addr));
6238 
6239 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6240 						hbm_bar_addr);
6241 		}
6242 		if (hbm_bar_addr == U64_MAX)
6243 			rc = -EIO;
6244 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6245 			user_address && !iommu_present(&pci_bus_type)) {
6246 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6247 	} else {
6248 		rc = -EFAULT;
6249 	}
6250 
6251 	return rc;
6252 }
6253 
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,bool user_address,u64 * val)6254 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6255 				bool user_address, u64 *val)
6256 {
6257 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6258 	struct gaudi_device *gaudi = hdev->asic_specific;
6259 	u64 hbm_bar_addr, host_phys_end;
6260 	int rc = 0;
6261 
6262 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6263 
6264 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6265 
6266 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6267 				(hdev->clock_gating_mask &
6268 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6269 
6270 			dev_err_ratelimited(hdev->dev,
6271 				"Can't read register - clock gating is enabled!\n");
6272 			rc = -EFAULT;
6273 		} else {
6274 			u32 val_l = RREG32(addr - CFG_BASE);
6275 			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6276 
6277 			*val = (((u64) val_h) << 32) | val_l;
6278 		}
6279 
6280 	} else if ((addr >= SRAM_BASE_ADDR) &&
6281 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6282 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6283 				(addr - SRAM_BASE_ADDR));
6284 	} else if (addr <=
6285 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6286 		u64 bar_base_addr = DRAM_PHYS_BASE +
6287 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6288 
6289 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6290 		if (hbm_bar_addr != U64_MAX) {
6291 			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6292 						(addr - bar_base_addr));
6293 
6294 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6295 						hbm_bar_addr);
6296 		}
6297 		if (hbm_bar_addr == U64_MAX)
6298 			rc = -EIO;
6299 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6300 			user_address && !iommu_present(&pci_bus_type)) {
6301 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6302 	} else {
6303 		rc = -EFAULT;
6304 	}
6305 
6306 	return rc;
6307 }
6308 
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,bool user_address,u64 val)6309 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6310 				bool user_address, u64 val)
6311 {
6312 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6313 	struct gaudi_device *gaudi = hdev->asic_specific;
6314 	u64 hbm_bar_addr, host_phys_end;
6315 	int rc = 0;
6316 
6317 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6318 
6319 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6320 
6321 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6322 				(hdev->clock_gating_mask &
6323 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6324 
6325 			dev_err_ratelimited(hdev->dev,
6326 				"Can't write register - clock gating is enabled!\n");
6327 			rc = -EFAULT;
6328 		} else {
6329 			WREG32(addr - CFG_BASE, lower_32_bits(val));
6330 			WREG32(addr + sizeof(u32) - CFG_BASE,
6331 				upper_32_bits(val));
6332 		}
6333 
6334 	} else if ((addr >= SRAM_BASE_ADDR) &&
6335 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6336 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6337 					(addr - SRAM_BASE_ADDR));
6338 	} else if (addr <=
6339 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6340 		u64 bar_base_addr = DRAM_PHYS_BASE +
6341 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6342 
6343 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6344 		if (hbm_bar_addr != U64_MAX) {
6345 			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6346 						(addr - bar_base_addr));
6347 
6348 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6349 						hbm_bar_addr);
6350 		}
6351 		if (hbm_bar_addr == U64_MAX)
6352 			rc = -EIO;
6353 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6354 			user_address && !iommu_present(&pci_bus_type)) {
6355 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6356 	} else {
6357 		rc = -EFAULT;
6358 	}
6359 
6360 	return rc;
6361 }
6362 
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)6363 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6364 					u32 size_to_dma, dma_addr_t dma_addr)
6365 {
6366 	u32 err_cause, val;
6367 	u64 dma_offset;
6368 	int rc;
6369 
6370 	dma_offset = dma_id * DMA_CORE_OFFSET;
6371 
6372 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6373 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6374 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6375 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6376 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6377 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6378 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6379 
6380 	rc = hl_poll_timeout(
6381 		hdev,
6382 		mmDMA0_CORE_STS0 + dma_offset,
6383 		val,
6384 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6385 		0,
6386 		1000000);
6387 
6388 	if (rc) {
6389 		dev_err(hdev->dev,
6390 			"DMA %d timed-out during reading of 0x%llx\n",
6391 			dma_id, addr);
6392 		return -EIO;
6393 	}
6394 
6395 	/* Verify DMA is OK */
6396 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6397 	if (err_cause) {
6398 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6399 		dev_dbg(hdev->dev,
6400 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6401 			err_cause);
6402 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6403 
6404 		return -EIO;
6405 	}
6406 
6407 	return 0;
6408 }
6409 
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)6410 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6411 				void *blob_addr)
6412 {
6413 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6414 	struct gaudi_device *gaudi = hdev->asic_specific;
6415 	u64 dma_offset, qm_offset;
6416 	dma_addr_t dma_addr;
6417 	void *kernel_addr;
6418 	bool is_eng_idle;
6419 	int rc = 0, dma_id;
6420 
6421 	kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6422 						hdev, SZ_2M,
6423 						&dma_addr,
6424 						GFP_KERNEL | __GFP_ZERO);
6425 
6426 	if (!kernel_addr)
6427 		return -ENOMEM;
6428 
6429 	mutex_lock(&gaudi->clk_gate_mutex);
6430 
6431 	hdev->asic_funcs->disable_clock_gating(hdev);
6432 
6433 	hdev->asic_funcs->hw_queues_lock(hdev);
6434 
6435 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6436 	dma_offset = dma_id * DMA_CORE_OFFSET;
6437 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6438 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6439 	is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6440 
6441 	if (!is_eng_idle) {
6442 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6443 		dma_offset = dma_id * DMA_CORE_OFFSET;
6444 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6445 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6446 		is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6447 
6448 		if (!is_eng_idle) {
6449 			dev_err_ratelimited(hdev->dev,
6450 				"Can't read via DMA because it is BUSY\n");
6451 			rc = -EAGAIN;
6452 			goto out;
6453 		}
6454 	}
6455 
6456 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6457 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6458 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6459 
6460 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6461 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6462 	 * ASID
6463 	 */
6464 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465 
6466 	/* Verify DMA is OK */
6467 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6468 	if (err_cause) {
6469 		dev_dbg(hdev->dev,
6470 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6471 			err_cause);
6472 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6473 	}
6474 
6475 	pos = 0;
6476 	size_left = size;
6477 	size_to_dma = SZ_2M;
6478 
6479 	while (size_left > 0) {
6480 
6481 		if (size_left < SZ_2M)
6482 			size_to_dma = size_left;
6483 
6484 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6485 						dma_addr);
6486 		if (rc)
6487 			break;
6488 
6489 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6490 
6491 		if (size_left <= SZ_2M)
6492 			break;
6493 
6494 		pos += SZ_2M;
6495 		addr += SZ_2M;
6496 		size_left -= SZ_2M;
6497 	}
6498 
6499 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6500 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6501 	 * ASID
6502 	 */
6503 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6504 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6505 
6506 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6507 
6508 out:
6509 	hdev->asic_funcs->hw_queues_unlock(hdev);
6510 
6511 	hdev->asic_funcs->set_clock_gating(hdev);
6512 
6513 	mutex_unlock(&gaudi->clk_gate_mutex);
6514 
6515 	hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6516 						dma_addr);
6517 
6518 	return rc;
6519 }
6520 
gaudi_read_pte(struct hl_device * hdev,u64 addr)6521 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6522 {
6523 	struct gaudi_device *gaudi = hdev->asic_specific;
6524 
6525 	if (hdev->hard_reset_pending)
6526 		return U64_MAX;
6527 
6528 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6529 			(addr - gaudi->hbm_bar_cur_addr));
6530 }
6531 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6532 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6533 {
6534 	struct gaudi_device *gaudi = hdev->asic_specific;
6535 
6536 	if (hdev->hard_reset_pending)
6537 		return;
6538 
6539 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6540 			(addr - gaudi->hbm_bar_cur_addr));
6541 }
6542 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6543 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6544 {
6545 	/* mask to zero the MMBP and ASID bits */
6546 	WREG32_AND(reg, ~0x7FF);
6547 	WREG32_OR(reg, asid);
6548 }
6549 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6550 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6551 {
6552 	struct gaudi_device *gaudi = hdev->asic_specific;
6553 
6554 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6555 		return;
6556 
6557 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6558 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6559 		return;
6560 	}
6561 
6562 	mutex_lock(&gaudi->clk_gate_mutex);
6563 
6564 	hdev->asic_funcs->disable_clock_gating(hdev);
6565 
6566 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6567 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6568 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6569 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6570 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6571 
6572 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577 
6578 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6579 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6580 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6581 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6582 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6583 
6584 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6585 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6586 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6587 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6588 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6589 
6590 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6591 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6592 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6593 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6594 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6595 
6596 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601 
6602 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6603 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6604 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6605 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6606 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6607 
6608 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6609 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6610 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6611 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6612 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6613 
6614 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6615 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6616 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6617 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6618 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6619 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6620 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6621 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6622 
6623 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6624 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6625 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6626 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6627 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6628 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6629 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6630 
6631 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6632 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6633 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6634 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6635 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6636 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6637 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6638 
6639 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6640 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6641 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6642 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6643 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6644 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6645 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6646 
6647 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6648 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6649 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6650 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6651 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6652 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6653 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6654 
6655 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6656 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6657 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6658 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6659 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6660 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6661 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6662 
6663 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6664 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6665 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6666 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6667 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6668 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6669 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6670 
6671 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6672 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6673 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6674 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6675 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6676 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6677 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6678 
6679 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6680 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6681 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6682 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6683 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6684 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6685 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6686 
6687 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6688 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6689 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6690 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6691 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6692 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6693 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6694 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6695 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6696 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6697 
6698 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6699 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6700 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6701 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6702 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6703 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6704 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6705 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6706 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6707 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6708 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6709 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6710 
6711 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6712 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6713 				asid);
6714 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6715 				asid);
6716 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6717 				asid);
6718 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6719 				asid);
6720 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6721 				asid);
6722 	}
6723 
6724 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6725 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6726 				asid);
6727 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6728 				asid);
6729 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6730 				asid);
6731 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6732 				asid);
6733 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6734 				asid);
6735 	}
6736 
6737 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6738 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6739 				asid);
6740 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6741 				asid);
6742 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6743 				asid);
6744 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6745 				asid);
6746 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6747 				asid);
6748 	}
6749 
6750 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6751 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6752 				asid);
6753 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6754 				asid);
6755 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6756 				asid);
6757 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6758 				asid);
6759 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6760 				asid);
6761 	}
6762 
6763 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6764 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6765 				asid);
6766 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6767 				asid);
6768 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6769 				asid);
6770 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6771 				asid);
6772 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6773 				asid);
6774 	}
6775 
6776 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6777 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6778 				asid);
6779 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6780 				asid);
6781 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6782 				asid);
6783 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6784 				asid);
6785 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6786 				asid);
6787 	}
6788 
6789 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6790 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6791 				asid);
6792 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6793 				asid);
6794 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6795 				asid);
6796 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6797 				asid);
6798 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6799 				asid);
6800 	}
6801 
6802 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6803 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6804 				asid);
6805 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6806 				asid);
6807 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6808 				asid);
6809 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6810 				asid);
6811 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6812 				asid);
6813 	}
6814 
6815 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6816 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6817 				asid);
6818 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6819 				asid);
6820 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6821 				asid);
6822 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6823 				asid);
6824 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6825 				asid);
6826 	}
6827 
6828 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6829 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6830 				asid);
6831 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6832 				asid);
6833 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6834 				asid);
6835 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6836 				asid);
6837 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6838 				asid);
6839 	}
6840 
6841 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6842 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6843 
6844 	hdev->asic_funcs->set_clock_gating(hdev);
6845 
6846 	mutex_unlock(&gaudi->clk_gate_mutex);
6847 }
6848 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6849 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6850 		struct hl_cs_job *job)
6851 {
6852 	struct packet_msg_prot *fence_pkt;
6853 	u32 *fence_ptr;
6854 	dma_addr_t fence_dma_addr;
6855 	struct hl_cb *cb;
6856 	u32 tmp, timeout, dma_offset;
6857 	int rc;
6858 
6859 	if (hdev->pldm)
6860 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6861 	else
6862 		timeout = HL_DEVICE_TIMEOUT_USEC;
6863 
6864 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6865 		dev_err_ratelimited(hdev->dev,
6866 			"Can't send driver job on QMAN0 because the device is not idle\n");
6867 		return -EBUSY;
6868 	}
6869 
6870 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6871 							&fence_dma_addr);
6872 	if (!fence_ptr) {
6873 		dev_err(hdev->dev,
6874 			"Failed to allocate fence memory for QMAN0\n");
6875 		return -ENOMEM;
6876 	}
6877 
6878 	cb = job->patched_cb;
6879 
6880 	fence_pkt = cb->kernel_address +
6881 			job->job_cb_size - sizeof(struct packet_msg_prot);
6882 
6883 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6884 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6885 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6886 
6887 	fence_pkt->ctl = cpu_to_le32(tmp);
6888 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6889 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6890 
6891 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6892 
6893 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6894 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6895 
6896 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6897 					job->job_cb_size, cb->bus_address);
6898 	if (rc) {
6899 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6900 		goto free_fence_ptr;
6901 	}
6902 
6903 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6904 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6905 				timeout, true);
6906 
6907 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6908 
6909 	if (rc == -ETIMEDOUT) {
6910 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6911 		goto free_fence_ptr;
6912 	}
6913 
6914 free_fence_ptr:
6915 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6916 
6917 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6918 					fence_dma_addr);
6919 	return rc;
6920 }
6921 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6922 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6923 {
6924 	if (event_type >= GAUDI_EVENT_SIZE)
6925 		goto event_not_supported;
6926 
6927 	if (!gaudi_irq_map_table[event_type].valid)
6928 		goto event_not_supported;
6929 
6930 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6931 
6932 	return;
6933 
6934 event_not_supported:
6935 	snprintf(desc, size, "N/A");
6936 }
6937 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)6938 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6939 							u32 x_y, bool is_write)
6940 {
6941 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6942 
6943 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6944 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6945 
6946 	switch (x_y) {
6947 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6948 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6949 		dma_id[0] = 0;
6950 		dma_id[1] = 2;
6951 		break;
6952 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6953 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6954 		dma_id[0] = 1;
6955 		dma_id[1] = 3;
6956 		break;
6957 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6958 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6959 		dma_id[0] = 4;
6960 		dma_id[1] = 6;
6961 		break;
6962 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6963 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6964 		dma_id[0] = 5;
6965 		dma_id[1] = 7;
6966 		break;
6967 	default:
6968 		goto unknown_initiator;
6969 	}
6970 
6971 	for (i = 0 ; i < 2 ; i++) {
6972 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6973 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6974 	}
6975 
6976 	switch (x_y) {
6977 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6978 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6979 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6980 			return "DMA0";
6981 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6982 			return "DMA2";
6983 		else
6984 			return "DMA0 or DMA2";
6985 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6986 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6987 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6988 			return "DMA1";
6989 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6990 			return "DMA3";
6991 		else
6992 			return "DMA1 or DMA3";
6993 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6994 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6995 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6996 			return "DMA4";
6997 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6998 			return "DMA6";
6999 		else
7000 			return "DMA4 or DMA6";
7001 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7002 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7003 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7004 			return "DMA5";
7005 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7006 			return "DMA7";
7007 		else
7008 			return "DMA5 or DMA7";
7009 	}
7010 
7011 unknown_initiator:
7012 	return "unknown initiator";
7013 }
7014 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)7015 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7016 							bool is_write)
7017 {
7018 	u32 val, x_y, axi_id;
7019 
7020 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7021 				RREG32(mmMMU_UP_RAZWI_READ_ID);
7022 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7023 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7024 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7025 			RAZWI_INITIATOR_AXI_ID_SHIFT);
7026 
7027 	switch (x_y) {
7028 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7029 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7030 			return "TPC0";
7031 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7032 			return "NIC0";
7033 		break;
7034 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
7035 		return "TPC1";
7036 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7037 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7038 		return "MME0";
7039 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7040 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7041 		return "MME1";
7042 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
7043 		return "TPC2";
7044 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7045 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7046 			return "TPC3";
7047 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7048 			return "PCI";
7049 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7050 			return "CPU";
7051 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7052 			return "PSOC";
7053 		break;
7054 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7055 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7056 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7057 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7058 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7059 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7060 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7061 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7062 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7063 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7064 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7065 			return "TPC4";
7066 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7067 			return "NIC1";
7068 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7069 			return "NIC2";
7070 		break;
7071 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
7072 		return "TPC5";
7073 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7074 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7075 		return "MME2";
7076 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7077 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7078 		return "MME3";
7079 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
7080 		return "TPC6";
7081 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7082 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7083 			return "TPC7";
7084 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7085 			return "NIC4";
7086 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7087 			return "NIC5";
7088 		break;
7089 	default:
7090 		break;
7091 	}
7092 
7093 	dev_err(hdev->dev,
7094 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7095 		val,
7096 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7097 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7098 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7099 			RAZWI_INITIATOR_AXI_ID_MASK);
7100 
7101 	return "unknown initiator";
7102 }
7103 
gaudi_print_razwi_info(struct hl_device * hdev)7104 static void gaudi_print_razwi_info(struct hl_device *hdev)
7105 {
7106 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7107 		dev_err_ratelimited(hdev->dev,
7108 			"RAZWI event caused by illegal write of %s\n",
7109 			gaudi_get_razwi_initiator_name(hdev, true));
7110 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7111 	}
7112 
7113 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7114 		dev_err_ratelimited(hdev->dev,
7115 			"RAZWI event caused by illegal read of %s\n",
7116 			gaudi_get_razwi_initiator_name(hdev, false));
7117 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7118 	}
7119 }
7120 
gaudi_print_mmu_error_info(struct hl_device * hdev)7121 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7122 {
7123 	struct gaudi_device *gaudi = hdev->asic_specific;
7124 	u64 addr;
7125 	u32 val;
7126 
7127 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7128 		return;
7129 
7130 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7131 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7132 		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7133 		addr <<= 32;
7134 		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7135 
7136 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7137 					addr);
7138 
7139 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7140 	}
7141 
7142 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7143 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7144 		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7145 		addr <<= 32;
7146 		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7147 
7148 		dev_err_ratelimited(hdev->dev,
7149 				"MMU access error on va 0x%llx\n", addr);
7150 
7151 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7152 	}
7153 }
7154 
7155 /*
7156  *  +-------------------+------------------------------------------------------+
7157  *  | Configuration Reg |                     Description                      |
7158  *  |      Address      |                                                      |
7159  *  +-------------------+------------------------------------------------------+
7160  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7161  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7162  *  |                   |0xF34 memory wrappers 63:32                           |
7163  *  |                   |0xF38 memory wrappers 95:64                           |
7164  *  |                   |0xF3C memory wrappers 127:96                          |
7165  *  +-------------------+------------------------------------------------------+
7166  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7167  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7168  *  |                   |0xF44 memory wrappers 63:32                           |
7169  *  |                   |0xF48 memory wrappers 95:64                           |
7170  *  |                   |0xF4C memory wrappers 127:96                          |
7171  *  +-------------------+------------------------------------------------------+
7172  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)7173 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7174 		struct ecc_info_extract_params *params, u64 *ecc_address,
7175 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7176 {
7177 	struct gaudi_device *gaudi = hdev->asic_specific;
7178 	u32 i, num_mem_regs, reg, err_bit;
7179 	u64 err_addr, err_word = 0;
7180 	int rc = 0;
7181 
7182 	num_mem_regs = params->num_memories / 32 +
7183 			((params->num_memories % 32) ? 1 : 0);
7184 
7185 	if (params->block_address >= CFG_BASE)
7186 		params->block_address -= CFG_BASE;
7187 
7188 	if (params->derr)
7189 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7190 	else
7191 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7192 
7193 	if (params->disable_clock_gating) {
7194 		mutex_lock(&gaudi->clk_gate_mutex);
7195 		hdev->asic_funcs->disable_clock_gating(hdev);
7196 	}
7197 
7198 	/* Set invalid wrapper index */
7199 	*memory_wrapper_idx = 0xFF;
7200 
7201 	/* Iterate through memory wrappers, a single bit must be set */
7202 	for (i = 0 ; i < num_mem_regs ; i++) {
7203 		err_addr += i * 4;
7204 		err_word = RREG32(err_addr);
7205 		if (err_word) {
7206 			err_bit = __ffs(err_word);
7207 			*memory_wrapper_idx = err_bit + (32 * i);
7208 			break;
7209 		}
7210 	}
7211 
7212 	if (*memory_wrapper_idx == 0xFF) {
7213 		dev_err(hdev->dev, "ECC error information cannot be found\n");
7214 		rc = -EINVAL;
7215 		goto enable_clk_gate;
7216 	}
7217 
7218 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7219 			*memory_wrapper_idx);
7220 
7221 	*ecc_address =
7222 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7223 	*ecc_syndrom =
7224 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7225 
7226 	/* Clear error indication */
7227 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7228 	if (params->derr)
7229 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7230 	else
7231 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7232 
7233 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7234 
7235 enable_clk_gate:
7236 	if (params->disable_clock_gating) {
7237 		hdev->asic_funcs->set_clock_gating(hdev);
7238 
7239 		mutex_unlock(&gaudi->clk_gate_mutex);
7240 	}
7241 
7242 	return rc;
7243 }
7244 
7245 /*
7246  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7247  *
7248  * @idx: the current pi/ci value
7249  * @q_len: the queue length (power of 2)
7250  *
7251  * @return the cyclically decremented index
7252  */
gaudi_queue_idx_dec(u32 idx,u32 q_len)7253 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7254 {
7255 	u32 mask = q_len - 1;
7256 
7257 	/*
7258 	 * modular decrement is equivalent to adding (queue_size -1)
7259 	 * later we take LSBs to make sure the value is in the
7260 	 * range [0, queue_len - 1]
7261 	 */
7262 	return (idx + q_len - 1) & mask;
7263 }
7264 
7265 /**
7266  * gaudi_print_sw_config_stream_data - print SW config stream data
7267  *
7268  * @hdev: pointer to the habanalabs device structure
7269  * @stream: the QMAN's stream
7270  * @qman_base: base address of QMAN registers block
7271  */
gaudi_print_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base)7272 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7273 						u64 qman_base)
7274 {
7275 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7276 	u32 cq_ptr_lo_off, size;
7277 
7278 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7279 
7280 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7281 						stream * cq_ptr_lo_off;
7282 	cq_ptr_hi = cq_ptr_lo +
7283 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284 	cq_tsize = cq_ptr_lo +
7285 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7286 
7287 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7288 	size = RREG32(cq_tsize);
7289 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7290 							stream, cq_ptr, size);
7291 }
7292 
7293 /**
7294  * gaudi_print_last_pqes_on_err - print last PQEs on error
7295  *
7296  * @hdev: pointer to the habanalabs device structure
7297  * @qid_base: first QID of the QMAN (out of 4 streams)
7298  * @stream: the QMAN's stream
7299  * @qman_base: base address of QMAN registers block
7300  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7301  */
gaudi_print_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,bool pr_sw_conf)7302 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7303 						u32 stream, u64 qman_base,
7304 						bool pr_sw_conf)
7305 {
7306 	u32 ci, qm_ci_stream_off, queue_len;
7307 	struct hl_hw_queue *q;
7308 	u64 pq_ci;
7309 	int i;
7310 
7311 	q = &hdev->kernel_queues[qid_base + stream];
7312 
7313 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7314 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7315 						stream * qm_ci_stream_off;
7316 
7317 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7318 					q->int_queue_len : HL_QUEUE_LENGTH;
7319 
7320 	hdev->asic_funcs->hw_queues_lock(hdev);
7321 
7322 	if (pr_sw_conf)
7323 		gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7324 
7325 	ci = RREG32(pq_ci);
7326 
7327 	/* we should start printing form ci -1 */
7328 	ci = gaudi_queue_idx_dec(ci, queue_len);
7329 
7330 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7331 		struct hl_bd *bd;
7332 		u64 addr;
7333 		u32 len;
7334 
7335 		bd = q->kernel_address;
7336 		bd += ci;
7337 
7338 		len = le32_to_cpu(bd->len);
7339 		/* len 0 means uninitialized entry- break */
7340 		if (!len)
7341 			break;
7342 
7343 		addr = le64_to_cpu(bd->ptr);
7344 
7345 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7346 							stream, ci, addr, len);
7347 
7348 		/* get previous ci, wrap if needed */
7349 		ci = gaudi_queue_idx_dec(ci, queue_len);
7350 	}
7351 
7352 	hdev->asic_funcs->hw_queues_unlock(hdev);
7353 }
7354 
7355 /**
7356  * print_qman_data_on_err - extract QMAN data on error
7357  *
7358  * @hdev: pointer to the habanalabs device structure
7359  * @qid_base: first QID of the QMAN (out of 4 streams)
7360  * @stream: the QMAN's stream
7361  * @qman_base: base address of QMAN registers block
7362  *
7363  * This function attempt to exatract as much data as possible on QMAN error.
7364  * On upper CP print the SW config stream data and last 8 PQEs.
7365  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7366  */
print_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base)7367 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7368 						u32 stream, u64 qman_base)
7369 {
7370 	u32 i;
7371 
7372 	if (stream != QMAN_STREAMS) {
7373 		gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7374 									true);
7375 		return;
7376 	}
7377 
7378 	gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7379 
7380 	for (i = 0; i < QMAN_STREAMS; i++)
7381 		gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7382 									false);
7383 }
7384 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base)7385 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7386 					  const char *qm_name,
7387 					  u64 qman_base,
7388 					  u32 qid_base)
7389 {
7390 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7391 	u64 glbl_sts_addr, arb_err_addr;
7392 	char reg_desc[32];
7393 
7394 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7395 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7396 
7397 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7398 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7399 		glbl_sts_clr_val = 0;
7400 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7401 
7402 		if (!glbl_sts_val)
7403 			continue;
7404 
7405 		if (i == QMAN_STREAMS)
7406 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7407 		else
7408 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7409 
7410 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7411 			if (glbl_sts_val & BIT(j)) {
7412 				dev_err_ratelimited(hdev->dev,
7413 						"%s %s. err cause: %s\n",
7414 						qm_name, reg_desc,
7415 						gaudi_qman_error_cause[j]);
7416 				glbl_sts_clr_val |= BIT(j);
7417 			}
7418 		}
7419 
7420 		/* Write 1 clear errors */
7421 		if (!hdev->stop_on_err)
7422 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7423 		else
7424 			print_qman_data_on_err(hdev, qid_base, i, qman_base);
7425 	}
7426 
7427 	arb_err_val = RREG32(arb_err_addr);
7428 
7429 	if (!arb_err_val)
7430 		return;
7431 
7432 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7433 		if (arb_err_val & BIT(j)) {
7434 			dev_err_ratelimited(hdev->dev,
7435 					"%s ARB_ERR. err cause: %s\n",
7436 					qm_name,
7437 					gaudi_qman_arb_error_cause[j]);
7438 		}
7439 	}
7440 }
7441 
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7442 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7443 		struct hl_eq_sm_sei_data *sei_data)
7444 {
7445 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7446 
7447 	/* Flip the bits as the enum is ordered in the opposite way */
7448 	index = (index ^ 0x3) & 0x3;
7449 
7450 	switch (sei_data->sei_cause) {
7451 	case SM_SEI_SO_OVERFLOW:
7452 		dev_err_ratelimited(hdev->dev,
7453 			"%s SEI Error: SOB Group %u overflow/underflow",
7454 			gaudi_sync_manager_names[index],
7455 			le32_to_cpu(sei_data->sei_log));
7456 		break;
7457 	case SM_SEI_LBW_4B_UNALIGNED:
7458 		dev_err_ratelimited(hdev->dev,
7459 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7460 			gaudi_sync_manager_names[index],
7461 			le32_to_cpu(sei_data->sei_log));
7462 		break;
7463 	case SM_SEI_AXI_RESPONSE_ERR:
7464 		dev_err_ratelimited(hdev->dev,
7465 			"%s SEI Error: AXI ID %u response error",
7466 			gaudi_sync_manager_names[index],
7467 			le32_to_cpu(sei_data->sei_log));
7468 		break;
7469 	default:
7470 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7471 				le32_to_cpu(sei_data->sei_log));
7472 		break;
7473 	}
7474 }
7475 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7476 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7477 		struct hl_eq_ecc_data *ecc_data)
7478 {
7479 	struct ecc_info_extract_params params;
7480 	u64 ecc_address = 0, ecc_syndrom = 0;
7481 	u8 index, memory_wrapper_idx = 0;
7482 	bool extract_info_from_fw;
7483 	int rc;
7484 
7485 	if (hdev->asic_prop.fw_security_enabled) {
7486 		extract_info_from_fw = true;
7487 		goto extract_ecc_info;
7488 	}
7489 
7490 	switch (event_type) {
7491 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7492 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7493 		extract_info_from_fw = true;
7494 		break;
7495 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7496 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7497 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7498 		params.num_memories = 90;
7499 		params.derr = false;
7500 		params.disable_clock_gating = true;
7501 		extract_info_from_fw = false;
7502 		break;
7503 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7504 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7505 		params.block_address =
7506 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7507 		params.num_memories = 90;
7508 		params.derr = true;
7509 		params.disable_clock_gating = true;
7510 		extract_info_from_fw = false;
7511 		break;
7512 	case GAUDI_EVENT_MME0_ACC_SERR:
7513 	case GAUDI_EVENT_MME1_ACC_SERR:
7514 	case GAUDI_EVENT_MME2_ACC_SERR:
7515 	case GAUDI_EVENT_MME3_ACC_SERR:
7516 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7517 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7518 		params.num_memories = 128;
7519 		params.derr = false;
7520 		params.disable_clock_gating = true;
7521 		extract_info_from_fw = false;
7522 		break;
7523 	case GAUDI_EVENT_MME0_ACC_DERR:
7524 	case GAUDI_EVENT_MME1_ACC_DERR:
7525 	case GAUDI_EVENT_MME2_ACC_DERR:
7526 	case GAUDI_EVENT_MME3_ACC_DERR:
7527 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7528 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7529 		params.num_memories = 128;
7530 		params.derr = true;
7531 		params.disable_clock_gating = true;
7532 		extract_info_from_fw = false;
7533 		break;
7534 	case GAUDI_EVENT_MME0_SBAB_SERR:
7535 	case GAUDI_EVENT_MME1_SBAB_SERR:
7536 	case GAUDI_EVENT_MME2_SBAB_SERR:
7537 	case GAUDI_EVENT_MME3_SBAB_SERR:
7538 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7539 		params.block_address =
7540 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7541 		params.num_memories = 33;
7542 		params.derr = false;
7543 		params.disable_clock_gating = true;
7544 		extract_info_from_fw = false;
7545 		break;
7546 	case GAUDI_EVENT_MME0_SBAB_DERR:
7547 	case GAUDI_EVENT_MME1_SBAB_DERR:
7548 	case GAUDI_EVENT_MME2_SBAB_DERR:
7549 	case GAUDI_EVENT_MME3_SBAB_DERR:
7550 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7551 		params.block_address =
7552 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7553 		params.num_memories = 33;
7554 		params.derr = true;
7555 		params.disable_clock_gating = true;
7556 		extract_info_from_fw = false;
7557 		break;
7558 	default:
7559 		return;
7560 	}
7561 
7562 extract_ecc_info:
7563 	if (extract_info_from_fw) {
7564 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7565 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7566 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7567 	} else {
7568 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7569 				&ecc_syndrom, &memory_wrapper_idx);
7570 		if (rc)
7571 			return;
7572 	}
7573 
7574 	dev_err(hdev->dev,
7575 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7576 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7577 }
7578 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)7579 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7580 {
7581 	u64 qman_base;
7582 	char desc[32];
7583 	u32 qid_base;
7584 	u8 index;
7585 
7586 	switch (event_type) {
7587 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7588 		index = event_type - GAUDI_EVENT_TPC0_QM;
7589 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7590 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7591 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7592 		break;
7593 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7594 		index = event_type - GAUDI_EVENT_MME0_QM;
7595 		qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7596 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7597 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7598 		break;
7599 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7600 		index = event_type - GAUDI_EVENT_DMA0_QM;
7601 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7602 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7603 		if (index > 1)
7604 			qid_base++;
7605 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7606 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7607 		break;
7608 	case GAUDI_EVENT_NIC0_QM0:
7609 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7610 		qman_base = mmNIC0_QM0_BASE;
7611 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7612 		break;
7613 	case GAUDI_EVENT_NIC0_QM1:
7614 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7615 		qman_base = mmNIC0_QM1_BASE;
7616 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7617 		break;
7618 	case GAUDI_EVENT_NIC1_QM0:
7619 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7620 		qman_base = mmNIC1_QM0_BASE;
7621 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7622 		break;
7623 	case GAUDI_EVENT_NIC1_QM1:
7624 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7625 		qman_base = mmNIC1_QM1_BASE;
7626 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7627 		break;
7628 	case GAUDI_EVENT_NIC2_QM0:
7629 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7630 		qman_base = mmNIC2_QM0_BASE;
7631 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7632 		break;
7633 	case GAUDI_EVENT_NIC2_QM1:
7634 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7635 		qman_base = mmNIC2_QM1_BASE;
7636 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7637 		break;
7638 	case GAUDI_EVENT_NIC3_QM0:
7639 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7640 		qman_base = mmNIC3_QM0_BASE;
7641 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7642 		break;
7643 	case GAUDI_EVENT_NIC3_QM1:
7644 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7645 		qman_base = mmNIC3_QM1_BASE;
7646 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7647 		break;
7648 	case GAUDI_EVENT_NIC4_QM0:
7649 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7650 		qman_base = mmNIC4_QM0_BASE;
7651 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7652 		break;
7653 	case GAUDI_EVENT_NIC4_QM1:
7654 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7655 		qman_base = mmNIC4_QM1_BASE;
7656 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7657 		break;
7658 	default:
7659 		return;
7660 	}
7661 
7662 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7663 }
7664 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7665 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7666 					bool razwi)
7667 {
7668 	char desc[64] = "";
7669 
7670 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7671 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7672 		event_type, desc);
7673 
7674 	if (razwi) {
7675 		gaudi_print_razwi_info(hdev);
7676 		gaudi_print_mmu_error_info(hdev);
7677 	}
7678 }
7679 
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7680 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7681 					struct cpucp_pkt_sync_err *sync_err)
7682 {
7683 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7684 
7685 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7686 			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7687 }
7688 
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7689 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7690 					struct hl_eq_fw_alive *fw_alive)
7691 {
7692 	dev_err(hdev->dev,
7693 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7694 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7695 		"Minor" : "Critical", fw_alive->process_id,
7696 		fw_alive->thread_id, fw_alive->uptime_seconds);
7697 }
7698 
gaudi_soft_reset_late_init(struct hl_device * hdev)7699 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7700 {
7701 	struct gaudi_device *gaudi = hdev->asic_specific;
7702 
7703 	/* Unmask all IRQs since some could have been received
7704 	 * during the soft reset
7705 	 */
7706 	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7707 }
7708 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7709 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7710 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7711 {
7712 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7713 	int rc = 0;
7714 
7715 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7716 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7717 		if (!hbm_ecc_data) {
7718 			dev_err(hdev->dev, "No FW ECC data");
7719 			return 0;
7720 		}
7721 
7722 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7723 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7725 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7727 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7729 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7731 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7733 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7735 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7736 
7737 		dev_err(hdev->dev,
7738 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7740 		dev_err(hdev->dev,
7741 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7742 			device, ch, hbm_ecc_data->first_addr, type,
7743 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7744 			hbm_ecc_data->dec_cnt);
7745 		return 0;
7746 	}
7747 
7748 	if (hdev->asic_prop.fw_security_enabled) {
7749 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7750 		return 0;
7751 	}
7752 
7753 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7754 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7755 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7756 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7757 		if (val) {
7758 			rc = -EIO;
7759 			dev_err(hdev->dev,
7760 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7761 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7762 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7763 				(val >> 4) & 0x1);
7764 
7765 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7766 			dev_err(hdev->dev,
7767 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7768 				device, ch * 2,
7769 				RREG32(base + ch * 0x1000 + 0x064),
7770 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7771 				(val2 & 0xFF0000) >> 16,
7772 				(val2 & 0xFF000000) >> 24);
7773 		}
7774 
7775 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7776 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7777 		if (val) {
7778 			rc = -EIO;
7779 			dev_err(hdev->dev,
7780 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7781 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7782 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7783 				(val >> 4) & 0x1);
7784 
7785 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7786 			dev_err(hdev->dev,
7787 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7788 				device, ch * 2 + 1,
7789 				RREG32(base + ch * 0x1000 + 0x074),
7790 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7791 				(val2 & 0xFF0000) >> 16,
7792 				(val2 & 0xFF000000) >> 24);
7793 		}
7794 
7795 		/* Clear interrupts */
7796 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7797 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7798 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7799 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7800 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7801 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7802 	}
7803 
7804 	val  = RREG32(base + 0x8F30);
7805 	val2 = RREG32(base + 0x8F34);
7806 	if (val | val2) {
7807 		rc = -EIO;
7808 		dev_err(hdev->dev,
7809 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7810 			device, val, val2);
7811 	}
7812 	val  = RREG32(base + 0x8F40);
7813 	val2 = RREG32(base + 0x8F44);
7814 	if (val | val2) {
7815 		rc = -EIO;
7816 		dev_err(hdev->dev,
7817 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7818 			device, val, val2);
7819 	}
7820 
7821 	return rc;
7822 }
7823 
gaudi_hbm_event_to_dev(u16 hbm_event_type)7824 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7825 {
7826 	switch (hbm_event_type) {
7827 	case GAUDI_EVENT_HBM0_SPI_0:
7828 	case GAUDI_EVENT_HBM0_SPI_1:
7829 		return 0;
7830 	case GAUDI_EVENT_HBM1_SPI_0:
7831 	case GAUDI_EVENT_HBM1_SPI_1:
7832 		return 1;
7833 	case GAUDI_EVENT_HBM2_SPI_0:
7834 	case GAUDI_EVENT_HBM2_SPI_1:
7835 		return 2;
7836 	case GAUDI_EVENT_HBM3_SPI_0:
7837 	case GAUDI_EVENT_HBM3_SPI_1:
7838 		return 3;
7839 	default:
7840 		break;
7841 	}
7842 
7843 	/* Should never happen */
7844 	return 0;
7845 }
7846 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7847 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7848 					char *interrupt_name)
7849 {
7850 	struct gaudi_device *gaudi = hdev->asic_specific;
7851 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7852 	bool soft_reset_required = false;
7853 
7854 	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7855 	 * gating, and thus cannot be done in CPU-CP and should be done instead
7856 	 * by the driver.
7857 	 */
7858 
7859 	mutex_lock(&gaudi->clk_gate_mutex);
7860 
7861 	hdev->asic_funcs->disable_clock_gating(hdev);
7862 
7863 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7864 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7865 
7866 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7867 		if (tpc_interrupts_cause & BIT(i)) {
7868 			dev_err_ratelimited(hdev->dev,
7869 					"TPC%d_%s interrupt cause: %s\n",
7870 					tpc_id, interrupt_name,
7871 					gaudi_tpc_interrupts_cause[i]);
7872 			/* If this is QM error, we need to soft-reset */
7873 			if (i == 15)
7874 				soft_reset_required = true;
7875 		}
7876 
7877 	/* Clear interrupts */
7878 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7879 
7880 	hdev->asic_funcs->set_clock_gating(hdev);
7881 
7882 	mutex_unlock(&gaudi->clk_gate_mutex);
7883 
7884 	return soft_reset_required;
7885 }
7886 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7887 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7888 {
7889 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7890 }
7891 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7892 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7893 {
7894 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7895 }
7896 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7897 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7898 					u16 event_type)
7899 {
7900 	switch (event_type) {
7901 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7902 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7903 		dev_info_ratelimited(hdev->dev,
7904 			"Clock throttling due to power consumption\n");
7905 		break;
7906 
7907 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7908 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7909 		dev_info_ratelimited(hdev->dev,
7910 			"Power envelop is safe, back to optimal clock\n");
7911 		break;
7912 
7913 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7914 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7915 		dev_info_ratelimited(hdev->dev,
7916 			"Clock throttling due to overheating\n");
7917 		break;
7918 
7919 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7920 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7921 		dev_info_ratelimited(hdev->dev,
7922 			"Thermal envelop is safe, back to optimal clock\n");
7923 		break;
7924 
7925 	default:
7926 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7927 			event_type);
7928 		break;
7929 	}
7930 }
7931 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7932 static void gaudi_handle_eqe(struct hl_device *hdev,
7933 				struct hl_eq_entry *eq_entry)
7934 {
7935 	struct gaudi_device *gaudi = hdev->asic_specific;
7936 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7937 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7938 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7939 	bool reset_required;
7940 	u8 cause;
7941 	int rc;
7942 
7943 	if (event_type >= GAUDI_EVENT_SIZE) {
7944 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7945 				event_type, GAUDI_EVENT_SIZE - 1);
7946 		return;
7947 	}
7948 
7949 	gaudi->events_stat[event_type]++;
7950 	gaudi->events_stat_aggregate[event_type]++;
7951 
7952 	switch (event_type) {
7953 	case GAUDI_EVENT_PCIE_CORE_DERR:
7954 	case GAUDI_EVENT_PCIE_IF_DERR:
7955 	case GAUDI_EVENT_PCIE_PHY_DERR:
7956 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7957 	case GAUDI_EVENT_MME0_ACC_DERR:
7958 	case GAUDI_EVENT_MME0_SBAB_DERR:
7959 	case GAUDI_EVENT_MME1_ACC_DERR:
7960 	case GAUDI_EVENT_MME1_SBAB_DERR:
7961 	case GAUDI_EVENT_MME2_ACC_DERR:
7962 	case GAUDI_EVENT_MME2_SBAB_DERR:
7963 	case GAUDI_EVENT_MME3_ACC_DERR:
7964 	case GAUDI_EVENT_MME3_SBAB_DERR:
7965 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7966 		fallthrough;
7967 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7968 	case GAUDI_EVENT_PSOC_MEM_DERR:
7969 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7970 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7971 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7972 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7973 	case GAUDI_EVENT_MMU_DERR:
7974 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7975 		gaudi_print_irq_info(hdev, event_type, true);
7976 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7977 		goto reset_device;
7978 
7979 	case GAUDI_EVENT_GIC500:
7980 	case GAUDI_EVENT_AXI_ECC:
7981 	case GAUDI_EVENT_L2_RAM_ECC:
7982 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7983 		gaudi_print_irq_info(hdev, event_type, false);
7984 		goto reset_device;
7985 
7986 	case GAUDI_EVENT_HBM0_SPI_0:
7987 	case GAUDI_EVENT_HBM1_SPI_0:
7988 	case GAUDI_EVENT_HBM2_SPI_0:
7989 	case GAUDI_EVENT_HBM3_SPI_0:
7990 		gaudi_print_irq_info(hdev, event_type, false);
7991 		gaudi_hbm_read_interrupts(hdev,
7992 				gaudi_hbm_event_to_dev(event_type),
7993 				&eq_entry->hbm_ecc_data);
7994 		goto reset_device;
7995 
7996 	case GAUDI_EVENT_HBM0_SPI_1:
7997 	case GAUDI_EVENT_HBM1_SPI_1:
7998 	case GAUDI_EVENT_HBM2_SPI_1:
7999 	case GAUDI_EVENT_HBM3_SPI_1:
8000 		gaudi_print_irq_info(hdev, event_type, false);
8001 		gaudi_hbm_read_interrupts(hdev,
8002 				gaudi_hbm_event_to_dev(event_type),
8003 				&eq_entry->hbm_ecc_data);
8004 		hl_fw_unmask_irq(hdev, event_type);
8005 		break;
8006 
8007 	case GAUDI_EVENT_TPC0_DEC:
8008 	case GAUDI_EVENT_TPC1_DEC:
8009 	case GAUDI_EVENT_TPC2_DEC:
8010 	case GAUDI_EVENT_TPC3_DEC:
8011 	case GAUDI_EVENT_TPC4_DEC:
8012 	case GAUDI_EVENT_TPC5_DEC:
8013 	case GAUDI_EVENT_TPC6_DEC:
8014 	case GAUDI_EVENT_TPC7_DEC:
8015 		gaudi_print_irq_info(hdev, event_type, true);
8016 		reset_required = gaudi_tpc_read_interrupts(hdev,
8017 					tpc_dec_event_to_tpc_id(event_type),
8018 					"AXI_SLV_DEC_Error");
8019 		if (reset_required) {
8020 			dev_err(hdev->dev, "reset required due to %s\n",
8021 				gaudi_irq_map_table[event_type].name);
8022 
8023 			hl_device_reset(hdev, 0);
8024 		} else {
8025 			hl_fw_unmask_irq(hdev, event_type);
8026 		}
8027 		break;
8028 
8029 	case GAUDI_EVENT_TPC0_KRN_ERR:
8030 	case GAUDI_EVENT_TPC1_KRN_ERR:
8031 	case GAUDI_EVENT_TPC2_KRN_ERR:
8032 	case GAUDI_EVENT_TPC3_KRN_ERR:
8033 	case GAUDI_EVENT_TPC4_KRN_ERR:
8034 	case GAUDI_EVENT_TPC5_KRN_ERR:
8035 	case GAUDI_EVENT_TPC6_KRN_ERR:
8036 	case GAUDI_EVENT_TPC7_KRN_ERR:
8037 		gaudi_print_irq_info(hdev, event_type, true);
8038 		reset_required = gaudi_tpc_read_interrupts(hdev,
8039 					tpc_krn_event_to_tpc_id(event_type),
8040 					"KRN_ERR");
8041 		if (reset_required) {
8042 			dev_err(hdev->dev, "reset required due to %s\n",
8043 				gaudi_irq_map_table[event_type].name);
8044 
8045 			hl_device_reset(hdev, 0);
8046 		} else {
8047 			hl_fw_unmask_irq(hdev, event_type);
8048 		}
8049 		break;
8050 
8051 	case GAUDI_EVENT_PCIE_CORE_SERR:
8052 	case GAUDI_EVENT_PCIE_IF_SERR:
8053 	case GAUDI_EVENT_PCIE_PHY_SERR:
8054 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8055 	case GAUDI_EVENT_MME0_ACC_SERR:
8056 	case GAUDI_EVENT_MME0_SBAB_SERR:
8057 	case GAUDI_EVENT_MME1_ACC_SERR:
8058 	case GAUDI_EVENT_MME1_SBAB_SERR:
8059 	case GAUDI_EVENT_MME2_ACC_SERR:
8060 	case GAUDI_EVENT_MME2_SBAB_SERR:
8061 	case GAUDI_EVENT_MME3_ACC_SERR:
8062 	case GAUDI_EVENT_MME3_SBAB_SERR:
8063 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8064 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
8065 	case GAUDI_EVENT_PSOC_MEM_SERR:
8066 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8067 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8068 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8069 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8070 		fallthrough;
8071 	case GAUDI_EVENT_MMU_SERR:
8072 		gaudi_print_irq_info(hdev, event_type, true);
8073 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8074 		hl_fw_unmask_irq(hdev, event_type);
8075 		break;
8076 
8077 	case GAUDI_EVENT_PCIE_DEC:
8078 	case GAUDI_EVENT_MME0_WBC_RSP:
8079 	case GAUDI_EVENT_MME0_SBAB0_RSP:
8080 	case GAUDI_EVENT_MME1_WBC_RSP:
8081 	case GAUDI_EVENT_MME1_SBAB0_RSP:
8082 	case GAUDI_EVENT_MME2_WBC_RSP:
8083 	case GAUDI_EVENT_MME2_SBAB0_RSP:
8084 	case GAUDI_EVENT_MME3_WBC_RSP:
8085 	case GAUDI_EVENT_MME3_SBAB0_RSP:
8086 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
8087 	case GAUDI_EVENT_PSOC_AXI_DEC:
8088 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
8089 	case GAUDI_EVENT_MMU_PAGE_FAULT:
8090 	case GAUDI_EVENT_MMU_WR_PERM:
8091 	case GAUDI_EVENT_RAZWI_OR_ADC:
8092 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8093 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8094 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8095 		fallthrough;
8096 	case GAUDI_EVENT_NIC0_QM0:
8097 	case GAUDI_EVENT_NIC0_QM1:
8098 	case GAUDI_EVENT_NIC1_QM0:
8099 	case GAUDI_EVENT_NIC1_QM1:
8100 	case GAUDI_EVENT_NIC2_QM0:
8101 	case GAUDI_EVENT_NIC2_QM1:
8102 	case GAUDI_EVENT_NIC3_QM0:
8103 	case GAUDI_EVENT_NIC3_QM1:
8104 	case GAUDI_EVENT_NIC4_QM0:
8105 	case GAUDI_EVENT_NIC4_QM1:
8106 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8107 		gaudi_print_irq_info(hdev, event_type, true);
8108 		gaudi_handle_qman_err(hdev, event_type);
8109 		hl_fw_unmask_irq(hdev, event_type);
8110 		break;
8111 
8112 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8113 		gaudi_print_irq_info(hdev, event_type, true);
8114 		goto reset_device;
8115 
8116 	case GAUDI_EVENT_TPC0_BMON_SPMU:
8117 	case GAUDI_EVENT_TPC1_BMON_SPMU:
8118 	case GAUDI_EVENT_TPC2_BMON_SPMU:
8119 	case GAUDI_EVENT_TPC3_BMON_SPMU:
8120 	case GAUDI_EVENT_TPC4_BMON_SPMU:
8121 	case GAUDI_EVENT_TPC5_BMON_SPMU:
8122 	case GAUDI_EVENT_TPC6_BMON_SPMU:
8123 	case GAUDI_EVENT_TPC7_BMON_SPMU:
8124 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8125 		gaudi_print_irq_info(hdev, event_type, false);
8126 		hl_fw_unmask_irq(hdev, event_type);
8127 		break;
8128 
8129 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8130 		gaudi_print_irq_info(hdev, event_type, false);
8131 		gaudi_print_sm_sei_info(hdev, event_type,
8132 					&eq_entry->sm_sei_data);
8133 		rc = hl_state_dump(hdev);
8134 		if (rc)
8135 			dev_err(hdev->dev,
8136 				"Error during system state dump %d\n", rc);
8137 		hl_fw_unmask_irq(hdev, event_type);
8138 		break;
8139 
8140 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8141 		gaudi_print_clk_change_info(hdev, event_type);
8142 		hl_fw_unmask_irq(hdev, event_type);
8143 		break;
8144 
8145 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
8146 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8147 		dev_err(hdev->dev,
8148 			"Received high temp H/W interrupt %d (cause %d)\n",
8149 			event_type, cause);
8150 		break;
8151 
8152 	case GAUDI_EVENT_DEV_RESET_REQ:
8153 		gaudi_print_irq_info(hdev, event_type, false);
8154 		goto reset_device;
8155 
8156 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8157 		gaudi_print_irq_info(hdev, event_type, false);
8158 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8159 		goto reset_device;
8160 
8161 	case GAUDI_EVENT_FW_ALIVE_S:
8162 		gaudi_print_irq_info(hdev, event_type, false);
8163 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8164 		goto reset_device;
8165 
8166 	default:
8167 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8168 				event_type);
8169 		break;
8170 	}
8171 
8172 	return;
8173 
8174 reset_device:
8175 	if (hdev->asic_prop.fw_security_enabled)
8176 		hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
8177 	else if (hdev->hard_reset_on_fw_events)
8178 		hl_device_reset(hdev, HL_RESET_HARD);
8179 	else
8180 		hl_fw_unmask_irq(hdev, event_type);
8181 }
8182 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)8183 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8184 					u32 *size)
8185 {
8186 	struct gaudi_device *gaudi = hdev->asic_specific;
8187 
8188 	if (aggregate) {
8189 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
8190 		return gaudi->events_stat_aggregate;
8191 	}
8192 
8193 	*size = (u32) sizeof(gaudi->events_stat);
8194 	return gaudi->events_stat;
8195 }
8196 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)8197 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8198 					u32 flags)
8199 {
8200 	struct gaudi_device *gaudi = hdev->asic_specific;
8201 	u32 status, timeout_usec;
8202 	int rc;
8203 
8204 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8205 		hdev->hard_reset_pending)
8206 		return 0;
8207 
8208 	if (hdev->pldm)
8209 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8210 	else
8211 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8212 
8213 	/* L0 & L1 invalidation */
8214 	WREG32(mmSTLB_INV_PS, 3);
8215 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8216 	WREG32(mmSTLB_INV_PS, 2);
8217 
8218 	rc = hl_poll_timeout(
8219 		hdev,
8220 		mmSTLB_INV_PS,
8221 		status,
8222 		!status,
8223 		1000,
8224 		timeout_usec);
8225 
8226 	WREG32(mmSTLB_INV_SET, 0);
8227 
8228 	if (rc) {
8229 		dev_err_ratelimited(hdev->dev,
8230 					"MMU cache invalidation timeout\n");
8231 		hl_device_reset(hdev, HL_RESET_HARD);
8232 	}
8233 
8234 	return rc;
8235 }
8236 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)8237 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8238 						bool is_hard, u32 flags,
8239 						u32 asid, u64 va, u64 size)
8240 {
8241 	/* Treat as invalidate all because there is no range invalidation
8242 	 * in Gaudi
8243 	 */
8244 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8245 }
8246 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)8247 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8248 					u32 asid, u64 phys_addr)
8249 {
8250 	u32 status, timeout_usec;
8251 	int rc;
8252 
8253 	if (hdev->pldm)
8254 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8255 	else
8256 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8257 
8258 	WREG32(MMU_ASID, asid);
8259 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8260 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8261 	WREG32(MMU_BUSY, 0x80000000);
8262 
8263 	rc = hl_poll_timeout(
8264 		hdev,
8265 		MMU_BUSY,
8266 		status,
8267 		!(status & 0x80000000),
8268 		1000,
8269 		timeout_usec);
8270 
8271 	if (rc) {
8272 		dev_err(hdev->dev,
8273 			"Timeout during MMU hop0 config of asid %d\n", asid);
8274 		return rc;
8275 	}
8276 
8277 	return 0;
8278 }
8279 
gaudi_send_heartbeat(struct hl_device * hdev)8280 static int gaudi_send_heartbeat(struct hl_device *hdev)
8281 {
8282 	struct gaudi_device *gaudi = hdev->asic_specific;
8283 
8284 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8285 		return 0;
8286 
8287 	return hl_fw_send_heartbeat(hdev);
8288 }
8289 
gaudi_cpucp_info_get(struct hl_device * hdev)8290 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8291 {
8292 	struct gaudi_device *gaudi = hdev->asic_specific;
8293 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8294 	int rc;
8295 
8296 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8297 		return 0;
8298 
8299 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8300 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8301 					mmCPU_BOOT_ERR1);
8302 	if (rc)
8303 		return rc;
8304 
8305 	if (!strlen(prop->cpucp_info.card_name))
8306 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8307 				CARD_NAME_MAX_LEN);
8308 
8309 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8310 
8311 	set_default_power_values(hdev);
8312 
8313 	hdev->max_power = prop->max_power_default;
8314 
8315 	return 0;
8316 }
8317 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct seq_file * s)8318 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8319 					u8 mask_len, struct seq_file *s)
8320 {
8321 	struct gaudi_device *gaudi = hdev->asic_specific;
8322 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8323 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8324 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8325 	unsigned long *mask = (unsigned long *)mask_arr;
8326 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8327 	bool is_idle = true, is_eng_idle, is_slave;
8328 	u64 offset;
8329 	int i, dma_id, port;
8330 
8331 	mutex_lock(&gaudi->clk_gate_mutex);
8332 
8333 	hdev->asic_funcs->disable_clock_gating(hdev);
8334 
8335 	if (s)
8336 		seq_puts(s,
8337 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8338 			"---  -------  ------------  ----------  -------------\n");
8339 
8340 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8341 		dma_id = gaudi_dma_assignment[i];
8342 		offset = dma_id * DMA_QMAN_OFFSET;
8343 
8344 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8345 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8346 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8347 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8348 				IS_DMA_IDLE(dma_core_sts0);
8349 		is_idle &= is_eng_idle;
8350 
8351 		if (mask && !is_eng_idle)
8352 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8353 		if (s)
8354 			seq_printf(s, fmt, dma_id,
8355 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8356 				qm_cgm_sts, dma_core_sts0);
8357 	}
8358 
8359 	if (s)
8360 		seq_puts(s,
8361 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8362 			"---  -------  ------------  ----------  ----------\n");
8363 
8364 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8365 		offset = i * TPC_QMAN_OFFSET;
8366 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8367 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8368 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8369 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8370 				IS_TPC_IDLE(tpc_cfg_sts);
8371 		is_idle &= is_eng_idle;
8372 
8373 		if (mask && !is_eng_idle)
8374 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8375 		if (s)
8376 			seq_printf(s, fmt, i,
8377 				is_eng_idle ? "Y" : "N",
8378 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8379 	}
8380 
8381 	if (s)
8382 		seq_puts(s,
8383 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8384 			"---  -------  ------------  ----------  -----------\n");
8385 
8386 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8387 		offset = i * MME_QMAN_OFFSET;
8388 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8389 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8390 
8391 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8392 		is_slave = i % 2;
8393 		if (!is_slave) {
8394 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8395 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8396 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8397 		}
8398 
8399 		is_idle &= is_eng_idle;
8400 
8401 		if (mask && !is_eng_idle)
8402 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8403 		if (s) {
8404 			if (!is_slave)
8405 				seq_printf(s, fmt, i,
8406 					is_eng_idle ? "Y" : "N",
8407 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8408 			else
8409 				seq_printf(s, mme_slave_fmt, i,
8410 					is_eng_idle ? "Y" : "N", "-",
8411 					"-", mme_arch_sts);
8412 		}
8413 	}
8414 
8415 	if (s)
8416 		seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8417 				"---  -------  ------------  ----------\n");
8418 
8419 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8420 		offset = i * NIC_MACRO_QMAN_OFFSET;
8421 		port = 2 * i;
8422 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8423 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8424 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8425 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8426 			is_idle &= is_eng_idle;
8427 
8428 			if (mask && !is_eng_idle)
8429 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8430 			if (s)
8431 				seq_printf(s, nic_fmt, port,
8432 						is_eng_idle ? "Y" : "N",
8433 						qm_glbl_sts0, qm_cgm_sts);
8434 		}
8435 
8436 		port = 2 * i + 1;
8437 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8438 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8439 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8440 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8441 			is_idle &= is_eng_idle;
8442 
8443 			if (mask && !is_eng_idle)
8444 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8445 			if (s)
8446 				seq_printf(s, nic_fmt, port,
8447 						is_eng_idle ? "Y" : "N",
8448 						qm_glbl_sts0, qm_cgm_sts);
8449 		}
8450 	}
8451 
8452 	if (s)
8453 		seq_puts(s, "\n");
8454 
8455 	hdev->asic_funcs->set_clock_gating(hdev);
8456 
8457 	mutex_unlock(&gaudi->clk_gate_mutex);
8458 
8459 	return is_idle;
8460 }
8461 
gaudi_hw_queues_lock(struct hl_device * hdev)8462 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8463 	__acquires(&gaudi->hw_queues_lock)
8464 {
8465 	struct gaudi_device *gaudi = hdev->asic_specific;
8466 
8467 	spin_lock(&gaudi->hw_queues_lock);
8468 }
8469 
gaudi_hw_queues_unlock(struct hl_device * hdev)8470 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8471 	__releases(&gaudi->hw_queues_lock)
8472 {
8473 	struct gaudi_device *gaudi = hdev->asic_specific;
8474 
8475 	spin_unlock(&gaudi->hw_queues_lock);
8476 }
8477 
gaudi_get_pci_id(struct hl_device * hdev)8478 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8479 {
8480 	return hdev->pdev->device;
8481 }
8482 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8483 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8484 				size_t max_size)
8485 {
8486 	struct gaudi_device *gaudi = hdev->asic_specific;
8487 
8488 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8489 		return 0;
8490 
8491 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8492 }
8493 
8494 /*
8495  * this function should be used only during initialization and/or after reset,
8496  * when there are no active users.
8497  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8498 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8499 				u32 tpc_id)
8500 {
8501 	struct gaudi_device *gaudi = hdev->asic_specific;
8502 	u64 kernel_timeout;
8503 	u32 status, offset;
8504 	int rc;
8505 
8506 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8507 
8508 	if (hdev->pldm)
8509 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8510 	else
8511 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8512 
8513 	mutex_lock(&gaudi->clk_gate_mutex);
8514 
8515 	hdev->asic_funcs->disable_clock_gating(hdev);
8516 
8517 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8518 			lower_32_bits(tpc_kernel));
8519 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8520 			upper_32_bits(tpc_kernel));
8521 
8522 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8523 			lower_32_bits(tpc_kernel));
8524 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8525 			upper_32_bits(tpc_kernel));
8526 	/* set a valid LUT pointer, content is of no significance */
8527 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8528 			lower_32_bits(tpc_kernel));
8529 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8530 			upper_32_bits(tpc_kernel));
8531 
8532 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8533 			lower_32_bits(CFG_BASE +
8534 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8535 
8536 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8537 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8538 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8539 	/* wait a bit for the engine to start executing */
8540 	usleep_range(1000, 1500);
8541 
8542 	/* wait until engine has finished executing */
8543 	rc = hl_poll_timeout(
8544 		hdev,
8545 		mmTPC0_CFG_STATUS + offset,
8546 		status,
8547 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8548 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8549 		1000,
8550 		kernel_timeout);
8551 
8552 	if (rc) {
8553 		dev_err(hdev->dev,
8554 			"Timeout while waiting for TPC%d icache prefetch\n",
8555 			tpc_id);
8556 		hdev->asic_funcs->set_clock_gating(hdev);
8557 		mutex_unlock(&gaudi->clk_gate_mutex);
8558 		return -EIO;
8559 	}
8560 
8561 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8562 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8563 
8564 	/* wait a bit for the engine to start executing */
8565 	usleep_range(1000, 1500);
8566 
8567 	/* wait until engine has finished executing */
8568 	rc = hl_poll_timeout(
8569 		hdev,
8570 		mmTPC0_CFG_STATUS + offset,
8571 		status,
8572 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8573 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8574 		1000,
8575 		kernel_timeout);
8576 
8577 	if (rc) {
8578 		dev_err(hdev->dev,
8579 			"Timeout while waiting for TPC%d vector pipe\n",
8580 			tpc_id);
8581 		hdev->asic_funcs->set_clock_gating(hdev);
8582 		mutex_unlock(&gaudi->clk_gate_mutex);
8583 		return -EIO;
8584 	}
8585 
8586 	rc = hl_poll_timeout(
8587 		hdev,
8588 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8589 		status,
8590 		(status == 0),
8591 		1000,
8592 		kernel_timeout);
8593 
8594 	hdev->asic_funcs->set_clock_gating(hdev);
8595 	mutex_unlock(&gaudi->clk_gate_mutex);
8596 
8597 	if (rc) {
8598 		dev_err(hdev->dev,
8599 			"Timeout while waiting for TPC%d kernel to execute\n",
8600 			tpc_id);
8601 		return -EIO;
8602 	}
8603 
8604 	return 0;
8605 }
8606 
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8607 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8608 		struct hl_ctx *ctx)
8609 {
8610 	struct gaudi_device *gaudi = hdev->asic_specific;
8611 	int min_alloc_order, rc, collective_cb_size;
8612 
8613 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8614 		return 0;
8615 
8616 	hdev->internal_cb_pool_virt_addr =
8617 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8618 					HOST_SPACE_INTERNAL_CB_SZ,
8619 					&hdev->internal_cb_pool_dma_addr,
8620 					GFP_KERNEL | __GFP_ZERO);
8621 
8622 	if (!hdev->internal_cb_pool_virt_addr)
8623 		return -ENOMEM;
8624 
8625 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8626 			sizeof(struct packet_fence);
8627 	min_alloc_order = ilog2(collective_cb_size);
8628 
8629 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8630 	if (!hdev->internal_cb_pool) {
8631 		dev_err(hdev->dev,
8632 			"Failed to create internal CB pool\n");
8633 		rc = -ENOMEM;
8634 		goto free_internal_cb_pool;
8635 	}
8636 
8637 	rc = gen_pool_add(hdev->internal_cb_pool,
8638 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8639 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8640 	if (rc) {
8641 		dev_err(hdev->dev,
8642 			"Failed to add memory to internal CB pool\n");
8643 		rc = -EFAULT;
8644 		goto destroy_internal_cb_pool;
8645 	}
8646 
8647 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8648 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8649 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8650 
8651 	if (!hdev->internal_cb_va_base) {
8652 		rc = -ENOMEM;
8653 		goto destroy_internal_cb_pool;
8654 	}
8655 
8656 	mutex_lock(&ctx->mmu_lock);
8657 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8658 			hdev->internal_cb_pool_dma_addr,
8659 			HOST_SPACE_INTERNAL_CB_SZ);
8660 
8661 	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8662 	mutex_unlock(&ctx->mmu_lock);
8663 
8664 	if (rc)
8665 		goto unreserve_internal_cb_pool;
8666 
8667 	return 0;
8668 
8669 unreserve_internal_cb_pool:
8670 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8671 			HOST_SPACE_INTERNAL_CB_SZ);
8672 destroy_internal_cb_pool:
8673 	gen_pool_destroy(hdev->internal_cb_pool);
8674 free_internal_cb_pool:
8675 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
8676 			HOST_SPACE_INTERNAL_CB_SZ,
8677 			hdev->internal_cb_pool_virt_addr,
8678 			hdev->internal_cb_pool_dma_addr);
8679 
8680 	return rc;
8681 }
8682 
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8683 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8684 		struct hl_ctx *ctx)
8685 {
8686 	struct gaudi_device *gaudi = hdev->asic_specific;
8687 
8688 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8689 		return;
8690 
8691 	mutex_lock(&ctx->mmu_lock);
8692 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8693 			HOST_SPACE_INTERNAL_CB_SZ);
8694 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8695 			HOST_SPACE_INTERNAL_CB_SZ);
8696 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8697 	mutex_unlock(&ctx->mmu_lock);
8698 
8699 	gen_pool_destroy(hdev->internal_cb_pool);
8700 
8701 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
8702 			HOST_SPACE_INTERNAL_CB_SZ,
8703 			hdev->internal_cb_pool_virt_addr,
8704 			hdev->internal_cb_pool_dma_addr);
8705 }
8706 
gaudi_ctx_init(struct hl_ctx * ctx)8707 static int gaudi_ctx_init(struct hl_ctx *ctx)
8708 {
8709 	int rc;
8710 
8711 	if (ctx->asid == HL_KERNEL_ASID_ID)
8712 		return 0;
8713 
8714 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8715 	if (rc)
8716 		return rc;
8717 
8718 	rc = gaudi_restore_user_registers(ctx->hdev);
8719 	if (rc)
8720 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8721 
8722 	return rc;
8723 }
8724 
gaudi_ctx_fini(struct hl_ctx * ctx)8725 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8726 {
8727 	if (ctx->asid == HL_KERNEL_ASID_ID)
8728 		return;
8729 
8730 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8731 }
8732 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8733 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8734 {
8735 	return gaudi_cq_assignment[cq_idx];
8736 }
8737 
gaudi_get_signal_cb_size(struct hl_device * hdev)8738 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8739 {
8740 	return sizeof(struct packet_msg_short) +
8741 			sizeof(struct packet_msg_prot) * 2;
8742 }
8743 
gaudi_get_wait_cb_size(struct hl_device * hdev)8744 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8745 {
8746 	return sizeof(struct packet_msg_short) * 4 +
8747 			sizeof(struct packet_fence) +
8748 			sizeof(struct packet_msg_prot) * 2;
8749 }
8750 
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8751 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8752 {
8753 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8754 }
8755 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8756 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8757 				u32 size, bool eb)
8758 {
8759 	struct hl_cb *cb = (struct hl_cb *) data;
8760 	struct packet_msg_short *pkt;
8761 	u32 value, ctl, pkt_size = sizeof(*pkt);
8762 
8763 	pkt = cb->kernel_address + size;
8764 	memset(pkt, 0, pkt_size);
8765 
8766 	/* Inc by 1, Mode ADD */
8767 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8768 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8769 
8770 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8771 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8772 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8773 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8774 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8775 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8776 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8777 
8778 	pkt->value = cpu_to_le32(value);
8779 	pkt->ctl = cpu_to_le32(ctl);
8780 
8781 	return size + pkt_size;
8782 }
8783 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8784 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8785 					u16 addr)
8786 {
8787 	u32 ctl, pkt_size = sizeof(*pkt);
8788 
8789 	memset(pkt, 0, pkt_size);
8790 
8791 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8792 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8793 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8794 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8795 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8796 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8797 
8798 	pkt->value = cpu_to_le32(value);
8799 	pkt->ctl = cpu_to_le32(ctl);
8800 
8801 	return pkt_size;
8802 }
8803 
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8804 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8805 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8806 		u16 sob_val, u16 mon_id)
8807 {
8808 	u64 monitor_base;
8809 	u32 ctl, value, pkt_size = sizeof(*pkt);
8810 	u16 msg_addr_offset;
8811 	u8 mask;
8812 
8813 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8814 		dev_err(hdev->dev,
8815 			"sob_base %u (mask %#x) is not valid\n",
8816 			sob_base, sob_mask);
8817 		return 0;
8818 	}
8819 
8820 	/*
8821 	 * monitor_base should be the content of the base0 address registers,
8822 	 * so it will be added to the msg short offsets
8823 	 */
8824 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8825 
8826 	msg_addr_offset =
8827 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8828 				monitor_base;
8829 
8830 	memset(pkt, 0, pkt_size);
8831 
8832 	/* Monitor config packet: bind the monitor to a sync object */
8833 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8834 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8835 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8836 			0); /* GREATER OR EQUAL*/
8837 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8838 
8839 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8840 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8841 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8842 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8843 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8844 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8845 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8846 
8847 	pkt->value = cpu_to_le32(value);
8848 	pkt->ctl = cpu_to_le32(ctl);
8849 
8850 	return pkt_size;
8851 }
8852 
gaudi_add_fence_pkt(struct packet_fence * pkt)8853 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8854 {
8855 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8856 
8857 	memset(pkt, 0, pkt_size);
8858 
8859 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8860 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8861 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8862 
8863 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8864 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8865 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8866 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8867 
8868 	pkt->cfg = cpu_to_le32(cfg);
8869 	pkt->ctl = cpu_to_le32(ctl);
8870 
8871 	return pkt_size;
8872 }
8873 
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8874 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8875 {
8876 	u32 offset, nic_index;
8877 
8878 	switch (queue_id) {
8879 	case GAUDI_QUEUE_ID_DMA_0_0:
8880 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8881 		break;
8882 	case GAUDI_QUEUE_ID_DMA_0_1:
8883 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8884 		break;
8885 	case GAUDI_QUEUE_ID_DMA_0_2:
8886 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8887 		break;
8888 	case GAUDI_QUEUE_ID_DMA_0_3:
8889 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8890 		break;
8891 	case GAUDI_QUEUE_ID_DMA_1_0:
8892 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8893 		break;
8894 	case GAUDI_QUEUE_ID_DMA_1_1:
8895 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8896 		break;
8897 	case GAUDI_QUEUE_ID_DMA_1_2:
8898 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8899 		break;
8900 	case GAUDI_QUEUE_ID_DMA_1_3:
8901 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8902 		break;
8903 	case GAUDI_QUEUE_ID_DMA_5_0:
8904 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8905 		break;
8906 	case GAUDI_QUEUE_ID_DMA_5_1:
8907 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8908 		break;
8909 	case GAUDI_QUEUE_ID_DMA_5_2:
8910 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8911 		break;
8912 	case GAUDI_QUEUE_ID_DMA_5_3:
8913 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8914 		break;
8915 	case GAUDI_QUEUE_ID_TPC_7_0:
8916 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8917 		break;
8918 	case GAUDI_QUEUE_ID_TPC_7_1:
8919 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8920 		break;
8921 	case GAUDI_QUEUE_ID_TPC_7_2:
8922 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8923 		break;
8924 	case GAUDI_QUEUE_ID_TPC_7_3:
8925 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8926 		break;
8927 	case GAUDI_QUEUE_ID_NIC_0_0:
8928 	case GAUDI_QUEUE_ID_NIC_1_0:
8929 	case GAUDI_QUEUE_ID_NIC_2_0:
8930 	case GAUDI_QUEUE_ID_NIC_3_0:
8931 	case GAUDI_QUEUE_ID_NIC_4_0:
8932 	case GAUDI_QUEUE_ID_NIC_5_0:
8933 	case GAUDI_QUEUE_ID_NIC_6_0:
8934 	case GAUDI_QUEUE_ID_NIC_7_0:
8935 	case GAUDI_QUEUE_ID_NIC_8_0:
8936 	case GAUDI_QUEUE_ID_NIC_9_0:
8937 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8938 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8939 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8940 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8941 		break;
8942 	case GAUDI_QUEUE_ID_NIC_0_1:
8943 	case GAUDI_QUEUE_ID_NIC_1_1:
8944 	case GAUDI_QUEUE_ID_NIC_2_1:
8945 	case GAUDI_QUEUE_ID_NIC_3_1:
8946 	case GAUDI_QUEUE_ID_NIC_4_1:
8947 	case GAUDI_QUEUE_ID_NIC_5_1:
8948 	case GAUDI_QUEUE_ID_NIC_6_1:
8949 	case GAUDI_QUEUE_ID_NIC_7_1:
8950 	case GAUDI_QUEUE_ID_NIC_8_1:
8951 	case GAUDI_QUEUE_ID_NIC_9_1:
8952 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8953 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8954 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8955 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8956 		break;
8957 	case GAUDI_QUEUE_ID_NIC_0_2:
8958 	case GAUDI_QUEUE_ID_NIC_1_2:
8959 	case GAUDI_QUEUE_ID_NIC_2_2:
8960 	case GAUDI_QUEUE_ID_NIC_3_2:
8961 	case GAUDI_QUEUE_ID_NIC_4_2:
8962 	case GAUDI_QUEUE_ID_NIC_5_2:
8963 	case GAUDI_QUEUE_ID_NIC_6_2:
8964 	case GAUDI_QUEUE_ID_NIC_7_2:
8965 	case GAUDI_QUEUE_ID_NIC_8_2:
8966 	case GAUDI_QUEUE_ID_NIC_9_2:
8967 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8968 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8969 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8970 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8971 		break;
8972 	case GAUDI_QUEUE_ID_NIC_0_3:
8973 	case GAUDI_QUEUE_ID_NIC_1_3:
8974 	case GAUDI_QUEUE_ID_NIC_2_3:
8975 	case GAUDI_QUEUE_ID_NIC_3_3:
8976 	case GAUDI_QUEUE_ID_NIC_4_3:
8977 	case GAUDI_QUEUE_ID_NIC_5_3:
8978 	case GAUDI_QUEUE_ID_NIC_6_3:
8979 	case GAUDI_QUEUE_ID_NIC_7_3:
8980 	case GAUDI_QUEUE_ID_NIC_8_3:
8981 	case GAUDI_QUEUE_ID_NIC_9_3:
8982 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8983 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8984 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8985 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8986 		break;
8987 	default:
8988 		return -EINVAL;
8989 	}
8990 
8991 	*addr = CFG_BASE + offset;
8992 
8993 	return 0;
8994 }
8995 
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8996 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8997 {
8998 	u64 monitor_base;
8999 	u32 size = 0;
9000 	u16 msg_addr_offset;
9001 
9002 	/*
9003 	 * monitor_base should be the content of the base0 address registers,
9004 	 * so it will be added to the msg short offsets
9005 	 */
9006 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9007 
9008 	/* First monitor config packet: low address of the sync */
9009 	msg_addr_offset =
9010 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9011 				monitor_base;
9012 
9013 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9014 					msg_addr_offset);
9015 
9016 	/* Second monitor config packet: high address of the sync */
9017 	msg_addr_offset =
9018 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9019 				monitor_base;
9020 
9021 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9022 					msg_addr_offset);
9023 
9024 	/*
9025 	 * Third monitor config packet: the payload, i.e. what to write when the
9026 	 * sync triggers
9027 	 */
9028 	msg_addr_offset =
9029 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9030 				monitor_base;
9031 
9032 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9033 
9034 	return size;
9035 }
9036 
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)9037 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9038 				struct hl_gen_wait_properties *prop)
9039 {
9040 	struct hl_cb *cb = (struct hl_cb *) prop->data;
9041 	void *buf = cb->kernel_address;
9042 	u64 fence_addr = 0;
9043 	u32 size = prop->size;
9044 
9045 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9046 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9047 				prop->q_idx);
9048 		return 0;
9049 	}
9050 
9051 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9052 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9053 			prop->sob_mask, prop->sob_val, prop->mon_id);
9054 	size += gaudi_add_fence_pkt(buf + size);
9055 
9056 	return size;
9057 }
9058 
gaudi_reset_sob(struct hl_device * hdev,void * data)9059 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9060 {
9061 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9062 
9063 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9064 		hw_sob->sob_id);
9065 
9066 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9067 			hw_sob->sob_id * 4, 0);
9068 
9069 	kref_init(&hw_sob->kref);
9070 }
9071 
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)9072 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9073 {
9074 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9075 							HL_POWER9_HOST_MAGIC) {
9076 		hdev->power9_64bit_dma_enable = 1;
9077 		hdev->dma_mask = 64;
9078 	} else {
9079 		hdev->power9_64bit_dma_enable = 0;
9080 		hdev->dma_mask = 48;
9081 	}
9082 }
9083 
gaudi_get_device_time(struct hl_device * hdev)9084 static u64 gaudi_get_device_time(struct hl_device *hdev)
9085 {
9086 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9087 
9088 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9089 }
9090 
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)9091 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9092 				u32 *block_size, u32 *block_id)
9093 {
9094 	return -EPERM;
9095 }
9096 
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)9097 static int gaudi_block_mmap(struct hl_device *hdev,
9098 				struct vm_area_struct *vma,
9099 				u32 block_id, u32 block_size)
9100 {
9101 	return -EPERM;
9102 }
9103 
gaudi_enable_events_from_fw(struct hl_device * hdev)9104 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9105 {
9106 	struct cpu_dyn_regs *dyn_regs =
9107 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9108 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9109 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9110 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
9111 
9112 	WREG32(irq_handler_offset,
9113 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9114 }
9115 
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)9116 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9117 {
9118 	switch (pll_idx) {
9119 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
9120 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
9121 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
9122 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
9123 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
9124 	case HL_GAUDI_MME_PLL: return MME_PLL;
9125 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
9126 	case HL_GAUDI_IF_PLL: return IF_PLL;
9127 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9128 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
9129 	default: return -EINVAL;
9130 	}
9131 }
9132 
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)9133 static int gaudi_add_sync_to_engine_map_entry(
9134 	struct hl_sync_to_engine_map *map, u32 reg_value,
9135 	enum hl_sync_engine_type engine_type, u32 engine_id)
9136 {
9137 	struct hl_sync_to_engine_map_entry *entry;
9138 
9139 	/* Reg value represents a partial address of sync object,
9140 	 * it is used as unique identifier. For this we need to
9141 	 * clear the cutoff cfg base bits from the value.
9142 	 */
9143 	if (reg_value == 0 || reg_value == 0xffffffff)
9144 		return 0;
9145 	reg_value -= (u32)CFG_BASE;
9146 
9147 	/* create a new hash entry */
9148 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9149 	if (!entry)
9150 		return -ENOMEM;
9151 	entry->engine_type = engine_type;
9152 	entry->engine_id = engine_id;
9153 	entry->sync_id = reg_value;
9154 	hash_add(map->tb, &entry->node, reg_value);
9155 
9156 	return 0;
9157 }
9158 
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)9159 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9160 				struct hl_sync_to_engine_map *map)
9161 {
9162 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9163 	struct gaudi_device *gaudi = hdev->asic_specific;
9164 	int i, j, rc;
9165 	u32 reg_value;
9166 
9167 	/* Iterate over TPC engines */
9168 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9169 		/* TPC registered must be accessed with clock gating disabled */
9170 		mutex_lock(&gaudi->clk_gate_mutex);
9171 		hdev->asic_funcs->disable_clock_gating(hdev);
9172 
9173 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9174 					sds->props[SP_NEXT_TPC] * i);
9175 
9176 		/* We can reenable clock_gating */
9177 		hdev->asic_funcs->set_clock_gating(hdev);
9178 		mutex_unlock(&gaudi->clk_gate_mutex);
9179 
9180 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9181 							ENGINE_TPC, i);
9182 		if (rc)
9183 			goto free_sync_to_engine_map;
9184 	}
9185 
9186 	/* Iterate over MME engines */
9187 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9188 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9189 			/* MME registered must be accessed with clock gating
9190 			 * disabled
9191 			 */
9192 			mutex_lock(&gaudi->clk_gate_mutex);
9193 			hdev->asic_funcs->disable_clock_gating(hdev);
9194 
9195 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9196 						sds->props[SP_NEXT_MME] * i +
9197 						j * sizeof(u32));
9198 
9199 			/* We can reenable clock_gating */
9200 			hdev->asic_funcs->set_clock_gating(hdev);
9201 			mutex_unlock(&gaudi->clk_gate_mutex);
9202 
9203 			rc = gaudi_add_sync_to_engine_map_entry(
9204 				map, reg_value, ENGINE_MME,
9205 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9206 			if (rc)
9207 				goto free_sync_to_engine_map;
9208 		}
9209 	}
9210 
9211 	/* Iterate over DMA engines */
9212 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9213 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9214 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
9215 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9216 							ENGINE_DMA, i);
9217 		if (rc)
9218 			goto free_sync_to_engine_map;
9219 	}
9220 
9221 	return 0;
9222 
9223 free_sync_to_engine_map:
9224 	hl_state_dump_free_sync_to_engine_map(map);
9225 
9226 	return rc;
9227 }
9228 
gaudi_monitor_valid(struct hl_mon_state_dump * mon)9229 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9230 {
9231 	return FIELD_GET(
9232 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9233 		mon->status);
9234 }
9235 
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)9236 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9237 {
9238 	const size_t max_write = 10;
9239 	u32 gid, mask, sob;
9240 	int i, offset;
9241 
9242 	/* Sync object ID is calculated as follows:
9243 	 * (8 * group_id + cleared bits in mask)
9244 	 */
9245 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9246 			mon->arm_data);
9247 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9248 			mon->arm_data);
9249 
9250 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9251 		max_write; mask >>= 1, i++) {
9252 		if (!(mask & 1)) {
9253 			sob = gid * MONITOR_MAX_SOBS + i;
9254 
9255 			if (offset > 0)
9256 				offset += snprintf(sobs + offset, max_write,
9257 							", ");
9258 
9259 			offset += snprintf(sobs + offset, max_write, "%u", sob);
9260 		}
9261 	}
9262 }
9263 
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)9264 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9265 				struct hl_device *hdev,
9266 				struct hl_mon_state_dump *mon)
9267 {
9268 	const char *name;
9269 	char scratch_buf1[BIN_REG_STRING_SIZE],
9270 		scratch_buf2[BIN_REG_STRING_SIZE];
9271 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9272 
9273 	name = hl_state_dump_get_monitor_name(hdev, mon);
9274 	if (!name)
9275 		name = "";
9276 
9277 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9278 
9279 	return hl_snprintf_resize(
9280 		buf, size, offset,
9281 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9282 		mon->id, name,
9283 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9284 				mon->arm_data),
9285 		hl_format_as_binary(
9286 			scratch_buf1, sizeof(scratch_buf1),
9287 			FIELD_GET(
9288 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9289 				mon->arm_data)),
9290 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9291 				mon->arm_data),
9292 		mon->wr_data,
9293 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9294 		hl_format_as_binary(
9295 			scratch_buf2, sizeof(scratch_buf2),
9296 			FIELD_GET(
9297 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9298 				mon->status)),
9299 		monitored_sobs);
9300 }
9301 
9302 
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)9303 static int gaudi_print_fences_single_engine(
9304 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9305 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9306 	size_t *size, size_t *offset)
9307 {
9308 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9309 	int rc = -ENOMEM, i;
9310 	u32 *statuses, *fences;
9311 
9312 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9313 			sizeof(*statuses), GFP_KERNEL);
9314 	if (!statuses)
9315 		goto out;
9316 
9317 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9318 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9319 			 sizeof(*fences), GFP_KERNEL);
9320 	if (!fences)
9321 		goto free_status;
9322 
9323 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9324 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9325 
9326 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9327 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9328 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9329 
9330 	/* The actual print */
9331 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9332 		u32 fence_id;
9333 		u64 fence_cnt, fence_rdata;
9334 		const char *engine_name;
9335 
9336 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9337 			statuses[i]))
9338 			continue;
9339 
9340 		fence_id =
9341 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9342 		fence_cnt = base_offset + CFG_BASE +
9343 			sizeof(u32) *
9344 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9345 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9346 				sds->props[SP_FENCE0_RDATA_OFFSET];
9347 		engine_name = hl_sync_engine_to_string(engine_type);
9348 
9349 		rc = hl_snprintf_resize(
9350 			buf, size, offset,
9351 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9352 			engine_name, engine_id,
9353 			i, fence_id,
9354 			fence_cnt, engine_name, engine_id, fence_id, i,
9355 			fence_rdata, engine_name, engine_id, fence_id, i,
9356 			fences[fence_id],
9357 			statuses[i]);
9358 		if (rc)
9359 			goto free_fences;
9360 	}
9361 
9362 	rc = 0;
9363 
9364 free_fences:
9365 	kfree(fences);
9366 free_status:
9367 	kfree(statuses);
9368 out:
9369 	return rc;
9370 }
9371 
9372 
9373 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9374 	.monitor_valid = gaudi_monitor_valid,
9375 	.print_single_monitor = gaudi_print_single_monitor,
9376 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9377 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9378 };
9379 
gaudi_state_dump_init(struct hl_device * hdev)9380 static void gaudi_state_dump_init(struct hl_device *hdev)
9381 {
9382 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9383 	int i;
9384 
9385 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9386 		hash_add(sds->so_id_to_str_tb,
9387 			&gaudi_so_id_to_str[i].node,
9388 			gaudi_so_id_to_str[i].id);
9389 
9390 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9391 		hash_add(sds->monitor_id_to_str_tb,
9392 			&gaudi_monitor_id_to_str[i].node,
9393 			gaudi_monitor_id_to_str[i].id);
9394 
9395 	sds->props = gaudi_state_dump_specs_props;
9396 
9397 	sds->sync_namager_names = gaudi_sync_manager_names;
9398 
9399 	sds->funcs = gaudi_state_dump_funcs;
9400 }
9401 
gaudi_get_stream_master_qid_arr(void)9402 static u32 *gaudi_get_stream_master_qid_arr(void)
9403 {
9404 	return gaudi_stream_master;
9405 }
9406 
9407 static const struct hl_asic_funcs gaudi_funcs = {
9408 	.early_init = gaudi_early_init,
9409 	.early_fini = gaudi_early_fini,
9410 	.late_init = gaudi_late_init,
9411 	.late_fini = gaudi_late_fini,
9412 	.sw_init = gaudi_sw_init,
9413 	.sw_fini = gaudi_sw_fini,
9414 	.hw_init = gaudi_hw_init,
9415 	.hw_fini = gaudi_hw_fini,
9416 	.halt_engines = gaudi_halt_engines,
9417 	.suspend = gaudi_suspend,
9418 	.resume = gaudi_resume,
9419 	.mmap = gaudi_mmap,
9420 	.ring_doorbell = gaudi_ring_doorbell,
9421 	.pqe_write = gaudi_pqe_write,
9422 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9423 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9424 	.scrub_device_mem = gaudi_scrub_device_mem,
9425 	.get_int_queue_base = gaudi_get_int_queue_base,
9426 	.test_queues = gaudi_test_queues,
9427 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9428 	.asic_dma_pool_free = gaudi_dma_pool_free,
9429 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9430 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9431 	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9432 	.cs_parser = gaudi_cs_parser,
9433 	.asic_dma_map_sg = gaudi_dma_map_sg,
9434 	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9435 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9436 	.update_eq_ci = gaudi_update_eq_ci,
9437 	.context_switch = gaudi_context_switch,
9438 	.restore_phase_topology = gaudi_restore_phase_topology,
9439 	.debugfs_read32 = gaudi_debugfs_read32,
9440 	.debugfs_write32 = gaudi_debugfs_write32,
9441 	.debugfs_read64 = gaudi_debugfs_read64,
9442 	.debugfs_write64 = gaudi_debugfs_write64,
9443 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9444 	.add_device_attr = gaudi_add_device_attr,
9445 	.handle_eqe = gaudi_handle_eqe,
9446 	.set_pll_profile = gaudi_set_pll_profile,
9447 	.get_events_stat = gaudi_get_events_stat,
9448 	.read_pte = gaudi_read_pte,
9449 	.write_pte = gaudi_write_pte,
9450 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9451 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9452 	.send_heartbeat = gaudi_send_heartbeat,
9453 	.set_clock_gating = gaudi_set_clock_gating,
9454 	.disable_clock_gating = gaudi_disable_clock_gating,
9455 	.debug_coresight = gaudi_debug_coresight,
9456 	.is_device_idle = gaudi_is_device_idle,
9457 	.soft_reset_late_init = gaudi_soft_reset_late_init,
9458 	.hw_queues_lock = gaudi_hw_queues_lock,
9459 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9460 	.get_pci_id = gaudi_get_pci_id,
9461 	.get_eeprom_data = gaudi_get_eeprom_data,
9462 	.send_cpu_message = gaudi_send_cpu_message,
9463 	.pci_bars_map = gaudi_pci_bars_map,
9464 	.init_iatu = gaudi_init_iatu,
9465 	.rreg = hl_rreg,
9466 	.wreg = hl_wreg,
9467 	.halt_coresight = gaudi_halt_coresight,
9468 	.ctx_init = gaudi_ctx_init,
9469 	.ctx_fini = gaudi_ctx_fini,
9470 	.get_clk_rate = gaudi_get_clk_rate,
9471 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9472 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9473 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9474 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9475 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9476 	.gen_signal_cb = gaudi_gen_signal_cb,
9477 	.gen_wait_cb = gaudi_gen_wait_cb,
9478 	.reset_sob = gaudi_reset_sob,
9479 	.reset_sob_group = gaudi_reset_sob_group,
9480 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9481 	.get_device_time = gaudi_get_device_time,
9482 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9483 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9484 	.scramble_addr = hl_mmu_scramble_addr,
9485 	.descramble_addr = hl_mmu_descramble_addr,
9486 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9487 	.get_hw_block_id = gaudi_get_hw_block_id,
9488 	.hw_block_mmap = gaudi_block_mmap,
9489 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9490 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9491 	.init_firmware_loader = gaudi_init_firmware_loader,
9492 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9493 	.state_dump_init = gaudi_state_dump_init,
9494 	.get_sob_addr = gaudi_get_sob_addr,
9495 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9496 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9497 };
9498 
9499 /**
9500  * gaudi_set_asic_funcs - set GAUDI function pointers
9501  *
9502  * @hdev: pointer to hl_device structure
9503  *
9504  */
gaudi_set_asic_funcs(struct hl_device * hdev)9505 void gaudi_set_asic_funcs(struct hl_device *hdev)
9506 {
9507 	hdev->asic_funcs = &gaudi_funcs;
9508 }
9509