1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN 20
86
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE 256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114 GAUDI_QUEUE_ID_DMA_0_0,
115 GAUDI_QUEUE_ID_DMA_0_1,
116 GAUDI_QUEUE_ID_DMA_0_2,
117 GAUDI_QUEUE_ID_DMA_0_3,
118 GAUDI_QUEUE_ID_DMA_1_0,
119 GAUDI_QUEUE_ID_DMA_1_1,
120 GAUDI_QUEUE_ID_DMA_1_2,
121 GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143 [0] = GAUDI_QUEUE_ID_DMA_0_0,
144 [1] = GAUDI_QUEUE_ID_DMA_0_1,
145 [2] = GAUDI_QUEUE_ID_DMA_0_2,
146 [3] = GAUDI_QUEUE_ID_DMA_0_3,
147 [4] = GAUDI_QUEUE_ID_DMA_1_0,
148 [5] = GAUDI_QUEUE_ID_DMA_1_1,
149 [6] = GAUDI_QUEUE_ID_DMA_1_2,
150 [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
155 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
156 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
157 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
158 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
159 [PACKET_REPEAT] = sizeof(struct packet_repeat),
160 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
161 [PACKET_FENCE] = sizeof(struct packet_fence),
162 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
163 [PACKET_NOP] = sizeof(struct packet_nop),
164 [PACKET_STOP] = sizeof(struct packet_stop),
165 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
166 [PACKET_WAIT] = sizeof(struct packet_wait),
167 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
168 };
169
validate_packet_id(enum packet_id id)170 static inline bool validate_packet_id(enum packet_id id)
171 {
172 switch (id) {
173 case PACKET_WREG_32:
174 case PACKET_WREG_BULK:
175 case PACKET_MSG_LONG:
176 case PACKET_MSG_SHORT:
177 case PACKET_CP_DMA:
178 case PACKET_REPEAT:
179 case PACKET_MSG_PROT:
180 case PACKET_FENCE:
181 case PACKET_LIN_DMA:
182 case PACKET_NOP:
183 case PACKET_STOP:
184 case PACKET_ARB_POINT:
185 case PACKET_WAIT:
186 case PACKET_LOAD_AND_EXE:
187 return true;
188 default:
189 return false;
190 }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195 "tpc_address_exceed_slm",
196 "tpc_div_by_0",
197 "tpc_spu_mac_overflow",
198 "tpc_spu_addsub_overflow",
199 "tpc_spu_abs_overflow",
200 "tpc_spu_fp_dst_nan_inf",
201 "tpc_spu_fp_dst_denorm",
202 "tpc_vpu_mac_overflow",
203 "tpc_vpu_addsub_overflow",
204 "tpc_vpu_abs_overflow",
205 "tpc_vpu_fp_dst_nan_inf",
206 "tpc_vpu_fp_dst_denorm",
207 "tpc_assertions",
208 "tpc_illegal_instruction",
209 "tpc_pc_wrap_around",
210 "tpc_qm_sw_err",
211 "tpc_hbw_rresp_err",
212 "tpc_hbw_bresp_err",
213 "tpc_lbw_rresp_err",
214 "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219 "PQ AXI HBW error",
220 "CQ AXI HBW error",
221 "CP AXI HBW error",
222 "CP error due to undefined OPCODE",
223 "CP encountered STOP OPCODE",
224 "CP AXI LBW error",
225 "CP WRREG32 or WRBULK returned error",
226 "N/A",
227 "FENCE 0 inc over max value and clipped",
228 "FENCE 1 inc over max value and clipped",
229 "FENCE 2 inc over max value and clipped",
230 "FENCE 3 inc over max value and clipped",
231 "FENCE 0 dec under min value and clipped",
232 "FENCE 1 dec under min value and clipped",
233 "FENCE 2 dec under min value and clipped",
234 "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239 "Choice push while full error",
240 "Choice Q watchdog error",
241 "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245 GAUDI_SM_SEI_SO_OVERFLOW,
246 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247 GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414 [SP_MON_OBJ_WR_ADDR_LOW] =
415 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416 [SP_MON_OBJ_WR_ADDR_HIGH] =
417 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438 [SP_FENCE0_CNT_OFFSET] =
439 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440 [SP_FENCE0_RDATA_OFFSET] =
441 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443 [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448 */
449 static const char * const gaudi_sync_manager_names[] = {
450 "SYNC_MGR_E_N",
451 "SYNC_MGR_W_N",
452 "SYNC_MGR_E_S",
453 "SYNC_MGR_W_S",
454 NULL
455 };
456
457 struct ecc_info_extract_params {
458 u64 block_address;
459 u32 num_memories;
460 bool derr;
461 bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467 struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469 u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471 u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486 return HL_COLLECTIVE_MASTER;
487
488 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490 return HL_COLLECTIVE_SLAVE;
491
492 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494 return HL_COLLECTIVE_SLAVE;
495
496 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498 return HL_COLLECTIVE_SLAVE;
499
500 return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
set_default_power_values(struct hl_device * hdev)503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505 struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507 if (hdev->card_type == cpucp_card_type_pmc) {
508 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510 if (prop->fw_security_enabled)
511 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512 else
513 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514 } else {
515 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517 }
518 }
519
gaudi_set_fixed_properties(struct hl_device * hdev)520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522 struct asic_fixed_properties *prop = &hdev->asic_prop;
523 u32 num_sync_stream_queues = 0;
524 int i;
525
526 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527 prop->hw_queues_props = kcalloc(prop->max_queues,
528 sizeof(struct hw_queue_properties),
529 GFP_KERNEL);
530
531 if (!prop->hw_queues_props)
532 return -ENOMEM;
533
534 for (i = 0 ; i < prop->max_queues ; i++) {
535 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537 prop->hw_queues_props[i].driver_only = 0;
538 prop->hw_queues_props[i].supports_sync_stream = 1;
539 prop->hw_queues_props[i].cb_alloc_flags =
540 CB_ALLOC_KERNEL;
541 num_sync_stream_queues++;
542 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544 prop->hw_queues_props[i].driver_only = 1;
545 prop->hw_queues_props[i].supports_sync_stream = 0;
546 prop->hw_queues_props[i].cb_alloc_flags =
547 CB_ALLOC_KERNEL;
548 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550 prop->hw_queues_props[i].driver_only = 0;
551 prop->hw_queues_props[i].supports_sync_stream = 0;
552 prop->hw_queues_props[i].cb_alloc_flags =
553 CB_ALLOC_USER;
554
555 }
556 prop->hw_queues_props[i].collective_mode =
557 get_collective_mode(hdev, i);
558 }
559
560 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562 prop->collective_first_sob = 0;
563 prop->collective_first_mon = 0;
564
565 /* 2 SOBs per internal queue stream are reserved for collective */
566 prop->sync_stream_first_sob =
567 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568 * QMAN_STREAMS * HL_RSVD_SOBS;
569
570 /* 1 monitor per internal queue stream are reserved for collective
571 * 2 monitors per external queue stream are reserved for collective
572 */
573 prop->sync_stream_first_mon =
574 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575 (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577 prop->dram_base_address = DRAM_PHYS_BASE;
578 prop->dram_size = GAUDI_HBM_SIZE_32GB;
579 prop->dram_end_address = prop->dram_base_address +
580 prop->dram_size;
581 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583 prop->sram_base_address = SRAM_BASE_ADDR;
584 prop->sram_size = SRAM_SIZE;
585 prop->sram_end_address = prop->sram_base_address +
586 prop->sram_size;
587 prop->sram_user_base_address = prop->sram_base_address +
588 SRAM_USER_BASE_OFFSET;
589
590 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591 if (hdev->pldm)
592 prop->mmu_pgt_size = 0x800000; /* 8MB */
593 else
594 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595 prop->mmu_pte_size = HL_PTE_SIZE;
596 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598 prop->dram_page_size = PAGE_SIZE_2MB;
599 prop->dram_supports_virtual_memory = false;
600
601 prop->pmmu.hop0_shift = HOP0_SHIFT;
602 prop->pmmu.hop1_shift = HOP1_SHIFT;
603 prop->pmmu.hop2_shift = HOP2_SHIFT;
604 prop->pmmu.hop3_shift = HOP3_SHIFT;
605 prop->pmmu.hop4_shift = HOP4_SHIFT;
606 prop->pmmu.hop0_mask = HOP0_MASK;
607 prop->pmmu.hop1_mask = HOP1_MASK;
608 prop->pmmu.hop2_mask = HOP2_MASK;
609 prop->pmmu.hop3_mask = HOP3_MASK;
610 prop->pmmu.hop4_mask = HOP4_MASK;
611 prop->pmmu.start_addr = VA_HOST_SPACE_START;
612 prop->pmmu.end_addr =
613 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614 prop->pmmu.page_size = PAGE_SIZE_4KB;
615 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616
617 /* PMMU and HPMMU are the same except of page size */
618 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620
621 /* shifts and masks are the same in PMMU and DMMU */
622 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624 prop->dmmu.end_addr = VA_HOST_SPACE_END;
625 prop->dmmu.page_size = PAGE_SIZE_2MB;
626
627 prop->cfg_size = CFG_SIZE;
628 prop->max_asid = MAX_ASID;
629 prop->num_of_events = GAUDI_EVENT_SIZE;
630 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631
632 set_default_power_values(hdev);
633
634 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636
637 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639
640 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641 CARD_NAME_MAX_LEN);
642
643 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644
645 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646 prop->sync_stream_first_sob +
647 (num_sync_stream_queues * HL_RSVD_SOBS);
648 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649 prop->sync_stream_first_mon +
650 (num_sync_stream_queues * HL_RSVD_MONS);
651
652 prop->first_available_user_msix_interrupt = USHRT_MAX;
653
654 for (i = 0 ; i < HL_MAX_DCORES ; i++)
655 prop->first_available_cq[i] = USHRT_MAX;
656
657 prop->fw_cpu_boot_dev_sts0_valid = false;
658 prop->fw_cpu_boot_dev_sts1_valid = false;
659 prop->hard_reset_done_by_fw = false;
660 prop->gic_interrupts_enable = true;
661
662 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663
664 return 0;
665 }
666
gaudi_pci_bars_map(struct hl_device * hdev)667 static int gaudi_pci_bars_map(struct hl_device *hdev)
668 {
669 static const char * const name[] = {"SRAM", "CFG", "HBM"};
670 bool is_wc[3] = {false, false, true};
671 int rc;
672
673 rc = hl_pci_bars_map(hdev, name, is_wc);
674 if (rc)
675 return rc;
676
677 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
678 (CFG_BASE - SPI_FLASH_BASE_ADDR);
679
680 return 0;
681 }
682
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)683 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
684 {
685 struct gaudi_device *gaudi = hdev->asic_specific;
686 struct hl_inbound_pci_region pci_region;
687 u64 old_addr = addr;
688 int rc;
689
690 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
691 return old_addr;
692
693 if (hdev->asic_prop.iatu_done_by_fw)
694 return U64_MAX;
695
696 /* Inbound Region 2 - Bar 4 - Point to HBM */
697 pci_region.mode = PCI_BAR_MATCH_MODE;
698 pci_region.bar = HBM_BAR_ID;
699 pci_region.addr = addr;
700 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
701 if (rc)
702 return U64_MAX;
703
704 if (gaudi) {
705 old_addr = gaudi->hbm_bar_cur_addr;
706 gaudi->hbm_bar_cur_addr = addr;
707 }
708
709 return old_addr;
710 }
711
gaudi_init_iatu(struct hl_device * hdev)712 static int gaudi_init_iatu(struct hl_device *hdev)
713 {
714 struct hl_inbound_pci_region inbound_region;
715 struct hl_outbound_pci_region outbound_region;
716 int rc;
717
718 if (hdev->asic_prop.iatu_done_by_fw)
719 return 0;
720
721 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
722 inbound_region.mode = PCI_BAR_MATCH_MODE;
723 inbound_region.bar = SRAM_BAR_ID;
724 inbound_region.addr = SRAM_BASE_ADDR;
725 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
726 if (rc)
727 goto done;
728
729 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
730 inbound_region.mode = PCI_BAR_MATCH_MODE;
731 inbound_region.bar = CFG_BAR_ID;
732 inbound_region.addr = SPI_FLASH_BASE_ADDR;
733 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
734 if (rc)
735 goto done;
736
737 /* Inbound Region 2 - Bar 4 - Point to HBM */
738 inbound_region.mode = PCI_BAR_MATCH_MODE;
739 inbound_region.bar = HBM_BAR_ID;
740 inbound_region.addr = DRAM_PHYS_BASE;
741 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
742 if (rc)
743 goto done;
744
745 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
746
747 /* Outbound Region 0 - Point to Host */
748 outbound_region.addr = HOST_PHYS_BASE;
749 outbound_region.size = HOST_PHYS_SIZE;
750 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
751
752 done:
753 return rc;
754 }
755
gaudi_get_hw_state(struct hl_device * hdev)756 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
757 {
758 return RREG32(mmHW_STATE);
759 }
760
gaudi_early_init(struct hl_device * hdev)761 static int gaudi_early_init(struct hl_device *hdev)
762 {
763 struct asic_fixed_properties *prop = &hdev->asic_prop;
764 struct pci_dev *pdev = hdev->pdev;
765 u32 fw_boot_status;
766 int rc;
767
768 rc = gaudi_set_fixed_properties(hdev);
769 if (rc) {
770 dev_err(hdev->dev, "Failed setting fixed properties\n");
771 return rc;
772 }
773
774 /* Check BAR sizes */
775 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
776 dev_err(hdev->dev,
777 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
778 SRAM_BAR_ID,
779 (unsigned long long) pci_resource_len(pdev,
780 SRAM_BAR_ID),
781 SRAM_BAR_SIZE);
782 rc = -ENODEV;
783 goto free_queue_props;
784 }
785
786 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
787 dev_err(hdev->dev,
788 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
789 CFG_BAR_ID,
790 (unsigned long long) pci_resource_len(pdev,
791 CFG_BAR_ID),
792 CFG_BAR_SIZE);
793 rc = -ENODEV;
794 goto free_queue_props;
795 }
796
797 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
798
799 /* If FW security is enabled at this point it means no access to ELBI */
800 if (hdev->asic_prop.fw_security_enabled) {
801 hdev->asic_prop.iatu_done_by_fw = true;
802
803 /*
804 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
805 * decision can only be taken based on PCI ID security.
806 */
807 hdev->asic_prop.gic_interrupts_enable = false;
808 goto pci_init;
809 }
810
811 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
812 &fw_boot_status);
813 if (rc)
814 goto free_queue_props;
815
816 /* Check whether FW is configuring iATU */
817 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
818 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
819 hdev->asic_prop.iatu_done_by_fw = true;
820
821 pci_init:
822 rc = hl_pci_init(hdev);
823 if (rc)
824 goto free_queue_props;
825
826 /* Before continuing in the initialization, we need to read the preboot
827 * version to determine whether we run with a security-enabled firmware
828 */
829 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
830 mmCPU_BOOT_DEV_STS0,
831 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
832 mmCPU_BOOT_ERR1,
833 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
834 if (rc) {
835 if (hdev->reset_on_preboot_fail)
836 hdev->asic_funcs->hw_fini(hdev, true, false);
837 goto pci_fini;
838 }
839
840 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
841 dev_info(hdev->dev,
842 "H/W state is dirty, must reset before initializing\n");
843 hdev->asic_funcs->hw_fini(hdev, true, false);
844 }
845
846 return 0;
847
848 pci_fini:
849 hl_pci_fini(hdev);
850 free_queue_props:
851 kfree(hdev->asic_prop.hw_queues_props);
852 return rc;
853 }
854
gaudi_early_fini(struct hl_device * hdev)855 static int gaudi_early_fini(struct hl_device *hdev)
856 {
857 kfree(hdev->asic_prop.hw_queues_props);
858 hl_pci_fini(hdev);
859
860 return 0;
861 }
862
863 /**
864 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
865 *
866 * @hdev: pointer to hl_device structure
867 *
868 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)869 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
870 {
871 struct asic_fixed_properties *prop = &hdev->asic_prop;
872 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
873 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
874 int rc;
875
876 if (hdev->asic_prop.fw_security_enabled) {
877 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
878
879 if (rc)
880 return rc;
881
882 freq = pll_freq_arr[2];
883 } else {
884 /* Backward compatibility */
885 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
886 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
887 nr = RREG32(mmPSOC_CPU_PLL_NR);
888 nf = RREG32(mmPSOC_CPU_PLL_NF);
889 od = RREG32(mmPSOC_CPU_PLL_OD);
890
891 if (div_sel == DIV_SEL_REF_CLK ||
892 div_sel == DIV_SEL_DIVIDED_REF) {
893 if (div_sel == DIV_SEL_REF_CLK)
894 freq = PLL_REF_CLK;
895 else
896 freq = PLL_REF_CLK / (div_fctr + 1);
897 } else if (div_sel == DIV_SEL_PLL_CLK ||
898 div_sel == DIV_SEL_DIVIDED_PLL) {
899 pll_clk = PLL_REF_CLK * (nf + 1) /
900 ((nr + 1) * (od + 1));
901 if (div_sel == DIV_SEL_PLL_CLK)
902 freq = pll_clk;
903 else
904 freq = pll_clk / (div_fctr + 1);
905 } else {
906 dev_warn(hdev->dev,
907 "Received invalid div select value: %d",
908 div_sel);
909 freq = 0;
910 }
911 }
912
913 prop->psoc_timestamp_frequency = freq;
914 prop->psoc_pci_pll_nr = nr;
915 prop->psoc_pci_pll_nf = nf;
916 prop->psoc_pci_pll_od = od;
917 prop->psoc_pci_pll_div_factor = div_fctr;
918
919 return 0;
920 }
921
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)922 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
923 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
924 {
925 struct asic_fixed_properties *prop = &hdev->asic_prop;
926 struct packet_lin_dma *init_tpc_mem_pkt;
927 struct hl_cs_job *job;
928 struct hl_cb *cb;
929 u64 dst_addr;
930 u32 cb_size, ctl;
931 u8 tpc_id;
932 int rc;
933
934 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
935 if (!cb)
936 return -EFAULT;
937
938 init_tpc_mem_pkt = cb->kernel_address;
939 cb_size = sizeof(*init_tpc_mem_pkt);
940 memset(init_tpc_mem_pkt, 0, cb_size);
941
942 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
943
944 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
945 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
946 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
947 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
948
949 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
950
951 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
952 dst_addr = (prop->sram_user_base_address &
953 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
954 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
955 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
956
957 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
958 if (!job) {
959 dev_err(hdev->dev, "Failed to allocate a new job\n");
960 rc = -ENOMEM;
961 goto release_cb;
962 }
963
964 job->id = 0;
965 job->user_cb = cb;
966 atomic_inc(&job->user_cb->cs_cnt);
967 job->user_cb_size = cb_size;
968 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
969 job->patched_cb = job->user_cb;
970 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
971
972 hl_debugfs_add_job(hdev, job);
973
974 rc = gaudi_send_job_on_qman0(hdev, job);
975
976 if (rc)
977 goto free_job;
978
979 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
980 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
981 if (rc)
982 break;
983 }
984
985 free_job:
986 hl_userptr_delete_list(hdev, &job->userptr_list);
987 hl_debugfs_remove_job(hdev, job);
988 kfree(job);
989 atomic_dec(&cb->cs_cnt);
990
991 release_cb:
992 hl_cb_put(cb);
993 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
994
995 return rc;
996 }
997
998 /*
999 * gaudi_init_tpc_mem() - Initialize TPC memories.
1000 * @hdev: Pointer to hl_device structure.
1001 *
1002 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1003 *
1004 * Return: 0 for success, negative value for error.
1005 */
gaudi_init_tpc_mem(struct hl_device * hdev)1006 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1007 {
1008 const struct firmware *fw;
1009 size_t fw_size;
1010 void *cpu_addr;
1011 dma_addr_t dma_handle;
1012 int rc, count = 5;
1013
1014 again:
1015 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1016 if (rc == -EINTR && count-- > 0) {
1017 msleep(50);
1018 goto again;
1019 }
1020
1021 if (rc) {
1022 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1023 GAUDI_TPC_FW_FILE);
1024 goto out;
1025 }
1026
1027 fw_size = fw->size;
1028 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1029 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1030 if (!cpu_addr) {
1031 dev_err(hdev->dev,
1032 "Failed to allocate %zu of dma memory for TPC kernel\n",
1033 fw_size);
1034 rc = -ENOMEM;
1035 goto out;
1036 }
1037
1038 memcpy(cpu_addr, fw->data, fw_size);
1039
1040 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1041
1042 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1043 dma_handle);
1044
1045 out:
1046 release_firmware(fw);
1047 return rc;
1048 }
1049
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1050 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1051 {
1052 struct gaudi_device *gaudi = hdev->asic_specific;
1053 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1054 struct hl_hw_queue *q;
1055 u32 i, sob_id, sob_group_id, queue_id;
1056
1057 /* Iterate through SOB groups and assign a SOB for each slave queue */
1058 sob_group_id =
1059 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1060 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1061
1062 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1063 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1064 q = &hdev->kernel_queues[queue_id + (4 * i)];
1065 q->sync_stream_prop.collective_sob_id = sob_id + i;
1066 }
1067
1068 /* Both DMA5 and TPC7 use the same resources since only a single
1069 * engine need to participate in the reduction process
1070 */
1071 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1072 q = &hdev->kernel_queues[queue_id];
1073 q->sync_stream_prop.collective_sob_id =
1074 sob_id + NIC_NUMBER_OF_ENGINES;
1075
1076 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1077 q = &hdev->kernel_queues[queue_id];
1078 q->sync_stream_prop.collective_sob_id =
1079 sob_id + NIC_NUMBER_OF_ENGINES;
1080 }
1081
gaudi_sob_group_hw_reset(struct kref * ref)1082 static void gaudi_sob_group_hw_reset(struct kref *ref)
1083 {
1084 struct gaudi_hw_sob_group *hw_sob_group =
1085 container_of(ref, struct gaudi_hw_sob_group, kref);
1086 struct hl_device *hdev = hw_sob_group->hdev;
1087 int i;
1088
1089 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1090 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1091 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1092
1093 kref_init(&hw_sob_group->kref);
1094 }
1095
gaudi_sob_group_reset_error(struct kref * ref)1096 static void gaudi_sob_group_reset_error(struct kref *ref)
1097 {
1098 struct gaudi_hw_sob_group *hw_sob_group =
1099 container_of(ref, struct gaudi_hw_sob_group, kref);
1100 struct hl_device *hdev = hw_sob_group->hdev;
1101
1102 dev_crit(hdev->dev,
1103 "SOB release shouldn't be called here, base_sob_id: %d\n",
1104 hw_sob_group->base_sob_id);
1105 }
1106
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1107 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1108 {
1109 struct gaudi_collective_properties *prop;
1110 int i;
1111
1112 prop = &gaudi->collective_props;
1113
1114 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1115
1116 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1117 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1118 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1119 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1120 /* Set collective engine bit */
1121 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1122 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1123 }
1124
gaudi_collective_init(struct hl_device * hdev)1125 static int gaudi_collective_init(struct hl_device *hdev)
1126 {
1127 u32 i, sob_id, reserved_sobs_per_group;
1128 struct gaudi_collective_properties *prop;
1129 struct gaudi_device *gaudi;
1130
1131 gaudi = hdev->asic_specific;
1132 prop = &gaudi->collective_props;
1133 sob_id = hdev->asic_prop.collective_first_sob;
1134
1135 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1136 reserved_sobs_per_group =
1137 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1138
1139 /* Init SOB groups */
1140 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1141 prop->hw_sob_group[i].hdev = hdev;
1142 prop->hw_sob_group[i].base_sob_id = sob_id;
1143 sob_id += reserved_sobs_per_group;
1144 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1145 }
1146
1147 for (i = 0 ; i < QMAN_STREAMS; i++) {
1148 prop->next_sob_group_val[i] = 1;
1149 prop->curr_sob_group_idx[i] = 0;
1150 gaudi_collective_map_sobs(hdev, i);
1151 }
1152
1153 gaudi_collective_mstr_sob_mask_set(gaudi);
1154
1155 return 0;
1156 }
1157
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1158 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1159 {
1160 struct gaudi_device *gaudi = hdev->asic_specific;
1161 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1162
1163 kref_put(&cprop->hw_sob_group[sob_group].kref,
1164 gaudi_sob_group_hw_reset);
1165 }
1166
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1167 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1168 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1169 {
1170 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1171 struct gaudi_collective_properties *cprop;
1172 struct hl_gen_wait_properties wait_prop;
1173 struct hl_sync_stream_properties *prop;
1174 struct gaudi_device *gaudi;
1175
1176 gaudi = hdev->asic_specific;
1177 cprop = &gaudi->collective_props;
1178 queue_id = job->hw_queue_id;
1179 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1180
1181 master_sob_base =
1182 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1183 master_monitor = prop->collective_mstr_mon_id[0];
1184
1185 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1186
1187 dev_dbg(hdev->dev,
1188 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1189 master_sob_base, cprop->mstr_sob_mask[0],
1190 cprop->next_sob_group_val[stream],
1191 master_monitor, queue_id);
1192
1193 wait_prop.data = (void *) job->patched_cb;
1194 wait_prop.sob_base = master_sob_base;
1195 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1196 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1197 wait_prop.mon_id = master_monitor;
1198 wait_prop.q_idx = queue_id;
1199 wait_prop.size = cb_size;
1200 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201
1202 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1203 master_monitor = prop->collective_mstr_mon_id[1];
1204
1205 dev_dbg(hdev->dev,
1206 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207 master_sob_base, cprop->mstr_sob_mask[1],
1208 cprop->next_sob_group_val[stream],
1209 master_monitor, queue_id);
1210
1211 wait_prop.sob_base = master_sob_base;
1212 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1213 wait_prop.mon_id = master_monitor;
1214 wait_prop.size = cb_size;
1215 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 }
1217
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1218 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1219 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1220 {
1221 struct hl_gen_wait_properties wait_prop;
1222 struct hl_sync_stream_properties *prop;
1223 u32 queue_id, cb_size = 0;
1224
1225 queue_id = job->hw_queue_id;
1226 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1227
1228 if (job->cs->encaps_signals) {
1229 /* use the encaps signal handle store earlier in the flow
1230 * and set the SOB information from the encaps
1231 * signals handle
1232 */
1233 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1234 cs_cmpl);
1235
1236 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1237 job->cs->sequence,
1238 cs_cmpl->hw_sob->sob_id,
1239 cs_cmpl->sob_val);
1240 }
1241
1242 /* Add to wait CBs using slave monitor */
1243 wait_prop.data = (void *) job->user_cb;
1244 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1245 wait_prop.sob_mask = 0x1;
1246 wait_prop.sob_val = cs_cmpl->sob_val;
1247 wait_prop.mon_id = prop->collective_slave_mon_id;
1248 wait_prop.q_idx = queue_id;
1249 wait_prop.size = cb_size;
1250
1251 dev_dbg(hdev->dev,
1252 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1253 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1254 prop->collective_slave_mon_id, queue_id);
1255
1256 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257
1258 dev_dbg(hdev->dev,
1259 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1260 prop->collective_sob_id, queue_id);
1261
1262 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1263 prop->collective_sob_id, cb_size, false);
1264 }
1265
gaudi_collective_wait_init_cs(struct hl_cs * cs)1266 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1267 {
1268 struct hl_cs_compl *signal_cs_cmpl =
1269 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1270 struct hl_cs_compl *cs_cmpl =
1271 container_of(cs->fence, struct hl_cs_compl, base_fence);
1272 struct gaudi_collective_properties *cprop;
1273 u32 stream, queue_id, sob_group_offset;
1274 struct gaudi_device *gaudi;
1275 struct hl_device *hdev;
1276 struct hl_cs_job *job;
1277 struct hl_ctx *ctx;
1278
1279 ctx = cs->ctx;
1280 hdev = ctx->hdev;
1281 gaudi = hdev->asic_specific;
1282 cprop = &gaudi->collective_props;
1283
1284 /* In encaps signals case the SOB info will be retrieved from
1285 * the handle in gaudi_collective_slave_init_job.
1286 */
1287 if (!cs->encaps_signals) {
1288 /* copy the SOB id and value of the signal CS */
1289 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1290 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1291 }
1292
1293 /* check again if the signal cs already completed.
1294 * if yes then don't send any wait cs since the hw_sob
1295 * could be in reset already. if signal is not completed
1296 * then get refcount to hw_sob to prevent resetting the sob
1297 * while wait cs is not submitted.
1298 * note that this check is protected by two locks,
1299 * hw queue lock and completion object lock,
1300 * and the same completion object lock also protects
1301 * the hw_sob reset handler function.
1302 * The hw_queue lock prevent out of sync of hw_sob
1303 * refcount value, changed by signal/wait flows.
1304 */
1305 spin_lock(&signal_cs_cmpl->lock);
1306
1307 if (completion_done(&cs->signal_fence->completion)) {
1308 spin_unlock(&signal_cs_cmpl->lock);
1309 return -EINVAL;
1310 }
1311 /* Increment kref since all slave queues are now waiting on it */
1312 kref_get(&cs_cmpl->hw_sob->kref);
1313
1314 spin_unlock(&signal_cs_cmpl->lock);
1315
1316 /* Calculate the stream from collective master queue (1st job) */
1317 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1318 stream = job->hw_queue_id % 4;
1319 sob_group_offset =
1320 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1321
1322 list_for_each_entry(job, &cs->job_list, cs_node) {
1323 queue_id = job->hw_queue_id;
1324
1325 if (hdev->kernel_queues[queue_id].collective_mode ==
1326 HL_COLLECTIVE_MASTER)
1327 gaudi_collective_master_init_job(hdev, job, stream,
1328 sob_group_offset);
1329 else
1330 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1331 }
1332
1333 cs_cmpl->sob_group = sob_group_offset;
1334
1335 /* Handle sob group kref and wraparound */
1336 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1337 cprop->next_sob_group_val[stream]++;
1338
1339 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1340 /*
1341 * Decrement as we reached the max value.
1342 * The release function won't be called here as we've
1343 * just incremented the refcount.
1344 */
1345 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1346 gaudi_sob_group_reset_error);
1347 cprop->next_sob_group_val[stream] = 1;
1348 /* only two SOBs are currently in use */
1349 cprop->curr_sob_group_idx[stream] =
1350 (cprop->curr_sob_group_idx[stream] + 1) &
1351 (HL_RSVD_SOBS - 1);
1352
1353 gaudi_collective_map_sobs(hdev, stream);
1354
1355 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1356 cprop->curr_sob_group_idx[stream], stream);
1357 }
1358
1359 mb();
1360 hl_fence_put(cs->signal_fence);
1361 cs->signal_fence = NULL;
1362
1363 return 0;
1364 }
1365
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1366 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1367 struct hl_ctx *ctx, struct hl_cs *cs,
1368 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1369 u32 encaps_signal_offset)
1370 {
1371 struct hw_queue_properties *hw_queue_prop;
1372 struct hl_cs_counters_atomic *cntr;
1373 struct hl_cs_job *job;
1374 struct hl_cb *cb;
1375 u32 cb_size;
1376 bool patched_cb;
1377
1378 cntr = &hdev->aggregated_cs_counters;
1379
1380 if (mode == HL_COLLECTIVE_MASTER) {
1381 /* CB size of collective master queue contains
1382 * 4 msg short packets for monitor 1 configuration
1383 * 1 fence packet
1384 * 4 msg short packets for monitor 2 configuration
1385 * 1 fence packet
1386 * 2 msg prot packets for completion and MSI-X
1387 */
1388 cb_size = sizeof(struct packet_msg_short) * 8 +
1389 sizeof(struct packet_fence) * 2 +
1390 sizeof(struct packet_msg_prot) * 2;
1391 patched_cb = true;
1392 } else {
1393 /* CB size of collective slave queues contains
1394 * 4 msg short packets for monitor configuration
1395 * 1 fence packet
1396 * 1 additional msg short packet for sob signal
1397 */
1398 cb_size = sizeof(struct packet_msg_short) * 5 +
1399 sizeof(struct packet_fence);
1400 patched_cb = false;
1401 }
1402
1403 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1404 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1405 if (!job) {
1406 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1407 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1408 dev_err(hdev->dev, "Failed to allocate a new job\n");
1409 return -ENOMEM;
1410 }
1411
1412 /* Allocate internal mapped CB for non patched CBs */
1413 cb = hl_cb_kernel_create(hdev, cb_size,
1414 hdev->mmu_enable && !patched_cb);
1415 if (!cb) {
1416 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1417 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1418 kfree(job);
1419 return -EFAULT;
1420 }
1421
1422 job->id = 0;
1423 job->cs = cs;
1424 job->user_cb = cb;
1425 atomic_inc(&job->user_cb->cs_cnt);
1426 job->user_cb_size = cb_size;
1427 job->hw_queue_id = queue_id;
1428
1429 /* since its guaranteed to have only one chunk in the collective wait
1430 * cs, we can use this chunk to set the encapsulated signal offset
1431 * in the jobs.
1432 */
1433 if (cs->encaps_signals)
1434 job->encaps_sig_wait_offset = encaps_signal_offset;
1435
1436 /*
1437 * No need in parsing, user CB is the patched CB.
1438 * We call hl_cb_destroy() out of two reasons - we don't need
1439 * the CB in the CB idr anymore and to decrement its refcount as
1440 * it was incremented inside hl_cb_kernel_create().
1441 */
1442 if (patched_cb)
1443 job->patched_cb = job->user_cb;
1444 else
1445 job->patched_cb = NULL;
1446
1447 job->job_cb_size = job->user_cb_size;
1448 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1449
1450 /* increment refcount as for external queues we get completion */
1451 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1452 cs_get(cs);
1453
1454 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1455
1456 list_add_tail(&job->cs_node, &cs->job_list);
1457
1458 hl_debugfs_add_job(hdev, job);
1459
1460 return 0;
1461 }
1462
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1463 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1464 struct hl_ctx *ctx, struct hl_cs *cs,
1465 u32 wait_queue_id, u32 collective_engine_id,
1466 u32 encaps_signal_offset)
1467 {
1468 struct gaudi_device *gaudi = hdev->asic_specific;
1469 struct hw_queue_properties *hw_queue_prop;
1470 u32 queue_id, collective_queue, num_jobs;
1471 u32 stream, nic_queue, nic_idx = 0;
1472 bool skip;
1473 int i, rc = 0;
1474
1475 /* Verify wait queue id is configured as master */
1476 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1477 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1478 dev_err(hdev->dev,
1479 "Queue %d is not configured as collective master\n",
1480 wait_queue_id);
1481 return -EINVAL;
1482 }
1483
1484 /* Verify engine id is supported */
1485 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1486 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1487 dev_err(hdev->dev,
1488 "Collective wait does not support engine %u\n",
1489 collective_engine_id);
1490 return -EINVAL;
1491 }
1492
1493 stream = wait_queue_id % 4;
1494
1495 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1496 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1497 else
1498 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1499
1500 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1501 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1502
1503 /* First job goes to the collective master queue, it will wait for
1504 * the collective slave queues to finish execution.
1505 * The synchronization is done using two monitors:
1506 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1507 * reduction engine (DMA5/TPC7).
1508 *
1509 * Rest of the jobs goes to the collective slave queues which will
1510 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1511 */
1512 for (i = 0 ; i < num_jobs ; i++) {
1513 if (i == 0) {
1514 queue_id = wait_queue_id;
1515 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1516 HL_COLLECTIVE_MASTER, queue_id,
1517 wait_queue_id, encaps_signal_offset);
1518 } else {
1519 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1520 if (gaudi->hw_cap_initialized &
1521 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1522 skip = false;
1523 else
1524 skip = true;
1525
1526 queue_id = nic_queue;
1527 nic_queue += 4;
1528 nic_idx++;
1529
1530 if (skip)
1531 continue;
1532 } else {
1533 queue_id = collective_queue;
1534 }
1535
1536 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537 HL_COLLECTIVE_SLAVE, queue_id,
1538 wait_queue_id, encaps_signal_offset);
1539 }
1540
1541 if (rc)
1542 return rc;
1543 }
1544
1545 return rc;
1546 }
1547
gaudi_late_init(struct hl_device * hdev)1548 static int gaudi_late_init(struct hl_device *hdev)
1549 {
1550 struct gaudi_device *gaudi = hdev->asic_specific;
1551 int rc;
1552
1553 rc = gaudi->cpucp_info_get(hdev);
1554 if (rc) {
1555 dev_err(hdev->dev, "Failed to get cpucp info\n");
1556 return rc;
1557 }
1558
1559 if ((hdev->card_type == cpucp_card_type_pci) &&
1560 (hdev->nic_ports_mask & 0x3)) {
1561 dev_info(hdev->dev,
1562 "PCI card detected, only 8 ports are enabled\n");
1563 hdev->nic_ports_mask &= ~0x3;
1564
1565 /* Stop and disable unused NIC QMANs */
1566 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1567 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1568 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1569
1570 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573
1574 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1575 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1576
1577 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1578 }
1579
1580 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1581 if (rc) {
1582 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1583 return rc;
1584 }
1585
1586 /* Scrub both SRAM and DRAM */
1587 rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1588 if (rc)
1589 goto disable_pci_access;
1590
1591 rc = gaudi_fetch_psoc_frequency(hdev);
1592 if (rc) {
1593 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1594 goto disable_pci_access;
1595 }
1596
1597 rc = gaudi_mmu_clear_pgt_range(hdev);
1598 if (rc) {
1599 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1600 goto disable_pci_access;
1601 }
1602
1603 rc = gaudi_init_tpc_mem(hdev);
1604 if (rc) {
1605 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1606 goto disable_pci_access;
1607 }
1608
1609 rc = gaudi_collective_init(hdev);
1610 if (rc) {
1611 dev_err(hdev->dev, "Failed to init collective\n");
1612 goto disable_pci_access;
1613 }
1614
1615 /* We only support a single ASID for the user, so for the sake of optimization, just
1616 * initialize the ASID one time during device initialization with the fixed value of 1
1617 */
1618 gaudi_mmu_prepare(hdev, 1);
1619
1620 return 0;
1621
1622 disable_pci_access:
1623 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1624
1625 return rc;
1626 }
1627
gaudi_late_fini(struct hl_device * hdev)1628 static void gaudi_late_fini(struct hl_device *hdev)
1629 {
1630 const struct hwmon_channel_info **channel_info_arr;
1631 int i = 0;
1632
1633 if (!hdev->hl_chip_info->info)
1634 return;
1635
1636 channel_info_arr = hdev->hl_chip_info->info;
1637
1638 while (channel_info_arr[i]) {
1639 kfree(channel_info_arr[i]->config);
1640 kfree(channel_info_arr[i]);
1641 i++;
1642 }
1643
1644 kfree(channel_info_arr);
1645
1646 hdev->hl_chip_info->info = NULL;
1647 }
1648
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1649 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1650 {
1651 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1652 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1653 int i, j, rc = 0;
1654
1655 /*
1656 * The device CPU works with 40-bits addresses, while bit 39 must be set
1657 * to '1' when accessing the host.
1658 * Bits 49:39 of the full host address are saved for a later
1659 * configuration of the HW to perform extension to 50 bits.
1660 * Because there is a single HW register that holds the extension bits,
1661 * these bits must be identical in all allocated range.
1662 */
1663
1664 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1665 virt_addr_arr[i] =
1666 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1667 HL_CPU_ACCESSIBLE_MEM_SIZE,
1668 &dma_addr_arr[i],
1669 GFP_KERNEL | __GFP_ZERO);
1670 if (!virt_addr_arr[i]) {
1671 rc = -ENOMEM;
1672 goto free_dma_mem_arr;
1673 }
1674
1675 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1676 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1677 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1678 break;
1679 }
1680
1681 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1682 dev_err(hdev->dev,
1683 "MSB of CPU accessible DMA memory are not identical in all range\n");
1684 rc = -EFAULT;
1685 goto free_dma_mem_arr;
1686 }
1687
1688 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1689 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1690 hdev->cpu_pci_msb_addr =
1691 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1692
1693 if (!hdev->asic_prop.fw_security_enabled)
1694 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1695
1696 free_dma_mem_arr:
1697 for (j = 0 ; j < i ; j++)
1698 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1699 HL_CPU_ACCESSIBLE_MEM_SIZE,
1700 virt_addr_arr[j],
1701 dma_addr_arr[j]);
1702
1703 return rc;
1704 }
1705
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1706 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1707 {
1708 struct gaudi_device *gaudi = hdev->asic_specific;
1709 struct gaudi_internal_qman_info *q;
1710 u32 i;
1711
1712 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1713 q = &gaudi->internal_qmans[i];
1714 if (!q->pq_kernel_addr)
1715 continue;
1716 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1717 q->pq_kernel_addr,
1718 q->pq_dma_addr);
1719 }
1720 }
1721
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1722 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1723 {
1724 struct gaudi_device *gaudi = hdev->asic_specific;
1725 struct gaudi_internal_qman_info *q;
1726 int rc, i;
1727
1728 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1729 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1730 continue;
1731
1732 q = &gaudi->internal_qmans[i];
1733
1734 switch (i) {
1735 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1736 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1737 break;
1738 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1739 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1740 break;
1741 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1742 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1743 break;
1744 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1745 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1746 break;
1747 default:
1748 dev_err(hdev->dev, "Bad internal queue index %d", i);
1749 rc = -EINVAL;
1750 goto free_internal_qmans_pq_mem;
1751 }
1752
1753 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1754 hdev, q->pq_size,
1755 &q->pq_dma_addr,
1756 GFP_KERNEL | __GFP_ZERO);
1757 if (!q->pq_kernel_addr) {
1758 rc = -ENOMEM;
1759 goto free_internal_qmans_pq_mem;
1760 }
1761 }
1762
1763 return 0;
1764
1765 free_internal_qmans_pq_mem:
1766 gaudi_free_internal_qmans_pq_mem(hdev);
1767 return rc;
1768 }
1769
gaudi_set_pci_memory_regions(struct hl_device * hdev)1770 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1771 {
1772 struct asic_fixed_properties *prop = &hdev->asic_prop;
1773 struct pci_mem_region *region;
1774
1775 /* CFG */
1776 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1777 region->region_base = CFG_BASE;
1778 region->region_size = CFG_SIZE;
1779 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1780 region->bar_size = CFG_BAR_SIZE;
1781 region->bar_id = CFG_BAR_ID;
1782 region->used = 1;
1783
1784 /* SRAM */
1785 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1786 region->region_base = SRAM_BASE_ADDR;
1787 region->region_size = SRAM_SIZE;
1788 region->offset_in_bar = 0;
1789 region->bar_size = SRAM_BAR_SIZE;
1790 region->bar_id = SRAM_BAR_ID;
1791 region->used = 1;
1792
1793 /* DRAM */
1794 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1795 region->region_base = DRAM_PHYS_BASE;
1796 region->region_size = hdev->asic_prop.dram_size;
1797 region->offset_in_bar = 0;
1798 region->bar_size = prop->dram_pci_bar_size;
1799 region->bar_id = HBM_BAR_ID;
1800 region->used = 1;
1801
1802 /* SP SRAM */
1803 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1804 region->region_base = PSOC_SCRATCHPAD_ADDR;
1805 region->region_size = PSOC_SCRATCHPAD_SIZE;
1806 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1807 region->bar_size = CFG_BAR_SIZE;
1808 region->bar_id = CFG_BAR_ID;
1809 region->used = 1;
1810 }
1811
gaudi_sw_init(struct hl_device * hdev)1812 static int gaudi_sw_init(struct hl_device *hdev)
1813 {
1814 struct gaudi_device *gaudi;
1815 u32 i, event_id = 0;
1816 int rc;
1817
1818 /* Allocate device structure */
1819 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1820 if (!gaudi)
1821 return -ENOMEM;
1822
1823 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1824 if (gaudi_irq_map_table[i].valid) {
1825 if (event_id == GAUDI_EVENT_SIZE) {
1826 dev_err(hdev->dev,
1827 "Event array exceeds the limit of %u events\n",
1828 GAUDI_EVENT_SIZE);
1829 rc = -EINVAL;
1830 goto free_gaudi_device;
1831 }
1832
1833 gaudi->events[event_id++] =
1834 gaudi_irq_map_table[i].fc_id;
1835 }
1836 }
1837
1838 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1839
1840 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1841
1842 hdev->asic_specific = gaudi;
1843
1844 /* Create DMA pool for small allocations */
1845 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1846 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1847 if (!hdev->dma_pool) {
1848 dev_err(hdev->dev, "failed to create DMA pool\n");
1849 rc = -ENOMEM;
1850 goto free_gaudi_device;
1851 }
1852
1853 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1854 if (rc)
1855 goto free_dma_pool;
1856
1857 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1858 if (!hdev->cpu_accessible_dma_pool) {
1859 dev_err(hdev->dev,
1860 "Failed to create CPU accessible DMA pool\n");
1861 rc = -ENOMEM;
1862 goto free_cpu_dma_mem;
1863 }
1864
1865 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1866 (uintptr_t) hdev->cpu_accessible_dma_mem,
1867 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1868 if (rc) {
1869 dev_err(hdev->dev,
1870 "Failed to add memory to CPU accessible DMA pool\n");
1871 rc = -EFAULT;
1872 goto free_cpu_accessible_dma_pool;
1873 }
1874
1875 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1876 if (rc)
1877 goto free_cpu_accessible_dma_pool;
1878
1879 spin_lock_init(&gaudi->hw_queues_lock);
1880 mutex_init(&gaudi->clk_gate_mutex);
1881
1882 hdev->supports_sync_stream = true;
1883 hdev->supports_coresight = true;
1884 hdev->supports_staged_submission = true;
1885 hdev->supports_wait_for_multi_cs = true;
1886
1887 hdev->asic_funcs->set_pci_memory_regions(hdev);
1888 hdev->stream_master_qid_arr =
1889 hdev->asic_funcs->get_stream_master_qid_arr();
1890 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1891
1892 return 0;
1893
1894 free_cpu_accessible_dma_pool:
1895 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1896 free_cpu_dma_mem:
1897 if (!hdev->asic_prop.fw_security_enabled)
1898 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1899 hdev->cpu_pci_msb_addr);
1900 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1901 HL_CPU_ACCESSIBLE_MEM_SIZE,
1902 hdev->cpu_accessible_dma_mem,
1903 hdev->cpu_accessible_dma_address);
1904 free_dma_pool:
1905 dma_pool_destroy(hdev->dma_pool);
1906 free_gaudi_device:
1907 kfree(gaudi);
1908 return rc;
1909 }
1910
gaudi_sw_fini(struct hl_device * hdev)1911 static int gaudi_sw_fini(struct hl_device *hdev)
1912 {
1913 struct gaudi_device *gaudi = hdev->asic_specific;
1914
1915 gaudi_free_internal_qmans_pq_mem(hdev);
1916
1917 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1918
1919 if (!hdev->asic_prop.fw_security_enabled)
1920 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1921 hdev->cpu_pci_msb_addr);
1922
1923 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1924 HL_CPU_ACCESSIBLE_MEM_SIZE,
1925 hdev->cpu_accessible_dma_mem,
1926 hdev->cpu_accessible_dma_address);
1927
1928 dma_pool_destroy(hdev->dma_pool);
1929
1930 mutex_destroy(&gaudi->clk_gate_mutex);
1931
1932 kfree(gaudi);
1933
1934 return 0;
1935 }
1936
gaudi_irq_handler_single(int irq,void * arg)1937 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1938 {
1939 struct hl_device *hdev = arg;
1940 int i;
1941
1942 if (hdev->disabled)
1943 return IRQ_HANDLED;
1944
1945 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1946 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1947
1948 hl_irq_handler_eq(irq, &hdev->event_queue);
1949
1950 return IRQ_HANDLED;
1951 }
1952
1953 /*
1954 * For backward compatibility, new MSI interrupts should be set after the
1955 * existing CPU and NIC interrupts.
1956 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1957 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1958 bool cpu_eq)
1959 {
1960 int msi_vec;
1961
1962 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1963 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1964 GAUDI_EVENT_QUEUE_MSI_IDX);
1965
1966 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1967 (nr + NIC_NUMBER_OF_ENGINES + 1);
1968
1969 return pci_irq_vector(hdev->pdev, msi_vec);
1970 }
1971
gaudi_enable_msi_single(struct hl_device * hdev)1972 static int gaudi_enable_msi_single(struct hl_device *hdev)
1973 {
1974 int rc, irq;
1975
1976 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1977
1978 irq = gaudi_pci_irq_vector(hdev, 0, false);
1979 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1980 "gaudi single msi", hdev);
1981 if (rc)
1982 dev_err(hdev->dev,
1983 "Failed to request single MSI IRQ\n");
1984
1985 return rc;
1986 }
1987
gaudi_enable_msi_multi(struct hl_device * hdev)1988 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1989 {
1990 int cq_cnt = hdev->asic_prop.completion_queues_count;
1991 int rc, i, irq_cnt_init, irq;
1992
1993 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1994 irq = gaudi_pci_irq_vector(hdev, i, false);
1995 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1996 &hdev->completion_queue[i]);
1997 if (rc) {
1998 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1999 goto free_irqs;
2000 }
2001 }
2002
2003 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2004 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2005 &hdev->event_queue);
2006 if (rc) {
2007 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2008 goto free_irqs;
2009 }
2010
2011 return 0;
2012
2013 free_irqs:
2014 for (i = 0 ; i < irq_cnt_init ; i++)
2015 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2016 &hdev->completion_queue[i]);
2017 return rc;
2018 }
2019
gaudi_enable_msi(struct hl_device * hdev)2020 static int gaudi_enable_msi(struct hl_device *hdev)
2021 {
2022 struct gaudi_device *gaudi = hdev->asic_specific;
2023 int rc;
2024
2025 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2026 return 0;
2027
2028 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2029 if (rc < 0) {
2030 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031 return rc;
2032 }
2033
2034 if (rc < NUMBER_OF_INTERRUPTS) {
2035 gaudi->multi_msi_mode = false;
2036 rc = gaudi_enable_msi_single(hdev);
2037 } else {
2038 gaudi->multi_msi_mode = true;
2039 rc = gaudi_enable_msi_multi(hdev);
2040 }
2041
2042 if (rc)
2043 goto free_pci_irq_vectors;
2044
2045 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2046
2047 return 0;
2048
2049 free_pci_irq_vectors:
2050 pci_free_irq_vectors(hdev->pdev);
2051 return rc;
2052 }
2053
gaudi_sync_irqs(struct hl_device * hdev)2054 static void gaudi_sync_irqs(struct hl_device *hdev)
2055 {
2056 struct gaudi_device *gaudi = hdev->asic_specific;
2057 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2058
2059 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060 return;
2061
2062 /* Wait for all pending IRQs to be finished */
2063 if (gaudi->multi_msi_mode) {
2064 for (i = 0 ; i < cq_cnt ; i++)
2065 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2066
2067 synchronize_irq(gaudi_pci_irq_vector(hdev,
2068 GAUDI_EVENT_QUEUE_MSI_IDX,
2069 true));
2070 } else {
2071 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2072 }
2073 }
2074
gaudi_disable_msi(struct hl_device * hdev)2075 static void gaudi_disable_msi(struct hl_device *hdev)
2076 {
2077 struct gaudi_device *gaudi = hdev->asic_specific;
2078 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2079
2080 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2081 return;
2082
2083 gaudi_sync_irqs(hdev);
2084
2085 if (gaudi->multi_msi_mode) {
2086 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2087 true);
2088 free_irq(irq, &hdev->event_queue);
2089
2090 for (i = 0 ; i < cq_cnt ; i++) {
2091 irq = gaudi_pci_irq_vector(hdev, i, false);
2092 free_irq(irq, &hdev->completion_queue[i]);
2093 }
2094 } else {
2095 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2096 }
2097
2098 pci_free_irq_vectors(hdev->pdev);
2099
2100 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2101 }
2102
gaudi_init_scrambler_sram(struct hl_device * hdev)2103 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2104 {
2105 struct gaudi_device *gaudi = hdev->asic_specific;
2106
2107 if (hdev->asic_prop.fw_security_enabled)
2108 return;
2109
2110 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2111 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2112 return;
2113
2114 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2115 return;
2116
2117 if (!hdev->sram_scrambler_enable)
2118 return;
2119
2120 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2121 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2123 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2125 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2127 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2129 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2131 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2133 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2135 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136
2137 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2138 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2140 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2142 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2144 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2146 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2148 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2150 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153
2154 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2155 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2156 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2157 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2159 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2161 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2163 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2165 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2167 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2169 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170
2171 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2172 }
2173
gaudi_init_scrambler_hbm(struct hl_device * hdev)2174 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2175 {
2176 struct gaudi_device *gaudi = hdev->asic_specific;
2177
2178 if (hdev->asic_prop.fw_security_enabled)
2179 return;
2180
2181 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2182 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2183 return;
2184
2185 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2186 return;
2187
2188 if (!hdev->dram_scrambler_enable)
2189 return;
2190
2191 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2192 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2193 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2194 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2196 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2198 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2200 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2202 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2204 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2206 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207
2208 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224
2225 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2226 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2227 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2228 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2230 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2232 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2234 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2236 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2238 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2240 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241
2242 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2243 }
2244
gaudi_init_e2e(struct hl_device * hdev)2245 static void gaudi_init_e2e(struct hl_device *hdev)
2246 {
2247 if (hdev->asic_prop.fw_security_enabled)
2248 return;
2249
2250 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2251 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2252 return;
2253
2254 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2255 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2256 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2257 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2258
2259 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263
2264 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268
2269 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273
2274 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278
2279 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283
2284 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288
2289 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2290 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2291 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2292 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2293
2294 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2295 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2296 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2297 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2298
2299 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2300 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2301 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2302 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2303
2304 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2305 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2306 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2307 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2308
2309 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2310 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2311 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2312 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2313
2314 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2315 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2316 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2317 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2318
2319 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2320 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2321 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2322 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2323
2324 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2325 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2326 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2327 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2328
2329 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2330 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2331 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2332 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2333
2334 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338
2339 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343
2344 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348
2349 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353
2354 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2355 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2356 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2357 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2358
2359 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2360 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2361 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2362 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2363
2364 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2365 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2366 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2367 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2368
2369 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2370 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2371 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2372 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2373
2374 if (!hdev->dram_scrambler_enable) {
2375 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2376 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2377 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2378 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2379
2380 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2381 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2382 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2383 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2384
2385 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2386 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2387 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2388 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2389
2390 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2391 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2392 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2393 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2394
2395 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2396 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2397 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2398 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2399
2400 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2401 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2402 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2403 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2404
2405 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2406 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2407 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2408 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2409
2410 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2411 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2412 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2413 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2414
2415 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2416 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2417 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2418 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2419
2420 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2421 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2422 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2423 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2424
2425 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2426 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2427 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2428 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2429
2430 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2431 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2432 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2433 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2434
2435 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2436 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2437 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2438 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2439
2440 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2441 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2442 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2443 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2444
2445 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2446 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2447 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2448 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2449
2450 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2451 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2452 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2453 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2454
2455 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459
2460 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464
2465 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469
2470 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474
2475 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2476 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2477 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2478 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2479
2480 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2481 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2482 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2483 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2484
2485 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2486 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2487 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2488 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2489
2490 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2491 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2492 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2493 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2494 }
2495
2496 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2497 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2499 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500
2501 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2502 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2504 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505
2506 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2507 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2509 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510
2511 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2512 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2514 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515
2516 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2517 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2519 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520
2521 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2522 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2524 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525
2526 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2527 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2529 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530
2531 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2532 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2534 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535
2536 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2537 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2539 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540
2541 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2542 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2544 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545
2546 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2547 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2549 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550
2551 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2552 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2554 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555
2556 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2557 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2559 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560
2561 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2562 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2564 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565
2566 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2567 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2569 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570
2571 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2572 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2574 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575
2576 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2577 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2579 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580
2581 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2582 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2584 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585
2586 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2587 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2589 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590
2591 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2592 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2594 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595
2596 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2597 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2599 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600
2601 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2602 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2604 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605
2606 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2607 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2609 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610
2611 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2612 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2614 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615 }
2616
gaudi_init_hbm_cred(struct hl_device * hdev)2617 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2618 {
2619 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2620
2621 if (hdev->asic_prop.fw_security_enabled)
2622 return;
2623
2624 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2625 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2626 return;
2627
2628 hbm0_wr = 0x33333333;
2629 hbm0_rd = 0x77777777;
2630 hbm1_wr = 0x55555555;
2631 hbm1_rd = 0xDDDDDDDD;
2632
2633 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2634 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2635 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2636 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2637
2638 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2639 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2640 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2641 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2642
2643 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2644 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2645 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2646 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2647
2648 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2649 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2650 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2651 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2652
2653 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2654 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2655 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2656 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2657 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2660 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2663 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665
2666 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2667 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2668 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2669 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2670 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2673 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2676 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678 }
2679
gaudi_init_golden_registers(struct hl_device * hdev)2680 static void gaudi_init_golden_registers(struct hl_device *hdev)
2681 {
2682 u32 tpc_offset;
2683 int tpc_id, i;
2684
2685 gaudi_init_e2e(hdev);
2686 gaudi_init_hbm_cred(hdev);
2687
2688 for (tpc_id = 0, tpc_offset = 0;
2689 tpc_id < TPC_NUMBER_OF_ENGINES;
2690 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2691 /* Mask all arithmetic interrupts from TPC */
2692 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2693 /* Set 16 cache lines */
2694 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2695 ICACHE_FETCH_LINE_NUM, 2);
2696 }
2697
2698 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2699 for (i = 0 ; i < 128 ; i += 8)
2700 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2701
2702 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2703 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2704 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 }
2707
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2708 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2709 int qman_id, dma_addr_t qman_pq_addr)
2710 {
2711 struct cpu_dyn_regs *dyn_regs =
2712 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2714 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2715 u32 q_off, dma_qm_offset;
2716 u32 dma_qm_err_cfg, irq_handler_offset;
2717
2718 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2719
2720 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2721 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2722 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2723 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724 so_base_en_lo = lower_32_bits(CFG_BASE +
2725 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2726 so_base_en_hi = upper_32_bits(CFG_BASE +
2727 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2729 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2730 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2731 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732 so_base_ws_lo = lower_32_bits(CFG_BASE +
2733 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2734 so_base_ws_hi = upper_32_bits(CFG_BASE +
2735 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736
2737 q_off = dma_qm_offset + qman_id * 4;
2738
2739 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2740 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2741
2742 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2743 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2744 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2745
2746 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2747 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2748 QMAN_LDMA_SRC_OFFSET);
2749 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2750 QMAN_LDMA_DST_OFFSET);
2751
2752 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2753 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2754 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2755 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2756 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2757 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2758 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2759 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2760
2761 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2762
2763 /* The following configuration is needed only once per QMAN */
2764 if (qman_id == 0) {
2765 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2766 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2767 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2768
2769 /* Configure RAZWI IRQ */
2770 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2771 if (hdev->stop_on_err)
2772 dma_qm_err_cfg |=
2773 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2774
2775 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2776
2777 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2778 lower_32_bits(CFG_BASE + irq_handler_offset));
2779 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2780 upper_32_bits(CFG_BASE + irq_handler_offset));
2781
2782 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2783 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2784 dma_id);
2785
2786 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2787 QM_ARB_ERR_MSG_EN_MASK);
2788
2789 /* Increase ARB WDT to support streams architecture */
2790 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2791 GAUDI_ARB_WDT_TIMEOUT);
2792
2793 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2794 QMAN_EXTERNAL_MAKE_TRUSTED);
2795
2796 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2797 }
2798 }
2799
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2800 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2801 {
2802 struct cpu_dyn_regs *dyn_regs =
2803 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2804 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2805 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2806 u32 irq_handler_offset;
2807
2808 /* Set to maximum possible according to physical size */
2809 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2810 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2811
2812 /* WA for H/W bug H3-2116 */
2813 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2814
2815 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2816 if (hdev->stop_on_err)
2817 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2818
2819 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2820
2821 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2822 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2823 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2824
2825 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2826 lower_32_bits(CFG_BASE + irq_handler_offset));
2827 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2828 upper_32_bits(CFG_BASE + irq_handler_offset));
2829
2830 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2831 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2832 WREG32(mmDMA0_CORE_PROT + dma_offset,
2833 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2834 /* If the channel is secured, it should be in MMU bypass mode */
2835 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2836 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2837 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2838 }
2839
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2840 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2841 u32 enable_mask)
2842 {
2843 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2844
2845 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2846 }
2847
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2848 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2849 {
2850 struct gaudi_device *gaudi = hdev->asic_specific;
2851 struct hl_hw_queue *q;
2852 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2853
2854 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2855 return;
2856
2857 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2858 dma_id = gaudi_dma_assignment[i];
2859 /*
2860 * For queues after the CPU Q need to add 1 to get the correct
2861 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2862 * order to get the correct MSI register.
2863 */
2864 if (dma_id > 1) {
2865 cpu_skip = 1;
2866 nic_skip = NIC_NUMBER_OF_ENGINES;
2867 } else {
2868 cpu_skip = 0;
2869 nic_skip = 0;
2870 }
2871
2872 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2873 q_idx = 4 * dma_id + j + cpu_skip;
2874 q = &hdev->kernel_queues[q_idx];
2875 q->cq_id = cq_id++;
2876 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2877 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2878 q->bus_address);
2879 }
2880
2881 gaudi_init_dma_core(hdev, dma_id);
2882
2883 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2884 }
2885
2886 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2887 }
2888
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2889 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2890 int qman_id, u64 qman_base_addr)
2891 {
2892 struct cpu_dyn_regs *dyn_regs =
2893 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2894 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2895 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2896 u32 dma_qm_err_cfg, irq_handler_offset;
2897 u32 q_off, dma_qm_offset;
2898
2899 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2900
2901 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2902 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2903 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2904 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905 so_base_en_lo = lower_32_bits(CFG_BASE +
2906 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2907 so_base_en_hi = upper_32_bits(CFG_BASE +
2908 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2910 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2912 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913 so_base_ws_lo = lower_32_bits(CFG_BASE +
2914 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915 so_base_ws_hi = upper_32_bits(CFG_BASE +
2916 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917
2918 q_off = dma_qm_offset + qman_id * 4;
2919
2920 if (qman_id < 4) {
2921 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2922 lower_32_bits(qman_base_addr));
2923 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2924 upper_32_bits(qman_base_addr));
2925
2926 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2927 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2928 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2929
2930 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2931 QMAN_CPDMA_SIZE_OFFSET);
2932 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2933 QMAN_CPDMA_SRC_OFFSET);
2934 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2935 QMAN_CPDMA_DST_OFFSET);
2936 } else {
2937 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2938 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2939 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2940
2941 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2942 QMAN_LDMA_SIZE_OFFSET);
2943 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2944 QMAN_LDMA_SRC_OFFSET);
2945 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2946 QMAN_LDMA_DST_OFFSET);
2947
2948 /* Configure RAZWI IRQ */
2949 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2950 if (hdev->stop_on_err)
2951 dma_qm_err_cfg |=
2952 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2953
2954 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2955
2956 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2957 lower_32_bits(CFG_BASE + irq_handler_offset));
2958 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2959 upper_32_bits(CFG_BASE + irq_handler_offset));
2960
2961 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2962 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2963 dma_id);
2964
2965 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2966 QM_ARB_ERR_MSG_EN_MASK);
2967
2968 /* Increase ARB WDT to support streams architecture */
2969 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2970 GAUDI_ARB_WDT_TIMEOUT);
2971
2972 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2973 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2974 QMAN_INTERNAL_MAKE_TRUSTED);
2975 }
2976
2977 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2978 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2979 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2980 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2981
2982 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2983 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2984 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2985 mtr_base_ws_lo);
2986 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2987 mtr_base_ws_hi);
2988 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2989 so_base_ws_lo);
2990 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2991 so_base_ws_hi);
2992 }
2993 }
2994
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2995 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2996 {
2997 struct gaudi_device *gaudi = hdev->asic_specific;
2998 struct gaudi_internal_qman_info *q;
2999 u64 qman_base_addr;
3000 int i, j, dma_id, internal_q_index;
3001
3002 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3003 return;
3004
3005 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3006 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3007
3008 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3009 /*
3010 * Add the CPU queue in order to get the correct queue
3011 * number as all internal queue are placed after it
3012 */
3013 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3014
3015 q = &gaudi->internal_qmans[internal_q_index];
3016 qman_base_addr = (u64) q->pq_dma_addr;
3017 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3018 qman_base_addr);
3019 }
3020
3021 /* Initializing lower CP for HBM DMA QMAN */
3022 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3023
3024 gaudi_init_dma_core(hdev, dma_id);
3025
3026 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3027 }
3028
3029 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3030 }
3031
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)3032 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3033 int qman_id, u64 qman_base_addr)
3034 {
3035 struct cpu_dyn_regs *dyn_regs =
3036 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3037 u32 mtr_base_lo, mtr_base_hi;
3038 u32 so_base_lo, so_base_hi;
3039 u32 irq_handler_offset;
3040 u32 q_off, mme_id;
3041 u32 mme_qm_err_cfg;
3042
3043 mtr_base_lo = lower_32_bits(CFG_BASE +
3044 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3045 mtr_base_hi = upper_32_bits(CFG_BASE +
3046 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047 so_base_lo = lower_32_bits(CFG_BASE +
3048 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3049 so_base_hi = upper_32_bits(CFG_BASE +
3050 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051
3052 q_off = mme_offset + qman_id * 4;
3053
3054 if (qman_id < 4) {
3055 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3056 lower_32_bits(qman_base_addr));
3057 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3058 upper_32_bits(qman_base_addr));
3059
3060 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3061 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3062 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3063
3064 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3065 QMAN_CPDMA_SIZE_OFFSET);
3066 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3067 QMAN_CPDMA_SRC_OFFSET);
3068 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3069 QMAN_CPDMA_DST_OFFSET);
3070 } else {
3071 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3072 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3073 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3074
3075 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3076 QMAN_LDMA_SIZE_OFFSET);
3077 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3078 QMAN_LDMA_SRC_OFFSET);
3079 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3080 QMAN_LDMA_DST_OFFSET);
3081
3082 /* Configure RAZWI IRQ */
3083 mme_id = mme_offset /
3084 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3085
3086 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3087 if (hdev->stop_on_err)
3088 mme_qm_err_cfg |=
3089 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3090
3091 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3092
3093 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3094 lower_32_bits(CFG_BASE + irq_handler_offset));
3095 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3096 upper_32_bits(CFG_BASE + irq_handler_offset));
3097
3098 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3099 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3100 mme_id);
3101
3102 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3103 QM_ARB_ERR_MSG_EN_MASK);
3104
3105 /* Increase ARB WDT to support streams architecture */
3106 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3107 GAUDI_ARB_WDT_TIMEOUT);
3108
3109 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3110 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3111 QMAN_INTERNAL_MAKE_TRUSTED);
3112 }
3113
3114 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3115 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3116 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3117 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3118 }
3119
gaudi_init_mme_qmans(struct hl_device * hdev)3120 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3121 {
3122 struct gaudi_device *gaudi = hdev->asic_specific;
3123 struct gaudi_internal_qman_info *q;
3124 u64 qman_base_addr;
3125 u32 mme_offset;
3126 int i, internal_q_index;
3127
3128 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3129 return;
3130
3131 /*
3132 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3133 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3134 */
3135
3136 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3137
3138 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3139 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3140 q = &gaudi->internal_qmans[internal_q_index];
3141 qman_base_addr = (u64) q->pq_dma_addr;
3142 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3143 qman_base_addr);
3144 if (i == 3)
3145 mme_offset = 0;
3146 }
3147
3148 /* Initializing lower CP for MME QMANs */
3149 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3150 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3151 gaudi_init_mme_qman(hdev, 0, 4, 0);
3152
3153 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3154 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3155
3156 gaudi->hw_cap_initialized |= HW_CAP_MME;
3157 }
3158
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)3159 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3160 int qman_id, u64 qman_base_addr)
3161 {
3162 struct cpu_dyn_regs *dyn_regs =
3163 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3164 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3165 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3166 u32 tpc_qm_err_cfg, irq_handler_offset;
3167 u32 q_off, tpc_id;
3168
3169 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3170 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3172 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173 so_base_en_lo = lower_32_bits(CFG_BASE +
3174 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175 so_base_en_hi = upper_32_bits(CFG_BASE +
3176 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3178 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3179 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3180 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181 so_base_ws_lo = lower_32_bits(CFG_BASE +
3182 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3183 so_base_ws_hi = upper_32_bits(CFG_BASE +
3184 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185
3186 q_off = tpc_offset + qman_id * 4;
3187
3188 tpc_id = tpc_offset /
3189 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3190
3191 if (qman_id < 4) {
3192 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3193 lower_32_bits(qman_base_addr));
3194 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3195 upper_32_bits(qman_base_addr));
3196
3197 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3198 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3199 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3200
3201 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3202 QMAN_CPDMA_SIZE_OFFSET);
3203 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3204 QMAN_CPDMA_SRC_OFFSET);
3205 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3206 QMAN_CPDMA_DST_OFFSET);
3207 } else {
3208 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3209 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3210 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3211
3212 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3213 QMAN_LDMA_SIZE_OFFSET);
3214 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3215 QMAN_LDMA_SRC_OFFSET);
3216 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3217 QMAN_LDMA_DST_OFFSET);
3218
3219 /* Configure RAZWI IRQ */
3220 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3221 if (hdev->stop_on_err)
3222 tpc_qm_err_cfg |=
3223 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3224
3225 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3226
3227 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3228 lower_32_bits(CFG_BASE + irq_handler_offset));
3229 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3230 upper_32_bits(CFG_BASE + irq_handler_offset));
3231
3232 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3233 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3234 tpc_id);
3235
3236 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3237 QM_ARB_ERR_MSG_EN_MASK);
3238
3239 /* Increase ARB WDT to support streams architecture */
3240 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3241 GAUDI_ARB_WDT_TIMEOUT);
3242
3243 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3244 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3245 QMAN_INTERNAL_MAKE_TRUSTED);
3246 }
3247
3248 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3249 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3250 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3251 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3252
3253 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3254 if (tpc_id == 6) {
3255 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3256 mtr_base_ws_lo);
3257 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3258 mtr_base_ws_hi);
3259 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3260 so_base_ws_lo);
3261 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3262 so_base_ws_hi);
3263 }
3264 }
3265
gaudi_init_tpc_qmans(struct hl_device * hdev)3266 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3267 {
3268 struct gaudi_device *gaudi = hdev->asic_specific;
3269 struct gaudi_internal_qman_info *q;
3270 u64 qman_base_addr;
3271 u32 so_base_hi, tpc_offset = 0;
3272 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3273 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3274 int i, tpc_id, internal_q_index;
3275
3276 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3277 return;
3278
3279 so_base_hi = upper_32_bits(CFG_BASE +
3280 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3281
3282 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3283 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3284 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3285 tpc_id * QMAN_STREAMS + i;
3286 q = &gaudi->internal_qmans[internal_q_index];
3287 qman_base_addr = (u64) q->pq_dma_addr;
3288 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3289 qman_base_addr);
3290
3291 if (i == 3) {
3292 /* Initializing lower CP for TPC QMAN */
3293 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3294
3295 /* Enable the QMAN and TPC channel */
3296 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3297 QMAN_TPC_ENABLE);
3298 }
3299 }
3300
3301 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3302 so_base_hi);
3303
3304 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3305
3306 gaudi->hw_cap_initialized |=
3307 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3308 }
3309 }
3310
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3311 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3312 int qman_id, u64 qman_base_addr, int nic_id)
3313 {
3314 struct cpu_dyn_regs *dyn_regs =
3315 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3316 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3317 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3318 u32 nic_qm_err_cfg, irq_handler_offset;
3319 u32 q_off;
3320
3321 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3322 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3323 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3324 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3326 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3327 so_base_en_hi = upper_32_bits(CFG_BASE +
3328 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3330 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3331 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3332 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3334 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3335 so_base_ws_hi = upper_32_bits(CFG_BASE +
3336 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337
3338 q_off = nic_offset + qman_id * 4;
3339
3340 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3341 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3342
3343 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3344 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3345 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3346
3347 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3348 QMAN_LDMA_SIZE_OFFSET);
3349 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3350 QMAN_LDMA_SRC_OFFSET);
3351 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3352 QMAN_LDMA_DST_OFFSET);
3353
3354 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3355 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3356 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3357 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3358
3359 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3360 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3361 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3362 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3363 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3364
3365 if (qman_id == 0) {
3366 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3367 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3368 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3369
3370 /* Configure RAZWI IRQ */
3371 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3372 if (hdev->stop_on_err)
3373 nic_qm_err_cfg |=
3374 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3375
3376 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3377
3378 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3379 lower_32_bits(CFG_BASE + irq_handler_offset));
3380 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3381 upper_32_bits(CFG_BASE + irq_handler_offset));
3382
3383 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3384 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3385 nic_id);
3386
3387 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3388 QM_ARB_ERR_MSG_EN_MASK);
3389
3390 /* Increase ARB WDT to support streams architecture */
3391 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3392 GAUDI_ARB_WDT_TIMEOUT);
3393
3394 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3395 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3396 QMAN_INTERNAL_MAKE_TRUSTED);
3397 }
3398 }
3399
gaudi_init_nic_qmans(struct hl_device * hdev)3400 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3401 {
3402 struct gaudi_device *gaudi = hdev->asic_specific;
3403 struct gaudi_internal_qman_info *q;
3404 u64 qman_base_addr;
3405 u32 nic_offset = 0;
3406 u32 nic_delta_between_qmans =
3407 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408 u32 nic_delta_between_nics =
3409 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410 int i, nic_id, internal_q_index;
3411
3412 if (!hdev->nic_ports_mask)
3413 return;
3414
3415 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3416 return;
3417
3418 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3419
3420 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3421 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3422 nic_offset += nic_delta_between_qmans;
3423 if (nic_id & 1) {
3424 nic_offset -= (nic_delta_between_qmans * 2);
3425 nic_offset += nic_delta_between_nics;
3426 }
3427 continue;
3428 }
3429
3430 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3431 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3432 nic_id * QMAN_STREAMS + i;
3433 q = &gaudi->internal_qmans[internal_q_index];
3434 qman_base_addr = (u64) q->pq_dma_addr;
3435 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3436 qman_base_addr, nic_id);
3437 }
3438
3439 /* Enable the QMAN */
3440 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3441
3442 nic_offset += nic_delta_between_qmans;
3443 if (nic_id & 1) {
3444 nic_offset -= (nic_delta_between_qmans * 2);
3445 nic_offset += nic_delta_between_nics;
3446 }
3447
3448 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3449 }
3450 }
3451
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3452 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3453 {
3454 struct gaudi_device *gaudi = hdev->asic_specific;
3455
3456 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457 return;
3458
3459 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3460 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3461 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3462 }
3463
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3464 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3465 {
3466 struct gaudi_device *gaudi = hdev->asic_specific;
3467
3468 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3469 return;
3470
3471 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3472 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3473 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3474 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3475 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3476 }
3477
gaudi_disable_mme_qmans(struct hl_device * hdev)3478 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3479 {
3480 struct gaudi_device *gaudi = hdev->asic_specific;
3481
3482 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483 return;
3484
3485 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3486 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3487 }
3488
gaudi_disable_tpc_qmans(struct hl_device * hdev)3489 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3490 {
3491 struct gaudi_device *gaudi = hdev->asic_specific;
3492 u32 tpc_offset = 0;
3493 int tpc_id;
3494
3495 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496 return;
3497
3498 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3499 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3500 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3501 }
3502 }
3503
gaudi_disable_nic_qmans(struct hl_device * hdev)3504 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3505 {
3506 struct gaudi_device *gaudi = hdev->asic_specific;
3507 u32 nic_mask, nic_offset = 0;
3508 u32 nic_delta_between_qmans =
3509 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3510 u32 nic_delta_between_nics =
3511 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512 int nic_id;
3513
3514 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3515 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3516
3517 if (gaudi->hw_cap_initialized & nic_mask)
3518 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3519
3520 nic_offset += nic_delta_between_qmans;
3521 if (nic_id & 1) {
3522 nic_offset -= (nic_delta_between_qmans * 2);
3523 nic_offset += nic_delta_between_nics;
3524 }
3525 }
3526 }
3527
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3528 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3529 {
3530 struct gaudi_device *gaudi = hdev->asic_specific;
3531
3532 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3533 return;
3534
3535 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3536 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3538 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 }
3540
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3541 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3542 {
3543 struct gaudi_device *gaudi = hdev->asic_specific;
3544
3545 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3546 return;
3547
3548 /* Stop CPs of HBM DMA QMANs */
3549
3550 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3551 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 }
3556
gaudi_stop_mme_qmans(struct hl_device * hdev)3557 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3558 {
3559 struct gaudi_device *gaudi = hdev->asic_specific;
3560
3561 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3562 return;
3563
3564 /* Stop CPs of MME QMANs */
3565 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567 }
3568
gaudi_stop_tpc_qmans(struct hl_device * hdev)3569 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3570 {
3571 struct gaudi_device *gaudi = hdev->asic_specific;
3572
3573 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3574 return;
3575
3576 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3578 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 }
3585
gaudi_stop_nic_qmans(struct hl_device * hdev)3586 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3587 {
3588 struct gaudi_device *gaudi = hdev->asic_specific;
3589
3590 /* Stop upper CPs of QMANs */
3591
3592 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3593 WREG32(mmNIC0_QM0_GLBL_CFG1,
3594 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597
3598 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3599 WREG32(mmNIC0_QM1_GLBL_CFG1,
3600 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603
3604 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3605 WREG32(mmNIC1_QM0_GLBL_CFG1,
3606 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609
3610 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3611 WREG32(mmNIC1_QM1_GLBL_CFG1,
3612 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615
3616 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3617 WREG32(mmNIC2_QM0_GLBL_CFG1,
3618 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621
3622 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3623 WREG32(mmNIC2_QM1_GLBL_CFG1,
3624 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627
3628 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3629 WREG32(mmNIC3_QM0_GLBL_CFG1,
3630 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633
3634 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3635 WREG32(mmNIC3_QM1_GLBL_CFG1,
3636 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639
3640 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3641 WREG32(mmNIC4_QM0_GLBL_CFG1,
3642 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645
3646 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3647 WREG32(mmNIC4_QM1_GLBL_CFG1,
3648 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651 }
3652
gaudi_pci_dma_stall(struct hl_device * hdev)3653 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3654 {
3655 struct gaudi_device *gaudi = hdev->asic_specific;
3656
3657 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3658 return;
3659
3660 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3661 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3662 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 }
3664
gaudi_hbm_dma_stall(struct hl_device * hdev)3665 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3666 {
3667 struct gaudi_device *gaudi = hdev->asic_specific;
3668
3669 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3670 return;
3671
3672 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3673 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3674 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 }
3678
gaudi_mme_stall(struct hl_device * hdev)3679 static void gaudi_mme_stall(struct hl_device *hdev)
3680 {
3681 struct gaudi_device *gaudi = hdev->asic_specific;
3682
3683 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3684 return;
3685
3686 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3687 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3688 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3689 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3690 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3691 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3693 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3695 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3697 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3699 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3701 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3703 }
3704
gaudi_tpc_stall(struct hl_device * hdev)3705 static void gaudi_tpc_stall(struct hl_device *hdev)
3706 {
3707 struct gaudi_device *gaudi = hdev->asic_specific;
3708
3709 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3710 return;
3711
3712 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3713 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3714 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 }
3721
gaudi_set_clock_gating(struct hl_device * hdev)3722 static void gaudi_set_clock_gating(struct hl_device *hdev)
3723 {
3724 struct gaudi_device *gaudi = hdev->asic_specific;
3725 u32 qman_offset;
3726 bool enable;
3727 int i;
3728
3729 /* In case we are during debug session, don't enable the clock gate
3730 * as it may interfere
3731 */
3732 if (hdev->in_debug)
3733 return;
3734
3735 if (hdev->asic_prop.fw_security_enabled)
3736 return;
3737
3738 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3739 enable = !!(hdev->clock_gating_mask &
3740 (BIT_ULL(gaudi_dma_assignment[i])));
3741
3742 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3743 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3744 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3745 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3746 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3747 }
3748
3749 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3750 enable = !!(hdev->clock_gating_mask &
3751 (BIT_ULL(gaudi_dma_assignment[i])));
3752
3753 /* GC sends work to DMA engine through Upper CP in DMA5 so
3754 * we need to not enable clock gating in that DMA
3755 */
3756 if (i == GAUDI_HBM_DMA_4)
3757 enable = 0;
3758
3759 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3760 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3761 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3763 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764 }
3765
3766 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3767 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3768 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3769
3770 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3771 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3772 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3773
3774 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3775 enable = !!(hdev->clock_gating_mask &
3776 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3777
3778 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3779 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3780 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3781 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3782
3783 qman_offset += TPC_QMAN_OFFSET;
3784 }
3785
3786 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3787 }
3788
gaudi_disable_clock_gating(struct hl_device * hdev)3789 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3790 {
3791 struct gaudi_device *gaudi = hdev->asic_specific;
3792 u32 qman_offset;
3793 int i;
3794
3795 if (hdev->asic_prop.fw_security_enabled)
3796 return;
3797
3798 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3799 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3800 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3801
3802 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3803 }
3804
3805 WREG32(mmMME0_QM_CGM_CFG, 0);
3806 WREG32(mmMME0_QM_CGM_CFG1, 0);
3807 WREG32(mmMME2_QM_CGM_CFG, 0);
3808 WREG32(mmMME2_QM_CGM_CFG1, 0);
3809
3810 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3811 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3812 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3813
3814 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3815 }
3816
3817 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3818 }
3819
gaudi_enable_timestamp(struct hl_device * hdev)3820 static void gaudi_enable_timestamp(struct hl_device *hdev)
3821 {
3822 /* Disable the timestamp counter */
3823 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3824
3825 /* Zero the lower/upper parts of the 64-bit counter */
3826 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3827 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3828
3829 /* Enable the counter */
3830 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3831 }
3832
gaudi_disable_timestamp(struct hl_device * hdev)3833 static void gaudi_disable_timestamp(struct hl_device *hdev)
3834 {
3835 /* Disable the timestamp counter */
3836 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3837 }
3838
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3839 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3840 {
3841 u32 wait_timeout_ms;
3842
3843 dev_info(hdev->dev,
3844 "Halting compute engines and disabling interrupts\n");
3845
3846 if (hdev->pldm)
3847 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3848 else
3849 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3850
3851 if (fw_reset)
3852 goto skip_engines;
3853
3854 gaudi_stop_nic_qmans(hdev);
3855 gaudi_stop_mme_qmans(hdev);
3856 gaudi_stop_tpc_qmans(hdev);
3857 gaudi_stop_hbm_dma_qmans(hdev);
3858 gaudi_stop_pci_dma_qmans(hdev);
3859
3860 hdev->asic_funcs->disable_clock_gating(hdev);
3861
3862 msleep(wait_timeout_ms);
3863
3864 gaudi_pci_dma_stall(hdev);
3865 gaudi_hbm_dma_stall(hdev);
3866 gaudi_tpc_stall(hdev);
3867 gaudi_mme_stall(hdev);
3868
3869 msleep(wait_timeout_ms);
3870
3871 gaudi_disable_nic_qmans(hdev);
3872 gaudi_disable_mme_qmans(hdev);
3873 gaudi_disable_tpc_qmans(hdev);
3874 gaudi_disable_hbm_dma_qmans(hdev);
3875 gaudi_disable_pci_dma_qmans(hdev);
3876
3877 gaudi_disable_timestamp(hdev);
3878
3879 skip_engines:
3880 gaudi_disable_msi(hdev);
3881 }
3882
gaudi_mmu_init(struct hl_device * hdev)3883 static int gaudi_mmu_init(struct hl_device *hdev)
3884 {
3885 struct asic_fixed_properties *prop = &hdev->asic_prop;
3886 struct gaudi_device *gaudi = hdev->asic_specific;
3887 u64 hop0_addr;
3888 int rc, i;
3889
3890 if (!hdev->mmu_enable)
3891 return 0;
3892
3893 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3894 return 0;
3895
3896 for (i = 0 ; i < prop->max_asid ; i++) {
3897 hop0_addr = prop->mmu_pgt_addr +
3898 (i * prop->mmu_hop_table_size);
3899
3900 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3901 if (rc) {
3902 dev_err(hdev->dev,
3903 "failed to set hop0 addr for asid %d\n", i);
3904 goto err;
3905 }
3906 }
3907
3908 /* init MMU cache manage page */
3909 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3910 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3911
3912 /* mem cache invalidation */
3913 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3914
3915 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3916
3917 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3918 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3919
3920 WREG32(mmSTLB_HOP_CONFIGURATION,
3921 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3922
3923 /*
3924 * The H/W expects the first PI after init to be 1. After wraparound
3925 * we'll write 0.
3926 */
3927 gaudi->mmu_cache_inv_pi = 1;
3928
3929 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3930
3931 return 0;
3932
3933 err:
3934 return rc;
3935 }
3936
gaudi_load_firmware_to_device(struct hl_device * hdev)3937 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3938 {
3939 void __iomem *dst;
3940
3941 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3942
3943 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3944 }
3945
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3946 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3947 {
3948 void __iomem *dst;
3949
3950 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3951
3952 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3953 }
3954
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3955 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3956 {
3957 struct dynamic_fw_load_mgr *dynamic_loader;
3958 struct cpu_dyn_regs *dyn_regs;
3959
3960 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3961
3962 /*
3963 * here we update initial values for few specific dynamic regs (as
3964 * before reading the first descriptor from FW those value has to be
3965 * hard-coded) in later stages of the protocol those values will be
3966 * updated automatically by reading the FW descriptor so data there
3967 * will always be up-to-date
3968 */
3969 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3970 dyn_regs->kmd_msg_to_cpu =
3971 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3972 dyn_regs->cpu_cmd_status_to_host =
3973 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3974
3975 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3976 }
3977
gaudi_init_static_firmware_loader(struct hl_device * hdev)3978 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3979 {
3980 struct static_fw_load_mgr *static_loader;
3981
3982 static_loader = &hdev->fw_loader.static_loader;
3983
3984 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3985 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3986 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3987 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3988 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3989 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3990 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3991 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3992 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3993 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3994 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3995 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3996 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3997 GAUDI_PLDM_RESET_WAIT_MSEC :
3998 GAUDI_CPU_RESET_WAIT_MSEC;
3999 }
4000
gaudi_init_firmware_loader(struct hl_device * hdev)4001 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4002 {
4003 struct asic_fixed_properties *prop = &hdev->asic_prop;
4004 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4005
4006 /* fill common fields */
4007 fw_loader->linux_loaded = false;
4008 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4009 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4010 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4011 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4012 fw_loader->skip_bmc = !hdev->bmc_enable;
4013 fw_loader->sram_bar_id = SRAM_BAR_ID;
4014 fw_loader->dram_bar_id = HBM_BAR_ID;
4015
4016 if (prop->dynamic_fw_load)
4017 gaudi_init_dynamic_firmware_loader(hdev);
4018 else
4019 gaudi_init_static_firmware_loader(hdev);
4020 }
4021
gaudi_init_cpu(struct hl_device * hdev)4022 static int gaudi_init_cpu(struct hl_device *hdev)
4023 {
4024 struct gaudi_device *gaudi = hdev->asic_specific;
4025 int rc;
4026
4027 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4028 return 0;
4029
4030 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4031 return 0;
4032
4033 /*
4034 * The device CPU works with 40 bits addresses.
4035 * This register sets the extension to 50 bits.
4036 */
4037 if (!hdev->asic_prop.fw_security_enabled)
4038 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4039
4040 rc = hl_fw_init_cpu(hdev);
4041
4042 if (rc)
4043 return rc;
4044
4045 gaudi->hw_cap_initialized |= HW_CAP_CPU;
4046
4047 return 0;
4048 }
4049
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4050 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4051 {
4052 struct cpu_dyn_regs *dyn_regs =
4053 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4054 struct asic_fixed_properties *prop = &hdev->asic_prop;
4055 struct gaudi_device *gaudi = hdev->asic_specific;
4056 u32 status, irq_handler_offset;
4057 struct hl_eq *eq;
4058 struct hl_hw_queue *cpu_pq =
4059 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4060 int err;
4061
4062 if (!hdev->cpu_queues_enable)
4063 return 0;
4064
4065 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4066 return 0;
4067
4068 eq = &hdev->event_queue;
4069
4070 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4071 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4072
4073 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4074 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4075
4076 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4077 lower_32_bits(hdev->cpu_accessible_dma_address));
4078 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4079 upper_32_bits(hdev->cpu_accessible_dma_address));
4080
4081 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4082 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4083 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4084
4085 /* Used for EQ CI */
4086 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4087
4088 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4089
4090 if (gaudi->multi_msi_mode)
4091 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4092 else
4093 WREG32(mmCPU_IF_QUEUE_INIT,
4094 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4095
4096 irq_handler_offset = prop->gic_interrupts_enable ?
4097 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4098 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4099
4100 WREG32(irq_handler_offset,
4101 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4102
4103 err = hl_poll_timeout(
4104 hdev,
4105 mmCPU_IF_QUEUE_INIT,
4106 status,
4107 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4108 1000,
4109 cpu_timeout);
4110
4111 if (err) {
4112 dev_err(hdev->dev,
4113 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4114 return -EIO;
4115 }
4116
4117 /* update FW application security bits */
4118 if (prop->fw_cpu_boot_dev_sts0_valid)
4119 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4120 if (prop->fw_cpu_boot_dev_sts1_valid)
4121 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4122
4123 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4124 return 0;
4125 }
4126
gaudi_pre_hw_init(struct hl_device * hdev)4127 static void gaudi_pre_hw_init(struct hl_device *hdev)
4128 {
4129 /* Perform read from the device to make sure device is up */
4130 RREG32(mmHW_STATE);
4131
4132 if (!hdev->asic_prop.fw_security_enabled) {
4133 /* Set the access through PCI bars (Linux driver only) as
4134 * secured
4135 */
4136 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4137 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4138 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4139
4140 /* Perform read to flush the waiting writes to ensure
4141 * configuration was set in the device
4142 */
4143 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4144 }
4145
4146 /*
4147 * Let's mark in the H/W that we have reached this point. We check
4148 * this value in the reset_before_init function to understand whether
4149 * we need to reset the chip before doing H/W init. This register is
4150 * cleared by the H/W upon H/W reset
4151 */
4152 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4153 }
4154
gaudi_hw_init(struct hl_device * hdev)4155 static int gaudi_hw_init(struct hl_device *hdev)
4156 {
4157 struct gaudi_device *gaudi = hdev->asic_specific;
4158 int rc;
4159
4160 gaudi_pre_hw_init(hdev);
4161
4162 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4163 * So we set it here and if anyone tries to move it later to
4164 * a different address, there will be an error
4165 */
4166 if (hdev->asic_prop.iatu_done_by_fw)
4167 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4168
4169 /*
4170 * Before pushing u-boot/linux to device, need to set the hbm bar to
4171 * base address of dram
4172 */
4173 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4174 dev_err(hdev->dev,
4175 "failed to map HBM bar to DRAM base address\n");
4176 return -EIO;
4177 }
4178
4179 rc = gaudi_init_cpu(hdev);
4180 if (rc) {
4181 dev_err(hdev->dev, "failed to initialize CPU\n");
4182 return rc;
4183 }
4184
4185 /* In case the clock gating was enabled in preboot we need to disable
4186 * it here before touching the MME/TPC registers.
4187 * There is no need to take clk gating mutex because when this function
4188 * runs, no other relevant code can run
4189 */
4190 hdev->asic_funcs->disable_clock_gating(hdev);
4191
4192 /* SRAM scrambler must be initialized after CPU is running from HBM */
4193 gaudi_init_scrambler_sram(hdev);
4194
4195 /* This is here just in case we are working without CPU */
4196 gaudi_init_scrambler_hbm(hdev);
4197
4198 gaudi_init_golden_registers(hdev);
4199
4200 rc = gaudi_mmu_init(hdev);
4201 if (rc)
4202 return rc;
4203
4204 gaudi_init_security(hdev);
4205
4206 gaudi_init_pci_dma_qmans(hdev);
4207
4208 gaudi_init_hbm_dma_qmans(hdev);
4209
4210 gaudi_init_mme_qmans(hdev);
4211
4212 gaudi_init_tpc_qmans(hdev);
4213
4214 gaudi_init_nic_qmans(hdev);
4215
4216 hdev->asic_funcs->set_clock_gating(hdev);
4217
4218 gaudi_enable_timestamp(hdev);
4219
4220 /* MSI must be enabled before CPU queues and NIC are initialized */
4221 rc = gaudi_enable_msi(hdev);
4222 if (rc)
4223 goto disable_queues;
4224
4225 /* must be called after MSI was enabled */
4226 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4227 if (rc) {
4228 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4229 rc);
4230 goto disable_msi;
4231 }
4232
4233 /* Perform read from the device to flush all configuration */
4234 RREG32(mmHW_STATE);
4235
4236 return 0;
4237
4238 disable_msi:
4239 gaudi_disable_msi(hdev);
4240 disable_queues:
4241 gaudi_disable_mme_qmans(hdev);
4242 gaudi_disable_pci_dma_qmans(hdev);
4243
4244 return rc;
4245 }
4246
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4247 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4248 {
4249 struct cpu_dyn_regs *dyn_regs =
4250 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4251 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4252 struct gaudi_device *gaudi = hdev->asic_specific;
4253 bool driver_performs_reset;
4254
4255 if (!hard_reset) {
4256 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4257 return;
4258 }
4259
4260 if (hdev->pldm) {
4261 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4262 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4263 } else {
4264 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4265 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4266 }
4267
4268 if (fw_reset) {
4269 dev_info(hdev->dev,
4270 "Firmware performs HARD reset, going to wait %dms\n",
4271 reset_timeout_ms);
4272
4273 goto skip_reset;
4274 }
4275
4276 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4277 !hdev->asic_prop.hard_reset_done_by_fw);
4278
4279 /* Set device to handle FLR by H/W as we will put the device CPU to
4280 * halt mode
4281 */
4282 if (driver_performs_reset)
4283 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4284 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4285
4286 /* If linux is loaded in the device CPU we need to communicate with it
4287 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4288 * registers in case of old F/Ws
4289 */
4290 if (hdev->fw_loader.linux_loaded) {
4291 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4292 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4293 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4294
4295 WREG32(irq_handler_offset,
4296 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4297 } else {
4298 if (hdev->asic_prop.hard_reset_done_by_fw)
4299 hl_fw_ask_hard_reset_without_linux(hdev);
4300 else
4301 hl_fw_ask_halt_machine_without_linux(hdev);
4302 }
4303
4304 if (driver_performs_reset) {
4305
4306 /* Configure the reset registers. Must be done as early as
4307 * possible in case we fail during H/W initialization
4308 */
4309 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4310 (CFG_RST_H_DMA_MASK |
4311 CFG_RST_H_MME_MASK |
4312 CFG_RST_H_SM_MASK |
4313 CFG_RST_H_TPC_7_MASK));
4314
4315 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4316
4317 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4318 (CFG_RST_H_HBM_MASK |
4319 CFG_RST_H_TPC_7_MASK |
4320 CFG_RST_H_NIC_MASK |
4321 CFG_RST_H_SM_MASK |
4322 CFG_RST_H_DMA_MASK |
4323 CFG_RST_H_MME_MASK |
4324 CFG_RST_H_CPU_MASK |
4325 CFG_RST_H_MMU_MASK));
4326
4327 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4328 (CFG_RST_L_IF_MASK |
4329 CFG_RST_L_PSOC_MASK |
4330 CFG_RST_L_TPC_MASK));
4331
4332 msleep(cpu_timeout_ms);
4333
4334 /* Tell ASIC not to re-initialize PCIe */
4335 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4336
4337 /* Restart BTL/BLR upon hard-reset */
4338 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4339
4340 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4341 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4342
4343 dev_info(hdev->dev,
4344 "Issued HARD reset command, going to wait %dms\n",
4345 reset_timeout_ms);
4346 } else {
4347 dev_info(hdev->dev,
4348 "Firmware performs HARD reset, going to wait %dms\n",
4349 reset_timeout_ms);
4350 }
4351
4352 skip_reset:
4353 /*
4354 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4355 * itself is in reset. Need to wait until the reset is deasserted
4356 */
4357 msleep(reset_timeout_ms);
4358
4359 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4360 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4361 dev_err(hdev->dev,
4362 "Timeout while waiting for device to reset 0x%x\n",
4363 status);
4364
4365 if (gaudi) {
4366 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4367 HW_CAP_HBM | HW_CAP_PCI_DMA |
4368 HW_CAP_MME | HW_CAP_TPC_MASK |
4369 HW_CAP_HBM_DMA | HW_CAP_PLL |
4370 HW_CAP_NIC_MASK | HW_CAP_MMU |
4371 HW_CAP_SRAM_SCRAMBLER |
4372 HW_CAP_HBM_SCRAMBLER |
4373 HW_CAP_CLK_GATE);
4374
4375 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4376
4377 hdev->device_cpu_is_halted = false;
4378 }
4379 }
4380
gaudi_suspend(struct hl_device * hdev)4381 static int gaudi_suspend(struct hl_device *hdev)
4382 {
4383 int rc;
4384
4385 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4386 if (rc)
4387 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4388
4389 return rc;
4390 }
4391
gaudi_resume(struct hl_device * hdev)4392 static int gaudi_resume(struct hl_device *hdev)
4393 {
4394 return gaudi_init_iatu(hdev);
4395 }
4396
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4397 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4398 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4399 {
4400 int rc;
4401
4402 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4403 VM_DONTCOPY | VM_NORESERVE;
4404
4405 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4406 (dma_addr - HOST_PHYS_BASE), size);
4407 if (rc)
4408 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4409
4410 return rc;
4411 }
4412
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4413 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4414 {
4415 struct cpu_dyn_regs *dyn_regs =
4416 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4417 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4418 struct gaudi_device *gaudi = hdev->asic_specific;
4419 bool invalid_queue = false;
4420 int dma_id;
4421
4422 switch (hw_queue_id) {
4423 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4424 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4425 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4426 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4427 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4428 break;
4429
4430 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4431 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4432 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4433 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4434 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4435 break;
4436
4437 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4438 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4439 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4440 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4441 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4442 break;
4443
4444 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4445 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4446 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4448 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449 break;
4450
4451 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4452 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4453 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4455 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456 break;
4457
4458 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4459 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4460 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463 break;
4464
4465 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4466 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4467 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470 break;
4471
4472 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4473 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4474 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477 break;
4478
4479 case GAUDI_QUEUE_ID_CPU_PQ:
4480 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4481 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4482 else
4483 invalid_queue = true;
4484 break;
4485
4486 case GAUDI_QUEUE_ID_MME_0_0:
4487 db_reg_offset = mmMME2_QM_PQ_PI_0;
4488 break;
4489
4490 case GAUDI_QUEUE_ID_MME_0_1:
4491 db_reg_offset = mmMME2_QM_PQ_PI_1;
4492 break;
4493
4494 case GAUDI_QUEUE_ID_MME_0_2:
4495 db_reg_offset = mmMME2_QM_PQ_PI_2;
4496 break;
4497
4498 case GAUDI_QUEUE_ID_MME_0_3:
4499 db_reg_offset = mmMME2_QM_PQ_PI_3;
4500 break;
4501
4502 case GAUDI_QUEUE_ID_MME_1_0:
4503 db_reg_offset = mmMME0_QM_PQ_PI_0;
4504 break;
4505
4506 case GAUDI_QUEUE_ID_MME_1_1:
4507 db_reg_offset = mmMME0_QM_PQ_PI_1;
4508 break;
4509
4510 case GAUDI_QUEUE_ID_MME_1_2:
4511 db_reg_offset = mmMME0_QM_PQ_PI_2;
4512 break;
4513
4514 case GAUDI_QUEUE_ID_MME_1_3:
4515 db_reg_offset = mmMME0_QM_PQ_PI_3;
4516 break;
4517
4518 case GAUDI_QUEUE_ID_TPC_0_0:
4519 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4520 break;
4521
4522 case GAUDI_QUEUE_ID_TPC_0_1:
4523 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4524 break;
4525
4526 case GAUDI_QUEUE_ID_TPC_0_2:
4527 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4528 break;
4529
4530 case GAUDI_QUEUE_ID_TPC_0_3:
4531 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4532 break;
4533
4534 case GAUDI_QUEUE_ID_TPC_1_0:
4535 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4536 break;
4537
4538 case GAUDI_QUEUE_ID_TPC_1_1:
4539 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4540 break;
4541
4542 case GAUDI_QUEUE_ID_TPC_1_2:
4543 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4544 break;
4545
4546 case GAUDI_QUEUE_ID_TPC_1_3:
4547 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4548 break;
4549
4550 case GAUDI_QUEUE_ID_TPC_2_0:
4551 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4552 break;
4553
4554 case GAUDI_QUEUE_ID_TPC_2_1:
4555 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4556 break;
4557
4558 case GAUDI_QUEUE_ID_TPC_2_2:
4559 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4560 break;
4561
4562 case GAUDI_QUEUE_ID_TPC_2_3:
4563 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4564 break;
4565
4566 case GAUDI_QUEUE_ID_TPC_3_0:
4567 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4568 break;
4569
4570 case GAUDI_QUEUE_ID_TPC_3_1:
4571 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4572 break;
4573
4574 case GAUDI_QUEUE_ID_TPC_3_2:
4575 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4576 break;
4577
4578 case GAUDI_QUEUE_ID_TPC_3_3:
4579 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4580 break;
4581
4582 case GAUDI_QUEUE_ID_TPC_4_0:
4583 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4584 break;
4585
4586 case GAUDI_QUEUE_ID_TPC_4_1:
4587 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4588 break;
4589
4590 case GAUDI_QUEUE_ID_TPC_4_2:
4591 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4592 break;
4593
4594 case GAUDI_QUEUE_ID_TPC_4_3:
4595 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4596 break;
4597
4598 case GAUDI_QUEUE_ID_TPC_5_0:
4599 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4600 break;
4601
4602 case GAUDI_QUEUE_ID_TPC_5_1:
4603 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4604 break;
4605
4606 case GAUDI_QUEUE_ID_TPC_5_2:
4607 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4608 break;
4609
4610 case GAUDI_QUEUE_ID_TPC_5_3:
4611 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4612 break;
4613
4614 case GAUDI_QUEUE_ID_TPC_6_0:
4615 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4616 break;
4617
4618 case GAUDI_QUEUE_ID_TPC_6_1:
4619 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4620 break;
4621
4622 case GAUDI_QUEUE_ID_TPC_6_2:
4623 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4624 break;
4625
4626 case GAUDI_QUEUE_ID_TPC_6_3:
4627 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4628 break;
4629
4630 case GAUDI_QUEUE_ID_TPC_7_0:
4631 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4632 break;
4633
4634 case GAUDI_QUEUE_ID_TPC_7_1:
4635 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4636 break;
4637
4638 case GAUDI_QUEUE_ID_TPC_7_2:
4639 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4640 break;
4641
4642 case GAUDI_QUEUE_ID_TPC_7_3:
4643 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4644 break;
4645
4646 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4647 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4648 invalid_queue = true;
4649
4650 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4652 break;
4653
4654 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4655 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4656 invalid_queue = true;
4657
4658 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4660 break;
4661
4662 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4663 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4664 invalid_queue = true;
4665
4666 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4668 break;
4669
4670 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4671 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4672 invalid_queue = true;
4673
4674 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4676 break;
4677
4678 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4679 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4680 invalid_queue = true;
4681
4682 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4684 break;
4685
4686 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4687 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4688 invalid_queue = true;
4689
4690 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4692 break;
4693
4694 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4695 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4696 invalid_queue = true;
4697
4698 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4699 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4700 break;
4701
4702 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4703 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4704 invalid_queue = true;
4705
4706 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4707 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4708 break;
4709
4710 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4711 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4712 invalid_queue = true;
4713
4714 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4715 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4716 break;
4717
4718 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4719 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4720 invalid_queue = true;
4721
4722 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4723 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4724 break;
4725
4726 default:
4727 invalid_queue = true;
4728 }
4729
4730 if (invalid_queue) {
4731 /* Should never get here */
4732 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4733 hw_queue_id);
4734 return;
4735 }
4736
4737 db_value = pi;
4738
4739 /* ring the doorbell */
4740 WREG32(db_reg_offset, db_value);
4741
4742 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4743 /* make sure device CPU will read latest data from host */
4744 mb();
4745
4746 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4747 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4748 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4749
4750 WREG32(irq_handler_offset,
4751 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4752 }
4753 }
4754
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4755 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4756 struct hl_bd *bd)
4757 {
4758 __le64 *pbd = (__le64 *) bd;
4759
4760 /* The QMANs are on the host memory so a simple copy suffice */
4761 pqe[0] = pbd[0];
4762 pqe[1] = pbd[1];
4763 }
4764
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4765 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4766 dma_addr_t *dma_handle, gfp_t flags)
4767 {
4768 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4769 dma_handle, flags);
4770
4771 /* Shift to the device's base physical address of host memory */
4772 if (kernel_addr)
4773 *dma_handle += HOST_PHYS_BASE;
4774
4775 return kernel_addr;
4776 }
4777
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4778 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4779 void *cpu_addr, dma_addr_t dma_handle)
4780 {
4781 /* Cancel the device's base physical address of host memory */
4782 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4783
4784 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4785 }
4786
gaudi_hbm_scrubbing(struct hl_device * hdev)4787 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4788 {
4789 struct asic_fixed_properties *prop = &hdev->asic_prop;
4790 u64 cur_addr = DRAM_BASE_ADDR_USER;
4791 u32 val;
4792 u32 chunk_size;
4793 int rc, dma_id;
4794
4795 while (cur_addr < prop->dram_end_address) {
4796 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4797 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4798
4799 chunk_size =
4800 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4801
4802 dev_dbg(hdev->dev,
4803 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4804 cur_addr, cur_addr + chunk_size);
4805
4806 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4807 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4808 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4809 lower_32_bits(cur_addr));
4810 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4811 upper_32_bits(cur_addr));
4812 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4813 chunk_size);
4814 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4815 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4816 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4817
4818 cur_addr += chunk_size;
4819
4820 if (cur_addr == prop->dram_end_address)
4821 break;
4822 }
4823
4824 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4825 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4826
4827 rc = hl_poll_timeout(
4828 hdev,
4829 mmDMA0_CORE_STS0 + dma_offset,
4830 val,
4831 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4832 1000,
4833 HBM_SCRUBBING_TIMEOUT_US);
4834
4835 if (rc) {
4836 dev_err(hdev->dev,
4837 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4838 dma_id);
4839 return -EIO;
4840 }
4841 }
4842 }
4843
4844 return 0;
4845 }
4846
gaudi_scrub_device_mem(struct hl_device * hdev,u64 addr,u64 size)4847 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4848 {
4849 struct asic_fixed_properties *prop = &hdev->asic_prop;
4850 struct gaudi_device *gaudi = hdev->asic_specific;
4851 int rc = 0;
4852 u64 val = 0;
4853
4854 if (!hdev->memory_scrub)
4855 return 0;
4856
4857 if (!addr && !size) {
4858 /* Wait till device is idle */
4859 rc = hl_poll_timeout(
4860 hdev,
4861 mmDMA0_CORE_STS0/* dummy */,
4862 val/* dummy */,
4863 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4864 0, NULL)),
4865 1000,
4866 HBM_SCRUBBING_TIMEOUT_US);
4867 if (rc) {
4868 dev_err(hdev->dev, "waiting for idle timeout\n");
4869 return -EIO;
4870 }
4871
4872 /* Scrub SRAM */
4873 addr = prop->sram_user_base_address;
4874 size = hdev->pldm ? 0x10000 :
4875 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4876 val = 0x7777777777777777ull;
4877
4878 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4879 if (rc) {
4880 dev_err(hdev->dev,
4881 "Failed to clear SRAM in mem scrub all\n");
4882 return rc;
4883 }
4884
4885 mutex_lock(&gaudi->clk_gate_mutex);
4886 hdev->asic_funcs->disable_clock_gating(hdev);
4887
4888 /* Scrub HBM using all DMA channels in parallel */
4889 rc = gaudi_hbm_scrubbing(hdev);
4890 if (rc)
4891 dev_err(hdev->dev,
4892 "Failed to clear HBM in mem scrub all\n");
4893
4894 hdev->asic_funcs->set_clock_gating(hdev);
4895 mutex_unlock(&gaudi->clk_gate_mutex);
4896 }
4897
4898 return rc;
4899 }
4900
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4901 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4902 u32 queue_id, dma_addr_t *dma_handle,
4903 u16 *queue_len)
4904 {
4905 struct gaudi_device *gaudi = hdev->asic_specific;
4906 struct gaudi_internal_qman_info *q;
4907
4908 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4909 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4910 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4911 return NULL;
4912 }
4913
4914 q = &gaudi->internal_qmans[queue_id];
4915 *dma_handle = q->pq_dma_addr;
4916 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4917
4918 return q->pq_kernel_addr;
4919 }
4920
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4921 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4922 u16 len, u32 timeout, u64 *result)
4923 {
4924 struct gaudi_device *gaudi = hdev->asic_specific;
4925
4926 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4927 if (result)
4928 *result = 0;
4929 return 0;
4930 }
4931
4932 if (!timeout)
4933 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4934
4935 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4936 timeout, result);
4937 }
4938
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4939 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4940 {
4941 struct packet_msg_prot *fence_pkt;
4942 dma_addr_t pkt_dma_addr;
4943 u32 fence_val, tmp, timeout_usec;
4944 dma_addr_t fence_dma_addr;
4945 u32 *fence_ptr;
4946 int rc;
4947
4948 if (hdev->pldm)
4949 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4950 else
4951 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4952
4953 fence_val = GAUDI_QMAN0_FENCE_VAL;
4954
4955 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956 &fence_dma_addr);
4957 if (!fence_ptr) {
4958 dev_err(hdev->dev,
4959 "Failed to allocate memory for H/W queue %d testing\n",
4960 hw_queue_id);
4961 return -ENOMEM;
4962 }
4963
4964 *fence_ptr = 0;
4965
4966 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4967 sizeof(struct packet_msg_prot),
4968 GFP_KERNEL, &pkt_dma_addr);
4969 if (!fence_pkt) {
4970 dev_err(hdev->dev,
4971 "Failed to allocate packet for H/W queue %d testing\n",
4972 hw_queue_id);
4973 rc = -ENOMEM;
4974 goto free_fence_ptr;
4975 }
4976
4977 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4978 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4979 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4980
4981 fence_pkt->ctl = cpu_to_le32(tmp);
4982 fence_pkt->value = cpu_to_le32(fence_val);
4983 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4984
4985 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4986 sizeof(struct packet_msg_prot),
4987 pkt_dma_addr);
4988 if (rc) {
4989 dev_err(hdev->dev,
4990 "Failed to send fence packet to H/W queue %d\n",
4991 hw_queue_id);
4992 goto free_pkt;
4993 }
4994
4995 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4996 1000, timeout_usec, true);
4997
4998 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4999
5000 if (rc == -ETIMEDOUT) {
5001 dev_err(hdev->dev,
5002 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5003 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5004 rc = -EIO;
5005 }
5006
5007 free_pkt:
5008 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5009 pkt_dma_addr);
5010 free_fence_ptr:
5011 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5012 fence_dma_addr);
5013 return rc;
5014 }
5015
gaudi_test_cpu_queue(struct hl_device * hdev)5016 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5017 {
5018 struct gaudi_device *gaudi = hdev->asic_specific;
5019
5020 /*
5021 * check capability here as send_cpu_message() won't update the result
5022 * value if no capability
5023 */
5024 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5025 return 0;
5026
5027 return hl_fw_test_cpu_queue(hdev);
5028 }
5029
gaudi_test_queues(struct hl_device * hdev)5030 static int gaudi_test_queues(struct hl_device *hdev)
5031 {
5032 int i, rc, ret_val = 0;
5033
5034 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5035 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5036 rc = gaudi_test_queue(hdev, i);
5037 if (rc)
5038 ret_val = -EINVAL;
5039 }
5040 }
5041
5042 rc = gaudi_test_cpu_queue(hdev);
5043 if (rc)
5044 ret_val = -EINVAL;
5045
5046 return ret_val;
5047 }
5048
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)5049 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5050 gfp_t mem_flags, dma_addr_t *dma_handle)
5051 {
5052 void *kernel_addr;
5053
5054 if (size > GAUDI_DMA_POOL_BLK_SIZE)
5055 return NULL;
5056
5057 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5058
5059 /* Shift to the device's base physical address of host memory */
5060 if (kernel_addr)
5061 *dma_handle += HOST_PHYS_BASE;
5062
5063 return kernel_addr;
5064 }
5065
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)5066 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5067 dma_addr_t dma_addr)
5068 {
5069 /* Cancel the device's base physical address of host memory */
5070 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5071
5072 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5073 }
5074
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)5075 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5076 size_t size, dma_addr_t *dma_handle)
5077 {
5078 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5079 }
5080
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)5081 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5082 size_t size, void *vaddr)
5083 {
5084 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5085 }
5086
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5087 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5088 int nents, enum dma_data_direction dir)
5089 {
5090 struct scatterlist *sg;
5091 int i;
5092
5093 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5094 return -ENOMEM;
5095
5096 /* Shift to the device's base physical address of host memory */
5097 for_each_sg(sgl, sg, nents, i)
5098 sg->dma_address += HOST_PHYS_BASE;
5099
5100 return 0;
5101 }
5102
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)5103 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5104 int nents, enum dma_data_direction dir)
5105 {
5106 struct scatterlist *sg;
5107 int i;
5108
5109 /* Cancel the device's base physical address of host memory */
5110 for_each_sg(sgl, sg, nents, i)
5111 sg->dma_address -= HOST_PHYS_BASE;
5112
5113 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5114 }
5115
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)5116 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5117 struct sg_table *sgt)
5118 {
5119 struct scatterlist *sg, *sg_next_iter;
5120 u32 count, dma_desc_cnt;
5121 u64 len, len_next;
5122 dma_addr_t addr, addr_next;
5123
5124 dma_desc_cnt = 0;
5125
5126 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5127
5128 len = sg_dma_len(sg);
5129 addr = sg_dma_address(sg);
5130
5131 if (len == 0)
5132 break;
5133
5134 while ((count + 1) < sgt->nents) {
5135 sg_next_iter = sg_next(sg);
5136 len_next = sg_dma_len(sg_next_iter);
5137 addr_next = sg_dma_address(sg_next_iter);
5138
5139 if (len_next == 0)
5140 break;
5141
5142 if ((addr + len == addr_next) &&
5143 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5144 len += len_next;
5145 count++;
5146 sg = sg_next_iter;
5147 } else {
5148 break;
5149 }
5150 }
5151
5152 dma_desc_cnt++;
5153 }
5154
5155 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5156 }
5157
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)5158 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5159 struct hl_cs_parser *parser,
5160 struct packet_lin_dma *user_dma_pkt,
5161 u64 addr, enum dma_data_direction dir)
5162 {
5163 struct hl_userptr *userptr;
5164 int rc;
5165
5166 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5167 parser->job_userptr_list, &userptr))
5168 goto already_pinned;
5169
5170 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5171 if (!userptr)
5172 return -ENOMEM;
5173
5174 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5175 userptr);
5176 if (rc)
5177 goto free_userptr;
5178
5179 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5180
5181 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5182 userptr->sgt->nents, dir);
5183 if (rc) {
5184 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5185 goto unpin_memory;
5186 }
5187
5188 userptr->dma_mapped = true;
5189 userptr->dir = dir;
5190
5191 already_pinned:
5192 parser->patched_cb_size +=
5193 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5194
5195 return 0;
5196
5197 unpin_memory:
5198 list_del(&userptr->job_node);
5199 hl_unpin_host_memory(hdev, userptr);
5200 free_userptr:
5201 kfree(userptr);
5202 return rc;
5203 }
5204
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)5205 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5206 struct hl_cs_parser *parser,
5207 struct packet_lin_dma *user_dma_pkt,
5208 bool src_in_host)
5209 {
5210 enum dma_data_direction dir;
5211 bool skip_host_mem_pin = false, user_memset;
5212 u64 addr;
5213 int rc = 0;
5214
5215 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5216 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5217 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218
5219 if (src_in_host) {
5220 if (user_memset)
5221 skip_host_mem_pin = true;
5222
5223 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5224 dir = DMA_TO_DEVICE;
5225 addr = le64_to_cpu(user_dma_pkt->src_addr);
5226 } else {
5227 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5228 dir = DMA_FROM_DEVICE;
5229 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5230 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5231 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5232 }
5233
5234 if (skip_host_mem_pin)
5235 parser->patched_cb_size += sizeof(*user_dma_pkt);
5236 else
5237 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5238 addr, dir);
5239
5240 return rc;
5241 }
5242
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5243 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5244 struct hl_cs_parser *parser,
5245 struct packet_lin_dma *user_dma_pkt)
5246 {
5247 bool src_in_host = false;
5248 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5249 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5250 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5251
5252 dev_dbg(hdev->dev, "DMA packet details:\n");
5253 dev_dbg(hdev->dev, "source == 0x%llx\n",
5254 le64_to_cpu(user_dma_pkt->src_addr));
5255 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5256 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5257
5258 /*
5259 * Special handling for DMA with size 0. Bypass all validations
5260 * because no transactions will be done except for WR_COMP, which
5261 * is not a security issue
5262 */
5263 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5264 parser->patched_cb_size += sizeof(*user_dma_pkt);
5265 return 0;
5266 }
5267
5268 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5269 src_in_host = true;
5270
5271 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5272 src_in_host);
5273 }
5274
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5275 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5276 struct hl_cs_parser *parser,
5277 struct packet_load_and_exe *user_pkt)
5278 {
5279 u32 cfg;
5280
5281 cfg = le32_to_cpu(user_pkt->cfg);
5282
5283 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5284 dev_err(hdev->dev,
5285 "User not allowed to use Load and Execute\n");
5286 return -EPERM;
5287 }
5288
5289 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5290
5291 return 0;
5292 }
5293
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5294 static int gaudi_validate_cb(struct hl_device *hdev,
5295 struct hl_cs_parser *parser, bool is_mmu)
5296 {
5297 u32 cb_parsed_length = 0;
5298 int rc = 0;
5299
5300 parser->patched_cb_size = 0;
5301
5302 /* cb_user_size is more than 0 so loop will always be executed */
5303 while (cb_parsed_length < parser->user_cb_size) {
5304 enum packet_id pkt_id;
5305 u16 pkt_size;
5306 struct gaudi_packet *user_pkt;
5307
5308 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5309
5310 pkt_id = (enum packet_id) (
5311 (le64_to_cpu(user_pkt->header) &
5312 PACKET_HEADER_PACKET_ID_MASK) >>
5313 PACKET_HEADER_PACKET_ID_SHIFT);
5314
5315 if (!validate_packet_id(pkt_id)) {
5316 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5317 rc = -EINVAL;
5318 break;
5319 }
5320
5321 pkt_size = gaudi_packet_sizes[pkt_id];
5322 cb_parsed_length += pkt_size;
5323 if (cb_parsed_length > parser->user_cb_size) {
5324 dev_err(hdev->dev,
5325 "packet 0x%x is out of CB boundary\n", pkt_id);
5326 rc = -EINVAL;
5327 break;
5328 }
5329
5330 switch (pkt_id) {
5331 case PACKET_MSG_PROT:
5332 dev_err(hdev->dev,
5333 "User not allowed to use MSG_PROT\n");
5334 rc = -EPERM;
5335 break;
5336
5337 case PACKET_CP_DMA:
5338 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5339 rc = -EPERM;
5340 break;
5341
5342 case PACKET_STOP:
5343 dev_err(hdev->dev, "User not allowed to use STOP\n");
5344 rc = -EPERM;
5345 break;
5346
5347 case PACKET_WREG_BULK:
5348 dev_err(hdev->dev,
5349 "User not allowed to use WREG_BULK\n");
5350 rc = -EPERM;
5351 break;
5352
5353 case PACKET_LOAD_AND_EXE:
5354 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5355 (struct packet_load_and_exe *) user_pkt);
5356 break;
5357
5358 case PACKET_LIN_DMA:
5359 parser->contains_dma_pkt = true;
5360 if (is_mmu)
5361 parser->patched_cb_size += pkt_size;
5362 else
5363 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5364 (struct packet_lin_dma *) user_pkt);
5365 break;
5366
5367 case PACKET_WREG_32:
5368 case PACKET_MSG_LONG:
5369 case PACKET_MSG_SHORT:
5370 case PACKET_REPEAT:
5371 case PACKET_FENCE:
5372 case PACKET_NOP:
5373 case PACKET_ARB_POINT:
5374 parser->patched_cb_size += pkt_size;
5375 break;
5376
5377 default:
5378 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5379 pkt_id);
5380 rc = -EINVAL;
5381 break;
5382 }
5383
5384 if (rc)
5385 break;
5386 }
5387
5388 /*
5389 * The new CB should have space at the end for two MSG_PROT packets:
5390 * 1. A packet that will act as a completion packet
5391 * 2. A packet that will generate MSI-X interrupt
5392 */
5393 if (parser->completion)
5394 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5395
5396 return rc;
5397 }
5398
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5399 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5400 struct hl_cs_parser *parser,
5401 struct packet_lin_dma *user_dma_pkt,
5402 struct packet_lin_dma *new_dma_pkt,
5403 u32 *new_dma_pkt_size)
5404 {
5405 struct hl_userptr *userptr;
5406 struct scatterlist *sg, *sg_next_iter;
5407 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5408 u64 len, len_next;
5409 dma_addr_t dma_addr, dma_addr_next;
5410 u64 device_memory_addr, addr;
5411 enum dma_data_direction dir;
5412 struct sg_table *sgt;
5413 bool src_in_host = false;
5414 bool skip_host_mem_pin = false;
5415 bool user_memset;
5416
5417 ctl = le32_to_cpu(user_dma_pkt->ctl);
5418
5419 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5420 src_in_host = true;
5421
5422 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5423 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5424
5425 if (src_in_host) {
5426 addr = le64_to_cpu(user_dma_pkt->src_addr);
5427 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5428 dir = DMA_TO_DEVICE;
5429 if (user_memset)
5430 skip_host_mem_pin = true;
5431 } else {
5432 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5433 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5434 dir = DMA_FROM_DEVICE;
5435 }
5436
5437 if ((!skip_host_mem_pin) &&
5438 (!hl_userptr_is_pinned(hdev, addr,
5439 le32_to_cpu(user_dma_pkt->tsize),
5440 parser->job_userptr_list, &userptr))) {
5441 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5442 addr, user_dma_pkt->tsize);
5443 return -EFAULT;
5444 }
5445
5446 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5447 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5448 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5449 return 0;
5450 }
5451
5452 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5453
5454 sgt = userptr->sgt;
5455 dma_desc_cnt = 0;
5456
5457 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5458 len = sg_dma_len(sg);
5459 dma_addr = sg_dma_address(sg);
5460
5461 if (len == 0)
5462 break;
5463
5464 while ((count + 1) < sgt->nents) {
5465 sg_next_iter = sg_next(sg);
5466 len_next = sg_dma_len(sg_next_iter);
5467 dma_addr_next = sg_dma_address(sg_next_iter);
5468
5469 if (len_next == 0)
5470 break;
5471
5472 if ((dma_addr + len == dma_addr_next) &&
5473 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5474 len += len_next;
5475 count++;
5476 sg = sg_next_iter;
5477 } else {
5478 break;
5479 }
5480 }
5481
5482 ctl = le32_to_cpu(user_dma_pkt->ctl);
5483 if (likely(dma_desc_cnt))
5484 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5485 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5486 new_dma_pkt->ctl = cpu_to_le32(ctl);
5487 new_dma_pkt->tsize = cpu_to_le32(len);
5488
5489 if (dir == DMA_TO_DEVICE) {
5490 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5491 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5492 } else {
5493 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5494 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5495 }
5496
5497 if (!user_memset)
5498 device_memory_addr += len;
5499 dma_desc_cnt++;
5500 new_dma_pkt++;
5501 }
5502
5503 if (!dma_desc_cnt) {
5504 dev_err(hdev->dev,
5505 "Error of 0 SG entries when patching DMA packet\n");
5506 return -EFAULT;
5507 }
5508
5509 /* Fix the last dma packet - wrcomp must be as user set it */
5510 new_dma_pkt--;
5511 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5512
5513 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5514
5515 return 0;
5516 }
5517
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5518 static int gaudi_patch_cb(struct hl_device *hdev,
5519 struct hl_cs_parser *parser)
5520 {
5521 u32 cb_parsed_length = 0;
5522 u32 cb_patched_cur_length = 0;
5523 int rc = 0;
5524
5525 /* cb_user_size is more than 0 so loop will always be executed */
5526 while (cb_parsed_length < parser->user_cb_size) {
5527 enum packet_id pkt_id;
5528 u16 pkt_size;
5529 u32 new_pkt_size = 0;
5530 struct gaudi_packet *user_pkt, *kernel_pkt;
5531
5532 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5533 kernel_pkt = parser->patched_cb->kernel_address +
5534 cb_patched_cur_length;
5535
5536 pkt_id = (enum packet_id) (
5537 (le64_to_cpu(user_pkt->header) &
5538 PACKET_HEADER_PACKET_ID_MASK) >>
5539 PACKET_HEADER_PACKET_ID_SHIFT);
5540
5541 if (!validate_packet_id(pkt_id)) {
5542 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5543 rc = -EINVAL;
5544 break;
5545 }
5546
5547 pkt_size = gaudi_packet_sizes[pkt_id];
5548 cb_parsed_length += pkt_size;
5549 if (cb_parsed_length > parser->user_cb_size) {
5550 dev_err(hdev->dev,
5551 "packet 0x%x is out of CB boundary\n", pkt_id);
5552 rc = -EINVAL;
5553 break;
5554 }
5555
5556 switch (pkt_id) {
5557 case PACKET_LIN_DMA:
5558 rc = gaudi_patch_dma_packet(hdev, parser,
5559 (struct packet_lin_dma *) user_pkt,
5560 (struct packet_lin_dma *) kernel_pkt,
5561 &new_pkt_size);
5562 cb_patched_cur_length += new_pkt_size;
5563 break;
5564
5565 case PACKET_MSG_PROT:
5566 dev_err(hdev->dev,
5567 "User not allowed to use MSG_PROT\n");
5568 rc = -EPERM;
5569 break;
5570
5571 case PACKET_CP_DMA:
5572 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5573 rc = -EPERM;
5574 break;
5575
5576 case PACKET_STOP:
5577 dev_err(hdev->dev, "User not allowed to use STOP\n");
5578 rc = -EPERM;
5579 break;
5580
5581 case PACKET_WREG_32:
5582 case PACKET_WREG_BULK:
5583 case PACKET_MSG_LONG:
5584 case PACKET_MSG_SHORT:
5585 case PACKET_REPEAT:
5586 case PACKET_FENCE:
5587 case PACKET_NOP:
5588 case PACKET_ARB_POINT:
5589 case PACKET_LOAD_AND_EXE:
5590 memcpy(kernel_pkt, user_pkt, pkt_size);
5591 cb_patched_cur_length += pkt_size;
5592 break;
5593
5594 default:
5595 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5596 pkt_id);
5597 rc = -EINVAL;
5598 break;
5599 }
5600
5601 if (rc)
5602 break;
5603 }
5604
5605 return rc;
5606 }
5607
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5608 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5609 struct hl_cs_parser *parser)
5610 {
5611 u64 patched_cb_handle;
5612 u32 patched_cb_size;
5613 struct hl_cb *user_cb;
5614 int rc;
5615
5616 /*
5617 * The new CB should have space at the end for two MSG_PROT pkt:
5618 * 1. A packet that will act as a completion packet
5619 * 2. A packet that will generate MSI interrupt
5620 */
5621 if (parser->completion)
5622 parser->patched_cb_size = parser->user_cb_size +
5623 sizeof(struct packet_msg_prot) * 2;
5624 else
5625 parser->patched_cb_size = parser->user_cb_size;
5626
5627 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5628 parser->patched_cb_size, false, false,
5629 &patched_cb_handle);
5630
5631 if (rc) {
5632 dev_err(hdev->dev,
5633 "Failed to allocate patched CB for DMA CS %d\n",
5634 rc);
5635 return rc;
5636 }
5637
5638 patched_cb_handle >>= PAGE_SHIFT;
5639 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5640 (u32) patched_cb_handle);
5641 /* hl_cb_get should never fail */
5642 if (!parser->patched_cb) {
5643 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5644 (u32) patched_cb_handle);
5645 rc = -EFAULT;
5646 goto out;
5647 }
5648
5649 /*
5650 * The check that parser->user_cb_size <= parser->user_cb->size was done
5651 * in validate_queue_index().
5652 */
5653 memcpy(parser->patched_cb->kernel_address,
5654 parser->user_cb->kernel_address,
5655 parser->user_cb_size);
5656
5657 patched_cb_size = parser->patched_cb_size;
5658
5659 /* Validate patched CB instead of user CB */
5660 user_cb = parser->user_cb;
5661 parser->user_cb = parser->patched_cb;
5662 rc = gaudi_validate_cb(hdev, parser, true);
5663 parser->user_cb = user_cb;
5664
5665 if (rc) {
5666 hl_cb_put(parser->patched_cb);
5667 goto out;
5668 }
5669
5670 if (patched_cb_size != parser->patched_cb_size) {
5671 dev_err(hdev->dev, "user CB size mismatch\n");
5672 hl_cb_put(parser->patched_cb);
5673 rc = -EINVAL;
5674 goto out;
5675 }
5676
5677 out:
5678 /*
5679 * Always call cb destroy here because we still have 1 reference
5680 * to it by calling cb_get earlier. After the job will be completed,
5681 * cb_put will release it, but here we want to remove it from the
5682 * idr
5683 */
5684 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5685 patched_cb_handle << PAGE_SHIFT);
5686
5687 return rc;
5688 }
5689
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5690 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5691 struct hl_cs_parser *parser)
5692 {
5693 u64 patched_cb_handle;
5694 int rc;
5695
5696 rc = gaudi_validate_cb(hdev, parser, false);
5697
5698 if (rc)
5699 goto free_userptr;
5700
5701 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5702 parser->patched_cb_size, false, false,
5703 &patched_cb_handle);
5704 if (rc) {
5705 dev_err(hdev->dev,
5706 "Failed to allocate patched CB for DMA CS %d\n", rc);
5707 goto free_userptr;
5708 }
5709
5710 patched_cb_handle >>= PAGE_SHIFT;
5711 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5712 (u32) patched_cb_handle);
5713 /* hl_cb_get should never fail here */
5714 if (!parser->patched_cb) {
5715 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5716 (u32) patched_cb_handle);
5717 rc = -EFAULT;
5718 goto out;
5719 }
5720
5721 rc = gaudi_patch_cb(hdev, parser);
5722
5723 if (rc)
5724 hl_cb_put(parser->patched_cb);
5725
5726 out:
5727 /*
5728 * Always call cb destroy here because we still have 1 reference
5729 * to it by calling cb_get earlier. After the job will be completed,
5730 * cb_put will release it, but here we want to remove it from the
5731 * idr
5732 */
5733 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5734 patched_cb_handle << PAGE_SHIFT);
5735
5736 free_userptr:
5737 if (rc)
5738 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5739 return rc;
5740 }
5741
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5742 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5743 struct hl_cs_parser *parser)
5744 {
5745 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5746 struct gaudi_device *gaudi = hdev->asic_specific;
5747 u32 nic_queue_offset, nic_mask_q_id;
5748
5749 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5750 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5751 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5752 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5753
5754 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5755 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5756 return -EINVAL;
5757 }
5758 }
5759
5760 /* For internal queue jobs just check if CB address is valid */
5761 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5762 parser->user_cb_size,
5763 asic_prop->sram_user_base_address,
5764 asic_prop->sram_end_address))
5765 return 0;
5766
5767 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5768 parser->user_cb_size,
5769 asic_prop->dram_user_base_address,
5770 asic_prop->dram_end_address))
5771 return 0;
5772
5773 /* PMMU and HPMMU addresses are equal, check only one of them */
5774 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5775 parser->user_cb_size,
5776 asic_prop->pmmu.start_addr,
5777 asic_prop->pmmu.end_addr))
5778 return 0;
5779
5780 dev_err(hdev->dev,
5781 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5782 parser->user_cb, parser->user_cb_size);
5783
5784 return -EFAULT;
5785 }
5786
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5787 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5788 {
5789 struct gaudi_device *gaudi = hdev->asic_specific;
5790
5791 if (parser->queue_type == QUEUE_TYPE_INT)
5792 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5793
5794 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5795 return gaudi_parse_cb_mmu(hdev, parser);
5796 else
5797 return gaudi_parse_cb_no_mmu(hdev, parser);
5798 }
5799
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5800 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5801 void *kernel_address, u32 len,
5802 u64 cq_addr, u32 cq_val, u32 msi_vec,
5803 bool eb)
5804 {
5805 struct gaudi_device *gaudi = hdev->asic_specific;
5806 struct packet_msg_prot *cq_pkt;
5807 u64 msi_addr;
5808 u32 tmp;
5809
5810 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5811
5812 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5813 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5814
5815 if (eb)
5816 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5817
5818 cq_pkt->ctl = cpu_to_le32(tmp);
5819 cq_pkt->value = cpu_to_le32(cq_val);
5820 cq_pkt->addr = cpu_to_le64(cq_addr);
5821
5822 cq_pkt++;
5823
5824 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5825 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5826 cq_pkt->ctl = cpu_to_le32(tmp);
5827 cq_pkt->value = cpu_to_le32(1);
5828
5829 if (gaudi->multi_msi_mode)
5830 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5831 else
5832 msi_addr = mmPCIE_CORE_MSI_REQ;
5833
5834 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5835 }
5836
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5837 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5838 {
5839 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5840 }
5841
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5842 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5843 u32 size, u64 val)
5844 {
5845 struct packet_lin_dma *lin_dma_pkt;
5846 struct hl_cs_job *job;
5847 u32 cb_size, ctl, err_cause;
5848 struct hl_cb *cb;
5849 u64 id;
5850 int rc;
5851
5852 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5853 if (!cb)
5854 return -EFAULT;
5855
5856 lin_dma_pkt = cb->kernel_address;
5857 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5858 cb_size = sizeof(*lin_dma_pkt);
5859
5860 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5861 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5862 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5863 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5864 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5865
5866 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5867 lin_dma_pkt->src_addr = cpu_to_le64(val);
5868 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5869 lin_dma_pkt->tsize = cpu_to_le32(size);
5870
5871 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5872 if (!job) {
5873 dev_err(hdev->dev, "Failed to allocate a new job\n");
5874 rc = -ENOMEM;
5875 goto release_cb;
5876 }
5877
5878 /* Verify DMA is OK */
5879 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5880 if (err_cause && !hdev->init_done) {
5881 dev_dbg(hdev->dev,
5882 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5883 err_cause);
5884 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5885 }
5886
5887 job->id = 0;
5888 job->user_cb = cb;
5889 atomic_inc(&job->user_cb->cs_cnt);
5890 job->user_cb_size = cb_size;
5891 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5892 job->patched_cb = job->user_cb;
5893 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5894
5895 hl_debugfs_add_job(hdev, job);
5896
5897 rc = gaudi_send_job_on_qman0(hdev, job);
5898 hl_debugfs_remove_job(hdev, job);
5899 kfree(job);
5900 atomic_dec(&cb->cs_cnt);
5901
5902 /* Verify DMA is OK */
5903 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5904 if (err_cause) {
5905 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5906 rc = -EIO;
5907 if (!hdev->init_done) {
5908 dev_dbg(hdev->dev,
5909 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5910 err_cause);
5911 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5912 }
5913 }
5914
5915 release_cb:
5916 id = cb->id;
5917 hl_cb_put(cb);
5918 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5919
5920 return rc;
5921 }
5922
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5923 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5924 u32 num_regs, u32 val)
5925 {
5926 struct packet_msg_long *pkt;
5927 struct hl_cs_job *job;
5928 u32 cb_size, ctl;
5929 struct hl_cb *cb;
5930 int i, rc;
5931
5932 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5933
5934 if (cb_size > SZ_2M) {
5935 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5936 return -ENOMEM;
5937 }
5938
5939 cb = hl_cb_kernel_create(hdev, cb_size, false);
5940 if (!cb)
5941 return -EFAULT;
5942
5943 pkt = cb->kernel_address;
5944
5945 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5946 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5947 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5948 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5949 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5950
5951 for (i = 0; i < num_regs ; i++, pkt++) {
5952 pkt->ctl = cpu_to_le32(ctl);
5953 pkt->value = cpu_to_le32(val);
5954 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5955 }
5956
5957 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5958 if (!job) {
5959 dev_err(hdev->dev, "Failed to allocate a new job\n");
5960 rc = -ENOMEM;
5961 goto release_cb;
5962 }
5963
5964 job->id = 0;
5965 job->user_cb = cb;
5966 atomic_inc(&job->user_cb->cs_cnt);
5967 job->user_cb_size = cb_size;
5968 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5969 job->patched_cb = job->user_cb;
5970 job->job_cb_size = cb_size;
5971
5972 hl_debugfs_add_job(hdev, job);
5973
5974 rc = gaudi_send_job_on_qman0(hdev, job);
5975 hl_debugfs_remove_job(hdev, job);
5976 kfree(job);
5977 atomic_dec(&cb->cs_cnt);
5978
5979 release_cb:
5980 hl_cb_put(cb);
5981 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5982
5983 return rc;
5984 }
5985
gaudi_restore_sm_registers(struct hl_device * hdev)5986 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5987 {
5988 u64 base_addr;
5989 u32 num_regs;
5990 int rc;
5991
5992 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5993 num_regs = NUM_OF_SOB_IN_BLOCK;
5994 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5995 if (rc) {
5996 dev_err(hdev->dev, "failed resetting SM registers");
5997 return -ENOMEM;
5998 }
5999
6000 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
6001 num_regs = NUM_OF_SOB_IN_BLOCK;
6002 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6003 if (rc) {
6004 dev_err(hdev->dev, "failed resetting SM registers");
6005 return -ENOMEM;
6006 }
6007
6008 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6009 num_regs = NUM_OF_SOB_IN_BLOCK;
6010 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6011 if (rc) {
6012 dev_err(hdev->dev, "failed resetting SM registers");
6013 return -ENOMEM;
6014 }
6015
6016 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6017 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6018 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6019 if (rc) {
6020 dev_err(hdev->dev, "failed resetting SM registers");
6021 return -ENOMEM;
6022 }
6023
6024 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6025 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6026 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6027 if (rc) {
6028 dev_err(hdev->dev, "failed resetting SM registers");
6029 return -ENOMEM;
6030 }
6031
6032 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6033 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6034 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6035 if (rc) {
6036 dev_err(hdev->dev, "failed resetting SM registers");
6037 return -ENOMEM;
6038 }
6039
6040 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6041 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6042 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6043 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6044 if (rc) {
6045 dev_err(hdev->dev, "failed resetting SM registers");
6046 return -ENOMEM;
6047 }
6048
6049 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6050 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6051 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6052 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6053 if (rc) {
6054 dev_err(hdev->dev, "failed resetting SM registers");
6055 return -ENOMEM;
6056 }
6057
6058 return 0;
6059 }
6060
gaudi_restore_dma_registers(struct hl_device * hdev)6061 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6062 {
6063 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6064 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6065 int i;
6066
6067 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6068 u64 sob_addr = CFG_BASE +
6069 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6070 (i * sob_delta);
6071 u32 dma_offset = i * DMA_CORE_OFFSET;
6072
6073 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6074 lower_32_bits(sob_addr));
6075 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6076 upper_32_bits(sob_addr));
6077 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6078
6079 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6080 * modified by the user for SRAM reduction
6081 */
6082 if (i > 1)
6083 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6084 0x00000001);
6085 }
6086 }
6087
gaudi_restore_qm_registers(struct hl_device * hdev)6088 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6089 {
6090 u32 qman_offset;
6091 int i;
6092
6093 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6094 qman_offset = i * DMA_QMAN_OFFSET;
6095 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6096 }
6097
6098 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6099 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6100 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6101 }
6102
6103 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6104 qman_offset = i * TPC_QMAN_OFFSET;
6105 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6106 }
6107
6108 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6109 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6110 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6111 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6112 }
6113 }
6114
gaudi_restore_user_registers(struct hl_device * hdev)6115 static int gaudi_restore_user_registers(struct hl_device *hdev)
6116 {
6117 int rc;
6118
6119 rc = gaudi_restore_sm_registers(hdev);
6120 if (rc)
6121 return rc;
6122
6123 gaudi_restore_dma_registers(hdev);
6124 gaudi_restore_qm_registers(hdev);
6125
6126 return 0;
6127 }
6128
gaudi_context_switch(struct hl_device * hdev,u32 asid)6129 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6130 {
6131 return 0;
6132 }
6133
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)6134 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6135 {
6136 struct asic_fixed_properties *prop = &hdev->asic_prop;
6137 struct gaudi_device *gaudi = hdev->asic_specific;
6138 u64 addr = prop->mmu_pgt_addr;
6139 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6140
6141 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6142 return 0;
6143
6144 return gaudi_memset_device_memory(hdev, addr, size, 0);
6145 }
6146
gaudi_restore_phase_topology(struct hl_device * hdev)6147 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6148 {
6149
6150 }
6151
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,bool user_address,u32 * val)6152 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6153 bool user_address, u32 *val)
6154 {
6155 struct asic_fixed_properties *prop = &hdev->asic_prop;
6156 struct gaudi_device *gaudi = hdev->asic_specific;
6157 u64 hbm_bar_addr, host_phys_end;
6158 int rc = 0;
6159
6160 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6161
6162 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6163
6164 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6165 (hdev->clock_gating_mask &
6166 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6167
6168 dev_err_ratelimited(hdev->dev,
6169 "Can't read register - clock gating is enabled!\n");
6170 rc = -EFAULT;
6171 } else {
6172 *val = RREG32(addr - CFG_BASE);
6173 }
6174
6175 } else if ((addr >= SRAM_BASE_ADDR) &&
6176 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6177 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6178 (addr - SRAM_BASE_ADDR));
6179 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6180 u64 bar_base_addr = DRAM_PHYS_BASE +
6181 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6182
6183 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6184 if (hbm_bar_addr != U64_MAX) {
6185 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6186 (addr - bar_base_addr));
6187
6188 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6189 hbm_bar_addr);
6190 }
6191 if (hbm_bar_addr == U64_MAX)
6192 rc = -EIO;
6193 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6194 user_address && !iommu_present(&pci_bus_type)) {
6195 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6196 } else {
6197 rc = -EFAULT;
6198 }
6199
6200 return rc;
6201 }
6202
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,bool user_address,u32 val)6203 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6204 bool user_address, u32 val)
6205 {
6206 struct asic_fixed_properties *prop = &hdev->asic_prop;
6207 struct gaudi_device *gaudi = hdev->asic_specific;
6208 u64 hbm_bar_addr, host_phys_end;
6209 int rc = 0;
6210
6211 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6212
6213 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6214
6215 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6216 (hdev->clock_gating_mask &
6217 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6218
6219 dev_err_ratelimited(hdev->dev,
6220 "Can't write register - clock gating is enabled!\n");
6221 rc = -EFAULT;
6222 } else {
6223 WREG32(addr - CFG_BASE, val);
6224 }
6225
6226 } else if ((addr >= SRAM_BASE_ADDR) &&
6227 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6228 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6229 (addr - SRAM_BASE_ADDR));
6230 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6231 u64 bar_base_addr = DRAM_PHYS_BASE +
6232 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6233
6234 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6235 if (hbm_bar_addr != U64_MAX) {
6236 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6237 (addr - bar_base_addr));
6238
6239 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6240 hbm_bar_addr);
6241 }
6242 if (hbm_bar_addr == U64_MAX)
6243 rc = -EIO;
6244 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6245 user_address && !iommu_present(&pci_bus_type)) {
6246 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6247 } else {
6248 rc = -EFAULT;
6249 }
6250
6251 return rc;
6252 }
6253
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,bool user_address,u64 * val)6254 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6255 bool user_address, u64 *val)
6256 {
6257 struct asic_fixed_properties *prop = &hdev->asic_prop;
6258 struct gaudi_device *gaudi = hdev->asic_specific;
6259 u64 hbm_bar_addr, host_phys_end;
6260 int rc = 0;
6261
6262 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6263
6264 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6265
6266 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6267 (hdev->clock_gating_mask &
6268 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6269
6270 dev_err_ratelimited(hdev->dev,
6271 "Can't read register - clock gating is enabled!\n");
6272 rc = -EFAULT;
6273 } else {
6274 u32 val_l = RREG32(addr - CFG_BASE);
6275 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6276
6277 *val = (((u64) val_h) << 32) | val_l;
6278 }
6279
6280 } else if ((addr >= SRAM_BASE_ADDR) &&
6281 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6282 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6283 (addr - SRAM_BASE_ADDR));
6284 } else if (addr <=
6285 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6286 u64 bar_base_addr = DRAM_PHYS_BASE +
6287 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6288
6289 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6290 if (hbm_bar_addr != U64_MAX) {
6291 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6292 (addr - bar_base_addr));
6293
6294 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6295 hbm_bar_addr);
6296 }
6297 if (hbm_bar_addr == U64_MAX)
6298 rc = -EIO;
6299 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6300 user_address && !iommu_present(&pci_bus_type)) {
6301 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6302 } else {
6303 rc = -EFAULT;
6304 }
6305
6306 return rc;
6307 }
6308
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,bool user_address,u64 val)6309 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6310 bool user_address, u64 val)
6311 {
6312 struct asic_fixed_properties *prop = &hdev->asic_prop;
6313 struct gaudi_device *gaudi = hdev->asic_specific;
6314 u64 hbm_bar_addr, host_phys_end;
6315 int rc = 0;
6316
6317 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6318
6319 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6320
6321 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6322 (hdev->clock_gating_mask &
6323 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6324
6325 dev_err_ratelimited(hdev->dev,
6326 "Can't write register - clock gating is enabled!\n");
6327 rc = -EFAULT;
6328 } else {
6329 WREG32(addr - CFG_BASE, lower_32_bits(val));
6330 WREG32(addr + sizeof(u32) - CFG_BASE,
6331 upper_32_bits(val));
6332 }
6333
6334 } else if ((addr >= SRAM_BASE_ADDR) &&
6335 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6336 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6337 (addr - SRAM_BASE_ADDR));
6338 } else if (addr <=
6339 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6340 u64 bar_base_addr = DRAM_PHYS_BASE +
6341 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6342
6343 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6344 if (hbm_bar_addr != U64_MAX) {
6345 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6346 (addr - bar_base_addr));
6347
6348 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6349 hbm_bar_addr);
6350 }
6351 if (hbm_bar_addr == U64_MAX)
6352 rc = -EIO;
6353 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6354 user_address && !iommu_present(&pci_bus_type)) {
6355 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6356 } else {
6357 rc = -EFAULT;
6358 }
6359
6360 return rc;
6361 }
6362
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)6363 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6364 u32 size_to_dma, dma_addr_t dma_addr)
6365 {
6366 u32 err_cause, val;
6367 u64 dma_offset;
6368 int rc;
6369
6370 dma_offset = dma_id * DMA_CORE_OFFSET;
6371
6372 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6373 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6374 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6375 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6376 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6377 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6378 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6379
6380 rc = hl_poll_timeout(
6381 hdev,
6382 mmDMA0_CORE_STS0 + dma_offset,
6383 val,
6384 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6385 0,
6386 1000000);
6387
6388 if (rc) {
6389 dev_err(hdev->dev,
6390 "DMA %d timed-out during reading of 0x%llx\n",
6391 dma_id, addr);
6392 return -EIO;
6393 }
6394
6395 /* Verify DMA is OK */
6396 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6397 if (err_cause) {
6398 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6399 dev_dbg(hdev->dev,
6400 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6401 err_cause);
6402 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6403
6404 return -EIO;
6405 }
6406
6407 return 0;
6408 }
6409
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)6410 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6411 void *blob_addr)
6412 {
6413 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6414 struct gaudi_device *gaudi = hdev->asic_specific;
6415 u64 dma_offset, qm_offset;
6416 dma_addr_t dma_addr;
6417 void *kernel_addr;
6418 bool is_eng_idle;
6419 int rc = 0, dma_id;
6420
6421 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6422 hdev, SZ_2M,
6423 &dma_addr,
6424 GFP_KERNEL | __GFP_ZERO);
6425
6426 if (!kernel_addr)
6427 return -ENOMEM;
6428
6429 mutex_lock(&gaudi->clk_gate_mutex);
6430
6431 hdev->asic_funcs->disable_clock_gating(hdev);
6432
6433 hdev->asic_funcs->hw_queues_lock(hdev);
6434
6435 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6436 dma_offset = dma_id * DMA_CORE_OFFSET;
6437 qm_offset = dma_id * DMA_QMAN_OFFSET;
6438 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6439 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6440
6441 if (!is_eng_idle) {
6442 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6443 dma_offset = dma_id * DMA_CORE_OFFSET;
6444 qm_offset = dma_id * DMA_QMAN_OFFSET;
6445 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6446 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6447
6448 if (!is_eng_idle) {
6449 dev_err_ratelimited(hdev->dev,
6450 "Can't read via DMA because it is BUSY\n");
6451 rc = -EAGAIN;
6452 goto out;
6453 }
6454 }
6455
6456 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6457 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6458 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6459
6460 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6461 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6462 * ASID
6463 */
6464 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465
6466 /* Verify DMA is OK */
6467 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6468 if (err_cause) {
6469 dev_dbg(hdev->dev,
6470 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6471 err_cause);
6472 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6473 }
6474
6475 pos = 0;
6476 size_left = size;
6477 size_to_dma = SZ_2M;
6478
6479 while (size_left > 0) {
6480
6481 if (size_left < SZ_2M)
6482 size_to_dma = size_left;
6483
6484 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6485 dma_addr);
6486 if (rc)
6487 break;
6488
6489 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6490
6491 if (size_left <= SZ_2M)
6492 break;
6493
6494 pos += SZ_2M;
6495 addr += SZ_2M;
6496 size_left -= SZ_2M;
6497 }
6498
6499 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6500 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6501 * ASID
6502 */
6503 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6504 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6505
6506 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6507
6508 out:
6509 hdev->asic_funcs->hw_queues_unlock(hdev);
6510
6511 hdev->asic_funcs->set_clock_gating(hdev);
6512
6513 mutex_unlock(&gaudi->clk_gate_mutex);
6514
6515 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6516 dma_addr);
6517
6518 return rc;
6519 }
6520
gaudi_read_pte(struct hl_device * hdev,u64 addr)6521 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6522 {
6523 struct gaudi_device *gaudi = hdev->asic_specific;
6524
6525 if (hdev->hard_reset_pending)
6526 return U64_MAX;
6527
6528 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6529 (addr - gaudi->hbm_bar_cur_addr));
6530 }
6531
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6532 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6533 {
6534 struct gaudi_device *gaudi = hdev->asic_specific;
6535
6536 if (hdev->hard_reset_pending)
6537 return;
6538
6539 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6540 (addr - gaudi->hbm_bar_cur_addr));
6541 }
6542
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6543 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6544 {
6545 /* mask to zero the MMBP and ASID bits */
6546 WREG32_AND(reg, ~0x7FF);
6547 WREG32_OR(reg, asid);
6548 }
6549
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6550 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6551 {
6552 struct gaudi_device *gaudi = hdev->asic_specific;
6553
6554 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6555 return;
6556
6557 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6558 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6559 return;
6560 }
6561
6562 mutex_lock(&gaudi->clk_gate_mutex);
6563
6564 hdev->asic_funcs->disable_clock_gating(hdev);
6565
6566 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6571
6572 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577
6578 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6583
6584 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6589
6590 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6595
6596 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601
6602 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6603 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6605 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6606 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6607
6608 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6611 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6612 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6613
6614 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6619 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6620 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6621 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6622
6623 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6625 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6626 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6627 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6628 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6629 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6630
6631 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6633 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6634 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6635 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6636 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6637 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6638
6639 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6641 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6642 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6643 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6644 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6645 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6646
6647 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6649 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6650 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6651 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6652 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6653 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6654
6655 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6657 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6658 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6659 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6660 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6661 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6662
6663 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6665 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6668 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6669 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6670
6671 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6676 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6677 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6678
6679 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6682 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6683 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6684 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6685 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6686
6687 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6688 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6689 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6690 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6691 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6692 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6693 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6694 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6695 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6696 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6697
6698 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6699 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6700 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6701 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6702 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6703 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6704 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6705 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6706 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6707 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6708 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6709 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6710
6711 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6712 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6713 asid);
6714 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6715 asid);
6716 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6717 asid);
6718 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6719 asid);
6720 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6721 asid);
6722 }
6723
6724 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6725 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6726 asid);
6727 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6728 asid);
6729 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6730 asid);
6731 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6732 asid);
6733 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6734 asid);
6735 }
6736
6737 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6738 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6739 asid);
6740 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6741 asid);
6742 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6743 asid);
6744 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6745 asid);
6746 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6747 asid);
6748 }
6749
6750 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6751 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6752 asid);
6753 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6754 asid);
6755 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6756 asid);
6757 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6758 asid);
6759 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6760 asid);
6761 }
6762
6763 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6764 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6765 asid);
6766 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6767 asid);
6768 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6769 asid);
6770 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6771 asid);
6772 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6773 asid);
6774 }
6775
6776 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6777 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6778 asid);
6779 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6780 asid);
6781 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6782 asid);
6783 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6784 asid);
6785 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6786 asid);
6787 }
6788
6789 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6790 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6791 asid);
6792 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6793 asid);
6794 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6795 asid);
6796 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6797 asid);
6798 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6799 asid);
6800 }
6801
6802 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6803 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6804 asid);
6805 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6806 asid);
6807 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6808 asid);
6809 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6810 asid);
6811 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6812 asid);
6813 }
6814
6815 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6816 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6817 asid);
6818 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6819 asid);
6820 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6821 asid);
6822 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6823 asid);
6824 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6825 asid);
6826 }
6827
6828 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6829 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6830 asid);
6831 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6832 asid);
6833 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6834 asid);
6835 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6836 asid);
6837 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6838 asid);
6839 }
6840
6841 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6842 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6843
6844 hdev->asic_funcs->set_clock_gating(hdev);
6845
6846 mutex_unlock(&gaudi->clk_gate_mutex);
6847 }
6848
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6849 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6850 struct hl_cs_job *job)
6851 {
6852 struct packet_msg_prot *fence_pkt;
6853 u32 *fence_ptr;
6854 dma_addr_t fence_dma_addr;
6855 struct hl_cb *cb;
6856 u32 tmp, timeout, dma_offset;
6857 int rc;
6858
6859 if (hdev->pldm)
6860 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6861 else
6862 timeout = HL_DEVICE_TIMEOUT_USEC;
6863
6864 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6865 dev_err_ratelimited(hdev->dev,
6866 "Can't send driver job on QMAN0 because the device is not idle\n");
6867 return -EBUSY;
6868 }
6869
6870 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6871 &fence_dma_addr);
6872 if (!fence_ptr) {
6873 dev_err(hdev->dev,
6874 "Failed to allocate fence memory for QMAN0\n");
6875 return -ENOMEM;
6876 }
6877
6878 cb = job->patched_cb;
6879
6880 fence_pkt = cb->kernel_address +
6881 job->job_cb_size - sizeof(struct packet_msg_prot);
6882
6883 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6884 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6885 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6886
6887 fence_pkt->ctl = cpu_to_le32(tmp);
6888 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6889 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6890
6891 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6892
6893 WREG32(mmDMA0_CORE_PROT + dma_offset,
6894 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6895
6896 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6897 job->job_cb_size, cb->bus_address);
6898 if (rc) {
6899 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6900 goto free_fence_ptr;
6901 }
6902
6903 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6904 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6905 timeout, true);
6906
6907 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6908
6909 if (rc == -ETIMEDOUT) {
6910 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6911 goto free_fence_ptr;
6912 }
6913
6914 free_fence_ptr:
6915 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6916
6917 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6918 fence_dma_addr);
6919 return rc;
6920 }
6921
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6922 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6923 {
6924 if (event_type >= GAUDI_EVENT_SIZE)
6925 goto event_not_supported;
6926
6927 if (!gaudi_irq_map_table[event_type].valid)
6928 goto event_not_supported;
6929
6930 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6931
6932 return;
6933
6934 event_not_supported:
6935 snprintf(desc, size, "N/A");
6936 }
6937
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)6938 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6939 u32 x_y, bool is_write)
6940 {
6941 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6942
6943 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6944 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6945
6946 switch (x_y) {
6947 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6948 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6949 dma_id[0] = 0;
6950 dma_id[1] = 2;
6951 break;
6952 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6953 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6954 dma_id[0] = 1;
6955 dma_id[1] = 3;
6956 break;
6957 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6958 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6959 dma_id[0] = 4;
6960 dma_id[1] = 6;
6961 break;
6962 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6963 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6964 dma_id[0] = 5;
6965 dma_id[1] = 7;
6966 break;
6967 default:
6968 goto unknown_initiator;
6969 }
6970
6971 for (i = 0 ; i < 2 ; i++) {
6972 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6973 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6974 }
6975
6976 switch (x_y) {
6977 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6978 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6979 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6980 return "DMA0";
6981 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6982 return "DMA2";
6983 else
6984 return "DMA0 or DMA2";
6985 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6986 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6987 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6988 return "DMA1";
6989 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6990 return "DMA3";
6991 else
6992 return "DMA1 or DMA3";
6993 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6994 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6995 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6996 return "DMA4";
6997 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6998 return "DMA6";
6999 else
7000 return "DMA4 or DMA6";
7001 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7002 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7003 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7004 return "DMA5";
7005 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7006 return "DMA7";
7007 else
7008 return "DMA5 or DMA7";
7009 }
7010
7011 unknown_initiator:
7012 return "unknown initiator";
7013 }
7014
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)7015 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7016 bool is_write)
7017 {
7018 u32 val, x_y, axi_id;
7019
7020 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7021 RREG32(mmMMU_UP_RAZWI_READ_ID);
7022 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7023 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7024 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7025 RAZWI_INITIATOR_AXI_ID_SHIFT);
7026
7027 switch (x_y) {
7028 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7029 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7030 return "TPC0";
7031 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7032 return "NIC0";
7033 break;
7034 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7035 return "TPC1";
7036 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7037 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7038 return "MME0";
7039 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7040 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7041 return "MME1";
7042 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7043 return "TPC2";
7044 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7045 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7046 return "TPC3";
7047 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7048 return "PCI";
7049 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7050 return "CPU";
7051 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7052 return "PSOC";
7053 break;
7054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7056 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7057 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7058 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7059 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7060 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7061 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7062 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7063 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7064 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7065 return "TPC4";
7066 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7067 return "NIC1";
7068 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7069 return "NIC2";
7070 break;
7071 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7072 return "TPC5";
7073 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7074 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7075 return "MME2";
7076 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7077 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7078 return "MME3";
7079 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7080 return "TPC6";
7081 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7082 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7083 return "TPC7";
7084 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7085 return "NIC4";
7086 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7087 return "NIC5";
7088 break;
7089 default:
7090 break;
7091 }
7092
7093 dev_err(hdev->dev,
7094 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7095 val,
7096 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7097 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7098 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7099 RAZWI_INITIATOR_AXI_ID_MASK);
7100
7101 return "unknown initiator";
7102 }
7103
gaudi_print_razwi_info(struct hl_device * hdev)7104 static void gaudi_print_razwi_info(struct hl_device *hdev)
7105 {
7106 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7107 dev_err_ratelimited(hdev->dev,
7108 "RAZWI event caused by illegal write of %s\n",
7109 gaudi_get_razwi_initiator_name(hdev, true));
7110 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7111 }
7112
7113 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7114 dev_err_ratelimited(hdev->dev,
7115 "RAZWI event caused by illegal read of %s\n",
7116 gaudi_get_razwi_initiator_name(hdev, false));
7117 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7118 }
7119 }
7120
gaudi_print_mmu_error_info(struct hl_device * hdev)7121 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7122 {
7123 struct gaudi_device *gaudi = hdev->asic_specific;
7124 u64 addr;
7125 u32 val;
7126
7127 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7128 return;
7129
7130 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7131 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7132 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7133 addr <<= 32;
7134 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7135
7136 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7137 addr);
7138
7139 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7140 }
7141
7142 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7143 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7144 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7145 addr <<= 32;
7146 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7147
7148 dev_err_ratelimited(hdev->dev,
7149 "MMU access error on va 0x%llx\n", addr);
7150
7151 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7152 }
7153 }
7154
7155 /*
7156 * +-------------------+------------------------------------------------------+
7157 * | Configuration Reg | Description |
7158 * | Address | |
7159 * +-------------------+------------------------------------------------------+
7160 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7161 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7162 * | |0xF34 memory wrappers 63:32 |
7163 * | |0xF38 memory wrappers 95:64 |
7164 * | |0xF3C memory wrappers 127:96 |
7165 * +-------------------+------------------------------------------------------+
7166 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7167 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7168 * | |0xF44 memory wrappers 63:32 |
7169 * | |0xF48 memory wrappers 95:64 |
7170 * | |0xF4C memory wrappers 127:96 |
7171 * +-------------------+------------------------------------------------------+
7172 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)7173 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7174 struct ecc_info_extract_params *params, u64 *ecc_address,
7175 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7176 {
7177 struct gaudi_device *gaudi = hdev->asic_specific;
7178 u32 i, num_mem_regs, reg, err_bit;
7179 u64 err_addr, err_word = 0;
7180 int rc = 0;
7181
7182 num_mem_regs = params->num_memories / 32 +
7183 ((params->num_memories % 32) ? 1 : 0);
7184
7185 if (params->block_address >= CFG_BASE)
7186 params->block_address -= CFG_BASE;
7187
7188 if (params->derr)
7189 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7190 else
7191 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7192
7193 if (params->disable_clock_gating) {
7194 mutex_lock(&gaudi->clk_gate_mutex);
7195 hdev->asic_funcs->disable_clock_gating(hdev);
7196 }
7197
7198 /* Set invalid wrapper index */
7199 *memory_wrapper_idx = 0xFF;
7200
7201 /* Iterate through memory wrappers, a single bit must be set */
7202 for (i = 0 ; i < num_mem_regs ; i++) {
7203 err_addr += i * 4;
7204 err_word = RREG32(err_addr);
7205 if (err_word) {
7206 err_bit = __ffs(err_word);
7207 *memory_wrapper_idx = err_bit + (32 * i);
7208 break;
7209 }
7210 }
7211
7212 if (*memory_wrapper_idx == 0xFF) {
7213 dev_err(hdev->dev, "ECC error information cannot be found\n");
7214 rc = -EINVAL;
7215 goto enable_clk_gate;
7216 }
7217
7218 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7219 *memory_wrapper_idx);
7220
7221 *ecc_address =
7222 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7223 *ecc_syndrom =
7224 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7225
7226 /* Clear error indication */
7227 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7228 if (params->derr)
7229 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7230 else
7231 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7232
7233 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7234
7235 enable_clk_gate:
7236 if (params->disable_clock_gating) {
7237 hdev->asic_funcs->set_clock_gating(hdev);
7238
7239 mutex_unlock(&gaudi->clk_gate_mutex);
7240 }
7241
7242 return rc;
7243 }
7244
7245 /*
7246 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7247 *
7248 * @idx: the current pi/ci value
7249 * @q_len: the queue length (power of 2)
7250 *
7251 * @return the cyclically decremented index
7252 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)7253 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7254 {
7255 u32 mask = q_len - 1;
7256
7257 /*
7258 * modular decrement is equivalent to adding (queue_size -1)
7259 * later we take LSBs to make sure the value is in the
7260 * range [0, queue_len - 1]
7261 */
7262 return (idx + q_len - 1) & mask;
7263 }
7264
7265 /**
7266 * gaudi_print_sw_config_stream_data - print SW config stream data
7267 *
7268 * @hdev: pointer to the habanalabs device structure
7269 * @stream: the QMAN's stream
7270 * @qman_base: base address of QMAN registers block
7271 */
gaudi_print_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base)7272 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7273 u64 qman_base)
7274 {
7275 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7276 u32 cq_ptr_lo_off, size;
7277
7278 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7279
7280 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7281 stream * cq_ptr_lo_off;
7282 cq_ptr_hi = cq_ptr_lo +
7283 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284 cq_tsize = cq_ptr_lo +
7285 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7286
7287 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7288 size = RREG32(cq_tsize);
7289 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7290 stream, cq_ptr, size);
7291 }
7292
7293 /**
7294 * gaudi_print_last_pqes_on_err - print last PQEs on error
7295 *
7296 * @hdev: pointer to the habanalabs device structure
7297 * @qid_base: first QID of the QMAN (out of 4 streams)
7298 * @stream: the QMAN's stream
7299 * @qman_base: base address of QMAN registers block
7300 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7301 */
gaudi_print_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,bool pr_sw_conf)7302 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7303 u32 stream, u64 qman_base,
7304 bool pr_sw_conf)
7305 {
7306 u32 ci, qm_ci_stream_off, queue_len;
7307 struct hl_hw_queue *q;
7308 u64 pq_ci;
7309 int i;
7310
7311 q = &hdev->kernel_queues[qid_base + stream];
7312
7313 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7314 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7315 stream * qm_ci_stream_off;
7316
7317 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7318 q->int_queue_len : HL_QUEUE_LENGTH;
7319
7320 hdev->asic_funcs->hw_queues_lock(hdev);
7321
7322 if (pr_sw_conf)
7323 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7324
7325 ci = RREG32(pq_ci);
7326
7327 /* we should start printing form ci -1 */
7328 ci = gaudi_queue_idx_dec(ci, queue_len);
7329
7330 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7331 struct hl_bd *bd;
7332 u64 addr;
7333 u32 len;
7334
7335 bd = q->kernel_address;
7336 bd += ci;
7337
7338 len = le32_to_cpu(bd->len);
7339 /* len 0 means uninitialized entry- break */
7340 if (!len)
7341 break;
7342
7343 addr = le64_to_cpu(bd->ptr);
7344
7345 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7346 stream, ci, addr, len);
7347
7348 /* get previous ci, wrap if needed */
7349 ci = gaudi_queue_idx_dec(ci, queue_len);
7350 }
7351
7352 hdev->asic_funcs->hw_queues_unlock(hdev);
7353 }
7354
7355 /**
7356 * print_qman_data_on_err - extract QMAN data on error
7357 *
7358 * @hdev: pointer to the habanalabs device structure
7359 * @qid_base: first QID of the QMAN (out of 4 streams)
7360 * @stream: the QMAN's stream
7361 * @qman_base: base address of QMAN registers block
7362 *
7363 * This function attempt to exatract as much data as possible on QMAN error.
7364 * On upper CP print the SW config stream data and last 8 PQEs.
7365 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7366 */
print_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base)7367 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7368 u32 stream, u64 qman_base)
7369 {
7370 u32 i;
7371
7372 if (stream != QMAN_STREAMS) {
7373 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7374 true);
7375 return;
7376 }
7377
7378 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7379
7380 for (i = 0; i < QMAN_STREAMS; i++)
7381 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7382 false);
7383 }
7384
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base)7385 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7386 const char *qm_name,
7387 u64 qman_base,
7388 u32 qid_base)
7389 {
7390 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7391 u64 glbl_sts_addr, arb_err_addr;
7392 char reg_desc[32];
7393
7394 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7395 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7396
7397 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7398 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7399 glbl_sts_clr_val = 0;
7400 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7401
7402 if (!glbl_sts_val)
7403 continue;
7404
7405 if (i == QMAN_STREAMS)
7406 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7407 else
7408 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7409
7410 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7411 if (glbl_sts_val & BIT(j)) {
7412 dev_err_ratelimited(hdev->dev,
7413 "%s %s. err cause: %s\n",
7414 qm_name, reg_desc,
7415 gaudi_qman_error_cause[j]);
7416 glbl_sts_clr_val |= BIT(j);
7417 }
7418 }
7419
7420 /* Write 1 clear errors */
7421 if (!hdev->stop_on_err)
7422 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7423 else
7424 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7425 }
7426
7427 arb_err_val = RREG32(arb_err_addr);
7428
7429 if (!arb_err_val)
7430 return;
7431
7432 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7433 if (arb_err_val & BIT(j)) {
7434 dev_err_ratelimited(hdev->dev,
7435 "%s ARB_ERR. err cause: %s\n",
7436 qm_name,
7437 gaudi_qman_arb_error_cause[j]);
7438 }
7439 }
7440 }
7441
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7442 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7443 struct hl_eq_sm_sei_data *sei_data)
7444 {
7445 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7446
7447 /* Flip the bits as the enum is ordered in the opposite way */
7448 index = (index ^ 0x3) & 0x3;
7449
7450 switch (sei_data->sei_cause) {
7451 case SM_SEI_SO_OVERFLOW:
7452 dev_err_ratelimited(hdev->dev,
7453 "%s SEI Error: SOB Group %u overflow/underflow",
7454 gaudi_sync_manager_names[index],
7455 le32_to_cpu(sei_data->sei_log));
7456 break;
7457 case SM_SEI_LBW_4B_UNALIGNED:
7458 dev_err_ratelimited(hdev->dev,
7459 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7460 gaudi_sync_manager_names[index],
7461 le32_to_cpu(sei_data->sei_log));
7462 break;
7463 case SM_SEI_AXI_RESPONSE_ERR:
7464 dev_err_ratelimited(hdev->dev,
7465 "%s SEI Error: AXI ID %u response error",
7466 gaudi_sync_manager_names[index],
7467 le32_to_cpu(sei_data->sei_log));
7468 break;
7469 default:
7470 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7471 le32_to_cpu(sei_data->sei_log));
7472 break;
7473 }
7474 }
7475
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7476 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7477 struct hl_eq_ecc_data *ecc_data)
7478 {
7479 struct ecc_info_extract_params params;
7480 u64 ecc_address = 0, ecc_syndrom = 0;
7481 u8 index, memory_wrapper_idx = 0;
7482 bool extract_info_from_fw;
7483 int rc;
7484
7485 if (hdev->asic_prop.fw_security_enabled) {
7486 extract_info_from_fw = true;
7487 goto extract_ecc_info;
7488 }
7489
7490 switch (event_type) {
7491 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7492 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7493 extract_info_from_fw = true;
7494 break;
7495 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7496 index = event_type - GAUDI_EVENT_TPC0_SERR;
7497 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7498 params.num_memories = 90;
7499 params.derr = false;
7500 params.disable_clock_gating = true;
7501 extract_info_from_fw = false;
7502 break;
7503 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7504 index = event_type - GAUDI_EVENT_TPC0_DERR;
7505 params.block_address =
7506 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7507 params.num_memories = 90;
7508 params.derr = true;
7509 params.disable_clock_gating = true;
7510 extract_info_from_fw = false;
7511 break;
7512 case GAUDI_EVENT_MME0_ACC_SERR:
7513 case GAUDI_EVENT_MME1_ACC_SERR:
7514 case GAUDI_EVENT_MME2_ACC_SERR:
7515 case GAUDI_EVENT_MME3_ACC_SERR:
7516 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7517 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7518 params.num_memories = 128;
7519 params.derr = false;
7520 params.disable_clock_gating = true;
7521 extract_info_from_fw = false;
7522 break;
7523 case GAUDI_EVENT_MME0_ACC_DERR:
7524 case GAUDI_EVENT_MME1_ACC_DERR:
7525 case GAUDI_EVENT_MME2_ACC_DERR:
7526 case GAUDI_EVENT_MME3_ACC_DERR:
7527 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7528 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7529 params.num_memories = 128;
7530 params.derr = true;
7531 params.disable_clock_gating = true;
7532 extract_info_from_fw = false;
7533 break;
7534 case GAUDI_EVENT_MME0_SBAB_SERR:
7535 case GAUDI_EVENT_MME1_SBAB_SERR:
7536 case GAUDI_EVENT_MME2_SBAB_SERR:
7537 case GAUDI_EVENT_MME3_SBAB_SERR:
7538 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7539 params.block_address =
7540 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7541 params.num_memories = 33;
7542 params.derr = false;
7543 params.disable_clock_gating = true;
7544 extract_info_from_fw = false;
7545 break;
7546 case GAUDI_EVENT_MME0_SBAB_DERR:
7547 case GAUDI_EVENT_MME1_SBAB_DERR:
7548 case GAUDI_EVENT_MME2_SBAB_DERR:
7549 case GAUDI_EVENT_MME3_SBAB_DERR:
7550 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7551 params.block_address =
7552 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7553 params.num_memories = 33;
7554 params.derr = true;
7555 params.disable_clock_gating = true;
7556 extract_info_from_fw = false;
7557 break;
7558 default:
7559 return;
7560 }
7561
7562 extract_ecc_info:
7563 if (extract_info_from_fw) {
7564 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7565 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7566 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7567 } else {
7568 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7569 &ecc_syndrom, &memory_wrapper_idx);
7570 if (rc)
7571 return;
7572 }
7573
7574 dev_err(hdev->dev,
7575 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7576 ecc_address, ecc_syndrom, memory_wrapper_idx);
7577 }
7578
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)7579 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7580 {
7581 u64 qman_base;
7582 char desc[32];
7583 u32 qid_base;
7584 u8 index;
7585
7586 switch (event_type) {
7587 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7588 index = event_type - GAUDI_EVENT_TPC0_QM;
7589 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7590 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7591 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7592 break;
7593 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7594 index = event_type - GAUDI_EVENT_MME0_QM;
7595 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7596 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7597 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7598 break;
7599 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7600 index = event_type - GAUDI_EVENT_DMA0_QM;
7601 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7602 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7603 if (index > 1)
7604 qid_base++;
7605 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7606 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7607 break;
7608 case GAUDI_EVENT_NIC0_QM0:
7609 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7610 qman_base = mmNIC0_QM0_BASE;
7611 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7612 break;
7613 case GAUDI_EVENT_NIC0_QM1:
7614 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7615 qman_base = mmNIC0_QM1_BASE;
7616 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7617 break;
7618 case GAUDI_EVENT_NIC1_QM0:
7619 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7620 qman_base = mmNIC1_QM0_BASE;
7621 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7622 break;
7623 case GAUDI_EVENT_NIC1_QM1:
7624 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7625 qman_base = mmNIC1_QM1_BASE;
7626 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7627 break;
7628 case GAUDI_EVENT_NIC2_QM0:
7629 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7630 qman_base = mmNIC2_QM0_BASE;
7631 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7632 break;
7633 case GAUDI_EVENT_NIC2_QM1:
7634 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7635 qman_base = mmNIC2_QM1_BASE;
7636 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7637 break;
7638 case GAUDI_EVENT_NIC3_QM0:
7639 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7640 qman_base = mmNIC3_QM0_BASE;
7641 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7642 break;
7643 case GAUDI_EVENT_NIC3_QM1:
7644 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7645 qman_base = mmNIC3_QM1_BASE;
7646 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7647 break;
7648 case GAUDI_EVENT_NIC4_QM0:
7649 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7650 qman_base = mmNIC4_QM0_BASE;
7651 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7652 break;
7653 case GAUDI_EVENT_NIC4_QM1:
7654 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7655 qman_base = mmNIC4_QM1_BASE;
7656 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7657 break;
7658 default:
7659 return;
7660 }
7661
7662 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7663 }
7664
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7665 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7666 bool razwi)
7667 {
7668 char desc[64] = "";
7669
7670 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7671 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7672 event_type, desc);
7673
7674 if (razwi) {
7675 gaudi_print_razwi_info(hdev);
7676 gaudi_print_mmu_error_info(hdev);
7677 }
7678 }
7679
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7680 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7681 struct cpucp_pkt_sync_err *sync_err)
7682 {
7683 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7684
7685 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7686 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7687 }
7688
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7689 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7690 struct hl_eq_fw_alive *fw_alive)
7691 {
7692 dev_err(hdev->dev,
7693 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7694 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7695 "Minor" : "Critical", fw_alive->process_id,
7696 fw_alive->thread_id, fw_alive->uptime_seconds);
7697 }
7698
gaudi_soft_reset_late_init(struct hl_device * hdev)7699 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7700 {
7701 struct gaudi_device *gaudi = hdev->asic_specific;
7702
7703 /* Unmask all IRQs since some could have been received
7704 * during the soft reset
7705 */
7706 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7707 }
7708
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7709 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7710 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7711 {
7712 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7713 int rc = 0;
7714
7715 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7716 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7717 if (!hbm_ecc_data) {
7718 dev_err(hdev->dev, "No FW ECC data");
7719 return 0;
7720 }
7721
7722 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7723 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7725 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7727 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7729 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7731 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7733 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7735 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7736
7737 dev_err(hdev->dev,
7738 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739 device, ch, wr_par, rd_par, ca_par, serr, derr);
7740 dev_err(hdev->dev,
7741 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7742 device, ch, hbm_ecc_data->first_addr, type,
7743 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7744 hbm_ecc_data->dec_cnt);
7745 return 0;
7746 }
7747
7748 if (hdev->asic_prop.fw_security_enabled) {
7749 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7750 return 0;
7751 }
7752
7753 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7754 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7755 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7756 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7757 if (val) {
7758 rc = -EIO;
7759 dev_err(hdev->dev,
7760 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7761 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7762 (val >> 2) & 0x1, (val >> 3) & 0x1,
7763 (val >> 4) & 0x1);
7764
7765 val2 = RREG32(base + ch * 0x1000 + 0x060);
7766 dev_err(hdev->dev,
7767 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7768 device, ch * 2,
7769 RREG32(base + ch * 0x1000 + 0x064),
7770 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7771 (val2 & 0xFF0000) >> 16,
7772 (val2 & 0xFF000000) >> 24);
7773 }
7774
7775 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7776 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7777 if (val) {
7778 rc = -EIO;
7779 dev_err(hdev->dev,
7780 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7781 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7782 (val >> 2) & 0x1, (val >> 3) & 0x1,
7783 (val >> 4) & 0x1);
7784
7785 val2 = RREG32(base + ch * 0x1000 + 0x070);
7786 dev_err(hdev->dev,
7787 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7788 device, ch * 2 + 1,
7789 RREG32(base + ch * 0x1000 + 0x074),
7790 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7791 (val2 & 0xFF0000) >> 16,
7792 (val2 & 0xFF000000) >> 24);
7793 }
7794
7795 /* Clear interrupts */
7796 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7797 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7798 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7799 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7800 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7801 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7802 }
7803
7804 val = RREG32(base + 0x8F30);
7805 val2 = RREG32(base + 0x8F34);
7806 if (val | val2) {
7807 rc = -EIO;
7808 dev_err(hdev->dev,
7809 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7810 device, val, val2);
7811 }
7812 val = RREG32(base + 0x8F40);
7813 val2 = RREG32(base + 0x8F44);
7814 if (val | val2) {
7815 rc = -EIO;
7816 dev_err(hdev->dev,
7817 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7818 device, val, val2);
7819 }
7820
7821 return rc;
7822 }
7823
gaudi_hbm_event_to_dev(u16 hbm_event_type)7824 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7825 {
7826 switch (hbm_event_type) {
7827 case GAUDI_EVENT_HBM0_SPI_0:
7828 case GAUDI_EVENT_HBM0_SPI_1:
7829 return 0;
7830 case GAUDI_EVENT_HBM1_SPI_0:
7831 case GAUDI_EVENT_HBM1_SPI_1:
7832 return 1;
7833 case GAUDI_EVENT_HBM2_SPI_0:
7834 case GAUDI_EVENT_HBM2_SPI_1:
7835 return 2;
7836 case GAUDI_EVENT_HBM3_SPI_0:
7837 case GAUDI_EVENT_HBM3_SPI_1:
7838 return 3;
7839 default:
7840 break;
7841 }
7842
7843 /* Should never happen */
7844 return 0;
7845 }
7846
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7847 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7848 char *interrupt_name)
7849 {
7850 struct gaudi_device *gaudi = hdev->asic_specific;
7851 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7852 bool soft_reset_required = false;
7853
7854 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7855 * gating, and thus cannot be done in CPU-CP and should be done instead
7856 * by the driver.
7857 */
7858
7859 mutex_lock(&gaudi->clk_gate_mutex);
7860
7861 hdev->asic_funcs->disable_clock_gating(hdev);
7862
7863 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7864 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7865
7866 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7867 if (tpc_interrupts_cause & BIT(i)) {
7868 dev_err_ratelimited(hdev->dev,
7869 "TPC%d_%s interrupt cause: %s\n",
7870 tpc_id, interrupt_name,
7871 gaudi_tpc_interrupts_cause[i]);
7872 /* If this is QM error, we need to soft-reset */
7873 if (i == 15)
7874 soft_reset_required = true;
7875 }
7876
7877 /* Clear interrupts */
7878 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7879
7880 hdev->asic_funcs->set_clock_gating(hdev);
7881
7882 mutex_unlock(&gaudi->clk_gate_mutex);
7883
7884 return soft_reset_required;
7885 }
7886
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7887 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7888 {
7889 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7890 }
7891
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7892 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7893 {
7894 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7895 }
7896
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7897 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7898 u16 event_type)
7899 {
7900 switch (event_type) {
7901 case GAUDI_EVENT_FIX_POWER_ENV_S:
7902 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7903 dev_info_ratelimited(hdev->dev,
7904 "Clock throttling due to power consumption\n");
7905 break;
7906
7907 case GAUDI_EVENT_FIX_POWER_ENV_E:
7908 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7909 dev_info_ratelimited(hdev->dev,
7910 "Power envelop is safe, back to optimal clock\n");
7911 break;
7912
7913 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7914 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7915 dev_info_ratelimited(hdev->dev,
7916 "Clock throttling due to overheating\n");
7917 break;
7918
7919 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7920 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7921 dev_info_ratelimited(hdev->dev,
7922 "Thermal envelop is safe, back to optimal clock\n");
7923 break;
7924
7925 default:
7926 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7927 event_type);
7928 break;
7929 }
7930 }
7931
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7932 static void gaudi_handle_eqe(struct hl_device *hdev,
7933 struct hl_eq_entry *eq_entry)
7934 {
7935 struct gaudi_device *gaudi = hdev->asic_specific;
7936 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7937 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7938 >> EQ_CTL_EVENT_TYPE_SHIFT);
7939 bool reset_required;
7940 u8 cause;
7941 int rc;
7942
7943 if (event_type >= GAUDI_EVENT_SIZE) {
7944 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7945 event_type, GAUDI_EVENT_SIZE - 1);
7946 return;
7947 }
7948
7949 gaudi->events_stat[event_type]++;
7950 gaudi->events_stat_aggregate[event_type]++;
7951
7952 switch (event_type) {
7953 case GAUDI_EVENT_PCIE_CORE_DERR:
7954 case GAUDI_EVENT_PCIE_IF_DERR:
7955 case GAUDI_EVENT_PCIE_PHY_DERR:
7956 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7957 case GAUDI_EVENT_MME0_ACC_DERR:
7958 case GAUDI_EVENT_MME0_SBAB_DERR:
7959 case GAUDI_EVENT_MME1_ACC_DERR:
7960 case GAUDI_EVENT_MME1_SBAB_DERR:
7961 case GAUDI_EVENT_MME2_ACC_DERR:
7962 case GAUDI_EVENT_MME2_SBAB_DERR:
7963 case GAUDI_EVENT_MME3_ACC_DERR:
7964 case GAUDI_EVENT_MME3_SBAB_DERR:
7965 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7966 fallthrough;
7967 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7968 case GAUDI_EVENT_PSOC_MEM_DERR:
7969 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7970 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7971 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7972 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7973 case GAUDI_EVENT_MMU_DERR:
7974 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7975 gaudi_print_irq_info(hdev, event_type, true);
7976 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7977 goto reset_device;
7978
7979 case GAUDI_EVENT_GIC500:
7980 case GAUDI_EVENT_AXI_ECC:
7981 case GAUDI_EVENT_L2_RAM_ECC:
7982 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7983 gaudi_print_irq_info(hdev, event_type, false);
7984 goto reset_device;
7985
7986 case GAUDI_EVENT_HBM0_SPI_0:
7987 case GAUDI_EVENT_HBM1_SPI_0:
7988 case GAUDI_EVENT_HBM2_SPI_0:
7989 case GAUDI_EVENT_HBM3_SPI_0:
7990 gaudi_print_irq_info(hdev, event_type, false);
7991 gaudi_hbm_read_interrupts(hdev,
7992 gaudi_hbm_event_to_dev(event_type),
7993 &eq_entry->hbm_ecc_data);
7994 goto reset_device;
7995
7996 case GAUDI_EVENT_HBM0_SPI_1:
7997 case GAUDI_EVENT_HBM1_SPI_1:
7998 case GAUDI_EVENT_HBM2_SPI_1:
7999 case GAUDI_EVENT_HBM3_SPI_1:
8000 gaudi_print_irq_info(hdev, event_type, false);
8001 gaudi_hbm_read_interrupts(hdev,
8002 gaudi_hbm_event_to_dev(event_type),
8003 &eq_entry->hbm_ecc_data);
8004 hl_fw_unmask_irq(hdev, event_type);
8005 break;
8006
8007 case GAUDI_EVENT_TPC0_DEC:
8008 case GAUDI_EVENT_TPC1_DEC:
8009 case GAUDI_EVENT_TPC2_DEC:
8010 case GAUDI_EVENT_TPC3_DEC:
8011 case GAUDI_EVENT_TPC4_DEC:
8012 case GAUDI_EVENT_TPC5_DEC:
8013 case GAUDI_EVENT_TPC6_DEC:
8014 case GAUDI_EVENT_TPC7_DEC:
8015 gaudi_print_irq_info(hdev, event_type, true);
8016 reset_required = gaudi_tpc_read_interrupts(hdev,
8017 tpc_dec_event_to_tpc_id(event_type),
8018 "AXI_SLV_DEC_Error");
8019 if (reset_required) {
8020 dev_err(hdev->dev, "reset required due to %s\n",
8021 gaudi_irq_map_table[event_type].name);
8022
8023 hl_device_reset(hdev, 0);
8024 } else {
8025 hl_fw_unmask_irq(hdev, event_type);
8026 }
8027 break;
8028
8029 case GAUDI_EVENT_TPC0_KRN_ERR:
8030 case GAUDI_EVENT_TPC1_KRN_ERR:
8031 case GAUDI_EVENT_TPC2_KRN_ERR:
8032 case GAUDI_EVENT_TPC3_KRN_ERR:
8033 case GAUDI_EVENT_TPC4_KRN_ERR:
8034 case GAUDI_EVENT_TPC5_KRN_ERR:
8035 case GAUDI_EVENT_TPC6_KRN_ERR:
8036 case GAUDI_EVENT_TPC7_KRN_ERR:
8037 gaudi_print_irq_info(hdev, event_type, true);
8038 reset_required = gaudi_tpc_read_interrupts(hdev,
8039 tpc_krn_event_to_tpc_id(event_type),
8040 "KRN_ERR");
8041 if (reset_required) {
8042 dev_err(hdev->dev, "reset required due to %s\n",
8043 gaudi_irq_map_table[event_type].name);
8044
8045 hl_device_reset(hdev, 0);
8046 } else {
8047 hl_fw_unmask_irq(hdev, event_type);
8048 }
8049 break;
8050
8051 case GAUDI_EVENT_PCIE_CORE_SERR:
8052 case GAUDI_EVENT_PCIE_IF_SERR:
8053 case GAUDI_EVENT_PCIE_PHY_SERR:
8054 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8055 case GAUDI_EVENT_MME0_ACC_SERR:
8056 case GAUDI_EVENT_MME0_SBAB_SERR:
8057 case GAUDI_EVENT_MME1_ACC_SERR:
8058 case GAUDI_EVENT_MME1_SBAB_SERR:
8059 case GAUDI_EVENT_MME2_ACC_SERR:
8060 case GAUDI_EVENT_MME2_SBAB_SERR:
8061 case GAUDI_EVENT_MME3_ACC_SERR:
8062 case GAUDI_EVENT_MME3_SBAB_SERR:
8063 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8064 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8065 case GAUDI_EVENT_PSOC_MEM_SERR:
8066 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8067 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8068 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8069 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8070 fallthrough;
8071 case GAUDI_EVENT_MMU_SERR:
8072 gaudi_print_irq_info(hdev, event_type, true);
8073 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8074 hl_fw_unmask_irq(hdev, event_type);
8075 break;
8076
8077 case GAUDI_EVENT_PCIE_DEC:
8078 case GAUDI_EVENT_MME0_WBC_RSP:
8079 case GAUDI_EVENT_MME0_SBAB0_RSP:
8080 case GAUDI_EVENT_MME1_WBC_RSP:
8081 case GAUDI_EVENT_MME1_SBAB0_RSP:
8082 case GAUDI_EVENT_MME2_WBC_RSP:
8083 case GAUDI_EVENT_MME2_SBAB0_RSP:
8084 case GAUDI_EVENT_MME3_WBC_RSP:
8085 case GAUDI_EVENT_MME3_SBAB0_RSP:
8086 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8087 case GAUDI_EVENT_PSOC_AXI_DEC:
8088 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8089 case GAUDI_EVENT_MMU_PAGE_FAULT:
8090 case GAUDI_EVENT_MMU_WR_PERM:
8091 case GAUDI_EVENT_RAZWI_OR_ADC:
8092 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8093 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8094 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8095 fallthrough;
8096 case GAUDI_EVENT_NIC0_QM0:
8097 case GAUDI_EVENT_NIC0_QM1:
8098 case GAUDI_EVENT_NIC1_QM0:
8099 case GAUDI_EVENT_NIC1_QM1:
8100 case GAUDI_EVENT_NIC2_QM0:
8101 case GAUDI_EVENT_NIC2_QM1:
8102 case GAUDI_EVENT_NIC3_QM0:
8103 case GAUDI_EVENT_NIC3_QM1:
8104 case GAUDI_EVENT_NIC4_QM0:
8105 case GAUDI_EVENT_NIC4_QM1:
8106 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8107 gaudi_print_irq_info(hdev, event_type, true);
8108 gaudi_handle_qman_err(hdev, event_type);
8109 hl_fw_unmask_irq(hdev, event_type);
8110 break;
8111
8112 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8113 gaudi_print_irq_info(hdev, event_type, true);
8114 goto reset_device;
8115
8116 case GAUDI_EVENT_TPC0_BMON_SPMU:
8117 case GAUDI_EVENT_TPC1_BMON_SPMU:
8118 case GAUDI_EVENT_TPC2_BMON_SPMU:
8119 case GAUDI_EVENT_TPC3_BMON_SPMU:
8120 case GAUDI_EVENT_TPC4_BMON_SPMU:
8121 case GAUDI_EVENT_TPC5_BMON_SPMU:
8122 case GAUDI_EVENT_TPC6_BMON_SPMU:
8123 case GAUDI_EVENT_TPC7_BMON_SPMU:
8124 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8125 gaudi_print_irq_info(hdev, event_type, false);
8126 hl_fw_unmask_irq(hdev, event_type);
8127 break;
8128
8129 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8130 gaudi_print_irq_info(hdev, event_type, false);
8131 gaudi_print_sm_sei_info(hdev, event_type,
8132 &eq_entry->sm_sei_data);
8133 rc = hl_state_dump(hdev);
8134 if (rc)
8135 dev_err(hdev->dev,
8136 "Error during system state dump %d\n", rc);
8137 hl_fw_unmask_irq(hdev, event_type);
8138 break;
8139
8140 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8141 gaudi_print_clk_change_info(hdev, event_type);
8142 hl_fw_unmask_irq(hdev, event_type);
8143 break;
8144
8145 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8146 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8147 dev_err(hdev->dev,
8148 "Received high temp H/W interrupt %d (cause %d)\n",
8149 event_type, cause);
8150 break;
8151
8152 case GAUDI_EVENT_DEV_RESET_REQ:
8153 gaudi_print_irq_info(hdev, event_type, false);
8154 goto reset_device;
8155
8156 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8157 gaudi_print_irq_info(hdev, event_type, false);
8158 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8159 goto reset_device;
8160
8161 case GAUDI_EVENT_FW_ALIVE_S:
8162 gaudi_print_irq_info(hdev, event_type, false);
8163 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8164 goto reset_device;
8165
8166 default:
8167 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8168 event_type);
8169 break;
8170 }
8171
8172 return;
8173
8174 reset_device:
8175 if (hdev->asic_prop.fw_security_enabled)
8176 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
8177 else if (hdev->hard_reset_on_fw_events)
8178 hl_device_reset(hdev, HL_RESET_HARD);
8179 else
8180 hl_fw_unmask_irq(hdev, event_type);
8181 }
8182
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)8183 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8184 u32 *size)
8185 {
8186 struct gaudi_device *gaudi = hdev->asic_specific;
8187
8188 if (aggregate) {
8189 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8190 return gaudi->events_stat_aggregate;
8191 }
8192
8193 *size = (u32) sizeof(gaudi->events_stat);
8194 return gaudi->events_stat;
8195 }
8196
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)8197 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8198 u32 flags)
8199 {
8200 struct gaudi_device *gaudi = hdev->asic_specific;
8201 u32 status, timeout_usec;
8202 int rc;
8203
8204 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8205 hdev->hard_reset_pending)
8206 return 0;
8207
8208 if (hdev->pldm)
8209 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8210 else
8211 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8212
8213 /* L0 & L1 invalidation */
8214 WREG32(mmSTLB_INV_PS, 3);
8215 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8216 WREG32(mmSTLB_INV_PS, 2);
8217
8218 rc = hl_poll_timeout(
8219 hdev,
8220 mmSTLB_INV_PS,
8221 status,
8222 !status,
8223 1000,
8224 timeout_usec);
8225
8226 WREG32(mmSTLB_INV_SET, 0);
8227
8228 if (rc) {
8229 dev_err_ratelimited(hdev->dev,
8230 "MMU cache invalidation timeout\n");
8231 hl_device_reset(hdev, HL_RESET_HARD);
8232 }
8233
8234 return rc;
8235 }
8236
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)8237 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8238 bool is_hard, u32 flags,
8239 u32 asid, u64 va, u64 size)
8240 {
8241 /* Treat as invalidate all because there is no range invalidation
8242 * in Gaudi
8243 */
8244 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8245 }
8246
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)8247 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8248 u32 asid, u64 phys_addr)
8249 {
8250 u32 status, timeout_usec;
8251 int rc;
8252
8253 if (hdev->pldm)
8254 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8255 else
8256 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8257
8258 WREG32(MMU_ASID, asid);
8259 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8260 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8261 WREG32(MMU_BUSY, 0x80000000);
8262
8263 rc = hl_poll_timeout(
8264 hdev,
8265 MMU_BUSY,
8266 status,
8267 !(status & 0x80000000),
8268 1000,
8269 timeout_usec);
8270
8271 if (rc) {
8272 dev_err(hdev->dev,
8273 "Timeout during MMU hop0 config of asid %d\n", asid);
8274 return rc;
8275 }
8276
8277 return 0;
8278 }
8279
gaudi_send_heartbeat(struct hl_device * hdev)8280 static int gaudi_send_heartbeat(struct hl_device *hdev)
8281 {
8282 struct gaudi_device *gaudi = hdev->asic_specific;
8283
8284 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8285 return 0;
8286
8287 return hl_fw_send_heartbeat(hdev);
8288 }
8289
gaudi_cpucp_info_get(struct hl_device * hdev)8290 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8291 {
8292 struct gaudi_device *gaudi = hdev->asic_specific;
8293 struct asic_fixed_properties *prop = &hdev->asic_prop;
8294 int rc;
8295
8296 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8297 return 0;
8298
8299 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8300 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8301 mmCPU_BOOT_ERR1);
8302 if (rc)
8303 return rc;
8304
8305 if (!strlen(prop->cpucp_info.card_name))
8306 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8307 CARD_NAME_MAX_LEN);
8308
8309 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8310
8311 set_default_power_values(hdev);
8312
8313 hdev->max_power = prop->max_power_default;
8314
8315 return 0;
8316 }
8317
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct seq_file * s)8318 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8319 u8 mask_len, struct seq_file *s)
8320 {
8321 struct gaudi_device *gaudi = hdev->asic_specific;
8322 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8323 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8324 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8325 unsigned long *mask = (unsigned long *)mask_arr;
8326 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8327 bool is_idle = true, is_eng_idle, is_slave;
8328 u64 offset;
8329 int i, dma_id, port;
8330
8331 mutex_lock(&gaudi->clk_gate_mutex);
8332
8333 hdev->asic_funcs->disable_clock_gating(hdev);
8334
8335 if (s)
8336 seq_puts(s,
8337 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8338 "--- ------- ------------ ---------- -------------\n");
8339
8340 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8341 dma_id = gaudi_dma_assignment[i];
8342 offset = dma_id * DMA_QMAN_OFFSET;
8343
8344 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8345 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8346 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8347 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8348 IS_DMA_IDLE(dma_core_sts0);
8349 is_idle &= is_eng_idle;
8350
8351 if (mask && !is_eng_idle)
8352 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8353 if (s)
8354 seq_printf(s, fmt, dma_id,
8355 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8356 qm_cgm_sts, dma_core_sts0);
8357 }
8358
8359 if (s)
8360 seq_puts(s,
8361 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8362 "--- ------- ------------ ---------- ----------\n");
8363
8364 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8365 offset = i * TPC_QMAN_OFFSET;
8366 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8367 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8368 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8369 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8370 IS_TPC_IDLE(tpc_cfg_sts);
8371 is_idle &= is_eng_idle;
8372
8373 if (mask && !is_eng_idle)
8374 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8375 if (s)
8376 seq_printf(s, fmt, i,
8377 is_eng_idle ? "Y" : "N",
8378 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8379 }
8380
8381 if (s)
8382 seq_puts(s,
8383 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8384 "--- ------- ------------ ---------- -----------\n");
8385
8386 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8387 offset = i * MME_QMAN_OFFSET;
8388 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8389 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8390
8391 /* MME 1 & 3 are slaves, no need to check their QMANs */
8392 is_slave = i % 2;
8393 if (!is_slave) {
8394 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8395 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8396 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8397 }
8398
8399 is_idle &= is_eng_idle;
8400
8401 if (mask && !is_eng_idle)
8402 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8403 if (s) {
8404 if (!is_slave)
8405 seq_printf(s, fmt, i,
8406 is_eng_idle ? "Y" : "N",
8407 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8408 else
8409 seq_printf(s, mme_slave_fmt, i,
8410 is_eng_idle ? "Y" : "N", "-",
8411 "-", mme_arch_sts);
8412 }
8413 }
8414
8415 if (s)
8416 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8417 "--- ------- ------------ ----------\n");
8418
8419 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8420 offset = i * NIC_MACRO_QMAN_OFFSET;
8421 port = 2 * i;
8422 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8423 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8424 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8425 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8426 is_idle &= is_eng_idle;
8427
8428 if (mask && !is_eng_idle)
8429 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8430 if (s)
8431 seq_printf(s, nic_fmt, port,
8432 is_eng_idle ? "Y" : "N",
8433 qm_glbl_sts0, qm_cgm_sts);
8434 }
8435
8436 port = 2 * i + 1;
8437 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8438 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8439 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8440 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8441 is_idle &= is_eng_idle;
8442
8443 if (mask && !is_eng_idle)
8444 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8445 if (s)
8446 seq_printf(s, nic_fmt, port,
8447 is_eng_idle ? "Y" : "N",
8448 qm_glbl_sts0, qm_cgm_sts);
8449 }
8450 }
8451
8452 if (s)
8453 seq_puts(s, "\n");
8454
8455 hdev->asic_funcs->set_clock_gating(hdev);
8456
8457 mutex_unlock(&gaudi->clk_gate_mutex);
8458
8459 return is_idle;
8460 }
8461
gaudi_hw_queues_lock(struct hl_device * hdev)8462 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8463 __acquires(&gaudi->hw_queues_lock)
8464 {
8465 struct gaudi_device *gaudi = hdev->asic_specific;
8466
8467 spin_lock(&gaudi->hw_queues_lock);
8468 }
8469
gaudi_hw_queues_unlock(struct hl_device * hdev)8470 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8471 __releases(&gaudi->hw_queues_lock)
8472 {
8473 struct gaudi_device *gaudi = hdev->asic_specific;
8474
8475 spin_unlock(&gaudi->hw_queues_lock);
8476 }
8477
gaudi_get_pci_id(struct hl_device * hdev)8478 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8479 {
8480 return hdev->pdev->device;
8481 }
8482
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8483 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8484 size_t max_size)
8485 {
8486 struct gaudi_device *gaudi = hdev->asic_specific;
8487
8488 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8489 return 0;
8490
8491 return hl_fw_get_eeprom_data(hdev, data, max_size);
8492 }
8493
8494 /*
8495 * this function should be used only during initialization and/or after reset,
8496 * when there are no active users.
8497 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8498 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8499 u32 tpc_id)
8500 {
8501 struct gaudi_device *gaudi = hdev->asic_specific;
8502 u64 kernel_timeout;
8503 u32 status, offset;
8504 int rc;
8505
8506 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8507
8508 if (hdev->pldm)
8509 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8510 else
8511 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8512
8513 mutex_lock(&gaudi->clk_gate_mutex);
8514
8515 hdev->asic_funcs->disable_clock_gating(hdev);
8516
8517 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8518 lower_32_bits(tpc_kernel));
8519 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8520 upper_32_bits(tpc_kernel));
8521
8522 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8523 lower_32_bits(tpc_kernel));
8524 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8525 upper_32_bits(tpc_kernel));
8526 /* set a valid LUT pointer, content is of no significance */
8527 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8528 lower_32_bits(tpc_kernel));
8529 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8530 upper_32_bits(tpc_kernel));
8531
8532 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8533 lower_32_bits(CFG_BASE +
8534 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8535
8536 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8537 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8538 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8539 /* wait a bit for the engine to start executing */
8540 usleep_range(1000, 1500);
8541
8542 /* wait until engine has finished executing */
8543 rc = hl_poll_timeout(
8544 hdev,
8545 mmTPC0_CFG_STATUS + offset,
8546 status,
8547 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8548 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8549 1000,
8550 kernel_timeout);
8551
8552 if (rc) {
8553 dev_err(hdev->dev,
8554 "Timeout while waiting for TPC%d icache prefetch\n",
8555 tpc_id);
8556 hdev->asic_funcs->set_clock_gating(hdev);
8557 mutex_unlock(&gaudi->clk_gate_mutex);
8558 return -EIO;
8559 }
8560
8561 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8562 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8563
8564 /* wait a bit for the engine to start executing */
8565 usleep_range(1000, 1500);
8566
8567 /* wait until engine has finished executing */
8568 rc = hl_poll_timeout(
8569 hdev,
8570 mmTPC0_CFG_STATUS + offset,
8571 status,
8572 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8573 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8574 1000,
8575 kernel_timeout);
8576
8577 if (rc) {
8578 dev_err(hdev->dev,
8579 "Timeout while waiting for TPC%d vector pipe\n",
8580 tpc_id);
8581 hdev->asic_funcs->set_clock_gating(hdev);
8582 mutex_unlock(&gaudi->clk_gate_mutex);
8583 return -EIO;
8584 }
8585
8586 rc = hl_poll_timeout(
8587 hdev,
8588 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8589 status,
8590 (status == 0),
8591 1000,
8592 kernel_timeout);
8593
8594 hdev->asic_funcs->set_clock_gating(hdev);
8595 mutex_unlock(&gaudi->clk_gate_mutex);
8596
8597 if (rc) {
8598 dev_err(hdev->dev,
8599 "Timeout while waiting for TPC%d kernel to execute\n",
8600 tpc_id);
8601 return -EIO;
8602 }
8603
8604 return 0;
8605 }
8606
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8607 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8608 struct hl_ctx *ctx)
8609 {
8610 struct gaudi_device *gaudi = hdev->asic_specific;
8611 int min_alloc_order, rc, collective_cb_size;
8612
8613 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8614 return 0;
8615
8616 hdev->internal_cb_pool_virt_addr =
8617 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8618 HOST_SPACE_INTERNAL_CB_SZ,
8619 &hdev->internal_cb_pool_dma_addr,
8620 GFP_KERNEL | __GFP_ZERO);
8621
8622 if (!hdev->internal_cb_pool_virt_addr)
8623 return -ENOMEM;
8624
8625 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8626 sizeof(struct packet_fence);
8627 min_alloc_order = ilog2(collective_cb_size);
8628
8629 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8630 if (!hdev->internal_cb_pool) {
8631 dev_err(hdev->dev,
8632 "Failed to create internal CB pool\n");
8633 rc = -ENOMEM;
8634 goto free_internal_cb_pool;
8635 }
8636
8637 rc = gen_pool_add(hdev->internal_cb_pool,
8638 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8639 HOST_SPACE_INTERNAL_CB_SZ, -1);
8640 if (rc) {
8641 dev_err(hdev->dev,
8642 "Failed to add memory to internal CB pool\n");
8643 rc = -EFAULT;
8644 goto destroy_internal_cb_pool;
8645 }
8646
8647 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8648 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8649 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8650
8651 if (!hdev->internal_cb_va_base) {
8652 rc = -ENOMEM;
8653 goto destroy_internal_cb_pool;
8654 }
8655
8656 mutex_lock(&ctx->mmu_lock);
8657 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8658 hdev->internal_cb_pool_dma_addr,
8659 HOST_SPACE_INTERNAL_CB_SZ);
8660
8661 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8662 mutex_unlock(&ctx->mmu_lock);
8663
8664 if (rc)
8665 goto unreserve_internal_cb_pool;
8666
8667 return 0;
8668
8669 unreserve_internal_cb_pool:
8670 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8671 HOST_SPACE_INTERNAL_CB_SZ);
8672 destroy_internal_cb_pool:
8673 gen_pool_destroy(hdev->internal_cb_pool);
8674 free_internal_cb_pool:
8675 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8676 HOST_SPACE_INTERNAL_CB_SZ,
8677 hdev->internal_cb_pool_virt_addr,
8678 hdev->internal_cb_pool_dma_addr);
8679
8680 return rc;
8681 }
8682
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8683 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8684 struct hl_ctx *ctx)
8685 {
8686 struct gaudi_device *gaudi = hdev->asic_specific;
8687
8688 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8689 return;
8690
8691 mutex_lock(&ctx->mmu_lock);
8692 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8693 HOST_SPACE_INTERNAL_CB_SZ);
8694 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8695 HOST_SPACE_INTERNAL_CB_SZ);
8696 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8697 mutex_unlock(&ctx->mmu_lock);
8698
8699 gen_pool_destroy(hdev->internal_cb_pool);
8700
8701 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8702 HOST_SPACE_INTERNAL_CB_SZ,
8703 hdev->internal_cb_pool_virt_addr,
8704 hdev->internal_cb_pool_dma_addr);
8705 }
8706
gaudi_ctx_init(struct hl_ctx * ctx)8707 static int gaudi_ctx_init(struct hl_ctx *ctx)
8708 {
8709 int rc;
8710
8711 if (ctx->asid == HL_KERNEL_ASID_ID)
8712 return 0;
8713
8714 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8715 if (rc)
8716 return rc;
8717
8718 rc = gaudi_restore_user_registers(ctx->hdev);
8719 if (rc)
8720 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8721
8722 return rc;
8723 }
8724
gaudi_ctx_fini(struct hl_ctx * ctx)8725 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8726 {
8727 if (ctx->asid == HL_KERNEL_ASID_ID)
8728 return;
8729
8730 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8731 }
8732
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8733 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8734 {
8735 return gaudi_cq_assignment[cq_idx];
8736 }
8737
gaudi_get_signal_cb_size(struct hl_device * hdev)8738 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8739 {
8740 return sizeof(struct packet_msg_short) +
8741 sizeof(struct packet_msg_prot) * 2;
8742 }
8743
gaudi_get_wait_cb_size(struct hl_device * hdev)8744 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8745 {
8746 return sizeof(struct packet_msg_short) * 4 +
8747 sizeof(struct packet_fence) +
8748 sizeof(struct packet_msg_prot) * 2;
8749 }
8750
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8751 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8752 {
8753 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8754 }
8755
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8756 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8757 u32 size, bool eb)
8758 {
8759 struct hl_cb *cb = (struct hl_cb *) data;
8760 struct packet_msg_short *pkt;
8761 u32 value, ctl, pkt_size = sizeof(*pkt);
8762
8763 pkt = cb->kernel_address + size;
8764 memset(pkt, 0, pkt_size);
8765
8766 /* Inc by 1, Mode ADD */
8767 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8768 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8769
8770 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8771 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8772 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8773 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8774 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8775 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8776 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8777
8778 pkt->value = cpu_to_le32(value);
8779 pkt->ctl = cpu_to_le32(ctl);
8780
8781 return size + pkt_size;
8782 }
8783
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8784 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8785 u16 addr)
8786 {
8787 u32 ctl, pkt_size = sizeof(*pkt);
8788
8789 memset(pkt, 0, pkt_size);
8790
8791 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8792 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8793 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8794 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8795 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8796 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8797
8798 pkt->value = cpu_to_le32(value);
8799 pkt->ctl = cpu_to_le32(ctl);
8800
8801 return pkt_size;
8802 }
8803
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8804 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8805 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8806 u16 sob_val, u16 mon_id)
8807 {
8808 u64 monitor_base;
8809 u32 ctl, value, pkt_size = sizeof(*pkt);
8810 u16 msg_addr_offset;
8811 u8 mask;
8812
8813 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8814 dev_err(hdev->dev,
8815 "sob_base %u (mask %#x) is not valid\n",
8816 sob_base, sob_mask);
8817 return 0;
8818 }
8819
8820 /*
8821 * monitor_base should be the content of the base0 address registers,
8822 * so it will be added to the msg short offsets
8823 */
8824 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8825
8826 msg_addr_offset =
8827 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8828 monitor_base;
8829
8830 memset(pkt, 0, pkt_size);
8831
8832 /* Monitor config packet: bind the monitor to a sync object */
8833 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8834 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8835 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8836 0); /* GREATER OR EQUAL*/
8837 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8838
8839 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8840 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8841 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8842 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8843 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8844 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8845 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8846
8847 pkt->value = cpu_to_le32(value);
8848 pkt->ctl = cpu_to_le32(ctl);
8849
8850 return pkt_size;
8851 }
8852
gaudi_add_fence_pkt(struct packet_fence * pkt)8853 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8854 {
8855 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8856
8857 memset(pkt, 0, pkt_size);
8858
8859 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8860 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8861 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8862
8863 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8864 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8865 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8866 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8867
8868 pkt->cfg = cpu_to_le32(cfg);
8869 pkt->ctl = cpu_to_le32(ctl);
8870
8871 return pkt_size;
8872 }
8873
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8874 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8875 {
8876 u32 offset, nic_index;
8877
8878 switch (queue_id) {
8879 case GAUDI_QUEUE_ID_DMA_0_0:
8880 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8881 break;
8882 case GAUDI_QUEUE_ID_DMA_0_1:
8883 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8884 break;
8885 case GAUDI_QUEUE_ID_DMA_0_2:
8886 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8887 break;
8888 case GAUDI_QUEUE_ID_DMA_0_3:
8889 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8890 break;
8891 case GAUDI_QUEUE_ID_DMA_1_0:
8892 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8893 break;
8894 case GAUDI_QUEUE_ID_DMA_1_1:
8895 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8896 break;
8897 case GAUDI_QUEUE_ID_DMA_1_2:
8898 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8899 break;
8900 case GAUDI_QUEUE_ID_DMA_1_3:
8901 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8902 break;
8903 case GAUDI_QUEUE_ID_DMA_5_0:
8904 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8905 break;
8906 case GAUDI_QUEUE_ID_DMA_5_1:
8907 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8908 break;
8909 case GAUDI_QUEUE_ID_DMA_5_2:
8910 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8911 break;
8912 case GAUDI_QUEUE_ID_DMA_5_3:
8913 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8914 break;
8915 case GAUDI_QUEUE_ID_TPC_7_0:
8916 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8917 break;
8918 case GAUDI_QUEUE_ID_TPC_7_1:
8919 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8920 break;
8921 case GAUDI_QUEUE_ID_TPC_7_2:
8922 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8923 break;
8924 case GAUDI_QUEUE_ID_TPC_7_3:
8925 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8926 break;
8927 case GAUDI_QUEUE_ID_NIC_0_0:
8928 case GAUDI_QUEUE_ID_NIC_1_0:
8929 case GAUDI_QUEUE_ID_NIC_2_0:
8930 case GAUDI_QUEUE_ID_NIC_3_0:
8931 case GAUDI_QUEUE_ID_NIC_4_0:
8932 case GAUDI_QUEUE_ID_NIC_5_0:
8933 case GAUDI_QUEUE_ID_NIC_6_0:
8934 case GAUDI_QUEUE_ID_NIC_7_0:
8935 case GAUDI_QUEUE_ID_NIC_8_0:
8936 case GAUDI_QUEUE_ID_NIC_9_0:
8937 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8938 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8939 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8940 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8941 break;
8942 case GAUDI_QUEUE_ID_NIC_0_1:
8943 case GAUDI_QUEUE_ID_NIC_1_1:
8944 case GAUDI_QUEUE_ID_NIC_2_1:
8945 case GAUDI_QUEUE_ID_NIC_3_1:
8946 case GAUDI_QUEUE_ID_NIC_4_1:
8947 case GAUDI_QUEUE_ID_NIC_5_1:
8948 case GAUDI_QUEUE_ID_NIC_6_1:
8949 case GAUDI_QUEUE_ID_NIC_7_1:
8950 case GAUDI_QUEUE_ID_NIC_8_1:
8951 case GAUDI_QUEUE_ID_NIC_9_1:
8952 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8953 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8954 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8955 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8956 break;
8957 case GAUDI_QUEUE_ID_NIC_0_2:
8958 case GAUDI_QUEUE_ID_NIC_1_2:
8959 case GAUDI_QUEUE_ID_NIC_2_2:
8960 case GAUDI_QUEUE_ID_NIC_3_2:
8961 case GAUDI_QUEUE_ID_NIC_4_2:
8962 case GAUDI_QUEUE_ID_NIC_5_2:
8963 case GAUDI_QUEUE_ID_NIC_6_2:
8964 case GAUDI_QUEUE_ID_NIC_7_2:
8965 case GAUDI_QUEUE_ID_NIC_8_2:
8966 case GAUDI_QUEUE_ID_NIC_9_2:
8967 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8968 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8969 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8970 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8971 break;
8972 case GAUDI_QUEUE_ID_NIC_0_3:
8973 case GAUDI_QUEUE_ID_NIC_1_3:
8974 case GAUDI_QUEUE_ID_NIC_2_3:
8975 case GAUDI_QUEUE_ID_NIC_3_3:
8976 case GAUDI_QUEUE_ID_NIC_4_3:
8977 case GAUDI_QUEUE_ID_NIC_5_3:
8978 case GAUDI_QUEUE_ID_NIC_6_3:
8979 case GAUDI_QUEUE_ID_NIC_7_3:
8980 case GAUDI_QUEUE_ID_NIC_8_3:
8981 case GAUDI_QUEUE_ID_NIC_9_3:
8982 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8983 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8984 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8985 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8986 break;
8987 default:
8988 return -EINVAL;
8989 }
8990
8991 *addr = CFG_BASE + offset;
8992
8993 return 0;
8994 }
8995
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8996 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8997 {
8998 u64 monitor_base;
8999 u32 size = 0;
9000 u16 msg_addr_offset;
9001
9002 /*
9003 * monitor_base should be the content of the base0 address registers,
9004 * so it will be added to the msg short offsets
9005 */
9006 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9007
9008 /* First monitor config packet: low address of the sync */
9009 msg_addr_offset =
9010 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9011 monitor_base;
9012
9013 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9014 msg_addr_offset);
9015
9016 /* Second monitor config packet: high address of the sync */
9017 msg_addr_offset =
9018 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9019 monitor_base;
9020
9021 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9022 msg_addr_offset);
9023
9024 /*
9025 * Third monitor config packet: the payload, i.e. what to write when the
9026 * sync triggers
9027 */
9028 msg_addr_offset =
9029 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9030 monitor_base;
9031
9032 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9033
9034 return size;
9035 }
9036
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)9037 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9038 struct hl_gen_wait_properties *prop)
9039 {
9040 struct hl_cb *cb = (struct hl_cb *) prop->data;
9041 void *buf = cb->kernel_address;
9042 u64 fence_addr = 0;
9043 u32 size = prop->size;
9044
9045 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9046 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9047 prop->q_idx);
9048 return 0;
9049 }
9050
9051 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9052 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9053 prop->sob_mask, prop->sob_val, prop->mon_id);
9054 size += gaudi_add_fence_pkt(buf + size);
9055
9056 return size;
9057 }
9058
gaudi_reset_sob(struct hl_device * hdev,void * data)9059 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9060 {
9061 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9062
9063 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9064 hw_sob->sob_id);
9065
9066 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9067 hw_sob->sob_id * 4, 0);
9068
9069 kref_init(&hw_sob->kref);
9070 }
9071
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)9072 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9073 {
9074 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9075 HL_POWER9_HOST_MAGIC) {
9076 hdev->power9_64bit_dma_enable = 1;
9077 hdev->dma_mask = 64;
9078 } else {
9079 hdev->power9_64bit_dma_enable = 0;
9080 hdev->dma_mask = 48;
9081 }
9082 }
9083
gaudi_get_device_time(struct hl_device * hdev)9084 static u64 gaudi_get_device_time(struct hl_device *hdev)
9085 {
9086 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9087
9088 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9089 }
9090
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)9091 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9092 u32 *block_size, u32 *block_id)
9093 {
9094 return -EPERM;
9095 }
9096
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)9097 static int gaudi_block_mmap(struct hl_device *hdev,
9098 struct vm_area_struct *vma,
9099 u32 block_id, u32 block_size)
9100 {
9101 return -EPERM;
9102 }
9103
gaudi_enable_events_from_fw(struct hl_device * hdev)9104 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9105 {
9106 struct cpu_dyn_regs *dyn_regs =
9107 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9108 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9109 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9110 le32_to_cpu(dyn_regs->gic_host_ints_irq);
9111
9112 WREG32(irq_handler_offset,
9113 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9114 }
9115
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)9116 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9117 {
9118 switch (pll_idx) {
9119 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9120 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9121 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9122 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9123 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9124 case HL_GAUDI_MME_PLL: return MME_PLL;
9125 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9126 case HL_GAUDI_IF_PLL: return IF_PLL;
9127 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9128 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9129 default: return -EINVAL;
9130 }
9131 }
9132
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)9133 static int gaudi_add_sync_to_engine_map_entry(
9134 struct hl_sync_to_engine_map *map, u32 reg_value,
9135 enum hl_sync_engine_type engine_type, u32 engine_id)
9136 {
9137 struct hl_sync_to_engine_map_entry *entry;
9138
9139 /* Reg value represents a partial address of sync object,
9140 * it is used as unique identifier. For this we need to
9141 * clear the cutoff cfg base bits from the value.
9142 */
9143 if (reg_value == 0 || reg_value == 0xffffffff)
9144 return 0;
9145 reg_value -= (u32)CFG_BASE;
9146
9147 /* create a new hash entry */
9148 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9149 if (!entry)
9150 return -ENOMEM;
9151 entry->engine_type = engine_type;
9152 entry->engine_id = engine_id;
9153 entry->sync_id = reg_value;
9154 hash_add(map->tb, &entry->node, reg_value);
9155
9156 return 0;
9157 }
9158
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)9159 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9160 struct hl_sync_to_engine_map *map)
9161 {
9162 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9163 struct gaudi_device *gaudi = hdev->asic_specific;
9164 int i, j, rc;
9165 u32 reg_value;
9166
9167 /* Iterate over TPC engines */
9168 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9169 /* TPC registered must be accessed with clock gating disabled */
9170 mutex_lock(&gaudi->clk_gate_mutex);
9171 hdev->asic_funcs->disable_clock_gating(hdev);
9172
9173 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9174 sds->props[SP_NEXT_TPC] * i);
9175
9176 /* We can reenable clock_gating */
9177 hdev->asic_funcs->set_clock_gating(hdev);
9178 mutex_unlock(&gaudi->clk_gate_mutex);
9179
9180 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9181 ENGINE_TPC, i);
9182 if (rc)
9183 goto free_sync_to_engine_map;
9184 }
9185
9186 /* Iterate over MME engines */
9187 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9188 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9189 /* MME registered must be accessed with clock gating
9190 * disabled
9191 */
9192 mutex_lock(&gaudi->clk_gate_mutex);
9193 hdev->asic_funcs->disable_clock_gating(hdev);
9194
9195 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9196 sds->props[SP_NEXT_MME] * i +
9197 j * sizeof(u32));
9198
9199 /* We can reenable clock_gating */
9200 hdev->asic_funcs->set_clock_gating(hdev);
9201 mutex_unlock(&gaudi->clk_gate_mutex);
9202
9203 rc = gaudi_add_sync_to_engine_map_entry(
9204 map, reg_value, ENGINE_MME,
9205 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9206 if (rc)
9207 goto free_sync_to_engine_map;
9208 }
9209 }
9210
9211 /* Iterate over DMA engines */
9212 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9213 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9214 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9215 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9216 ENGINE_DMA, i);
9217 if (rc)
9218 goto free_sync_to_engine_map;
9219 }
9220
9221 return 0;
9222
9223 free_sync_to_engine_map:
9224 hl_state_dump_free_sync_to_engine_map(map);
9225
9226 return rc;
9227 }
9228
gaudi_monitor_valid(struct hl_mon_state_dump * mon)9229 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9230 {
9231 return FIELD_GET(
9232 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9233 mon->status);
9234 }
9235
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)9236 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9237 {
9238 const size_t max_write = 10;
9239 u32 gid, mask, sob;
9240 int i, offset;
9241
9242 /* Sync object ID is calculated as follows:
9243 * (8 * group_id + cleared bits in mask)
9244 */
9245 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9246 mon->arm_data);
9247 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9248 mon->arm_data);
9249
9250 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9251 max_write; mask >>= 1, i++) {
9252 if (!(mask & 1)) {
9253 sob = gid * MONITOR_MAX_SOBS + i;
9254
9255 if (offset > 0)
9256 offset += snprintf(sobs + offset, max_write,
9257 ", ");
9258
9259 offset += snprintf(sobs + offset, max_write, "%u", sob);
9260 }
9261 }
9262 }
9263
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)9264 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9265 struct hl_device *hdev,
9266 struct hl_mon_state_dump *mon)
9267 {
9268 const char *name;
9269 char scratch_buf1[BIN_REG_STRING_SIZE],
9270 scratch_buf2[BIN_REG_STRING_SIZE];
9271 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9272
9273 name = hl_state_dump_get_monitor_name(hdev, mon);
9274 if (!name)
9275 name = "";
9276
9277 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9278
9279 return hl_snprintf_resize(
9280 buf, size, offset,
9281 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9282 mon->id, name,
9283 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9284 mon->arm_data),
9285 hl_format_as_binary(
9286 scratch_buf1, sizeof(scratch_buf1),
9287 FIELD_GET(
9288 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9289 mon->arm_data)),
9290 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9291 mon->arm_data),
9292 mon->wr_data,
9293 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9294 hl_format_as_binary(
9295 scratch_buf2, sizeof(scratch_buf2),
9296 FIELD_GET(
9297 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9298 mon->status)),
9299 monitored_sobs);
9300 }
9301
9302
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)9303 static int gaudi_print_fences_single_engine(
9304 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9305 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9306 size_t *size, size_t *offset)
9307 {
9308 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9309 int rc = -ENOMEM, i;
9310 u32 *statuses, *fences;
9311
9312 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9313 sizeof(*statuses), GFP_KERNEL);
9314 if (!statuses)
9315 goto out;
9316
9317 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9318 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9319 sizeof(*fences), GFP_KERNEL);
9320 if (!fences)
9321 goto free_status;
9322
9323 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9324 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9325
9326 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9327 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9328 fences[i] = RREG32(base_offset + i * sizeof(u32));
9329
9330 /* The actual print */
9331 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9332 u32 fence_id;
9333 u64 fence_cnt, fence_rdata;
9334 const char *engine_name;
9335
9336 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9337 statuses[i]))
9338 continue;
9339
9340 fence_id =
9341 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9342 fence_cnt = base_offset + CFG_BASE +
9343 sizeof(u32) *
9344 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9345 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9346 sds->props[SP_FENCE0_RDATA_OFFSET];
9347 engine_name = hl_sync_engine_to_string(engine_type);
9348
9349 rc = hl_snprintf_resize(
9350 buf, size, offset,
9351 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9352 engine_name, engine_id,
9353 i, fence_id,
9354 fence_cnt, engine_name, engine_id, fence_id, i,
9355 fence_rdata, engine_name, engine_id, fence_id, i,
9356 fences[fence_id],
9357 statuses[i]);
9358 if (rc)
9359 goto free_fences;
9360 }
9361
9362 rc = 0;
9363
9364 free_fences:
9365 kfree(fences);
9366 free_status:
9367 kfree(statuses);
9368 out:
9369 return rc;
9370 }
9371
9372
9373 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9374 .monitor_valid = gaudi_monitor_valid,
9375 .print_single_monitor = gaudi_print_single_monitor,
9376 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9377 .print_fences_single_engine = gaudi_print_fences_single_engine,
9378 };
9379
gaudi_state_dump_init(struct hl_device * hdev)9380 static void gaudi_state_dump_init(struct hl_device *hdev)
9381 {
9382 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9383 int i;
9384
9385 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9386 hash_add(sds->so_id_to_str_tb,
9387 &gaudi_so_id_to_str[i].node,
9388 gaudi_so_id_to_str[i].id);
9389
9390 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9391 hash_add(sds->monitor_id_to_str_tb,
9392 &gaudi_monitor_id_to_str[i].node,
9393 gaudi_monitor_id_to_str[i].id);
9394
9395 sds->props = gaudi_state_dump_specs_props;
9396
9397 sds->sync_namager_names = gaudi_sync_manager_names;
9398
9399 sds->funcs = gaudi_state_dump_funcs;
9400 }
9401
gaudi_get_stream_master_qid_arr(void)9402 static u32 *gaudi_get_stream_master_qid_arr(void)
9403 {
9404 return gaudi_stream_master;
9405 }
9406
9407 static const struct hl_asic_funcs gaudi_funcs = {
9408 .early_init = gaudi_early_init,
9409 .early_fini = gaudi_early_fini,
9410 .late_init = gaudi_late_init,
9411 .late_fini = gaudi_late_fini,
9412 .sw_init = gaudi_sw_init,
9413 .sw_fini = gaudi_sw_fini,
9414 .hw_init = gaudi_hw_init,
9415 .hw_fini = gaudi_hw_fini,
9416 .halt_engines = gaudi_halt_engines,
9417 .suspend = gaudi_suspend,
9418 .resume = gaudi_resume,
9419 .mmap = gaudi_mmap,
9420 .ring_doorbell = gaudi_ring_doorbell,
9421 .pqe_write = gaudi_pqe_write,
9422 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9423 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9424 .scrub_device_mem = gaudi_scrub_device_mem,
9425 .get_int_queue_base = gaudi_get_int_queue_base,
9426 .test_queues = gaudi_test_queues,
9427 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9428 .asic_dma_pool_free = gaudi_dma_pool_free,
9429 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9430 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9431 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9432 .cs_parser = gaudi_cs_parser,
9433 .asic_dma_map_sg = gaudi_dma_map_sg,
9434 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9435 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9436 .update_eq_ci = gaudi_update_eq_ci,
9437 .context_switch = gaudi_context_switch,
9438 .restore_phase_topology = gaudi_restore_phase_topology,
9439 .debugfs_read32 = gaudi_debugfs_read32,
9440 .debugfs_write32 = gaudi_debugfs_write32,
9441 .debugfs_read64 = gaudi_debugfs_read64,
9442 .debugfs_write64 = gaudi_debugfs_write64,
9443 .debugfs_read_dma = gaudi_debugfs_read_dma,
9444 .add_device_attr = gaudi_add_device_attr,
9445 .handle_eqe = gaudi_handle_eqe,
9446 .set_pll_profile = gaudi_set_pll_profile,
9447 .get_events_stat = gaudi_get_events_stat,
9448 .read_pte = gaudi_read_pte,
9449 .write_pte = gaudi_write_pte,
9450 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9451 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9452 .send_heartbeat = gaudi_send_heartbeat,
9453 .set_clock_gating = gaudi_set_clock_gating,
9454 .disable_clock_gating = gaudi_disable_clock_gating,
9455 .debug_coresight = gaudi_debug_coresight,
9456 .is_device_idle = gaudi_is_device_idle,
9457 .soft_reset_late_init = gaudi_soft_reset_late_init,
9458 .hw_queues_lock = gaudi_hw_queues_lock,
9459 .hw_queues_unlock = gaudi_hw_queues_unlock,
9460 .get_pci_id = gaudi_get_pci_id,
9461 .get_eeprom_data = gaudi_get_eeprom_data,
9462 .send_cpu_message = gaudi_send_cpu_message,
9463 .pci_bars_map = gaudi_pci_bars_map,
9464 .init_iatu = gaudi_init_iatu,
9465 .rreg = hl_rreg,
9466 .wreg = hl_wreg,
9467 .halt_coresight = gaudi_halt_coresight,
9468 .ctx_init = gaudi_ctx_init,
9469 .ctx_fini = gaudi_ctx_fini,
9470 .get_clk_rate = gaudi_get_clk_rate,
9471 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9472 .load_firmware_to_device = gaudi_load_firmware_to_device,
9473 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9474 .get_signal_cb_size = gaudi_get_signal_cb_size,
9475 .get_wait_cb_size = gaudi_get_wait_cb_size,
9476 .gen_signal_cb = gaudi_gen_signal_cb,
9477 .gen_wait_cb = gaudi_gen_wait_cb,
9478 .reset_sob = gaudi_reset_sob,
9479 .reset_sob_group = gaudi_reset_sob_group,
9480 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9481 .get_device_time = gaudi_get_device_time,
9482 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9483 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9484 .scramble_addr = hl_mmu_scramble_addr,
9485 .descramble_addr = hl_mmu_descramble_addr,
9486 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9487 .get_hw_block_id = gaudi_get_hw_block_id,
9488 .hw_block_mmap = gaudi_block_mmap,
9489 .enable_events_from_fw = gaudi_enable_events_from_fw,
9490 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9491 .init_firmware_loader = gaudi_init_firmware_loader,
9492 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9493 .state_dump_init = gaudi_state_dump_init,
9494 .get_sob_addr = gaudi_get_sob_addr,
9495 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9496 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9497 };
9498
9499 /**
9500 * gaudi_set_asic_funcs - set GAUDI function pointers
9501 *
9502 * @hdev: pointer to hl_device structure
9503 *
9504 */
gaudi_set_asic_funcs(struct hl_device * hdev)9505 void gaudi_set_asic_funcs(struct hl_device *hdev)
9506 {
9507 hdev->asic_funcs = &gaudi_funcs;
9508 }
9509