1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (c) 2016-2017 Hisilicon Limited. */
3
4 #include "hclge_err.h"
5
6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
7 { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err",
8 .reset_level = HNAE3_NONE_RESET },
9 { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err",
10 .reset_level = HNAE3_NONE_RESET },
11 { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err",
12 .reset_level = HNAE3_NONE_RESET },
13 { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err",
14 .reset_level = HNAE3_NONE_RESET },
15 { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err",
16 .reset_level = HNAE3_NONE_RESET },
17 { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err",
18 .reset_level = HNAE3_NONE_RESET },
19 { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err",
20 .reset_level = HNAE3_NONE_RESET },
21 { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err",
22 .reset_level = HNAE3_NONE_RESET },
23 { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err",
24 .reset_level = HNAE3_NONE_RESET },
25 { /* sentinel */ }
26 };
27
28 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
29 { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
30 .reset_level = HNAE3_NONE_RESET },
31 { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
32 .reset_level = HNAE3_NONE_RESET },
33 { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
34 .reset_level = HNAE3_NONE_RESET },
35 { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
36 .reset_level = HNAE3_NONE_RESET },
37 { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err",
38 .reset_level = HNAE3_NONE_RESET },
39 { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err",
40 .reset_level = HNAE3_NONE_RESET },
41 { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
42 .reset_level = HNAE3_NONE_RESET },
43 { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
44 .reset_level = HNAE3_NONE_RESET },
45 { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
46 .reset_level = HNAE3_NONE_RESET },
47 { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
48 .reset_level = HNAE3_NONE_RESET },
49 { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
50 .reset_level = HNAE3_NONE_RESET },
51 { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
52 .reset_level = HNAE3_NONE_RESET },
53 { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
54 .reset_level = HNAE3_NONE_RESET },
55 { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
56 .reset_level = HNAE3_NONE_RESET },
57 { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
58 .reset_level = HNAE3_NONE_RESET },
59 { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
60 .reset_level = HNAE3_NONE_RESET },
61 { /* sentinel */ }
62 };
63
64 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
65 { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err",
66 .reset_level = HNAE3_NONE_RESET },
67 { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err",
68 .reset_level = HNAE3_NONE_RESET },
69 { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err",
70 .reset_level = HNAE3_NONE_RESET },
71 { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
72 .reset_level = HNAE3_NONE_RESET },
73 { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err",
74 .reset_level = HNAE3_NONE_RESET },
75 { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err",
76 .reset_level = HNAE3_NONE_RESET },
77 { /* sentinel */ }
78 };
79
80 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
81 { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err",
82 .reset_level = HNAE3_NONE_RESET },
83 { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err",
84 .reset_level = HNAE3_NONE_RESET },
85 { /* sentinel */ }
86 };
87
88 static const struct hclge_hw_error hclge_igu_int[] = {
89 { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
90 .reset_level = HNAE3_GLOBAL_RESET },
91 { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
92 .reset_level = HNAE3_GLOBAL_RESET },
93 { /* sentinel */ }
94 };
95
96 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
97 { .int_msk = BIT(0), .msg = "rx_buf_overflow",
98 .reset_level = HNAE3_GLOBAL_RESET },
99 { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
100 .reset_level = HNAE3_GLOBAL_RESET },
101 { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow",
102 .reset_level = HNAE3_GLOBAL_RESET },
103 { .int_msk = BIT(3), .msg = "tx_buf_overflow",
104 .reset_level = HNAE3_GLOBAL_RESET },
105 { .int_msk = BIT(4), .msg = "tx_buf_underrun",
106 .reset_level = HNAE3_GLOBAL_RESET },
107 { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
108 .reset_level = HNAE3_GLOBAL_RESET },
109 { /* sentinel */ }
110 };
111
112 static const struct hclge_hw_error hclge_ncsi_err_int[] = {
113 { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err",
114 .reset_level = HNAE3_NONE_RESET },
115 { /* sentinel */ }
116 };
117
118 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
119 { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err",
120 .reset_level = HNAE3_GLOBAL_RESET },
121 { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err",
122 .reset_level = HNAE3_GLOBAL_RESET },
123 { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err",
124 .reset_level = HNAE3_GLOBAL_RESET },
125 { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err",
126 .reset_level = HNAE3_GLOBAL_RESET },
127 { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err",
128 .reset_level = HNAE3_GLOBAL_RESET },
129 { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err",
130 .reset_level = HNAE3_GLOBAL_RESET },
131 { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err",
132 .reset_level = HNAE3_GLOBAL_RESET },
133 { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err",
134 .reset_level = HNAE3_GLOBAL_RESET },
135 { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err",
136 .reset_level = HNAE3_GLOBAL_RESET },
137 { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err",
138 .reset_level = HNAE3_GLOBAL_RESET },
139 { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err",
140 .reset_level = HNAE3_GLOBAL_RESET },
141 { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err",
142 .reset_level = HNAE3_GLOBAL_RESET },
143 { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err",
144 .reset_level = HNAE3_GLOBAL_RESET },
145 { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err",
146 .reset_level = HNAE3_GLOBAL_RESET },
147 { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err",
148 .reset_level = HNAE3_GLOBAL_RESET },
149 { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err",
150 .reset_level = HNAE3_GLOBAL_RESET },
151 { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err",
152 .reset_level = HNAE3_GLOBAL_RESET },
153 { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err",
154 .reset_level = HNAE3_GLOBAL_RESET },
155 { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err",
156 .reset_level = HNAE3_GLOBAL_RESET },
157 { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err",
158 .reset_level = HNAE3_GLOBAL_RESET },
159 { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err",
160 .reset_level = HNAE3_GLOBAL_RESET },
161 { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err",
162 .reset_level = HNAE3_GLOBAL_RESET },
163 { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err",
164 .reset_level = HNAE3_GLOBAL_RESET },
165 { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err",
166 .reset_level = HNAE3_GLOBAL_RESET },
167 { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err",
168 .reset_level = HNAE3_GLOBAL_RESET },
169 { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err",
170 .reset_level = HNAE3_GLOBAL_RESET },
171 { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err",
172 .reset_level = HNAE3_GLOBAL_RESET },
173 { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err",
174 .reset_level = HNAE3_GLOBAL_RESET },
175 { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err",
176 .reset_level = HNAE3_GLOBAL_RESET },
177 { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err",
178 .reset_level = HNAE3_GLOBAL_RESET },
179 { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
180 .reset_level = HNAE3_GLOBAL_RESET },
181 { /* sentinel */ }
182 };
183
184 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
185 { .int_msk = BIT(0), .msg = "tx_vlan_tag_err",
186 .reset_level = HNAE3_NONE_RESET },
187 { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err",
188 .reset_level = HNAE3_NONE_RESET },
189 { /* sentinel */ }
190 };
191
192 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
193 { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err",
194 .reset_level = HNAE3_GLOBAL_RESET },
195 { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
196 .reset_level = HNAE3_GLOBAL_RESET },
197 { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err",
198 .reset_level = HNAE3_GLOBAL_RESET },
199 { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err",
200 .reset_level = HNAE3_GLOBAL_RESET },
201 { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err",
202 .reset_level = HNAE3_GLOBAL_RESET },
203 { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err",
204 .reset_level = HNAE3_GLOBAL_RESET },
205 { /* sentinel */ }
206 };
207
208 static const struct hclge_hw_error hclge_tm_sch_rint[] = {
209 { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err",
210 .reset_level = HNAE3_GLOBAL_RESET },
211 { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err",
212 .reset_level = HNAE3_GLOBAL_RESET },
213 { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err",
214 .reset_level = HNAE3_GLOBAL_RESET },
215 { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
216 .reset_level = HNAE3_GLOBAL_RESET },
217 { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
218 .reset_level = HNAE3_GLOBAL_RESET },
219 { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
220 .reset_level = HNAE3_GLOBAL_RESET },
221 { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
222 .reset_level = HNAE3_GLOBAL_RESET },
223 { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
224 .reset_level = HNAE3_GLOBAL_RESET },
225 { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
226 .reset_level = HNAE3_GLOBAL_RESET },
227 { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
228 .reset_level = HNAE3_GLOBAL_RESET },
229 { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
230 .reset_level = HNAE3_GLOBAL_RESET },
231 { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err",
232 .reset_level = HNAE3_GLOBAL_RESET },
233 { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err",
234 .reset_level = HNAE3_GLOBAL_RESET },
235 { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
236 .reset_level = HNAE3_GLOBAL_RESET },
237 { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
238 .reset_level = HNAE3_GLOBAL_RESET },
239 { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
240 .reset_level = HNAE3_GLOBAL_RESET },
241 { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
242 .reset_level = HNAE3_GLOBAL_RESET },
243 { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
244 .reset_level = HNAE3_GLOBAL_RESET },
245 { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
246 .reset_level = HNAE3_GLOBAL_RESET },
247 { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
248 .reset_level = HNAE3_GLOBAL_RESET },
249 { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
250 .reset_level = HNAE3_GLOBAL_RESET },
251 { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err",
252 .reset_level = HNAE3_GLOBAL_RESET },
253 { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err",
254 .reset_level = HNAE3_GLOBAL_RESET },
255 { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err",
256 .reset_level = HNAE3_GLOBAL_RESET },
257 { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err",
258 .reset_level = HNAE3_GLOBAL_RESET },
259 { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err",
260 .reset_level = HNAE3_GLOBAL_RESET },
261 { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err",
262 .reset_level = HNAE3_GLOBAL_RESET },
263 { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err",
264 .reset_level = HNAE3_GLOBAL_RESET },
265 { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err",
266 .reset_level = HNAE3_GLOBAL_RESET },
267 { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err",
268 .reset_level = HNAE3_GLOBAL_RESET },
269 { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err",
270 .reset_level = HNAE3_GLOBAL_RESET },
271 { /* sentinel */ }
272 };
273
274 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
275 { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err",
276 .reset_level = HNAE3_GLOBAL_RESET },
277 { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err",
278 .reset_level = HNAE3_GLOBAL_RESET },
279 { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err",
280 .reset_level = HNAE3_GLOBAL_RESET },
281 { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err",
282 .reset_level = HNAE3_GLOBAL_RESET },
283 { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err",
284 .reset_level = HNAE3_GLOBAL_RESET },
285 { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err",
286 .reset_level = HNAE3_GLOBAL_RESET },
287 { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err",
288 .reset_level = HNAE3_GLOBAL_RESET },
289 { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err",
290 .reset_level = HNAE3_GLOBAL_RESET },
291 { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err",
292 .reset_level = HNAE3_GLOBAL_RESET },
293 { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err",
294 .reset_level = HNAE3_GLOBAL_RESET },
295 { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err",
296 .reset_level = HNAE3_GLOBAL_RESET },
297 { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err",
298 .reset_level = HNAE3_GLOBAL_RESET },
299 { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err",
300 .reset_level = HNAE3_GLOBAL_RESET },
301 { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err",
302 .reset_level = HNAE3_GLOBAL_RESET },
303 { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err",
304 .reset_level = HNAE3_GLOBAL_RESET },
305 { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err",
306 .reset_level = HNAE3_GLOBAL_RESET },
307 { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err",
308 .reset_level = HNAE3_GLOBAL_RESET },
309 { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err",
310 .reset_level = HNAE3_GLOBAL_RESET },
311 { /* sentinel */ }
312 };
313
314 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
315 { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err",
316 .reset_level = HNAE3_GLOBAL_RESET },
317 { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err",
318 .reset_level = HNAE3_GLOBAL_RESET },
319 { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err",
320 .reset_level = HNAE3_GLOBAL_RESET },
321 { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err",
322 .reset_level = HNAE3_GLOBAL_RESET },
323 { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err",
324 .reset_level = HNAE3_GLOBAL_RESET },
325 { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err",
326 .reset_level = HNAE3_GLOBAL_RESET },
327 { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
328 .reset_level = HNAE3_GLOBAL_RESET },
329 { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
330 .reset_level = HNAE3_GLOBAL_RESET },
331 { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
332 .reset_level = HNAE3_GLOBAL_RESET },
333 { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
334 .reset_level = HNAE3_GLOBAL_RESET },
335 { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
336 .reset_level = HNAE3_GLOBAL_RESET },
337 { /* sentinel */ }
338 };
339
340 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
341 { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err",
342 .reset_level = HNAE3_NONE_RESET },
343 { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err",
344 .reset_level = HNAE3_GLOBAL_RESET },
345 { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err",
346 .reset_level = HNAE3_NONE_RESET },
347 { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err",
348 .reset_level = HNAE3_GLOBAL_RESET },
349 { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err",
350 .reset_level = HNAE3_NONE_RESET },
351 { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err",
352 .reset_level = HNAE3_GLOBAL_RESET },
353 { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err",
354 .reset_level = HNAE3_NONE_RESET },
355 { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err",
356 .reset_level = HNAE3_GLOBAL_RESET },
357 { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err",
358 .reset_level = HNAE3_GLOBAL_RESET },
359 { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err",
360 .reset_level = HNAE3_GLOBAL_RESET },
361 { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err",
362 .reset_level = HNAE3_GLOBAL_RESET },
363 { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err",
364 .reset_level = HNAE3_GLOBAL_RESET },
365 { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err",
366 .reset_level = HNAE3_GLOBAL_RESET },
367 { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err",
368 .reset_level = HNAE3_GLOBAL_RESET },
369 { /* sentinel */ }
370 };
371
372 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
373 { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
374 .reset_level = HNAE3_GLOBAL_RESET },
375 { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
376 .reset_level = HNAE3_GLOBAL_RESET },
377 { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
378 .reset_level = HNAE3_GLOBAL_RESET },
379 { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
380 .reset_level = HNAE3_GLOBAL_RESET },
381 { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err",
382 .reset_level = HNAE3_GLOBAL_RESET },
383 { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err",
384 .reset_level = HNAE3_GLOBAL_RESET },
385 { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err",
386 .reset_level = HNAE3_GLOBAL_RESET },
387 { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err",
388 .reset_level = HNAE3_GLOBAL_RESET },
389 { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err",
390 .reset_level = HNAE3_GLOBAL_RESET },
391 { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err",
392 .reset_level = HNAE3_GLOBAL_RESET },
393 { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err",
394 .reset_level = HNAE3_GLOBAL_RESET },
395 { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err",
396 .reset_level = HNAE3_GLOBAL_RESET },
397 { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err",
398 .reset_level = HNAE3_GLOBAL_RESET },
399 { .int_msk = BIT(26), .msg = "rd_bus_err",
400 .reset_level = HNAE3_GLOBAL_RESET },
401 { .int_msk = BIT(27), .msg = "wr_bus_err",
402 .reset_level = HNAE3_GLOBAL_RESET },
403 { .int_msk = BIT(28), .msg = "reg_search_miss",
404 .reset_level = HNAE3_GLOBAL_RESET },
405 { .int_msk = BIT(29), .msg = "rx_q_search_miss",
406 .reset_level = HNAE3_NONE_RESET },
407 { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect",
408 .reset_level = HNAE3_NONE_RESET },
409 { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl",
410 .reset_level = HNAE3_GLOBAL_RESET },
411 { /* sentinel */ }
412 };
413
414 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
415 { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
416 .reset_level = HNAE3_GLOBAL_RESET },
417 { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
418 .reset_level = HNAE3_GLOBAL_RESET },
419 { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
420 .reset_level = HNAE3_GLOBAL_RESET },
421 { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
422 .reset_level = HNAE3_GLOBAL_RESET },
423 { /* sentinel */ }
424 };
425
426 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
427 { .int_msk = BIT(0), .msg = "over_8bd_no_fe",
428 .reset_level = HNAE3_FUNC_RESET },
429 { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err",
430 .reset_level = HNAE3_NONE_RESET },
431 { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err",
432 .reset_level = HNAE3_NONE_RESET },
433 { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison",
434 .reset_level = HNAE3_FUNC_RESET },
435 { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison",
436 .reset_level = HNAE3_FUNC_RESET },
437 { .int_msk = BIT(5), .msg = "buf_wait_timeout",
438 .reset_level = HNAE3_NONE_RESET },
439 { /* sentinel */ }
440 };
441
442 static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
443 { .int_msk = BIT(0), .msg = "buf_sum_err",
444 .reset_level = HNAE3_NONE_RESET },
445 { .int_msk = BIT(1), .msg = "ppp_mb_num_err",
446 .reset_level = HNAE3_NONE_RESET },
447 { .int_msk = BIT(2), .msg = "ppp_mbid_err",
448 .reset_level = HNAE3_GLOBAL_RESET },
449 { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err",
450 .reset_level = HNAE3_GLOBAL_RESET },
451 { .int_msk = BIT(4), .msg = "ppp_rlt_host_err",
452 .reset_level = HNAE3_GLOBAL_RESET },
453 { .int_msk = BIT(5), .msg = "cks_edit_position_err",
454 .reset_level = HNAE3_GLOBAL_RESET },
455 { .int_msk = BIT(6), .msg = "cks_edit_condition_err",
456 .reset_level = HNAE3_GLOBAL_RESET },
457 { .int_msk = BIT(7), .msg = "vlan_edit_condition_err",
458 .reset_level = HNAE3_GLOBAL_RESET },
459 { .int_msk = BIT(8), .msg = "vlan_num_ot_err",
460 .reset_level = HNAE3_GLOBAL_RESET },
461 { .int_msk = BIT(9), .msg = "vlan_num_in_err",
462 .reset_level = HNAE3_GLOBAL_RESET },
463 { /* sentinel */ }
464 };
465
466 #define HCLGE_SSU_MEM_ECC_ERR(x) \
467 { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \
468 .reset_level = HNAE3_GLOBAL_RESET }
469
470 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
471 HCLGE_SSU_MEM_ECC_ERR(0),
472 HCLGE_SSU_MEM_ECC_ERR(1),
473 HCLGE_SSU_MEM_ECC_ERR(2),
474 HCLGE_SSU_MEM_ECC_ERR(3),
475 HCLGE_SSU_MEM_ECC_ERR(4),
476 HCLGE_SSU_MEM_ECC_ERR(5),
477 HCLGE_SSU_MEM_ECC_ERR(6),
478 HCLGE_SSU_MEM_ECC_ERR(7),
479 HCLGE_SSU_MEM_ECC_ERR(8),
480 HCLGE_SSU_MEM_ECC_ERR(9),
481 HCLGE_SSU_MEM_ECC_ERR(10),
482 HCLGE_SSU_MEM_ECC_ERR(11),
483 HCLGE_SSU_MEM_ECC_ERR(12),
484 HCLGE_SSU_MEM_ECC_ERR(13),
485 HCLGE_SSU_MEM_ECC_ERR(14),
486 HCLGE_SSU_MEM_ECC_ERR(15),
487 HCLGE_SSU_MEM_ECC_ERR(16),
488 HCLGE_SSU_MEM_ECC_ERR(17),
489 HCLGE_SSU_MEM_ECC_ERR(18),
490 HCLGE_SSU_MEM_ECC_ERR(19),
491 HCLGE_SSU_MEM_ECC_ERR(20),
492 HCLGE_SSU_MEM_ECC_ERR(21),
493 HCLGE_SSU_MEM_ECC_ERR(22),
494 HCLGE_SSU_MEM_ECC_ERR(23),
495 HCLGE_SSU_MEM_ECC_ERR(24),
496 HCLGE_SSU_MEM_ECC_ERR(25),
497 HCLGE_SSU_MEM_ECC_ERR(26),
498 HCLGE_SSU_MEM_ECC_ERR(27),
499 HCLGE_SSU_MEM_ECC_ERR(28),
500 HCLGE_SSU_MEM_ECC_ERR(29),
501 HCLGE_SSU_MEM_ECC_ERR(30),
502 HCLGE_SSU_MEM_ECC_ERR(31),
503 { /* sentinel */ }
504 };
505
506 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
507 { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
508 .reset_level = HNAE3_FUNC_RESET },
509 { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
510 .reset_level = HNAE3_GLOBAL_RESET },
511 { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
512 .reset_level = HNAE3_GLOBAL_RESET },
513 { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port",
514 .reset_level = HNAE3_GLOBAL_RESET },
515 { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port",
516 .reset_level = HNAE3_GLOBAL_RESET },
517 { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port",
518 .reset_level = HNAE3_GLOBAL_RESET },
519 { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port",
520 .reset_level = HNAE3_GLOBAL_RESET },
521 { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port",
522 .reset_level = HNAE3_GLOBAL_RESET },
523 { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port",
524 .reset_level = HNAE3_GLOBAL_RESET },
525 { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port",
526 .reset_level = HNAE3_GLOBAL_RESET },
527 { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port",
528 .reset_level = HNAE3_GLOBAL_RESET },
529 { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port",
530 .reset_level = HNAE3_GLOBAL_RESET },
531 { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port",
532 .reset_level = HNAE3_GLOBAL_RESET },
533 { /* sentinel */ }
534 };
535
536 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
537 { .int_msk = BIT(0), .msg = "ig_mac_inf_int",
538 .reset_level = HNAE3_GLOBAL_RESET },
539 { .int_msk = BIT(1), .msg = "ig_host_inf_int",
540 .reset_level = HNAE3_GLOBAL_RESET },
541 { .int_msk = BIT(2), .msg = "ig_roc_buf_int",
542 .reset_level = HNAE3_GLOBAL_RESET },
543 { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int",
544 .reset_level = HNAE3_GLOBAL_RESET },
545 { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int",
546 .reset_level = HNAE3_GLOBAL_RESET },
547 { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int",
548 .reset_level = HNAE3_GLOBAL_RESET },
549 { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int",
550 .reset_level = HNAE3_GLOBAL_RESET },
551 { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int",
552 .reset_level = HNAE3_GLOBAL_RESET },
553 { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int",
554 .reset_level = HNAE3_GLOBAL_RESET },
555 { .int_msk = BIT(9), .msg = "qm_eof_fifo_int",
556 .reset_level = HNAE3_GLOBAL_RESET },
557 { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int",
558 .reset_level = HNAE3_GLOBAL_RESET },
559 { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int",
560 .reset_level = HNAE3_GLOBAL_RESET },
561 { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int",
562 .reset_level = HNAE3_GLOBAL_RESET },
563 { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int",
564 .reset_level = HNAE3_GLOBAL_RESET },
565 { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int",
566 .reset_level = HNAE3_GLOBAL_RESET },
567 { .int_msk = BIT(15), .msg = "host_cmd_fifo_int",
568 .reset_level = HNAE3_GLOBAL_RESET },
569 { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int",
570 .reset_level = HNAE3_GLOBAL_RESET },
571 { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int",
572 .reset_level = HNAE3_GLOBAL_RESET },
573 { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int",
574 .reset_level = HNAE3_GLOBAL_RESET },
575 { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int",
576 .reset_level = HNAE3_GLOBAL_RESET },
577 { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int",
578 .reset_level = HNAE3_GLOBAL_RESET },
579 { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int",
580 .reset_level = HNAE3_GLOBAL_RESET },
581 { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int",
582 .reset_level = HNAE3_GLOBAL_RESET },
583 { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int",
584 .reset_level = HNAE3_GLOBAL_RESET },
585 { /* sentinel */ }
586 };
587
588 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
589 { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg",
590 .reset_level = HNAE3_GLOBAL_RESET },
591 { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg",
592 .reset_level = HNAE3_GLOBAL_RESET },
593 { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg",
594 .reset_level = HNAE3_GLOBAL_RESET },
595 { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg",
596 .reset_level = HNAE3_GLOBAL_RESET },
597 { /* sentinel */ }
598 };
599
600 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
601 { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
602 .reset_level = HNAE3_FUNC_RESET },
603 { .int_msk = BIT(9), .msg = "low_water_line_err_port",
604 .reset_level = HNAE3_NONE_RESET },
605 { .int_msk = BIT(10), .msg = "hi_water_line_err_port",
606 .reset_level = HNAE3_GLOBAL_RESET },
607 { /* sentinel */ }
608 };
609
610 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
611 { .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" },
612 { .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" },
613 { .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" },
614 { .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" },
615 { .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" },
616 { .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" },
617 { .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" },
618 { .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" },
619 { .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" },
620 { .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" },
621 { .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" },
622 { .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" },
623 { .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" },
624 { .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" },
625 { .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" },
626 { .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" },
627 { .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" },
628 { .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" },
629 { .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" },
630 { .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" },
631 { /* sentinel */ }
632 };
633
hclge_log_error(struct device * dev,char * reg,const struct hclge_hw_error * err,u32 err_sts,unsigned long * reset_requests)634 static void hclge_log_error(struct device *dev, char *reg,
635 const struct hclge_hw_error *err,
636 u32 err_sts, unsigned long *reset_requests)
637 {
638 while (err->msg) {
639 if (err->int_msk & err_sts) {
640 dev_err(dev, "%s %s found [error status=0x%x]\n",
641 reg, err->msg, err_sts);
642 if (err->reset_level &&
643 err->reset_level != HNAE3_NONE_RESET)
644 set_bit(err->reset_level, reset_requests);
645 }
646 err++;
647 }
648 }
649
650 /* hclge_cmd_query_error: read the error information
651 * @hdev: pointer to struct hclge_dev
652 * @desc: descriptor for describing the command
653 * @cmd: command opcode
654 * @flag: flag for extended command structure
655 *
656 * This function query the error info from hw register/s using command
657 */
hclge_cmd_query_error(struct hclge_dev * hdev,struct hclge_desc * desc,u32 cmd,u16 flag)658 static int hclge_cmd_query_error(struct hclge_dev *hdev,
659 struct hclge_desc *desc, u32 cmd, u16 flag)
660 {
661 struct device *dev = &hdev->pdev->dev;
662 int desc_num = 1;
663 int ret;
664
665 hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
666 if (flag) {
667 desc[0].flag |= cpu_to_le16(flag);
668 hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
669 desc_num = 2;
670 }
671
672 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
673 if (ret)
674 dev_err(dev, "query error cmd failed (%d)\n", ret);
675
676 return ret;
677 }
678
hclge_clear_mac_tnl_int(struct hclge_dev * hdev)679 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev)
680 {
681 struct hclge_desc desc;
682
683 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false);
684 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR);
685
686 return hclge_cmd_send(&hdev->hw, &desc, 1);
687 }
688
hclge_config_common_hw_err_int(struct hclge_dev * hdev,bool en)689 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
690 {
691 struct device *dev = &hdev->pdev->dev;
692 struct hclge_desc desc[2];
693 int ret;
694
695 /* configure common error interrupts */
696 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
697 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
698 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
699
700 if (en) {
701 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
702 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
703 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
704 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
705 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
706 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
707 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
708 }
709
710 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
711 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
712 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
713 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
714 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
715 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
716 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
717
718 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
719 if (ret)
720 dev_err(dev,
721 "fail(%d) to configure common err interrupts\n", ret);
722
723 return ret;
724 }
725
hclge_config_ncsi_hw_err_int(struct hclge_dev * hdev,bool en)726 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en)
727 {
728 struct device *dev = &hdev->pdev->dev;
729 struct hclge_desc desc;
730 int ret;
731
732 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
733 return 0;
734
735 /* configure NCSI error interrupts */
736 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false);
737 if (en)
738 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN);
739
740 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
741 if (ret)
742 dev_err(dev,
743 "fail(%d) to configure NCSI error interrupts\n", ret);
744
745 return ret;
746 }
747
hclge_config_igu_egu_hw_err_int(struct hclge_dev * hdev,bool en)748 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en)
749 {
750 struct device *dev = &hdev->pdev->dev;
751 struct hclge_desc desc;
752 int ret;
753
754 /* configure IGU,EGU error interrupts */
755 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
756 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE);
757 if (en)
758 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
759
760 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
761
762 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
763 if (ret) {
764 dev_err(dev,
765 "fail(%d) to configure IGU common interrupts\n", ret);
766 return ret;
767 }
768
769 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false);
770 if (en)
771 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN);
772
773 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK);
774
775 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
776 if (ret) {
777 dev_err(dev,
778 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret);
779 return ret;
780 }
781
782 ret = hclge_config_ncsi_hw_err_int(hdev, en);
783
784 return ret;
785 }
786
hclge_config_ppp_error_interrupt(struct hclge_dev * hdev,u32 cmd,bool en)787 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
788 bool en)
789 {
790 struct device *dev = &hdev->pdev->dev;
791 struct hclge_desc desc[2];
792 int ret;
793
794 /* configure PPP error interrupts */
795 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
796 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
797 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
798
799 if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
800 if (en) {
801 desc[0].data[0] =
802 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN);
803 desc[0].data[1] =
804 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN);
805 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN);
806 }
807
808 desc[1].data[0] =
809 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK);
810 desc[1].data[1] =
811 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK);
812 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
813 desc[1].data[2] =
814 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK);
815 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
816 if (en) {
817 desc[0].data[0] =
818 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN);
819 desc[0].data[1] =
820 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN);
821 }
822
823 desc[1].data[0] =
824 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK);
825 desc[1].data[1] =
826 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK);
827 }
828
829 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
830 if (ret)
831 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret);
832
833 return ret;
834 }
835
hclge_config_ppp_hw_err_int(struct hclge_dev * hdev,bool en)836 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en)
837 {
838 int ret;
839
840 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD,
841 en);
842 if (ret)
843 return ret;
844
845 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD,
846 en);
847
848 return ret;
849 }
850
hclge_config_tm_hw_err_int(struct hclge_dev * hdev,bool en)851 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
852 {
853 struct device *dev = &hdev->pdev->dev;
854 struct hclge_desc desc;
855 int ret;
856
857 /* configure TM SCH hw errors */
858 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false);
859 if (en)
860 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN);
861
862 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
863 if (ret) {
864 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret);
865 return ret;
866 }
867
868 /* configure TM QCN hw errors */
869 ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0);
870 if (ret) {
871 dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret);
872 return ret;
873 }
874
875 hclge_cmd_reuse_desc(&desc, false);
876 if (en)
877 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN);
878
879 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
880 if (ret)
881 dev_err(dev,
882 "fail(%d) to configure TM QCN mem errors\n", ret);
883
884 return ret;
885 }
886
hclge_config_mac_err_int(struct hclge_dev * hdev,bool en)887 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en)
888 {
889 struct device *dev = &hdev->pdev->dev;
890 struct hclge_desc desc;
891 int ret;
892
893 /* configure MAC common error interrupts */
894 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false);
895 if (en)
896 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN);
897
898 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK);
899
900 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
901 if (ret)
902 dev_err(dev,
903 "fail(%d) to configure MAC COMMON error intr\n", ret);
904
905 return ret;
906 }
907
hclge_config_mac_tnl_int(struct hclge_dev * hdev,bool en)908 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en)
909 {
910 struct hclge_desc desc;
911
912 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false);
913 if (en)
914 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN);
915 else
916 desc.data[0] = 0;
917
918 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK);
919
920 return hclge_cmd_send(&hdev->hw, &desc, 1);
921 }
922
hclge_config_ppu_error_interrupts(struct hclge_dev * hdev,u32 cmd,bool en)923 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
924 bool en)
925 {
926 struct device *dev = &hdev->pdev->dev;
927 struct hclge_desc desc[2];
928 int desc_num = 1;
929 int ret;
930
931 /* configure PPU error interrupts */
932 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) {
933 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
934 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
935 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
936 if (en) {
937 desc[0].data[0] =
938 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN);
939 desc[0].data[1] =
940 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN);
941 desc[1].data[3] =
942 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN);
943 desc[1].data[4] =
944 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN);
945 }
946
947 desc[1].data[0] =
948 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK);
949 desc[1].data[1] =
950 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK);
951 desc[1].data[2] =
952 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK);
953 desc[1].data[3] |=
954 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK);
955 desc_num = 2;
956 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) {
957 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
958 if (en)
959 desc[0].data[0] =
960 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2);
961
962 desc[0].data[2] =
963 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK);
964 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) {
965 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
966 if (en)
967 desc[0].data[0] =
968 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN);
969
970 desc[0].data[2] =
971 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK);
972 } else {
973 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n");
974 return -EINVAL;
975 }
976
977 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
978
979 return ret;
980 }
981
hclge_config_ppu_hw_err_int(struct hclge_dev * hdev,bool en)982 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en)
983 {
984 struct device *dev = &hdev->pdev->dev;
985 int ret;
986
987 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD,
988 en);
989 if (ret) {
990 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n",
991 ret);
992 return ret;
993 }
994
995 ret = hclge_config_ppu_error_interrupts(hdev,
996 HCLGE_PPU_MPF_OTHER_INT_CMD,
997 en);
998 if (ret) {
999 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret);
1000 return ret;
1001 }
1002
1003 ret = hclge_config_ppu_error_interrupts(hdev,
1004 HCLGE_PPU_PF_OTHER_INT_CMD, en);
1005 if (ret)
1006 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n",
1007 ret);
1008 return ret;
1009 }
1010
hclge_config_ssu_hw_err_int(struct hclge_dev * hdev,bool en)1011 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
1012 {
1013 struct device *dev = &hdev->pdev->dev;
1014 struct hclge_desc desc[2];
1015 int ret;
1016
1017 /* configure SSU ecc error interrupts */
1018 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false);
1019 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
1020 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false);
1021 if (en) {
1022 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN);
1023 desc[0].data[1] =
1024 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN);
1025 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN);
1026 }
1027
1028 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK);
1029 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK);
1030 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK);
1031
1032 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1033 if (ret) {
1034 dev_err(dev,
1035 "fail(%d) to configure SSU ECC error interrupt\n", ret);
1036 return ret;
1037 }
1038
1039 /* configure SSU common error interrupts */
1040 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false);
1041 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
1042 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false);
1043
1044 if (en) {
1045 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
1046 desc[0].data[0] =
1047 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN);
1048 else
1049 desc[0].data[0] =
1050 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5));
1051 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN);
1052 desc[0].data[2] =
1053 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN);
1054 }
1055
1056 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK |
1057 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK);
1058 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK);
1059
1060 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1061 if (ret)
1062 dev_err(dev,
1063 "fail(%d) to configure SSU COMMON error intr\n", ret);
1064
1065 return ret;
1066 }
1067
1068 /* hclge_query_bd_num: query number of buffer descriptors
1069 * @hdev: pointer to struct hclge_dev
1070 * @is_ras: true for ras, false for msix
1071 * @mpf_bd_num: number of main PF interrupt buffer descriptors
1072 * @pf_bd_num: number of not main PF interrupt buffer descriptors
1073 *
1074 * This function querys number of mpf and pf buffer descriptors.
1075 */
hclge_query_bd_num(struct hclge_dev * hdev,bool is_ras,int * mpf_bd_num,int * pf_bd_num)1076 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
1077 int *mpf_bd_num, int *pf_bd_num)
1078 {
1079 struct device *dev = &hdev->pdev->dev;
1080 u32 mpf_min_bd_num, pf_min_bd_num;
1081 enum hclge_opcode_type opcode;
1082 struct hclge_desc desc_bd;
1083 int ret;
1084
1085 if (is_ras) {
1086 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM;
1087 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM;
1088 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM;
1089 } else {
1090 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM;
1091 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM;
1092 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM;
1093 }
1094
1095 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true);
1096 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
1097 if (ret) {
1098 dev_err(dev, "fail(%d) to query msix int status bd num\n",
1099 ret);
1100 return ret;
1101 }
1102
1103 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
1104 *pf_bd_num = le32_to_cpu(desc_bd.data[1]);
1105 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) {
1106 dev_err(dev, "Invalid bd num: mpf(%d), pf(%d)\n",
1107 *mpf_bd_num, *pf_bd_num);
1108 return -EINVAL;
1109 }
1110
1111 return 0;
1112 }
1113
1114 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors
1115 * @hdev: pointer to struct hclge_dev
1116 * @desc: descriptor for describing the command
1117 * @num: number of extended command structures
1118 *
1119 * This function handles all the main PF RAS errors in the
1120 * hw register/s using command.
1121 */
hclge_handle_mpf_ras_error(struct hclge_dev * hdev,struct hclge_desc * desc,int num)1122 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
1123 struct hclge_desc *desc,
1124 int num)
1125 {
1126 struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
1127 struct device *dev = &hdev->pdev->dev;
1128 __le32 *desc_data;
1129 u32 status;
1130 int ret;
1131
1132 /* query all main PF RAS errors */
1133 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
1134 true);
1135 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1136 if (ret) {
1137 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
1138 return ret;
1139 }
1140
1141 /* log HNS common errors */
1142 status = le32_to_cpu(desc[0].data[0]);
1143 if (status)
1144 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
1145 &hclge_imp_tcm_ecc_int[0], status,
1146 &ae_dev->hw_err_reset_req);
1147
1148 status = le32_to_cpu(desc[0].data[1]);
1149 if (status)
1150 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
1151 &hclge_cmdq_nic_mem_ecc_int[0], status,
1152 &ae_dev->hw_err_reset_req);
1153
1154 if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
1155 dev_warn(dev, "imp_rd_data_poison_err found\n");
1156
1157 status = le32_to_cpu(desc[0].data[3]);
1158 if (status)
1159 hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
1160 &hclge_tqp_int_ecc_int[0], status,
1161 &ae_dev->hw_err_reset_req);
1162
1163 status = le32_to_cpu(desc[0].data[4]);
1164 if (status)
1165 hclge_log_error(dev, "MSIX_ECC_INT_STS",
1166 &hclge_msix_sram_ecc_int[0], status,
1167 &ae_dev->hw_err_reset_req);
1168
1169 /* log SSU(Storage Switch Unit) errors */
1170 desc_data = (__le32 *)&desc[2];
1171 status = le32_to_cpu(*(desc_data + 2));
1172 if (status)
1173 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
1174 &hclge_ssu_mem_ecc_err_int[0], status,
1175 &ae_dev->hw_err_reset_req);
1176
1177 status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
1178 if (status) {
1179 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
1180 status);
1181 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
1182 }
1183
1184 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
1185 if (status)
1186 hclge_log_error(dev, "SSU_COMMON_ERR_INT",
1187 &hclge_ssu_com_err_int[0], status,
1188 &ae_dev->hw_err_reset_req);
1189
1190 /* log IGU(Ingress Unit) errors */
1191 desc_data = (__le32 *)&desc[3];
1192 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
1193 if (status)
1194 hclge_log_error(dev, "IGU_INT_STS",
1195 &hclge_igu_int[0], status,
1196 &ae_dev->hw_err_reset_req);
1197
1198 /* log PPP(Programmable Packet Process) errors */
1199 desc_data = (__le32 *)&desc[4];
1200 status = le32_to_cpu(*(desc_data + 1));
1201 if (status)
1202 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
1203 &hclge_ppp_mpf_abnormal_int_st1[0], status,
1204 &ae_dev->hw_err_reset_req);
1205
1206 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
1207 if (status)
1208 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
1209 &hclge_ppp_mpf_abnormal_int_st3[0], status,
1210 &ae_dev->hw_err_reset_req);
1211
1212 /* log PPU(RCB) errors */
1213 desc_data = (__le32 *)&desc[5];
1214 status = le32_to_cpu(*(desc_data + 1));
1215 if (status) {
1216 dev_err(dev,
1217 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n");
1218 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
1219 }
1220
1221 status = le32_to_cpu(*(desc_data + 2));
1222 if (status)
1223 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
1224 &hclge_ppu_mpf_abnormal_int_st2[0], status,
1225 &ae_dev->hw_err_reset_req);
1226
1227 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
1228 if (status)
1229 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
1230 &hclge_ppu_mpf_abnormal_int_st3[0], status,
1231 &ae_dev->hw_err_reset_req);
1232
1233 /* log TM(Traffic Manager) errors */
1234 desc_data = (__le32 *)&desc[6];
1235 status = le32_to_cpu(*desc_data);
1236 if (status)
1237 hclge_log_error(dev, "TM_SCH_RINT",
1238 &hclge_tm_sch_rint[0], status,
1239 &ae_dev->hw_err_reset_req);
1240
1241 /* log QCN(Quantized Congestion Control) errors */
1242 desc_data = (__le32 *)&desc[7];
1243 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
1244 if (status)
1245 hclge_log_error(dev, "QCN_FIFO_RINT",
1246 &hclge_qcn_fifo_rint[0], status,
1247 &ae_dev->hw_err_reset_req);
1248
1249 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
1250 if (status)
1251 hclge_log_error(dev, "QCN_ECC_RINT",
1252 &hclge_qcn_ecc_rint[0], status,
1253 &ae_dev->hw_err_reset_req);
1254
1255 /* log NCSI errors */
1256 desc_data = (__le32 *)&desc[9];
1257 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
1258 if (status)
1259 hclge_log_error(dev, "NCSI_ECC_INT_RPT",
1260 &hclge_ncsi_err_int[0], status,
1261 &ae_dev->hw_err_reset_req);
1262
1263 /* clear all main PF RAS errors */
1264 hclge_cmd_reuse_desc(&desc[0], false);
1265 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1266 if (ret)
1267 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
1268
1269 return ret;
1270 }
1271
1272 /* hclge_handle_pf_ras_error: handle all PF RAS errors
1273 * @hdev: pointer to struct hclge_dev
1274 * @desc: descriptor for describing the command
1275 * @num: number of extended command structures
1276 *
1277 * This function handles all the PF RAS errors in the
1278 * hw register/s using command.
1279 */
hclge_handle_pf_ras_error(struct hclge_dev * hdev,struct hclge_desc * desc,int num)1280 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
1281 struct hclge_desc *desc,
1282 int num)
1283 {
1284 struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
1285 struct device *dev = &hdev->pdev->dev;
1286 __le32 *desc_data;
1287 u32 status;
1288 int ret;
1289
1290 /* query all PF RAS errors */
1291 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
1292 true);
1293 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1294 if (ret) {
1295 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
1296 return ret;
1297 }
1298
1299 /* log SSU(Storage Switch Unit) errors */
1300 status = le32_to_cpu(desc[0].data[0]);
1301 if (status)
1302 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
1303 &hclge_ssu_port_based_err_int[0], status,
1304 &ae_dev->hw_err_reset_req);
1305
1306 status = le32_to_cpu(desc[0].data[1]);
1307 if (status)
1308 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
1309 &hclge_ssu_fifo_overflow_int[0], status,
1310 &ae_dev->hw_err_reset_req);
1311
1312 status = le32_to_cpu(desc[0].data[2]);
1313 if (status)
1314 hclge_log_error(dev, "SSU_ETS_TCG_INT",
1315 &hclge_ssu_ets_tcg_int[0], status,
1316 &ae_dev->hw_err_reset_req);
1317
1318 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
1319 desc_data = (__le32 *)&desc[1];
1320 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
1321 if (status)
1322 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
1323 &hclge_igu_egu_tnl_int[0], status,
1324 &ae_dev->hw_err_reset_req);
1325
1326 /* log PPU(RCB) errors */
1327 desc_data = (__le32 *)&desc[3];
1328 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
1329 if (status) {
1330 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
1331 &hclge_ppu_pf_abnormal_int[0], status,
1332 &ae_dev->hw_err_reset_req);
1333 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR);
1334 }
1335
1336 /* clear all PF RAS errors */
1337 hclge_cmd_reuse_desc(&desc[0], false);
1338 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1339 if (ret)
1340 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
1341
1342 return ret;
1343 }
1344
hclge_handle_all_ras_errors(struct hclge_dev * hdev)1345 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
1346 {
1347 u32 mpf_bd_num, pf_bd_num, bd_num;
1348 struct hclge_desc *desc;
1349 int ret;
1350
1351 /* query the number of registers in the RAS int status */
1352 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num);
1353 if (ret)
1354 return ret;
1355
1356 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
1357 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
1358 if (!desc)
1359 return -ENOMEM;
1360
1361 /* handle all main PF RAS errors */
1362 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
1363 if (ret) {
1364 kfree(desc);
1365 return ret;
1366 }
1367 memset(desc, 0, bd_num * sizeof(struct hclge_desc));
1368
1369 /* handle all PF RAS errors */
1370 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
1371 kfree(desc);
1372
1373 return ret;
1374 }
1375
hclge_log_rocee_axi_error(struct hclge_dev * hdev)1376 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
1377 {
1378 struct device *dev = &hdev->pdev->dev;
1379 struct hclge_desc desc[3];
1380 int ret;
1381
1382 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
1383 true);
1384 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
1385 true);
1386 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
1387 true);
1388 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
1389 desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
1390
1391 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
1392 if (ret) {
1393 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
1394 return ret;
1395 }
1396
1397 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
1398 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
1399 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
1400 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
1401 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
1402 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
1403 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
1404 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
1405 dev_err(dev, "AXI3: %08X %08X %08X %08X\n",
1406 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
1407 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
1408
1409 return 0;
1410 }
1411
hclge_log_rocee_ecc_error(struct hclge_dev * hdev)1412 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
1413 {
1414 struct device *dev = &hdev->pdev->dev;
1415 struct hclge_desc desc[2];
1416 int ret;
1417
1418 ret = hclge_cmd_query_error(hdev, &desc[0],
1419 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
1420 HCLGE_CMD_FLAG_NEXT);
1421 if (ret) {
1422 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
1423 return ret;
1424 }
1425
1426 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
1427 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
1428 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
1429 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
1430 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
1431 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
1432
1433 return 0;
1434 }
1435
hclge_log_rocee_ovf_error(struct hclge_dev * hdev)1436 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
1437 {
1438 struct device *dev = &hdev->pdev->dev;
1439 struct hclge_desc desc[2];
1440 int ret;
1441
1442 /* read overflow error status */
1443 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD,
1444 0);
1445 if (ret) {
1446 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret);
1447 return ret;
1448 }
1449
1450 /* log overflow error */
1451 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
1452 const struct hclge_hw_error *err;
1453 u32 err_sts;
1454
1455 err = &hclge_rocee_qmm_ovf_err_int[0];
1456 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK &
1457 le32_to_cpu(desc[0].data[0]);
1458 while (err->msg) {
1459 if (err->int_msk == err_sts) {
1460 dev_err(dev, "%s [error status=0x%x] found\n",
1461 err->msg,
1462 le32_to_cpu(desc[0].data[0]));
1463 break;
1464 }
1465 err++;
1466 }
1467 }
1468
1469 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
1470 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n",
1471 le32_to_cpu(desc[0].data[1]));
1472 }
1473
1474 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
1475 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n",
1476 le32_to_cpu(desc[0].data[2]));
1477 }
1478
1479 return 0;
1480 }
1481
1482 static enum hnae3_reset_type
hclge_log_and_clear_rocee_ras_error(struct hclge_dev * hdev)1483 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
1484 {
1485 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET;
1486 struct device *dev = &hdev->pdev->dev;
1487 struct hclge_desc desc[2];
1488 unsigned int status;
1489 int ret;
1490
1491 /* read RAS error interrupt status */
1492 ret = hclge_cmd_query_error(hdev, &desc[0],
1493 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0);
1494 if (ret) {
1495 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret);
1496 /* reset everything for now */
1497 return HNAE3_GLOBAL_RESET;
1498 }
1499
1500 status = le32_to_cpu(desc[0].data[0]);
1501
1502 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
1503 if (status & HCLGE_ROCEE_RERR_INT_MASK)
1504 dev_err(dev, "ROCEE RAS AXI rresp error\n");
1505
1506 if (status & HCLGE_ROCEE_BERR_INT_MASK)
1507 dev_err(dev, "ROCEE RAS AXI bresp error\n");
1508
1509 reset_type = HNAE3_FUNC_RESET;
1510
1511 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR);
1512
1513 ret = hclge_log_rocee_axi_error(hdev);
1514 if (ret)
1515 return HNAE3_GLOBAL_RESET;
1516 }
1517
1518 if (status & HCLGE_ROCEE_ECC_INT_MASK) {
1519 dev_err(dev, "ROCEE RAS 2bit ECC error\n");
1520 reset_type = HNAE3_GLOBAL_RESET;
1521
1522 ret = hclge_log_rocee_ecc_error(hdev);
1523 if (ret)
1524 return HNAE3_GLOBAL_RESET;
1525 }
1526
1527 if (status & HCLGE_ROCEE_OVF_INT_MASK) {
1528 ret = hclge_log_rocee_ovf_error(hdev);
1529 if (ret) {
1530 dev_err(dev, "failed(%d) to process ovf error\n", ret);
1531 /* reset everything for now */
1532 return HNAE3_GLOBAL_RESET;
1533 }
1534 }
1535
1536 /* clear error status */
1537 hclge_cmd_reuse_desc(&desc[0], false);
1538 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
1539 if (ret) {
1540 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret);
1541 /* reset everything for now */
1542 return HNAE3_GLOBAL_RESET;
1543 }
1544
1545 return reset_type;
1546 }
1547
hclge_config_rocee_ras_interrupt(struct hclge_dev * hdev,bool en)1548 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
1549 {
1550 struct device *dev = &hdev->pdev->dev;
1551 struct hclge_desc desc;
1552 int ret;
1553
1554 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 ||
1555 !hnae3_dev_roce_supported(hdev))
1556 return 0;
1557
1558 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false);
1559 if (en) {
1560 /* enable ROCEE hw error interrupts */
1561 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN);
1562 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN);
1563
1564 hclge_log_and_clear_rocee_ras_error(hdev);
1565 }
1566 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK);
1567 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK);
1568
1569 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1570 if (ret)
1571 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret);
1572
1573 return ret;
1574 }
1575
hclge_handle_rocee_ras_error(struct hnae3_ae_dev * ae_dev)1576 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
1577 {
1578 struct hclge_dev *hdev = ae_dev->priv;
1579 enum hnae3_reset_type reset_type;
1580
1581 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
1582 return;
1583
1584 reset_type = hclge_log_and_clear_rocee_ras_error(hdev);
1585 if (reset_type != HNAE3_NONE_RESET)
1586 set_bit(reset_type, &ae_dev->hw_err_reset_req);
1587 }
1588
1589 static const struct hclge_hw_blk hw_blk[] = {
1590 {
1591 .msk = BIT(0), .name = "IGU_EGU",
1592 .config_err_int = hclge_config_igu_egu_hw_err_int,
1593 },
1594 {
1595 .msk = BIT(1), .name = "PPP",
1596 .config_err_int = hclge_config_ppp_hw_err_int,
1597 },
1598 {
1599 .msk = BIT(2), .name = "SSU",
1600 .config_err_int = hclge_config_ssu_hw_err_int,
1601 },
1602 {
1603 .msk = BIT(3), .name = "PPU",
1604 .config_err_int = hclge_config_ppu_hw_err_int,
1605 },
1606 {
1607 .msk = BIT(4), .name = "TM",
1608 .config_err_int = hclge_config_tm_hw_err_int,
1609 },
1610 {
1611 .msk = BIT(5), .name = "COMMON",
1612 .config_err_int = hclge_config_common_hw_err_int,
1613 },
1614 {
1615 .msk = BIT(8), .name = "MAC",
1616 .config_err_int = hclge_config_mac_err_int,
1617 },
1618 { /* sentinel */ }
1619 };
1620
hclge_config_nic_hw_error(struct hclge_dev * hdev,bool state)1621 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
1622 {
1623 const struct hclge_hw_blk *module = hw_blk;
1624 int ret = 0;
1625
1626 while (module->name) {
1627 if (module->config_err_int) {
1628 ret = module->config_err_int(hdev, state);
1629 if (ret)
1630 return ret;
1631 }
1632 module++;
1633 }
1634
1635 return ret;
1636 }
1637
hclge_handle_hw_ras_error(struct hnae3_ae_dev * ae_dev)1638 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
1639 {
1640 struct hclge_dev *hdev = ae_dev->priv;
1641 struct device *dev = &hdev->pdev->dev;
1642 u32 status;
1643
1644 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
1645 dev_err(dev,
1646 "Can't recover - RAS error reported during dev init\n");
1647 return PCI_ERS_RESULT_NONE;
1648 }
1649
1650 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
1651
1652 if (status & HCLGE_RAS_REG_NFE_MASK ||
1653 status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
1654 ae_dev->hw_err_reset_req = 0;
1655 else
1656 goto out;
1657
1658 /* Handling Non-fatal HNS RAS errors */
1659 if (status & HCLGE_RAS_REG_NFE_MASK) {
1660 dev_err(dev,
1661 "HNS Non-Fatal RAS error(status=0x%x) identified\n",
1662 status);
1663 hclge_handle_all_ras_errors(hdev);
1664 }
1665
1666 /* Handling Non-fatal Rocee RAS errors */
1667 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 &&
1668 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
1669 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n");
1670 hclge_handle_rocee_ras_error(ae_dev);
1671 }
1672
1673 if (ae_dev->hw_err_reset_req)
1674 return PCI_ERS_RESULT_NEED_RESET;
1675
1676 out:
1677 return PCI_ERS_RESULT_RECOVERED;
1678 }
1679
hclge_clear_hw_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,bool is_mpf,u32 bd_num)1680 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev,
1681 struct hclge_desc *desc, bool is_mpf,
1682 u32 bd_num)
1683 {
1684 if (is_mpf)
1685 desc[0].opcode =
1686 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT);
1687 else
1688 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT);
1689
1690 desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
1691
1692 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num);
1693 }
1694
1695 /* hclge_query_8bd_info: query information about over_8bd_nfe_err
1696 * @hdev: pointer to struct hclge_dev
1697 * @vf_id: Index of the virtual function with error
1698 * @q_id: Physical index of the queue with error
1699 *
1700 * This function get specific index of queue and function which causes
1701 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is
1702 * caused by PF instead of VF.
1703 */
hclge_query_over_8bd_err_info(struct hclge_dev * hdev,u16 * vf_id,u16 * q_id)1704 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id,
1705 u16 *q_id)
1706 {
1707 struct hclge_query_ppu_pf_other_int_dfx_cmd *req;
1708 struct hclge_desc desc;
1709 int ret;
1710
1711 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true);
1712 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1713 if (ret)
1714 return ret;
1715
1716 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data;
1717 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id);
1718 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid);
1719
1720 return 0;
1721 }
1722
1723 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err
1724 * @hdev: pointer to struct hclge_dev
1725 * @reset_requests: reset level that we need to trigger later
1726 *
1727 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in
1728 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed.
1729 */
hclge_handle_over_8bd_err(struct hclge_dev * hdev,unsigned long * reset_requests)1730 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
1731 unsigned long *reset_requests)
1732 {
1733 struct device *dev = &hdev->pdev->dev;
1734 u16 vf_id;
1735 u16 q_id;
1736 int ret;
1737
1738 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id);
1739 if (ret) {
1740 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n",
1741 ret);
1742 return;
1743 }
1744
1745 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n",
1746 vf_id, q_id);
1747
1748 if (vf_id) {
1749 if (vf_id >= hdev->num_alloc_vport) {
1750 dev_err(dev, "invalid vf id(%u)\n", vf_id);
1751 return;
1752 }
1753
1754 /* If we need to trigger other reset whose level is higher
1755 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset
1756 * here.
1757 */
1758 if (*reset_requests != 0)
1759 return;
1760
1761 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]);
1762 if (ret)
1763 dev_err(dev, "inform reset to vf(%u) failed %d!\n",
1764 hdev->vport->vport_id, ret);
1765 } else {
1766 set_bit(HNAE3_FUNC_RESET, reset_requests);
1767 }
1768 }
1769
1770 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors
1771 * @hdev: pointer to struct hclge_dev
1772 * @desc: descriptor for describing the command
1773 * @mpf_bd_num: number of extended command structures
1774 * @reset_requests: record of the reset level that we need
1775 *
1776 * This function handles all the main PF MSI-X errors in the hw register/s
1777 * using command.
1778 */
hclge_handle_mpf_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,int mpf_bd_num,unsigned long * reset_requests)1779 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev,
1780 struct hclge_desc *desc,
1781 int mpf_bd_num,
1782 unsigned long *reset_requests)
1783 {
1784 struct device *dev = &hdev->pdev->dev;
1785 __le32 *desc_data;
1786 u32 status;
1787 int ret;
1788 /* query all main PF MSIx errors */
1789 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
1790 true);
1791 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
1792 if (ret) {
1793 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret);
1794 return ret;
1795 }
1796
1797 /* log MAC errors */
1798 desc_data = (__le32 *)&desc[1];
1799 status = le32_to_cpu(*desc_data);
1800 if (status)
1801 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
1802 &hclge_mac_afifo_tnl_int[0], status,
1803 reset_requests);
1804
1805 /* log PPU(RCB) MPF errors */
1806 desc_data = (__le32 *)&desc[5];
1807 status = le32_to_cpu(*(desc_data + 2)) &
1808 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
1809 if (status)
1810 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]",
1811 status);
1812
1813 /* clear all main PF MSIx errors */
1814 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
1815 if (ret)
1816 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret);
1817
1818 return ret;
1819 }
1820
1821 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors
1822 * @hdev: pointer to struct hclge_dev
1823 * @desc: descriptor for describing the command
1824 * @mpf_bd_num: number of extended command structures
1825 * @reset_requests: record of the reset level that we need
1826 *
1827 * This function handles all the PF MSI-X errors in the hw register/s using
1828 * command.
1829 */
hclge_handle_pf_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,int pf_bd_num,unsigned long * reset_requests)1830 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
1831 struct hclge_desc *desc,
1832 int pf_bd_num,
1833 unsigned long *reset_requests)
1834 {
1835 struct device *dev = &hdev->pdev->dev;
1836 __le32 *desc_data;
1837 u32 status;
1838 int ret;
1839
1840 /* query all PF MSIx errors */
1841 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
1842 true);
1843 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
1844 if (ret) {
1845 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret);
1846 return ret;
1847 }
1848
1849 /* log SSU PF errors */
1850 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
1851 if (status)
1852 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
1853 &hclge_ssu_port_based_pf_int[0],
1854 status, reset_requests);
1855
1856 /* read and log PPP PF errors */
1857 desc_data = (__le32 *)&desc[2];
1858 status = le32_to_cpu(*desc_data);
1859 if (status)
1860 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
1861 &hclge_ppp_pf_abnormal_int[0],
1862 status, reset_requests);
1863
1864 /* log PPU(RCB) PF errors */
1865 desc_data = (__le32 *)&desc[3];
1866 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
1867 if (status)
1868 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
1869 &hclge_ppu_pf_abnormal_int[0],
1870 status, reset_requests);
1871
1872 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
1873 if (status)
1874 hclge_handle_over_8bd_err(hdev, reset_requests);
1875
1876 /* clear all PF MSIx errors */
1877 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
1878 if (ret)
1879 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret);
1880
1881 return ret;
1882 }
1883
hclge_handle_all_hw_msix_error(struct hclge_dev * hdev,unsigned long * reset_requests)1884 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
1885 unsigned long *reset_requests)
1886 {
1887 struct hclge_mac_tnl_stats mac_tnl_stats;
1888 struct device *dev = &hdev->pdev->dev;
1889 u32 mpf_bd_num, pf_bd_num, bd_num;
1890 struct hclge_desc *desc;
1891 u32 status;
1892 int ret;
1893
1894 /* query the number of bds for the MSIx int status */
1895 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
1896 if (ret)
1897 goto out;
1898
1899 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
1900 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
1901 if (!desc)
1902 return -ENOMEM;
1903
1904 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num,
1905 reset_requests);
1906 if (ret)
1907 goto msi_error;
1908
1909 memset(desc, 0, bd_num * sizeof(struct hclge_desc));
1910 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests);
1911 if (ret)
1912 goto msi_error;
1913
1914 /* query and clear mac tnl interruptions */
1915 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
1916 true);
1917 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
1918 if (ret) {
1919 dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
1920 goto msi_error;
1921 }
1922
1923 status = le32_to_cpu(desc->data[0]);
1924 if (status) {
1925 /* When mac tnl interrupt occurs, we record current time and
1926 * register status here in a fifo, then clear the status. So
1927 * that if link status changes suddenly at some time, we can
1928 * query them by debugfs.
1929 */
1930 mac_tnl_stats.time = local_clock();
1931 mac_tnl_stats.status = status;
1932 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
1933 ret = hclge_clear_mac_tnl_int(hdev);
1934 if (ret)
1935 dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
1936 }
1937
1938 msi_error:
1939 kfree(desc);
1940 out:
1941 return ret;
1942 }
1943
hclge_handle_hw_msix_error(struct hclge_dev * hdev,unsigned long * reset_requests)1944 int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
1945 unsigned long *reset_requests)
1946 {
1947 struct device *dev = &hdev->pdev->dev;
1948
1949 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
1950 dev_err(dev,
1951 "Can't handle - MSIx error reported during dev init\n");
1952 return 0;
1953 }
1954
1955 return hclge_handle_all_hw_msix_error(hdev, reset_requests);
1956 }
1957
hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev * ae_dev)1958 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
1959 {
1960 #define HCLGE_DESC_NO_DATA_LEN 8
1961
1962 struct hclge_dev *hdev = ae_dev->priv;
1963 struct device *dev = &hdev->pdev->dev;
1964 u32 mpf_bd_num, pf_bd_num, bd_num;
1965 struct hclge_desc *desc;
1966 u32 status;
1967 int ret;
1968
1969 ae_dev->hw_err_reset_req = 0;
1970 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
1971
1972 /* query the number of bds for the MSIx int status */
1973 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
1974 if (ret)
1975 return;
1976
1977 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
1978 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
1979 if (!desc)
1980 return;
1981
1982 /* Clear HNS hw errors reported through msix */
1983 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) -
1984 HCLGE_DESC_NO_DATA_LEN);
1985 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
1986 if (ret) {
1987 dev_err(dev, "fail(%d) to clear mpf msix int during init\n",
1988 ret);
1989 goto msi_error;
1990 }
1991
1992 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) -
1993 HCLGE_DESC_NO_DATA_LEN);
1994 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
1995 if (ret) {
1996 dev_err(dev, "fail(%d) to clear pf msix int during init\n",
1997 ret);
1998 goto msi_error;
1999 }
2000
2001 /* Handle Non-fatal HNS RAS errors */
2002 if (status & HCLGE_RAS_REG_NFE_MASK) {
2003 dev_err(dev, "HNS hw error(RAS) identified during init\n");
2004 hclge_handle_all_ras_errors(hdev);
2005 }
2006
2007 msi_error:
2008 kfree(desc);
2009 }
2010