• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50 
51 /*
52  * This file contains all of the code that is specific to the HFI chip
53  */
54 
55 #include <linux/pci.h>
56 #include <linux/delay.h>
57 #include <linux/interrupt.h>
58 #include <linux/module.h>
59 
60 #include "hfi.h"
61 #include "trace.h"
62 #include "mad.h"
63 #include "pio.h"
64 #include "sdma.h"
65 #include "eprom.h"
66 
67 #define NUM_IB_PORTS 1
68 
69 uint kdeth_qp;
70 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
72 
73 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74 module_param(num_vls, uint, S_IRUGO);
75 MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
76 
77 /*
78  * Default time to aggregate two 10K packets from the idle state
79  * (timer not running). The timer starts at the end of the first packet,
80  * so only the time for one 10K packet and header plus a bit extra is needed.
81  * 10 * 1024 + 64 header byte = 10304 byte
82  * 10304 byte / 12.5 GB/s = 824.32ns
83  */
84 uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85 module_param(rcv_intr_timeout, uint, S_IRUGO);
86 MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
87 
88 uint rcv_intr_count = 16; /* same as qib */
89 module_param(rcv_intr_count, uint, S_IRUGO);
90 MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
91 
92 ushort link_crc_mask = SUPPORTED_CRCS;
93 module_param(link_crc_mask, ushort, S_IRUGO);
94 MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
95 
96 uint loopback;
97 module_param_named(loopback, loopback, uint, S_IRUGO);
98 MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
99 
100 /* Other driver tunables */
101 uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102 static ushort crc_14b_sideband = 1;
103 static uint use_flr = 1;
104 uint quick_linkup; /* skip LNI */
105 
106 struct flag_table {
107 	u64 flag;	/* the flag */
108 	char *str;	/* description string */
109 	u16 extra;	/* extra information */
110 	u16 unused0;
111 	u32 unused1;
112 };
113 
114 /* str must be a string constant */
115 #define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116 #define FLAG_ENTRY0(str, flag) {flag, str, 0}
117 
118 /* Send Error Consequences */
119 #define SEC_WRITE_DROPPED	0x1
120 #define SEC_PACKET_DROPPED	0x2
121 #define SEC_SC_HALTED		0x4	/* per-context only */
122 #define SEC_SPC_FREEZE		0x8	/* per-HFI only */
123 
124 #define VL15CTXT                  1
125 #define MIN_KERNEL_KCTXTS         2
126 #define NUM_MAP_REGS             32
127 
128 /* Bit offset into the GUID which carries HFI id information */
129 #define GUID_HFI_INDEX_SHIFT     39
130 
131 /* extract the emulation revision */
132 #define emulator_rev(dd) ((dd)->irev >> 8)
133 /* parallel and serial emulation versions are 3 and 4 respectively */
134 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
136 
137 /* RSM fields */
138 
139 /* packet type */
140 #define IB_PACKET_TYPE         2ull
141 #define QW_SHIFT               6ull
142 /* QPN[7..1] */
143 #define QPN_WIDTH              7ull
144 
145 /* LRH.BTH: QW 0, OFFSET 48 - for match */
146 #define LRH_BTH_QW             0ull
147 #define LRH_BTH_BIT_OFFSET     48ull
148 #define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
149 #define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150 #define LRH_BTH_SELECT
151 #define LRH_BTH_MASK           3ull
152 #define LRH_BTH_VALUE          2ull
153 
154 /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155 #define LRH_SC_QW              0ull
156 #define LRH_SC_BIT_OFFSET      56ull
157 #define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
158 #define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159 #define LRH_SC_MASK            128ull
160 #define LRH_SC_VALUE           0ull
161 
162 /* SC[n..0] QW 0, OFFSET 60 - for select */
163 #define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
164 
165 /* QPN[m+n:1] QW 1, OFFSET 1 */
166 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
167 
168 /* defines to build power on SC2VL table */
169 #define SC2VL_VAL( \
170 	num, \
171 	sc0, sc0val, \
172 	sc1, sc1val, \
173 	sc2, sc2val, \
174 	sc3, sc3val, \
175 	sc4, sc4val, \
176 	sc5, sc5val, \
177 	sc6, sc6val, \
178 	sc7, sc7val) \
179 ( \
180 	((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181 	((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182 	((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183 	((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184 	((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185 	((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186 	((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187 	((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
188 )
189 
190 #define DC_SC_VL_VAL( \
191 	range, \
192 	e0, e0val, \
193 	e1, e1val, \
194 	e2, e2val, \
195 	e3, e3val, \
196 	e4, e4val, \
197 	e5, e5val, \
198 	e6, e6val, \
199 	e7, e7val, \
200 	e8, e8val, \
201 	e9, e9val, \
202 	e10, e10val, \
203 	e11, e11val, \
204 	e12, e12val, \
205 	e13, e13val, \
206 	e14, e14val, \
207 	e15, e15val) \
208 ( \
209 	((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210 	((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211 	((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212 	((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213 	((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214 	((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215 	((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216 	((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217 	((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218 	((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219 	((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220 	((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221 	((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222 	((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223 	((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224 	((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
225 )
226 
227 /* all CceStatus sub-block freeze bits */
228 #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229 			| CCE_STATUS_RXE_FROZE_SMASK \
230 			| CCE_STATUS_TXE_FROZE_SMASK \
231 			| CCE_STATUS_TXE_PIO_FROZE_SMASK)
232 /* all CceStatus sub-block TXE pause bits */
233 #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234 			| CCE_STATUS_TXE_PAUSED_SMASK \
235 			| CCE_STATUS_SDMA_PAUSED_SMASK)
236 /* all CceStatus sub-block RXE pause bits */
237 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
238 
239 /*
240  * CCE Error flags.
241  */
242 static struct flag_table cce_err_status_flags[] = {
243 /* 0*/	FLAG_ENTRY0("CceCsrParityErr",
244 		CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245 /* 1*/	FLAG_ENTRY0("CceCsrReadBadAddrErr",
246 		CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247 /* 2*/	FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248 		CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249 /* 3*/	FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250 		CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251 /* 4*/	FLAG_ENTRY0("CceTrgtAccessErr",
252 		CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253 /* 5*/	FLAG_ENTRY0("CceRspdDataParityErr",
254 		CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255 /* 6*/	FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256 		CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257 /* 7*/	FLAG_ENTRY0("CceCsrCfgBusParityErr",
258 		CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259 /* 8*/	FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260 		CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261 /* 9*/	FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262 	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263 /*10*/	FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264 	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265 /*11*/	FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266 	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267 /*12*/	FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268 		CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269 /*13*/	FLAG_ENTRY0("PcicRetryMemCorErr",
270 		CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271 /*14*/	FLAG_ENTRY0("PcicRetryMemCorErr",
272 		CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273 /*15*/	FLAG_ENTRY0("PcicPostHdQCorErr",
274 		CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275 /*16*/	FLAG_ENTRY0("PcicPostHdQCorErr",
276 		CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277 /*17*/	FLAG_ENTRY0("PcicPostHdQCorErr",
278 		CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279 /*18*/	FLAG_ENTRY0("PcicCplDatQCorErr",
280 		CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281 /*19*/	FLAG_ENTRY0("PcicNPostHQParityErr",
282 		CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283 /*20*/	FLAG_ENTRY0("PcicNPostDatQParityErr",
284 		CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285 /*21*/	FLAG_ENTRY0("PcicRetryMemUncErr",
286 		CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287 /*22*/	FLAG_ENTRY0("PcicRetrySotMemUncErr",
288 		CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289 /*23*/	FLAG_ENTRY0("PcicPostHdQUncErr",
290 		CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291 /*24*/	FLAG_ENTRY0("PcicPostDatQUncErr",
292 		CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293 /*25*/	FLAG_ENTRY0("PcicCplHdQUncErr",
294 		CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295 /*26*/	FLAG_ENTRY0("PcicCplDatQUncErr",
296 		CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297 /*27*/	FLAG_ENTRY0("PcicTransmitFrontParityErr",
298 		CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299 /*28*/	FLAG_ENTRY0("PcicTransmitBackParityErr",
300 		CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301 /*29*/	FLAG_ENTRY0("PcicReceiveParityErr",
302 		CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303 /*30*/	FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304 		CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305 /*31*/	FLAG_ENTRY0("LATriggered",
306 		CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307 /*32*/	FLAG_ENTRY0("CceSegReadBadAddrErr",
308 		CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309 /*33*/	FLAG_ENTRY0("CceSegWriteBadAddrErr",
310 		CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311 /*34*/	FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312 		CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313 /*35*/	FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314 		CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315 /*36*/	FLAG_ENTRY0("CceMsixTableCorErr",
316 		CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317 /*37*/	FLAG_ENTRY0("CceMsixTableUncErr",
318 		CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319 /*38*/	FLAG_ENTRY0("CceIntMapCorErr",
320 		CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321 /*39*/	FLAG_ENTRY0("CceIntMapUncErr",
322 		CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323 /*40*/	FLAG_ENTRY0("CceMsixCsrParityErr",
324 		CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
325 /*41-63 reserved*/
326 };
327 
328 /*
329  * Misc Error flags
330  */
331 #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332 static struct flag_table misc_err_status_flags[] = {
333 /* 0*/	FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334 /* 1*/	FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335 /* 2*/	FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336 /* 3*/	FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337 /* 4*/	FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338 /* 5*/	FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339 /* 6*/	FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340 /* 7*/	FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341 /* 8*/	FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342 /* 9*/	FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343 /*10*/	FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344 /*11*/	FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345 /*12*/	FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
346 };
347 
348 /*
349  * TXE PIO Error flags and consequences
350  */
351 static struct flag_table pio_err_status_flags[] = {
352 /* 0*/	FLAG_ENTRY("PioWriteBadCtxt",
353 	SEC_WRITE_DROPPED,
354 	SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355 /* 1*/	FLAG_ENTRY("PioWriteAddrParity",
356 	SEC_SPC_FREEZE,
357 	SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358 /* 2*/	FLAG_ENTRY("PioCsrParity",
359 	SEC_SPC_FREEZE,
360 	SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361 /* 3*/	FLAG_ENTRY("PioSbMemFifo0",
362 	SEC_SPC_FREEZE,
363 	SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364 /* 4*/	FLAG_ENTRY("PioSbMemFifo1",
365 	SEC_SPC_FREEZE,
366 	SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367 /* 5*/	FLAG_ENTRY("PioPccFifoParity",
368 	SEC_SPC_FREEZE,
369 	SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370 /* 6*/	FLAG_ENTRY("PioPecFifoParity",
371 	SEC_SPC_FREEZE,
372 	SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373 /* 7*/	FLAG_ENTRY("PioSbrdctlCrrelParity",
374 	SEC_SPC_FREEZE,
375 	SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376 /* 8*/	FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
377 	SEC_SPC_FREEZE,
378 	SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379 /* 9*/	FLAG_ENTRY("PioPktEvictFifoParityErr",
380 	SEC_SPC_FREEZE,
381 	SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382 /*10*/	FLAG_ENTRY("PioSmPktResetParity",
383 	SEC_SPC_FREEZE,
384 	SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385 /*11*/	FLAG_ENTRY("PioVlLenMemBank0Unc",
386 	SEC_SPC_FREEZE,
387 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388 /*12*/	FLAG_ENTRY("PioVlLenMemBank1Unc",
389 	SEC_SPC_FREEZE,
390 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391 /*13*/	FLAG_ENTRY("PioVlLenMemBank0Cor",
392 	0,
393 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394 /*14*/	FLAG_ENTRY("PioVlLenMemBank1Cor",
395 	0,
396 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397 /*15*/	FLAG_ENTRY("PioCreditRetFifoParity",
398 	SEC_SPC_FREEZE,
399 	SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400 /*16*/	FLAG_ENTRY("PioPpmcPblFifo",
401 	SEC_SPC_FREEZE,
402 	SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403 /*17*/	FLAG_ENTRY("PioInitSmIn",
404 	0,
405 	SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406 /*18*/	FLAG_ENTRY("PioPktEvictSmOrArbSm",
407 	SEC_SPC_FREEZE,
408 	SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409 /*19*/	FLAG_ENTRY("PioHostAddrMemUnc",
410 	SEC_SPC_FREEZE,
411 	SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412 /*20*/	FLAG_ENTRY("PioHostAddrMemCor",
413 	0,
414 	SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415 /*21*/	FLAG_ENTRY("PioWriteDataParity",
416 	SEC_SPC_FREEZE,
417 	SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418 /*22*/	FLAG_ENTRY("PioStateMachine",
419 	SEC_SPC_FREEZE,
420 	SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421 /*23*/	FLAG_ENTRY("PioWriteQwValidParity",
422 	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423 	SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424 /*24*/	FLAG_ENTRY("PioBlockQwCountParity",
425 	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426 	SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427 /*25*/	FLAG_ENTRY("PioVlfVlLenParity",
428 	SEC_SPC_FREEZE,
429 	SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430 /*26*/	FLAG_ENTRY("PioVlfSopParity",
431 	SEC_SPC_FREEZE,
432 	SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433 /*27*/	FLAG_ENTRY("PioVlFifoParity",
434 	SEC_SPC_FREEZE,
435 	SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436 /*28*/	FLAG_ENTRY("PioPpmcBqcMemParity",
437 	SEC_SPC_FREEZE,
438 	SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439 /*29*/	FLAG_ENTRY("PioPpmcSopLen",
440 	SEC_SPC_FREEZE,
441 	SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
442 /*30-31 reserved*/
443 /*32*/	FLAG_ENTRY("PioCurrentFreeCntParity",
444 	SEC_SPC_FREEZE,
445 	SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446 /*33*/	FLAG_ENTRY("PioLastReturnedCntParity",
447 	SEC_SPC_FREEZE,
448 	SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449 /*34*/	FLAG_ENTRY("PioPccSopHeadParity",
450 	SEC_SPC_FREEZE,
451 	SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452 /*35*/	FLAG_ENTRY("PioPecSopHeadParityErr",
453 	SEC_SPC_FREEZE,
454 	SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
455 /*36-63 reserved*/
456 };
457 
458 /* TXE PIO errors that cause an SPC freeze */
459 #define ALL_PIO_FREEZE_ERR \
460 	(SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461 	| SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462 	| SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463 	| SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464 	| SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465 	| SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466 	| SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467 	| SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468 	| SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469 	| SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470 	| SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471 	| SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472 	| SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473 	| SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474 	| SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475 	| SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476 	| SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477 	| SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478 	| SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479 	| SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480 	| SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481 	| SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482 	| SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483 	| SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484 	| SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485 	| SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486 	| SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487 	| SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488 	| SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
489 
490 /*
491  * TXE SDMA Error flags
492  */
493 static struct flag_table sdma_err_status_flags[] = {
494 /* 0*/	FLAG_ENTRY0("SDmaRpyTagErr",
495 		SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496 /* 1*/	FLAG_ENTRY0("SDmaCsrParityErr",
497 		SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498 /* 2*/	FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499 		SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500 /* 3*/	FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501 		SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
502 /*04-63 reserved*/
503 };
504 
505 /* TXE SDMA errors that cause an SPC freeze */
506 #define ALL_SDMA_FREEZE_ERR  \
507 		(SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508 		| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509 		| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
510 
511 /*
512  * TXE Egress Error flags
513  */
514 #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515 static struct flag_table egress_err_status_flags[] = {
516 /* 0*/	FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517 /* 1*/	FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
518 /* 2 reserved */
519 /* 3*/	FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520 		SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521 /* 4*/	FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522 /* 5*/	FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
523 /* 6 reserved */
524 /* 7*/	FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525 		SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526 /* 8*/	FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527 		SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
528 /* 9-10 reserved */
529 /*11*/	FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530 		SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531 /*12*/	FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532 /*13*/	FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533 /*14*/	FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534 /*15*/	FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535 /*16*/	FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536 		SEES(TX_SDMA0_DISALLOWED_PACKET)),
537 /*17*/	FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538 		SEES(TX_SDMA1_DISALLOWED_PACKET)),
539 /*18*/	FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540 		SEES(TX_SDMA2_DISALLOWED_PACKET)),
541 /*19*/	FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542 		SEES(TX_SDMA3_DISALLOWED_PACKET)),
543 /*20*/	FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544 		SEES(TX_SDMA4_DISALLOWED_PACKET)),
545 /*21*/	FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546 		SEES(TX_SDMA5_DISALLOWED_PACKET)),
547 /*22*/	FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548 		SEES(TX_SDMA6_DISALLOWED_PACKET)),
549 /*23*/	FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550 		SEES(TX_SDMA7_DISALLOWED_PACKET)),
551 /*24*/	FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552 		SEES(TX_SDMA8_DISALLOWED_PACKET)),
553 /*25*/	FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554 		SEES(TX_SDMA9_DISALLOWED_PACKET)),
555 /*26*/	FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556 		SEES(TX_SDMA10_DISALLOWED_PACKET)),
557 /*27*/	FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558 		SEES(TX_SDMA11_DISALLOWED_PACKET)),
559 /*28*/	FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560 		SEES(TX_SDMA12_DISALLOWED_PACKET)),
561 /*29*/	FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562 		SEES(TX_SDMA13_DISALLOWED_PACKET)),
563 /*30*/	FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564 		SEES(TX_SDMA14_DISALLOWED_PACKET)),
565 /*31*/	FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566 		SEES(TX_SDMA15_DISALLOWED_PACKET)),
567 /*32*/	FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568 		SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569 /*33*/	FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570 		SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571 /*34*/	FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572 		SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573 /*35*/	FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574 		SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575 /*36*/	FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576 		SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577 /*37*/	FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578 		SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579 /*38*/	FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580 		SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581 /*39*/	FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582 		SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583 /*40*/	FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584 		SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585 /*41*/	FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586 /*42*/	FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587 /*43*/	FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588 /*44*/	FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589 /*45*/	FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590 /*46*/	FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591 /*47*/	FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592 /*48*/	FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593 /*49*/	FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594 /*50*/	FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595 /*51*/	FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596 /*52*/	FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597 /*53*/	FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598 /*54*/	FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599 /*55*/	FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600 /*56*/	FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601 /*57*/	FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602 /*58*/	FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603 /*59*/	FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604 /*60*/	FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605 /*61*/	FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606 /*62*/	FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607 		SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608 /*63*/	FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609 		SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
610 };
611 
612 /*
613  * TXE Egress Error Info flags
614  */
615 #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616 static struct flag_table egress_err_info_flags[] = {
617 /* 0*/	FLAG_ENTRY0("Reserved", 0ull),
618 /* 1*/	FLAG_ENTRY0("VLErr", SEEI(VL)),
619 /* 2*/	FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620 /* 3*/	FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621 /* 4*/	FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622 /* 5*/	FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623 /* 6*/	FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624 /* 7*/	FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625 /* 8*/	FLAG_ENTRY0("RawErr", SEEI(RAW)),
626 /* 9*/	FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627 /*10*/	FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628 /*11*/	FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629 /*12*/	FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630 /*13*/	FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631 /*14*/	FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632 /*15*/	FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633 /*16*/	FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634 /*17*/	FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635 /*18*/	FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636 /*19*/	FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637 /*20*/	FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638 /*21*/	FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
639 };
640 
641 /* TXE Egress errors that cause an SPC freeze */
642 #define ALL_TXE_EGRESS_FREEZE_ERR \
643 	(SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644 	| SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645 	| SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646 	| SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647 	| SEES(TX_LAUNCH_CSR_PARITY) \
648 	| SEES(TX_SBRD_CTL_CSR_PARITY) \
649 	| SEES(TX_CONFIG_PARITY) \
650 	| SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651 	| SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652 	| SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653 	| SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654 	| SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655 	| SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656 	| SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657 	| SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658 	| SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659 	| SEES(TX_CREDIT_RETURN_PARITY))
660 
661 /*
662  * TXE Send error flags
663  */
664 #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665 static struct flag_table send_err_status_flags[] = {
666 /* 0*/	FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667 /* 1*/	FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668 /* 2*/	FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
669 };
670 
671 /*
672  * TXE Send Context Error flags and consequences
673  */
674 static struct flag_table sc_err_status_flags[] = {
675 /* 0*/	FLAG_ENTRY("InconsistentSop",
676 		SEC_PACKET_DROPPED | SEC_SC_HALTED,
677 		SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678 /* 1*/	FLAG_ENTRY("DisallowedPacket",
679 		SEC_PACKET_DROPPED | SEC_SC_HALTED,
680 		SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681 /* 2*/	FLAG_ENTRY("WriteCrossesBoundary",
682 		SEC_WRITE_DROPPED | SEC_SC_HALTED,
683 		SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684 /* 3*/	FLAG_ENTRY("WriteOverflow",
685 		SEC_WRITE_DROPPED | SEC_SC_HALTED,
686 		SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687 /* 4*/	FLAG_ENTRY("WriteOutOfBounds",
688 		SEC_WRITE_DROPPED | SEC_SC_HALTED,
689 		SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
690 /* 5-63 reserved*/
691 };
692 
693 /*
694  * RXE Receive Error flags
695  */
696 #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697 static struct flag_table rxe_err_status_flags[] = {
698 /* 0*/	FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699 /* 1*/	FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700 /* 2*/	FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701 /* 3*/	FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702 /* 4*/	FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703 /* 5*/	FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704 /* 6*/	FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705 /* 7*/	FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706 /* 8*/	FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707 /* 9*/	FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708 /*10*/	FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709 /*11*/	FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710 /*12*/	FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711 /*13*/	FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712 /*14*/	FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713 /*15*/	FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714 /*16*/	FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715 		RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716 /*17*/	FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717 /*18*/	FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718 /*19*/	FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719 		RXES(RBUF_BLOCK_LIST_READ_UNC)),
720 /*20*/	FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721 		RXES(RBUF_BLOCK_LIST_READ_COR)),
722 /*21*/	FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723 		RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724 /*22*/	FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725 		RXES(RBUF_CSR_QENT_CNT_PARITY)),
726 /*23*/	FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727 		RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728 /*24*/	FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729 		RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730 /*25*/	FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731 /*26*/	FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732 /*27*/	FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733 		RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734 /*28*/	FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735 /*29*/	FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736 /*30*/	FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737 /*31*/	FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738 /*32*/	FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739 /*33*/	FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740 /*34*/	FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741 /*35*/	FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742 		RXES(RBUF_FL_INITDONE_PARITY)),
743 /*36*/	FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744 		RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745 /*37*/	FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746 /*38*/	FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747 /*39*/	FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748 /*40*/	FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749 		RXES(LOOKUP_DES_PART1_UNC_COR)),
750 /*41*/	FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751 		RXES(LOOKUP_DES_PART2_PARITY)),
752 /*42*/	FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753 /*43*/	FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754 /*44*/	FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755 /*45*/	FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756 /*46*/	FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757 /*47*/	FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758 /*48*/	FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759 /*49*/	FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760 /*50*/	FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761 /*51*/	FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762 /*52*/	FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763 /*53*/	FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764 /*54*/	FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765 /*55*/	FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766 /*56*/	FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767 /*57*/	FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768 /*58*/	FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769 /*59*/	FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770 /*60*/	FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771 /*61*/	FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772 /*62*/	FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773 /*63*/	FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
774 };
775 
776 /* RXE errors that will trigger an SPC freeze */
777 #define ALL_RXE_FREEZE_ERR  \
778 	(RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779 	| RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780 	| RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781 	| RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782 	| RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783 	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784 	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785 	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786 	| RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787 	| RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788 	| RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789 	| RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790 	| RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791 	| RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792 	| RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793 	| RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794 	| RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795 	| RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796 	| RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797 	| RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798 	| RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799 	| RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800 	| RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801 	| RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802 	| RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803 	| RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804 	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805 	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806 	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807 	| RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808 	| RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809 	| RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810 	| RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811 	| RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812 	| RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813 	| RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814 	| RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815 	| RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816 	| RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817 	| RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818 	| RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819 	| RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820 	| RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821 	| RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
822 
823 #define RXE_FREEZE_ABORT_MASK \
824 	(RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825 	RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826 	RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
827 
828 /*
829  * DCC Error Flags
830  */
831 #define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832 static struct flag_table dcc_err_flags[] = {
833 	FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834 	FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835 	FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836 	FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837 	FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838 	FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839 	FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840 	FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841 	FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842 	FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843 	FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844 	FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845 	FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846 	FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847 	FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848 	FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849 	FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850 	FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851 	FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852 	FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853 	FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854 	FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855 	FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856 	FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857 	FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858 	FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859 	FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860 	FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861 	FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862 	FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863 	FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864 	FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865 	FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866 	FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867 	FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868 	FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869 	FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870 	FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871 	FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872 	FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873 	FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874 	FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875 	FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876 	FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877 	FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878 	FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
879 };
880 
881 /*
882  * LCB error flags
883  */
884 #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885 static struct flag_table lcb_err_flags[] = {
886 /* 0*/	FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887 /* 1*/	FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888 /* 2*/	FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889 /* 3*/	FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890 		LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891 /* 4*/	FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892 /* 5*/	FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893 /* 6*/	FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894 /* 7*/	FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895 /* 8*/	FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896 /* 9*/	FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897 /*10*/	FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898 /*11*/	FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899 /*12*/	FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900 /*13*/	FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901 		LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902 /*14*/	FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903 /*15*/	FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904 /*16*/	FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905 /*17*/	FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906 /*18*/	FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907 /*19*/	FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908 		LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909 /*20*/	FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910 /*21*/	FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911 /*22*/	FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912 /*23*/	FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913 /*24*/	FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914 /*25*/	FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915 /*26*/	FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916 		LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917 /*27*/	FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918 /*28*/	FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919 		LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920 /*29*/	FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921 		LCBE(REDUNDANT_FLIT_PARITY_ERR))
922 };
923 
924 /*
925  * DC8051 Error Flags
926  */
927 #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928 static struct flag_table dc8051_err_flags[] = {
929 	FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930 	FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931 	FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932 	FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933 	FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934 	FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935 	FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936 	FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937 	FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938 		D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939 	FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
940 };
941 
942 /*
943  * DC8051 Information Error flags
944  *
945  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
946  */
947 static struct flag_table dc8051_info_err_flags[] = {
948 	FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
949 	FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
950 	FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
951 	FLAG_ENTRY0("Serdes internal loopback failure",
952 					FAILED_SERDES_INTERNAL_LOOPBACK),
953 	FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
954 	FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
955 	FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
956 	FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
957 	FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
958 	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959 	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960 	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
961 };
962 
963 /*
964  * DC8051 Information Host Information flags
965  *
966  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
967  */
968 static struct flag_table dc8051_info_host_msg_flags[] = {
969 	FLAG_ENTRY0("Host request done", 0x0001),
970 	FLAG_ENTRY0("BC SMA message", 0x0002),
971 	FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972 	FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973 	FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974 	FLAG_ENTRY0("External device config request", 0x0020),
975 	FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976 	FLAG_ENTRY0("LinkUp achieved", 0x0080),
977 	FLAG_ENTRY0("Link going down", 0x0100),
978 };
979 
980 
981 static u32 encoded_size(u32 size);
982 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
985 			       u8 *continuous);
986 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987 				  u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989 				      u8 *remote_tx_rate, u16 *link_widths);
990 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991 				     u8 *flag_bits, u16 *link_widths);
992 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
993 				  u8 *device_rev);
994 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996 static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997 			    u8 *tx_polarity_inversion,
998 			    u8 *rx_polarity_inversion, u8 *max_rate);
999 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000 				unsigned int context, u64 err_status);
1001 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002 static void handle_dcc_err(struct hfi1_devdata *dd,
1003 			   unsigned int context, u64 err_status);
1004 static void handle_lcb_err(struct hfi1_devdata *dd,
1005 			   unsigned int context, u64 err_status);
1006 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014 static void set_partition_keys(struct hfi1_pportdata *);
1015 static const char *link_state_name(u32 state);
1016 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017 					  u32 state);
1018 static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019 			   u64 *out_data);
1020 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021 static int thermal_init(struct hfi1_devdata *dd);
1022 
1023 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024 				  int msecs);
1025 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026 static void handle_temp_err(struct hfi1_devdata *);
1027 static void dc_shutdown(struct hfi1_devdata *);
1028 static void dc_start(struct hfi1_devdata *);
1029 
1030 /*
1031  * Error interrupt table entry.  This is used as input to the interrupt
1032  * "clear down" routine used for all second tier error interrupt register.
1033  * Second tier interrupt registers have a single bit representing them
1034  * in the top-level CceIntStatus.
1035  */
1036 struct err_reg_info {
1037 	u32 status;		/* status CSR offset */
1038 	u32 clear;		/* clear CSR offset */
1039 	u32 mask;		/* mask CSR offset */
1040 	void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041 	const char *desc;
1042 };
1043 
1044 #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045 #define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046 #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047 
1048 /*
1049  * Helpers for building HFI and DC error interrupt table entries.  Different
1050  * helpers are needed because of inconsistent register names.
1051  */
1052 #define EE(reg, handler, desc) \
1053 	{ reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054 		handler, desc }
1055 #define DC_EE1(reg, handler, desc) \
1056 	{ reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057 #define DC_EE2(reg, handler, desc) \
1058 	{ reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059 
1060 /*
1061  * Table of the "misc" grouping of error interrupts.  Each entry refers to
1062  * another register containing more information.
1063  */
1064 static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065 /* 0*/	EE(CCE_ERR,		handle_cce_err,    "CceErr"),
1066 /* 1*/	EE(RCV_ERR,		handle_rxe_err,    "RxeErr"),
1067 /* 2*/	EE(MISC_ERR,	handle_misc_err,   "MiscErr"),
1068 /* 3*/	{ 0, 0, 0, NULL }, /* reserved */
1069 /* 4*/	EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1070 /* 5*/	EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1071 /* 6*/	EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072 /* 7*/	EE(SEND_ERR,	handle_txe_err,    "TxeErr")
1073 	/* the rest are reserved */
1074 };
1075 
1076 /*
1077  * Index into the Various section of the interrupt sources
1078  * corresponding to the Critical Temperature interrupt.
1079  */
1080 #define TCRIT_INT_SOURCE 4
1081 
1082 /*
1083  * SDMA error interrupt entry - refers to another register containing more
1084  * information.
1085  */
1086 static const struct err_reg_info sdma_eng_err =
1087 	EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088 
1089 static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090 /* 0*/	{ 0, 0, 0, NULL }, /* PbcInt */
1091 /* 1*/	{ 0, 0, 0, NULL }, /* GpioAssertInt */
1092 /* 2*/	EE(ASIC_QSFP1,	handle_qsfp_int,	"QSFP1"),
1093 /* 3*/	EE(ASIC_QSFP2,	handle_qsfp_int,	"QSFP2"),
1094 /* 4*/	{ 0, 0, 0, NULL }, /* TCritInt */
1095 	/* rest are reserved */
1096 };
1097 
1098 /*
1099  * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100  * register can not be derived from the MTU value because 10K is not
1101  * a power of 2. Therefore, we need a constant. Everything else can
1102  * be calculated.
1103  */
1104 #define DCC_CFG_PORT_MTU_CAP_10240 7
1105 
1106 /*
1107  * Table of the DC grouping of error interrupts.  Each entry refers to
1108  * another register containing more information.
1109  */
1110 static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111 /* 0*/	DC_EE1(DCC_ERR,		handle_dcc_err,	       "DCC Err"),
1112 /* 1*/	DC_EE2(DC_LCB_ERR,	handle_lcb_err,	       "LCB Err"),
1113 /* 2*/	DC_EE2(DC_DC8051_ERR,	handle_8051_interrupt, "DC8051 Interrupt"),
1114 /* 3*/	/* dc_lbm_int - special, see is_dc_int() */
1115 	/* the rest are reserved */
1116 };
1117 
1118 struct cntr_entry {
1119 	/*
1120 	 * counter name
1121 	 */
1122 	char *name;
1123 
1124 	/*
1125 	 * csr to read for name (if applicable)
1126 	 */
1127 	u64 csr;
1128 
1129 	/*
1130 	 * offset into dd or ppd to store the counter's value
1131 	 */
1132 	int offset;
1133 
1134 	/*
1135 	 * flags
1136 	 */
1137 	u8 flags;
1138 
1139 	/*
1140 	 * accessor for stat element, context either dd or ppd
1141 	 */
1142 	u64 (*rw_cntr)(const struct cntr_entry *,
1143 			       void *context,
1144 			       int vl,
1145 			       int mode,
1146 			       u64 data);
1147 };
1148 
1149 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151 
1152 #define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153 { \
1154 	name, \
1155 	csr, \
1156 	offset, \
1157 	flags, \
1158 	accessor \
1159 }
1160 
1161 /* 32bit RXE */
1162 #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163 CNTR_ELEM(#name, \
1164 	  (counter * 8 + RCV_COUNTER_ARRAY32), \
1165 	  0, flags | CNTR_32BIT, \
1166 	  port_access_u32_csr)
1167 
1168 #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169 CNTR_ELEM(#name, \
1170 	  (counter * 8 + RCV_COUNTER_ARRAY32), \
1171 	  0, flags | CNTR_32BIT, \
1172 	  dev_access_u32_csr)
1173 
1174 /* 64bit RXE */
1175 #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176 CNTR_ELEM(#name, \
1177 	  (counter * 8 + RCV_COUNTER_ARRAY64), \
1178 	  0, flags, \
1179 	  port_access_u64_csr)
1180 
1181 #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182 CNTR_ELEM(#name, \
1183 	  (counter * 8 + RCV_COUNTER_ARRAY64), \
1184 	  0, flags, \
1185 	  dev_access_u64_csr)
1186 
1187 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188 #define OVR_ELM(ctx) \
1189 CNTR_ELEM("RcvHdrOvr" #ctx, \
1190 	  (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191 	  0, CNTR_NORMAL, port_access_u64_csr)
1192 
1193 /* 32bit TXE */
1194 #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195 CNTR_ELEM(#name, \
1196 	  (counter * 8 + SEND_COUNTER_ARRAY32), \
1197 	  0, flags | CNTR_32BIT, \
1198 	  port_access_u32_csr)
1199 
1200 /* 64bit TXE */
1201 #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202 CNTR_ELEM(#name, \
1203 	  (counter * 8 + SEND_COUNTER_ARRAY64), \
1204 	  0, flags, \
1205 	  port_access_u64_csr)
1206 
1207 # define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208 CNTR_ELEM(#name,\
1209 	  counter * 8 + SEND_COUNTER_ARRAY64, \
1210 	  0, \
1211 	  flags, \
1212 	  dev_access_u64_csr)
1213 
1214 /* CCE */
1215 #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216 CNTR_ELEM(#name, \
1217 	  (counter * 8 + CCE_COUNTER_ARRAY32), \
1218 	  0, flags | CNTR_32BIT, \
1219 	  dev_access_u32_csr)
1220 
1221 #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222 CNTR_ELEM(#name, \
1223 	  (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224 	  0, flags | CNTR_32BIT, \
1225 	  dev_access_u32_csr)
1226 
1227 /* DC */
1228 #define DC_PERF_CNTR(name, counter, flags) \
1229 CNTR_ELEM(#name, \
1230 	  counter, \
1231 	  0, \
1232 	  flags, \
1233 	  dev_access_u64_csr)
1234 
1235 #define DC_PERF_CNTR_LCB(name, counter, flags) \
1236 CNTR_ELEM(#name, \
1237 	  counter, \
1238 	  0, \
1239 	  flags, \
1240 	  dc_access_lcb_cntr)
1241 
1242 /* ibp counters */
1243 #define SW_IBP_CNTR(name, cntr) \
1244 CNTR_ELEM(#name, \
1245 	  0, \
1246 	  0, \
1247 	  CNTR_SYNTH, \
1248 	  access_ibp_##cntr)
1249 
read_csr(const struct hfi1_devdata * dd,u32 offset)1250 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251 {
1252 	u64 val;
1253 
1254 	if (dd->flags & HFI1_PRESENT) {
1255 		val = readq((void __iomem *)dd->kregbase + offset);
1256 		return val;
1257 	}
1258 	return -1;
1259 }
1260 
write_csr(const struct hfi1_devdata * dd,u32 offset,u64 value)1261 void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262 {
1263 	if (dd->flags & HFI1_PRESENT)
1264 		writeq(value, (void __iomem *)dd->kregbase + offset);
1265 }
1266 
get_csr_addr(struct hfi1_devdata * dd,u32 offset)1267 void __iomem *get_csr_addr(
1268 	struct hfi1_devdata *dd,
1269 	u32 offset)
1270 {
1271 	return (void __iomem *)dd->kregbase + offset;
1272 }
1273 
read_write_csr(const struct hfi1_devdata * dd,u32 csr,int mode,u64 value)1274 static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275 				 int mode, u64 value)
1276 {
1277 	u64 ret;
1278 
1279 
1280 	if (mode == CNTR_MODE_R) {
1281 		ret = read_csr(dd, csr);
1282 	} else if (mode == CNTR_MODE_W) {
1283 		write_csr(dd, csr, value);
1284 		ret = value;
1285 	} else {
1286 		dd_dev_err(dd, "Invalid cntr register access mode");
1287 		return 0;
1288 	}
1289 
1290 	hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291 	return ret;
1292 }
1293 
1294 /* Dev Access */
dev_access_u32_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1295 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296 			    void *context, int vl, int mode, u64 data)
1297 {
1298 	struct hfi1_devdata *dd = context;
1299 
1300 	if (vl != CNTR_INVALID_VL)
1301 		return 0;
1302 	return read_write_csr(dd, entry->csr, mode, data);
1303 }
1304 
dev_access_u64_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1305 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306 			    int vl, int mode, u64 data)
1307 {
1308 	struct hfi1_devdata *dd = context;
1309 
1310 	u64 val = 0;
1311 	u64 csr = entry->csr;
1312 
1313 	if (entry->flags & CNTR_VL) {
1314 		if (vl == CNTR_INVALID_VL)
1315 			return 0;
1316 		csr += 8 * vl;
1317 	} else {
1318 		if (vl != CNTR_INVALID_VL)
1319 			return 0;
1320 	}
1321 
1322 	val = read_write_csr(dd, csr, mode, data);
1323 	return val;
1324 }
1325 
dc_access_lcb_cntr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1326 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327 			    int vl, int mode, u64 data)
1328 {
1329 	struct hfi1_devdata *dd = context;
1330 	u32 csr = entry->csr;
1331 	int ret = 0;
1332 
1333 	if (vl != CNTR_INVALID_VL)
1334 		return 0;
1335 	if (mode == CNTR_MODE_R)
1336 		ret = read_lcb_csr(dd, csr, &data);
1337 	else if (mode == CNTR_MODE_W)
1338 		ret = write_lcb_csr(dd, csr, data);
1339 
1340 	if (ret) {
1341 		dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342 		return 0;
1343 	}
1344 
1345 	hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346 	return data;
1347 }
1348 
1349 /* Port Access */
port_access_u32_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1350 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351 			     int vl, int mode, u64 data)
1352 {
1353 	struct hfi1_pportdata *ppd = context;
1354 
1355 	if (vl != CNTR_INVALID_VL)
1356 		return 0;
1357 	return read_write_csr(ppd->dd, entry->csr, mode, data);
1358 }
1359 
port_access_u64_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1360 static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361 			     void *context, int vl, int mode, u64 data)
1362 {
1363 	struct hfi1_pportdata *ppd = context;
1364 	u64 val;
1365 	u64 csr = entry->csr;
1366 
1367 	if (entry->flags & CNTR_VL) {
1368 		if (vl == CNTR_INVALID_VL)
1369 			return 0;
1370 		csr += 8 * vl;
1371 	} else {
1372 		if (vl != CNTR_INVALID_VL)
1373 			return 0;
1374 	}
1375 	val = read_write_csr(ppd->dd, csr, mode, data);
1376 	return val;
1377 }
1378 
1379 /* Software defined */
read_write_sw(struct hfi1_devdata * dd,u64 * cntr,int mode,u64 data)1380 static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381 				u64 data)
1382 {
1383 	u64 ret;
1384 
1385 	if (mode == CNTR_MODE_R) {
1386 		ret = *cntr;
1387 	} else if (mode == CNTR_MODE_W) {
1388 		*cntr = data;
1389 		ret = data;
1390 	} else {
1391 		dd_dev_err(dd, "Invalid cntr sw access mode");
1392 		return 0;
1393 	}
1394 
1395 	hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396 
1397 	return ret;
1398 }
1399 
access_sw_link_dn_cnt(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1400 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401 			       int vl, int mode, u64 data)
1402 {
1403 	struct hfi1_pportdata *ppd = context;
1404 
1405 	if (vl != CNTR_INVALID_VL)
1406 		return 0;
1407 	return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408 }
1409 
access_sw_link_up_cnt(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1410 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411 			       int vl, int mode, u64 data)
1412 {
1413 	struct hfi1_pportdata *ppd = context;
1414 
1415 	if (vl != CNTR_INVALID_VL)
1416 		return 0;
1417 	return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418 }
1419 
access_sw_xmit_discards(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1420 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421 				    void *context, int vl, int mode, u64 data)
1422 {
1423 	struct hfi1_pportdata *ppd = context;
1424 
1425 	if (vl != CNTR_INVALID_VL)
1426 		return 0;
1427 
1428 	return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429 }
1430 
access_xmit_constraint_errs(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1431 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432 				     void *context, int vl, int mode, u64 data)
1433 {
1434 	struct hfi1_pportdata *ppd = context;
1435 
1436 	if (vl != CNTR_INVALID_VL)
1437 		return 0;
1438 
1439 	return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440 			     mode, data);
1441 }
1442 
access_rcv_constraint_errs(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1443 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444 				     void *context, int vl, int mode, u64 data)
1445 {
1446 	struct hfi1_pportdata *ppd = context;
1447 
1448 	if (vl != CNTR_INVALID_VL)
1449 		return 0;
1450 
1451 	return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452 			     mode, data);
1453 }
1454 
get_all_cpu_total(u64 __percpu * cntr)1455 u64 get_all_cpu_total(u64 __percpu *cntr)
1456 {
1457 	int cpu;
1458 	u64 counter = 0;
1459 
1460 	for_each_possible_cpu(cpu)
1461 		counter += *per_cpu_ptr(cntr, cpu);
1462 	return counter;
1463 }
1464 
read_write_cpu(struct hfi1_devdata * dd,u64 * z_val,u64 __percpu * cntr,int vl,int mode,u64 data)1465 static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466 			  u64 __percpu *cntr,
1467 			  int vl, int mode, u64 data)
1468 {
1469 
1470 	u64 ret = 0;
1471 
1472 	if (vl != CNTR_INVALID_VL)
1473 		return 0;
1474 
1475 	if (mode == CNTR_MODE_R) {
1476 		ret = get_all_cpu_total(cntr) - *z_val;
1477 	} else if (mode == CNTR_MODE_W) {
1478 		/* A write can only zero the counter */
1479 		if (data == 0)
1480 			*z_val = get_all_cpu_total(cntr);
1481 		else
1482 			dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483 	} else {
1484 		dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485 		return 0;
1486 	}
1487 
1488 	return ret;
1489 }
1490 
access_sw_cpu_intr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1491 static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492 			      void *context, int vl, int mode, u64 data)
1493 {
1494 	struct hfi1_devdata *dd = context;
1495 
1496 	return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497 			      mode, data);
1498 }
1499 
access_sw_cpu_rcv_limit(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1500 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501 			      void *context, int vl, int mode, u64 data)
1502 {
1503 	struct hfi1_devdata *dd = context;
1504 
1505 	return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506 			      mode, data);
1507 }
1508 
access_sw_pio_wait(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1509 static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510 			      void *context, int vl, int mode, u64 data)
1511 {
1512 	struct hfi1_devdata *dd = context;
1513 
1514 	return dd->verbs_dev.n_piowait;
1515 }
1516 
access_sw_vtx_wait(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1517 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518 			      void *context, int vl, int mode, u64 data)
1519 {
1520 	struct hfi1_devdata *dd = context;
1521 
1522 	return dd->verbs_dev.n_txwait;
1523 }
1524 
access_sw_kmem_wait(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1525 static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526 			       void *context, int vl, int mode, u64 data)
1527 {
1528 	struct hfi1_devdata *dd = context;
1529 
1530 	return dd->verbs_dev.n_kmem_wait;
1531 }
1532 
access_sw_send_schedule(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1533 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1534 			       void *context, int vl, int mode, u64 data)
1535 {
1536 	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1537 
1538 	return dd->verbs_dev.n_send_schedule;
1539 }
1540 
1541 #define def_access_sw_cpu(cntr) \
1542 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,		      \
1543 			      void *context, int vl, int mode, u64 data)      \
1544 {									      \
1545 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
1546 	return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,	      \
1547 			      ppd->ibport_data.cntr, vl,		      \
1548 			      mode, data);				      \
1549 }
1550 
1551 def_access_sw_cpu(rc_acks);
1552 def_access_sw_cpu(rc_qacks);
1553 def_access_sw_cpu(rc_delayed_comp);
1554 
1555 #define def_access_ibp_counter(cntr) \
1556 static u64 access_ibp_##cntr(const struct cntr_entry *entry,		      \
1557 				void *context, int vl, int mode, u64 data)    \
1558 {									      \
1559 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
1560 									      \
1561 	if (vl != CNTR_INVALID_VL)					      \
1562 		return 0;						      \
1563 									      \
1564 	return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,	      \
1565 			     mode, data);				      \
1566 }
1567 
1568 def_access_ibp_counter(loop_pkts);
1569 def_access_ibp_counter(rc_resends);
1570 def_access_ibp_counter(rnr_naks);
1571 def_access_ibp_counter(other_naks);
1572 def_access_ibp_counter(rc_timeouts);
1573 def_access_ibp_counter(pkt_drops);
1574 def_access_ibp_counter(dmawait);
1575 def_access_ibp_counter(rc_seqnak);
1576 def_access_ibp_counter(rc_dupreq);
1577 def_access_ibp_counter(rdma_seq);
1578 def_access_ibp_counter(unaligned);
1579 def_access_ibp_counter(seq_naks);
1580 
1581 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1582 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1583 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1584 			CNTR_NORMAL),
1585 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1586 			CNTR_NORMAL),
1587 [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1588 			RCV_TID_FLOW_GEN_MISMATCH_CNT,
1589 			CNTR_NORMAL),
1590 [C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1591 			CNTR_NORMAL),
1592 [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1593 			CNTR_NORMAL),
1594 [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1595 			RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1596 [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1597 			CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1598 [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1599 			CNTR_NORMAL),
1600 [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1601 			CNTR_NORMAL),
1602 [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1603 			CNTR_NORMAL),
1604 [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1605 			CNTR_NORMAL),
1606 [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1607 			CNTR_NORMAL),
1608 [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1609 			CNTR_NORMAL),
1610 [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1611 			CCE_RCV_URGENT_INT_CNT,	CNTR_NORMAL),
1612 [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1613 			CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1614 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1615 			      CNTR_SYNTH),
1616 [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1617 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1618 				 CNTR_SYNTH),
1619 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1620 				  CNTR_SYNTH),
1621 [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1622 				  CNTR_SYNTH),
1623 [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1624 				   DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1625 [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1626 				  DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1627 				  CNTR_SYNTH),
1628 [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1629 				DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1630 [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1631 			       CNTR_SYNTH),
1632 [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1633 			      CNTR_SYNTH),
1634 [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1635 			       CNTR_SYNTH),
1636 [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1637 				 CNTR_SYNTH),
1638 [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1639 				CNTR_SYNTH),
1640 [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1641 				CNTR_SYNTH),
1642 [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1643 			       CNTR_SYNTH),
1644 [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1645 				 CNTR_SYNTH | CNTR_VL),
1646 [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1647 				CNTR_SYNTH | CNTR_VL),
1648 [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1649 [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1650 				 CNTR_SYNTH | CNTR_VL),
1651 [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1652 [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1653 				 CNTR_SYNTH | CNTR_VL),
1654 [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1655 			      CNTR_SYNTH),
1656 [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1657 				 CNTR_SYNTH | CNTR_VL),
1658 [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1659 				CNTR_SYNTH),
1660 [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1661 				   CNTR_SYNTH | CNTR_VL),
1662 [C_DC_TOTAL_CRC] =
1663 	DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1664 			 CNTR_SYNTH),
1665 [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1666 				  CNTR_SYNTH),
1667 [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1668 				  CNTR_SYNTH),
1669 [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1670 				  CNTR_SYNTH),
1671 [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1672 				  CNTR_SYNTH),
1673 [C_DC_CRC_MULT_LN] =
1674 	DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1675 			 CNTR_SYNTH),
1676 [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1677 				    CNTR_SYNTH),
1678 [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1679 				    CNTR_SYNTH),
1680 [C_DC_SEQ_CRC_CNT] =
1681 	DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1682 			 CNTR_SYNTH),
1683 [C_DC_ESC0_ONLY_CNT] =
1684 	DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1685 			 CNTR_SYNTH),
1686 [C_DC_ESC0_PLUS1_CNT] =
1687 	DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1688 			 CNTR_SYNTH),
1689 [C_DC_ESC0_PLUS2_CNT] =
1690 	DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1691 			 CNTR_SYNTH),
1692 [C_DC_REINIT_FROM_PEER_CNT] =
1693 	DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1694 			 CNTR_SYNTH),
1695 [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1696 				  CNTR_SYNTH),
1697 [C_DC_MISC_FLG_CNT] =
1698 	DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1699 			 CNTR_SYNTH),
1700 [C_DC_PRF_GOOD_LTP_CNT] =
1701 	DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1702 [C_DC_PRF_ACCEPTED_LTP_CNT] =
1703 	DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1704 			 CNTR_SYNTH),
1705 [C_DC_PRF_RX_FLIT_CNT] =
1706 	DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1707 [C_DC_PRF_TX_FLIT_CNT] =
1708 	DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1709 [C_DC_PRF_CLK_CNTR] =
1710 	DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1711 [C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1712 	DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1713 [C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1714 	DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1715 			 CNTR_SYNTH),
1716 [C_DC_PG_STS_TX_SBE_CNT] =
1717 	DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1718 [C_DC_PG_STS_TX_MBE_CNT] =
1719 	DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1720 			 CNTR_SYNTH),
1721 [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1722 			    access_sw_cpu_intr),
1723 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1724 			    access_sw_cpu_rcv_limit),
1725 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1726 			    access_sw_vtx_wait),
1727 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1728 			    access_sw_pio_wait),
1729 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1730 			    access_sw_kmem_wait),
1731 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1732 			    access_sw_send_schedule),
1733 };
1734 
1735 static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1736 [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1737 			CNTR_NORMAL),
1738 [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1739 			CNTR_NORMAL),
1740 [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1741 			CNTR_NORMAL),
1742 [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1743 			CNTR_NORMAL),
1744 [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1745 			CNTR_NORMAL),
1746 [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1747 			CNTR_NORMAL),
1748 [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1749 			CNTR_NORMAL),
1750 [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1751 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1752 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1753 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1754 			CNTR_SYNTH | CNTR_VL),
1755 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1756 			CNTR_SYNTH | CNTR_VL),
1757 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1758 			CNTR_SYNTH | CNTR_VL),
1759 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1760 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1761 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1762 			access_sw_link_dn_cnt),
1763 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1764 			access_sw_link_up_cnt),
1765 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1766 			access_sw_xmit_discards),
1767 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1768 			CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1769 			access_sw_xmit_discards),
1770 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1771 			access_xmit_constraint_errs),
1772 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1773 			access_rcv_constraint_errs),
1774 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1775 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1776 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1777 [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1778 [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1779 [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1780 [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1781 [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1782 [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1783 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1784 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1785 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1786 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1787 			       access_sw_cpu_rc_acks),
1788 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1789 			       access_sw_cpu_rc_qacks),
1790 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1791 			       access_sw_cpu_rc_delayed_comp),
1792 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1793 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1794 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1795 [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1796 [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1797 [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1798 [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1799 [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1800 [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1801 [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1802 [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1803 [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1804 [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1805 [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1806 [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1807 [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1808 [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1809 [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1810 [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1811 [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1812 [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1813 [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1814 [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1815 [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1816 [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1817 [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1818 [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1819 [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1820 [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1821 [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1822 [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1823 [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1824 [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1825 [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1826 [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1827 [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1828 [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1829 [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1830 [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1831 [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1832 [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1833 [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1834 [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1835 [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1836 [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1837 [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1838 [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1839 [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1840 [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1841 [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1842 [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1843 [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1844 [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1845 [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1846 [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1847 [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1848 [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1849 [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1850 [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1851 [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1852 [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1853 [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1854 [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1855 [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1856 [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1857 [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1858 [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1859 [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1860 [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1861 [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1862 [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1863 [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1864 [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1865 [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1866 [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1867 [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1868 [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1869 [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1870 [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1871 [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1872 };
1873 
1874 /* ======================================================================== */
1875 
1876 /* return true if this is chip revision revision a0 */
is_a0(struct hfi1_devdata * dd)1877 int is_a0(struct hfi1_devdata *dd)
1878 {
1879 	return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1880 			& CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1881 }
1882 
1883 /* return true if this is chip revision revision a */
is_ax(struct hfi1_devdata * dd)1884 int is_ax(struct hfi1_devdata *dd)
1885 {
1886 	u8 chip_rev_minor =
1887 		dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1888 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
1889 	return (chip_rev_minor & 0xf0) == 0;
1890 }
1891 
1892 /* return true if this is chip revision revision b */
is_bx(struct hfi1_devdata * dd)1893 int is_bx(struct hfi1_devdata *dd)
1894 {
1895 	u8 chip_rev_minor =
1896 		dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1897 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
1898 	return !!(chip_rev_minor & 0x10);
1899 }
1900 
1901 /*
1902  * Append string s to buffer buf.  Arguments curp and len are the current
1903  * position and remaining length, respectively.
1904  *
1905  * return 0 on success, 1 on out of room
1906  */
append_str(char * buf,char ** curp,int * lenp,const char * s)1907 static int append_str(char *buf, char **curp, int *lenp, const char *s)
1908 {
1909 	char *p = *curp;
1910 	int len = *lenp;
1911 	int result = 0; /* success */
1912 	char c;
1913 
1914 	/* add a comma, if first in the buffer */
1915 	if (p != buf) {
1916 		if (len == 0) {
1917 			result = 1; /* out of room */
1918 			goto done;
1919 		}
1920 		*p++ = ',';
1921 		len--;
1922 	}
1923 
1924 	/* copy the string */
1925 	while ((c = *s++) != 0) {
1926 		if (len == 0) {
1927 			result = 1; /* out of room */
1928 			goto done;
1929 		}
1930 		*p++ = c;
1931 		len--;
1932 	}
1933 
1934 done:
1935 	/* write return values */
1936 	*curp = p;
1937 	*lenp = len;
1938 
1939 	return result;
1940 }
1941 
1942 /*
1943  * Using the given flag table, print a comma separated string into
1944  * the buffer.  End in '*' if the buffer is too short.
1945  */
flag_string(char * buf,int buf_len,u64 flags,struct flag_table * table,int table_size)1946 static char *flag_string(char *buf, int buf_len, u64 flags,
1947 				struct flag_table *table, int table_size)
1948 {
1949 	char extra[32];
1950 	char *p = buf;
1951 	int len = buf_len;
1952 	int no_room = 0;
1953 	int i;
1954 
1955 	/* make sure there is at least 2 so we can form "*" */
1956 	if (len < 2)
1957 		return "";
1958 
1959 	len--;	/* leave room for a nul */
1960 	for (i = 0; i < table_size; i++) {
1961 		if (flags & table[i].flag) {
1962 			no_room = append_str(buf, &p, &len, table[i].str);
1963 			if (no_room)
1964 				break;
1965 			flags &= ~table[i].flag;
1966 		}
1967 	}
1968 
1969 	/* any undocumented bits left? */
1970 	if (!no_room && flags) {
1971 		snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1972 		no_room = append_str(buf, &p, &len, extra);
1973 	}
1974 
1975 	/* add * if ran out of room */
1976 	if (no_room) {
1977 		/* may need to back up to add space for a '*' */
1978 		if (len == 0)
1979 			--p;
1980 		*p++ = '*';
1981 	}
1982 
1983 	/* add final nul - space already allocated above */
1984 	*p = 0;
1985 	return buf;
1986 }
1987 
1988 /* first 8 CCE error interrupt source names */
1989 static const char * const cce_misc_names[] = {
1990 	"CceErrInt",		/* 0 */
1991 	"RxeErrInt",		/* 1 */
1992 	"MiscErrInt",		/* 2 */
1993 	"Reserved3",		/* 3 */
1994 	"PioErrInt",		/* 4 */
1995 	"SDmaErrInt",		/* 5 */
1996 	"EgressErrInt",		/* 6 */
1997 	"TxeErrInt"		/* 7 */
1998 };
1999 
2000 /*
2001  * Return the miscellaneous error interrupt name.
2002  */
is_misc_err_name(char * buf,size_t bsize,unsigned int source)2003 static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
2004 {
2005 	if (source < ARRAY_SIZE(cce_misc_names))
2006 		strncpy(buf, cce_misc_names[source], bsize);
2007 	else
2008 		snprintf(buf,
2009 			bsize,
2010 			"Reserved%u",
2011 			source + IS_GENERAL_ERR_START);
2012 
2013 	return buf;
2014 }
2015 
2016 /*
2017  * Return the SDMA engine error interrupt name.
2018  */
is_sdma_eng_err_name(char * buf,size_t bsize,unsigned int source)2019 static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2020 {
2021 	snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2022 	return buf;
2023 }
2024 
2025 /*
2026  * Return the send context error interrupt name.
2027  */
is_sendctxt_err_name(char * buf,size_t bsize,unsigned int source)2028 static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2029 {
2030 	snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2031 	return buf;
2032 }
2033 
2034 static const char * const various_names[] = {
2035 	"PbcInt",
2036 	"GpioAssertInt",
2037 	"Qsfp1Int",
2038 	"Qsfp2Int",
2039 	"TCritInt"
2040 };
2041 
2042 /*
2043  * Return the various interrupt name.
2044  */
is_various_name(char * buf,size_t bsize,unsigned int source)2045 static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2046 {
2047 	if (source < ARRAY_SIZE(various_names))
2048 		strncpy(buf, various_names[source], bsize);
2049 	else
2050 		snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2051 	return buf;
2052 }
2053 
2054 /*
2055  * Return the DC interrupt name.
2056  */
is_dc_name(char * buf,size_t bsize,unsigned int source)2057 static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2058 {
2059 	static const char * const dc_int_names[] = {
2060 		"common",
2061 		"lcb",
2062 		"8051",
2063 		"lbm"	/* local block merge */
2064 	};
2065 
2066 	if (source < ARRAY_SIZE(dc_int_names))
2067 		snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2068 	else
2069 		snprintf(buf, bsize, "DCInt%u", source);
2070 	return buf;
2071 }
2072 
2073 static const char * const sdma_int_names[] = {
2074 	"SDmaInt",
2075 	"SdmaIdleInt",
2076 	"SdmaProgressInt",
2077 };
2078 
2079 /*
2080  * Return the SDMA engine interrupt name.
2081  */
is_sdma_eng_name(char * buf,size_t bsize,unsigned int source)2082 static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2083 {
2084 	/* what interrupt */
2085 	unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2086 	/* which engine */
2087 	unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2088 
2089 	if (likely(what < 3))
2090 		snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2091 	else
2092 		snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2093 	return buf;
2094 }
2095 
2096 /*
2097  * Return the receive available interrupt name.
2098  */
is_rcv_avail_name(char * buf,size_t bsize,unsigned int source)2099 static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2100 {
2101 	snprintf(buf, bsize, "RcvAvailInt%u", source);
2102 	return buf;
2103 }
2104 
2105 /*
2106  * Return the receive urgent interrupt name.
2107  */
is_rcv_urgent_name(char * buf,size_t bsize,unsigned int source)2108 static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2109 {
2110 	snprintf(buf, bsize, "RcvUrgentInt%u", source);
2111 	return buf;
2112 }
2113 
2114 /*
2115  * Return the send credit interrupt name.
2116  */
is_send_credit_name(char * buf,size_t bsize,unsigned int source)2117 static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2118 {
2119 	snprintf(buf, bsize, "SendCreditInt%u", source);
2120 	return buf;
2121 }
2122 
2123 /*
2124  * Return the reserved interrupt name.
2125  */
is_reserved_name(char * buf,size_t bsize,unsigned int source)2126 static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2127 {
2128 	snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2129 	return buf;
2130 }
2131 
cce_err_status_string(char * buf,int buf_len,u64 flags)2132 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2133 {
2134 	return flag_string(buf, buf_len, flags,
2135 			cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2136 }
2137 
rxe_err_status_string(char * buf,int buf_len,u64 flags)2138 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2139 {
2140 	return flag_string(buf, buf_len, flags,
2141 			rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2142 }
2143 
misc_err_status_string(char * buf,int buf_len,u64 flags)2144 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2145 {
2146 	return flag_string(buf, buf_len, flags, misc_err_status_flags,
2147 			ARRAY_SIZE(misc_err_status_flags));
2148 }
2149 
pio_err_status_string(char * buf,int buf_len,u64 flags)2150 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2151 {
2152 	return flag_string(buf, buf_len, flags,
2153 			pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2154 }
2155 
sdma_err_status_string(char * buf,int buf_len,u64 flags)2156 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2157 {
2158 	return flag_string(buf, buf_len, flags,
2159 			sdma_err_status_flags,
2160 			ARRAY_SIZE(sdma_err_status_flags));
2161 }
2162 
egress_err_status_string(char * buf,int buf_len,u64 flags)2163 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2164 {
2165 	return flag_string(buf, buf_len, flags,
2166 		egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2167 }
2168 
egress_err_info_string(char * buf,int buf_len,u64 flags)2169 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2170 {
2171 	return flag_string(buf, buf_len, flags,
2172 		egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2173 }
2174 
send_err_status_string(char * buf,int buf_len,u64 flags)2175 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2176 {
2177 	return flag_string(buf, buf_len, flags,
2178 			send_err_status_flags,
2179 			ARRAY_SIZE(send_err_status_flags));
2180 }
2181 
handle_cce_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2182 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2183 {
2184 	char buf[96];
2185 
2186 	/*
2187 	 * For most these errors, there is nothing that can be done except
2188 	 * report or record it.
2189 	 */
2190 	dd_dev_info(dd, "CCE Error: %s\n",
2191 		cce_err_status_string(buf, sizeof(buf), reg));
2192 
2193 	if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2194 			&& is_a0(dd)
2195 			&& (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2196 		/* this error requires a manual drop into SPC freeze mode */
2197 		/* then a fix up */
2198 		start_freeze_handling(dd->pport, FREEZE_SELF);
2199 	}
2200 }
2201 
2202 /*
2203  * Check counters for receive errors that do not have an interrupt
2204  * associated with them.
2205  */
2206 #define RCVERR_CHECK_TIME 10
update_rcverr_timer(unsigned long opaque)2207 static void update_rcverr_timer(unsigned long opaque)
2208 {
2209 	struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2210 	struct hfi1_pportdata *ppd = dd->pport;
2211 	u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2212 
2213 	if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2214 		ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2215 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2216 		set_link_down_reason(ppd,
2217 		  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2218 			OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2219 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2220 	}
2221 	dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2222 
2223 	mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224 }
2225 
init_rcverr(struct hfi1_devdata * dd)2226 static int init_rcverr(struct hfi1_devdata *dd)
2227 {
2228 	setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2229 	/* Assume the hardware counter has been reset */
2230 	dd->rcv_ovfl_cnt = 0;
2231 	return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2232 }
2233 
free_rcverr(struct hfi1_devdata * dd)2234 static void free_rcverr(struct hfi1_devdata *dd)
2235 {
2236 	if (dd->rcverr_timer.data)
2237 		del_timer_sync(&dd->rcverr_timer);
2238 	dd->rcverr_timer.data = 0;
2239 }
2240 
handle_rxe_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2241 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2242 {
2243 	char buf[96];
2244 
2245 	dd_dev_info(dd, "Receive Error: %s\n",
2246 		rxe_err_status_string(buf, sizeof(buf), reg));
2247 
2248 	if (reg & ALL_RXE_FREEZE_ERR) {
2249 		int flags = 0;
2250 
2251 		/*
2252 		 * Freeze mode recovery is disabled for the errors
2253 		 * in RXE_FREEZE_ABORT_MASK
2254 		 */
2255 		if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2256 			flags = FREEZE_ABORT;
2257 
2258 		start_freeze_handling(dd->pport, flags);
2259 	}
2260 }
2261 
handle_misc_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2262 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263 {
2264 	char buf[96];
2265 
2266 	dd_dev_info(dd, "Misc Error: %s",
2267 		misc_err_status_string(buf, sizeof(buf), reg));
2268 }
2269 
handle_pio_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2270 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2271 {
2272 	char buf[96];
2273 
2274 	dd_dev_info(dd, "PIO Error: %s\n",
2275 		pio_err_status_string(buf, sizeof(buf), reg));
2276 
2277 	if (reg & ALL_PIO_FREEZE_ERR)
2278 		start_freeze_handling(dd->pport, 0);
2279 }
2280 
handle_sdma_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2281 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2282 {
2283 	char buf[96];
2284 
2285 	dd_dev_info(dd, "SDMA Error: %s\n",
2286 		sdma_err_status_string(buf, sizeof(buf), reg));
2287 
2288 	if (reg & ALL_SDMA_FREEZE_ERR)
2289 		start_freeze_handling(dd->pport, 0);
2290 }
2291 
count_port_inactive(struct hfi1_devdata * dd)2292 static void count_port_inactive(struct hfi1_devdata *dd)
2293 {
2294 	struct hfi1_pportdata *ppd = dd->pport;
2295 
2296 	if (ppd->port_xmit_discards < ~(u64)0)
2297 		ppd->port_xmit_discards++;
2298 }
2299 
2300 /*
2301  * We have had a "disallowed packet" error during egress. Determine the
2302  * integrity check which failed, and update relevant error counter, etc.
2303  *
2304  * Note that the SEND_EGRESS_ERR_INFO register has only a single
2305  * bit of state per integrity check, and so we can miss the reason for an
2306  * egress error if more than one packet fails the same integrity check
2307  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2308  */
handle_send_egress_err_info(struct hfi1_devdata * dd)2309 static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2310 {
2311 	struct hfi1_pportdata *ppd = dd->pport;
2312 	u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2313 	u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2314 	char buf[96];
2315 
2316 	/* clear down all observed info as quickly as possible after read */
2317 	write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2318 
2319 	dd_dev_info(dd,
2320 		"Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2321 		info, egress_err_info_string(buf, sizeof(buf), info), src);
2322 
2323 	/* Eventually add other counters for each bit */
2324 
2325 	if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2326 		if (ppd->port_xmit_discards < ~(u64)0)
2327 			ppd->port_xmit_discards++;
2328 	}
2329 }
2330 
2331 /*
2332  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2333  * register. Does it represent a 'port inactive' error?
2334  */
port_inactive_err(u64 posn)2335 static inline int port_inactive_err(u64 posn)
2336 {
2337 	return (posn >= SEES(TX_LINKDOWN) &&
2338 		posn <= SEES(TX_INCORRECT_LINK_STATE));
2339 }
2340 
2341 /*
2342  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2343  * register. Does it represent a 'disallowed packet' error?
2344  */
disallowed_pkt_err(u64 posn)2345 static inline int disallowed_pkt_err(u64 posn)
2346 {
2347 	return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2348 		posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2349 }
2350 
handle_egress_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2351 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2352 {
2353 	u64 reg_copy = reg, handled = 0;
2354 	char buf[96];
2355 
2356 	if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2357 		start_freeze_handling(dd->pport, 0);
2358 	if (is_a0(dd) && (reg &
2359 		    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2360 		    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2361 		start_freeze_handling(dd->pport, 0);
2362 
2363 	while (reg_copy) {
2364 		int posn = fls64(reg_copy);
2365 		/*
2366 		 * fls64() returns a 1-based offset, but we generally
2367 		 * want 0-based offsets.
2368 		 */
2369 		int shift = posn - 1;
2370 
2371 		if (port_inactive_err(shift)) {
2372 			count_port_inactive(dd);
2373 			handled |= (1ULL << shift);
2374 		} else if (disallowed_pkt_err(shift)) {
2375 			handle_send_egress_err_info(dd);
2376 			handled |= (1ULL << shift);
2377 		}
2378 		clear_bit(shift, (unsigned long *)&reg_copy);
2379 	}
2380 
2381 	reg &= ~handled;
2382 
2383 	if (reg)
2384 		dd_dev_info(dd, "Egress Error: %s\n",
2385 			egress_err_status_string(buf, sizeof(buf), reg));
2386 }
2387 
handle_txe_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2388 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2389 {
2390 	char buf[96];
2391 
2392 	dd_dev_info(dd, "Send Error: %s\n",
2393 		send_err_status_string(buf, sizeof(buf), reg));
2394 
2395 }
2396 
2397 /*
2398  * The maximum number of times the error clear down will loop before
2399  * blocking a repeating error.  This value is arbitrary.
2400  */
2401 #define MAX_CLEAR_COUNT 20
2402 
2403 /*
2404  * Clear and handle an error register.  All error interrupts are funneled
2405  * through here to have a central location to correctly handle single-
2406  * or multi-shot errors.
2407  *
2408  * For non per-context registers, call this routine with a context value
2409  * of 0 so the per-context offset is zero.
2410  *
2411  * If the handler loops too many times, assume that something is wrong
2412  * and can't be fixed, so mask the error bits.
2413  */
interrupt_clear_down(struct hfi1_devdata * dd,u32 context,const struct err_reg_info * eri)2414 static void interrupt_clear_down(struct hfi1_devdata *dd,
2415 				 u32 context,
2416 				 const struct err_reg_info *eri)
2417 {
2418 	u64 reg;
2419 	u32 count;
2420 
2421 	/* read in a loop until no more errors are seen */
2422 	count = 0;
2423 	while (1) {
2424 		reg = read_kctxt_csr(dd, context, eri->status);
2425 		if (reg == 0)
2426 			break;
2427 		write_kctxt_csr(dd, context, eri->clear, reg);
2428 		if (likely(eri->handler))
2429 			eri->handler(dd, context, reg);
2430 		count++;
2431 		if (count > MAX_CLEAR_COUNT) {
2432 			u64 mask;
2433 
2434 			dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2435 				eri->desc, reg);
2436 			/*
2437 			 * Read-modify-write so any other masked bits
2438 			 * remain masked.
2439 			 */
2440 			mask = read_kctxt_csr(dd, context, eri->mask);
2441 			mask &= ~reg;
2442 			write_kctxt_csr(dd, context, eri->mask, mask);
2443 			break;
2444 		}
2445 	}
2446 }
2447 
2448 /*
2449  * CCE block "misc" interrupt.  Source is < 16.
2450  */
is_misc_err_int(struct hfi1_devdata * dd,unsigned int source)2451 static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2452 {
2453 	const struct err_reg_info *eri = &misc_errs[source];
2454 
2455 	if (eri->handler) {
2456 		interrupt_clear_down(dd, 0, eri);
2457 	} else {
2458 		dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2459 			source);
2460 	}
2461 }
2462 
send_context_err_status_string(char * buf,int buf_len,u64 flags)2463 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2464 {
2465 	return flag_string(buf, buf_len, flags,
2466 			sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2467 }
2468 
2469 /*
2470  * Send context error interrupt.  Source (hw_context) is < 160.
2471  *
2472  * All send context errors cause the send context to halt.  The normal
2473  * clear-down mechanism cannot be used because we cannot clear the
2474  * error bits until several other long-running items are done first.
2475  * This is OK because with the context halted, nothing else is going
2476  * to happen on it anyway.
2477  */
is_sendctxt_err_int(struct hfi1_devdata * dd,unsigned int hw_context)2478 static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2479 				unsigned int hw_context)
2480 {
2481 	struct send_context_info *sci;
2482 	struct send_context *sc;
2483 	char flags[96];
2484 	u64 status;
2485 	u32 sw_index;
2486 
2487 	sw_index = dd->hw_to_sw[hw_context];
2488 	if (sw_index >= dd->num_send_contexts) {
2489 		dd_dev_err(dd,
2490 			"out of range sw index %u for send context %u\n",
2491 			sw_index, hw_context);
2492 		return;
2493 	}
2494 	sci = &dd->send_contexts[sw_index];
2495 	sc = sci->sc;
2496 	if (!sc) {
2497 		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2498 			sw_index, hw_context);
2499 		return;
2500 	}
2501 
2502 	/* tell the software that a halt has begun */
2503 	sc_stop(sc, SCF_HALTED);
2504 
2505 	status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2506 
2507 	dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2508 		send_context_err_status_string(flags, sizeof(flags), status));
2509 
2510 	if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2511 		handle_send_egress_err_info(dd);
2512 
2513 	/*
2514 	 * Automatically restart halted kernel contexts out of interrupt
2515 	 * context.  User contexts must ask the driver to restart the context.
2516 	 */
2517 	if (sc->type != SC_USER)
2518 		queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2519 }
2520 
handle_sdma_eng_err(struct hfi1_devdata * dd,unsigned int source,u64 status)2521 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2522 				unsigned int source, u64 status)
2523 {
2524 	struct sdma_engine *sde;
2525 
2526 	sde = &dd->per_sdma[source];
2527 #ifdef CONFIG_SDMA_VERBOSITY
2528 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2529 		   slashstrip(__FILE__), __LINE__, __func__);
2530 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2531 		   sde->this_idx, source, (unsigned long long)status);
2532 #endif
2533 	sdma_engine_error(sde, status);
2534 }
2535 
2536 /*
2537  * CCE block SDMA error interrupt.  Source is < 16.
2538  */
is_sdma_eng_err_int(struct hfi1_devdata * dd,unsigned int source)2539 static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2540 {
2541 #ifdef CONFIG_SDMA_VERBOSITY
2542 	struct sdma_engine *sde = &dd->per_sdma[source];
2543 
2544 	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2545 		   slashstrip(__FILE__), __LINE__, __func__);
2546 	dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2547 		   source);
2548 	sdma_dumpstate(sde);
2549 #endif
2550 	interrupt_clear_down(dd, source, &sdma_eng_err);
2551 }
2552 
2553 /*
2554  * CCE block "various" interrupt.  Source is < 8.
2555  */
is_various_int(struct hfi1_devdata * dd,unsigned int source)2556 static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2557 {
2558 	const struct err_reg_info *eri = &various_err[source];
2559 
2560 	/*
2561 	 * TCritInt cannot go through interrupt_clear_down()
2562 	 * because it is not a second tier interrupt. The handler
2563 	 * should be called directly.
2564 	 */
2565 	if (source == TCRIT_INT_SOURCE)
2566 		handle_temp_err(dd);
2567 	else if (eri->handler)
2568 		interrupt_clear_down(dd, 0, eri);
2569 	else
2570 		dd_dev_info(dd,
2571 			"%s: Unimplemented/reserved interrupt %d\n",
2572 			__func__, source);
2573 }
2574 
handle_qsfp_int(struct hfi1_devdata * dd,u32 src_ctx,u64 reg)2575 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2576 {
2577 	/* source is always zero */
2578 	struct hfi1_pportdata *ppd = dd->pport;
2579 	unsigned long flags;
2580 	u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2581 
2582 	if (reg & QSFP_HFI0_MODPRST_N) {
2583 
2584 		dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2585 				__func__);
2586 
2587 		if (!qsfp_mod_present(ppd)) {
2588 			ppd->driver_link_ready = 0;
2589 			/*
2590 			 * Cable removed, reset all our information about the
2591 			 * cache and cable capabilities
2592 			 */
2593 
2594 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2595 			/*
2596 			 * We don't set cache_refresh_required here as we expect
2597 			 * an interrupt when a cable is inserted
2598 			 */
2599 			ppd->qsfp_info.cache_valid = 0;
2600 			ppd->qsfp_info.qsfp_interrupt_functional = 0;
2601 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2602 						flags);
2603 			write_csr(dd,
2604 					dd->hfi1_id ?
2605 						ASIC_QSFP2_INVERT :
2606 						ASIC_QSFP1_INVERT,
2607 				qsfp_int_mgmt);
2608 			if (ppd->host_link_state == HLS_DN_POLL) {
2609 				/*
2610 				 * The link is still in POLL. This means
2611 				 * that the normal link down processing
2612 				 * will not happen. We have to do it here
2613 				 * before turning the DC off.
2614 				 */
2615 				queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2616 			}
2617 		} else {
2618 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2619 			ppd->qsfp_info.cache_valid = 0;
2620 			ppd->qsfp_info.cache_refresh_required = 1;
2621 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2622 						flags);
2623 
2624 			qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2625 			write_csr(dd,
2626 					dd->hfi1_id ?
2627 						ASIC_QSFP2_INVERT :
2628 						ASIC_QSFP1_INVERT,
2629 				qsfp_int_mgmt);
2630 		}
2631 	}
2632 
2633 	if (reg & QSFP_HFI0_INT_N) {
2634 
2635 		dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2636 				__func__);
2637 		spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2638 		ppd->qsfp_info.check_interrupt_flags = 1;
2639 		ppd->qsfp_info.qsfp_interrupt_functional = 1;
2640 		spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2641 	}
2642 
2643 	/* Schedule the QSFP work only if there is a cable attached. */
2644 	if (qsfp_mod_present(ppd))
2645 		queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2646 }
2647 
request_host_lcb_access(struct hfi1_devdata * dd)2648 static int request_host_lcb_access(struct hfi1_devdata *dd)
2649 {
2650 	int ret;
2651 
2652 	ret = do_8051_command(dd, HCMD_MISC,
2653 		(u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2654 		NULL);
2655 	if (ret != HCMD_SUCCESS) {
2656 		dd_dev_err(dd, "%s: command failed with error %d\n",
2657 			__func__, ret);
2658 	}
2659 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2660 }
2661 
request_8051_lcb_access(struct hfi1_devdata * dd)2662 static int request_8051_lcb_access(struct hfi1_devdata *dd)
2663 {
2664 	int ret;
2665 
2666 	ret = do_8051_command(dd, HCMD_MISC,
2667 		(u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2668 		NULL);
2669 	if (ret != HCMD_SUCCESS) {
2670 		dd_dev_err(dd, "%s: command failed with error %d\n",
2671 			__func__, ret);
2672 	}
2673 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2674 }
2675 
2676 /*
2677  * Set the LCB selector - allow host access.  The DCC selector always
2678  * points to the host.
2679  */
set_host_lcb_access(struct hfi1_devdata * dd)2680 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2681 {
2682 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2683 				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2684 				| DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2685 }
2686 
2687 /*
2688  * Clear the LCB selector - allow 8051 access.  The DCC selector always
2689  * points to the host.
2690  */
set_8051_lcb_access(struct hfi1_devdata * dd)2691 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2692 {
2693 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2694 				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2695 }
2696 
2697 /*
2698  * Acquire LCB access from the 8051.  If the host already has access,
2699  * just increment a counter.  Otherwise, inform the 8051 that the
2700  * host is taking access.
2701  *
2702  * Returns:
2703  *	0 on success
2704  *	-EBUSY if the 8051 has control and cannot be disturbed
2705  *	-errno if unable to acquire access from the 8051
2706  */
acquire_lcb_access(struct hfi1_devdata * dd,int sleep_ok)2707 int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2708 {
2709 	struct hfi1_pportdata *ppd = dd->pport;
2710 	int ret = 0;
2711 
2712 	/*
2713 	 * Use the host link state lock so the operation of this routine
2714 	 * { link state check, selector change, count increment } can occur
2715 	 * as a unit against a link state change.  Otherwise there is a
2716 	 * race between the state change and the count increment.
2717 	 */
2718 	if (sleep_ok) {
2719 		mutex_lock(&ppd->hls_lock);
2720 	} else {
2721 		while (!mutex_trylock(&ppd->hls_lock))
2722 			udelay(1);
2723 	}
2724 
2725 	/* this access is valid only when the link is up */
2726 	if ((ppd->host_link_state & HLS_UP) == 0) {
2727 		dd_dev_info(dd, "%s: link state %s not up\n",
2728 			__func__, link_state_name(ppd->host_link_state));
2729 		ret = -EBUSY;
2730 		goto done;
2731 	}
2732 
2733 	if (dd->lcb_access_count == 0) {
2734 		ret = request_host_lcb_access(dd);
2735 		if (ret) {
2736 			dd_dev_err(dd,
2737 				"%s: unable to acquire LCB access, err %d\n",
2738 				__func__, ret);
2739 			goto done;
2740 		}
2741 		set_host_lcb_access(dd);
2742 	}
2743 	dd->lcb_access_count++;
2744 done:
2745 	mutex_unlock(&ppd->hls_lock);
2746 	return ret;
2747 }
2748 
2749 /*
2750  * Release LCB access by decrementing the use count.  If the count is moving
2751  * from 1 to 0, inform 8051 that it has control back.
2752  *
2753  * Returns:
2754  *	0 on success
2755  *	-errno if unable to release access to the 8051
2756  */
release_lcb_access(struct hfi1_devdata * dd,int sleep_ok)2757 int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2758 {
2759 	int ret = 0;
2760 
2761 	/*
2762 	 * Use the host link state lock because the acquire needed it.
2763 	 * Here, we only need to keep { selector change, count decrement }
2764 	 * as a unit.
2765 	 */
2766 	if (sleep_ok) {
2767 		mutex_lock(&dd->pport->hls_lock);
2768 	} else {
2769 		while (!mutex_trylock(&dd->pport->hls_lock))
2770 			udelay(1);
2771 	}
2772 
2773 	if (dd->lcb_access_count == 0) {
2774 		dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2775 			__func__);
2776 		goto done;
2777 	}
2778 
2779 	if (dd->lcb_access_count == 1) {
2780 		set_8051_lcb_access(dd);
2781 		ret = request_8051_lcb_access(dd);
2782 		if (ret) {
2783 			dd_dev_err(dd,
2784 				"%s: unable to release LCB access, err %d\n",
2785 				__func__, ret);
2786 			/* restore host access if the grant didn't work */
2787 			set_host_lcb_access(dd);
2788 			goto done;
2789 		}
2790 	}
2791 	dd->lcb_access_count--;
2792 done:
2793 	mutex_unlock(&dd->pport->hls_lock);
2794 	return ret;
2795 }
2796 
2797 /*
2798  * Initialize LCB access variables and state.  Called during driver load,
2799  * after most of the initialization is finished.
2800  *
2801  * The DC default is LCB access on for the host.  The driver defaults to
2802  * leaving access to the 8051.  Assign access now - this constrains the call
2803  * to this routine to be after all LCB set-up is done.  In particular, after
2804  * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2805  */
init_lcb_access(struct hfi1_devdata * dd)2806 static void init_lcb_access(struct hfi1_devdata *dd)
2807 {
2808 	dd->lcb_access_count = 0;
2809 }
2810 
2811 /*
2812  * Write a response back to a 8051 request.
2813  */
hreq_response(struct hfi1_devdata * dd,u8 return_code,u16 rsp_data)2814 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2815 {
2816 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2817 		DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2818 		| (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2819 		| (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2820 }
2821 
2822 /*
2823  * Handle requests from the 8051.
2824  */
handle_8051_request(struct hfi1_devdata * dd)2825 static void handle_8051_request(struct hfi1_devdata *dd)
2826 {
2827 	u64 reg;
2828 	u16 data;
2829 	u8 type;
2830 
2831 	reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2832 	if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2833 		return;	/* no request */
2834 
2835 	/* zero out COMPLETED so the response is seen */
2836 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2837 
2838 	/* extract request details */
2839 	type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2840 			& DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2841 	data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2842 			& DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2843 
2844 	switch (type) {
2845 	case HREQ_LOAD_CONFIG:
2846 	case HREQ_SAVE_CONFIG:
2847 	case HREQ_READ_CONFIG:
2848 	case HREQ_SET_TX_EQ_ABS:
2849 	case HREQ_SET_TX_EQ_REL:
2850 	case HREQ_ENABLE:
2851 		dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2852 			type);
2853 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2854 		break;
2855 
2856 	case HREQ_CONFIG_DONE:
2857 		hreq_response(dd, HREQ_SUCCESS, 0);
2858 		break;
2859 
2860 	case HREQ_INTERFACE_TEST:
2861 		hreq_response(dd, HREQ_SUCCESS, data);
2862 		break;
2863 
2864 	default:
2865 		dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2866 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2867 		break;
2868 	}
2869 }
2870 
write_global_credit(struct hfi1_devdata * dd,u8 vau,u16 total,u16 shared)2871 static void write_global_credit(struct hfi1_devdata *dd,
2872 				u8 vau, u16 total, u16 shared)
2873 {
2874 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2875 		((u64)total
2876 			<< SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2877 		| ((u64)shared
2878 			<< SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2879 		| ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2880 }
2881 
2882 /*
2883  * Set up initial VL15 credits of the remote.  Assumes the rest of
2884  * the CM credit registers are zero from a previous global or credit reset .
2885  */
set_up_vl15(struct hfi1_devdata * dd,u8 vau,u16 vl15buf)2886 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2887 {
2888 	/* leave shared count at zero for both global and VL15 */
2889 	write_global_credit(dd, vau, vl15buf, 0);
2890 
2891 	/* We may need some credits for another VL when sending packets
2892 	 * with the snoop interface. Dividing it down the middle for VL15
2893 	 * and VL0 should suffice.
2894 	 */
2895 	if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2896 		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2897 		    << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2898 		write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2899 		    << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2900 	} else {
2901 		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2902 			<< SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2903 	}
2904 }
2905 
2906 /*
2907  * Zero all credit details from the previous connection and
2908  * reset the CM manager's internal counters.
2909  */
reset_link_credits(struct hfi1_devdata * dd)2910 void reset_link_credits(struct hfi1_devdata *dd)
2911 {
2912 	int i;
2913 
2914 	/* remove all previous VL credit limits */
2915 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
2916 		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2917 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2918 	write_global_credit(dd, 0, 0, 0);
2919 	/* reset the CM block */
2920 	pio_send_control(dd, PSC_CM_RESET);
2921 }
2922 
2923 /* convert a vCU to a CU */
vcu_to_cu(u8 vcu)2924 static u32 vcu_to_cu(u8 vcu)
2925 {
2926 	return 1 << vcu;
2927 }
2928 
2929 /* convert a CU to a vCU */
cu_to_vcu(u32 cu)2930 static u8 cu_to_vcu(u32 cu)
2931 {
2932 	return ilog2(cu);
2933 }
2934 
2935 /* convert a vAU to an AU */
vau_to_au(u8 vau)2936 static u32 vau_to_au(u8 vau)
2937 {
2938 	return 8 * (1 << vau);
2939 }
2940 
set_linkup_defaults(struct hfi1_pportdata * ppd)2941 static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2942 {
2943 	ppd->sm_trap_qp = 0x0;
2944 	ppd->sa_qp = 0x1;
2945 }
2946 
2947 /*
2948  * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2949  */
lcb_shutdown(struct hfi1_devdata * dd,int abort)2950 static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2951 {
2952 	u64 reg;
2953 
2954 	/* clear lcb run: LCB_CFG_RUN.EN = 0 */
2955 	write_csr(dd, DC_LCB_CFG_RUN, 0);
2956 	/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2957 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2958 		1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2959 	/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2960 	dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2961 	reg = read_csr(dd, DCC_CFG_RESET);
2962 	write_csr(dd, DCC_CFG_RESET,
2963 		reg
2964 		| (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2965 		| (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2966 	(void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2967 	if (!abort) {
2968 		udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2969 		write_csr(dd, DCC_CFG_RESET, reg);
2970 		write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2971 	}
2972 }
2973 
2974 /*
2975  * This routine should be called after the link has been transitioned to
2976  * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2977  * reset).
2978  *
2979  * The expectation is that the caller of this routine would have taken
2980  * care of properly transitioning the link into the correct state.
2981  */
dc_shutdown(struct hfi1_devdata * dd)2982 static void dc_shutdown(struct hfi1_devdata *dd)
2983 {
2984 	unsigned long flags;
2985 
2986 	spin_lock_irqsave(&dd->dc8051_lock, flags);
2987 	if (dd->dc_shutdown) {
2988 		spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2989 		return;
2990 	}
2991 	dd->dc_shutdown = 1;
2992 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2993 	/* Shutdown the LCB */
2994 	lcb_shutdown(dd, 1);
2995 	/* Going to OFFLINE would have causes the 8051 to put the
2996 	 * SerDes into reset already. Just need to shut down the 8051,
2997 	 * itself. */
2998 	write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2999 }
3000 
3001 /* Calling this after the DC has been brought out of reset should not
3002  * do any damage. */
dc_start(struct hfi1_devdata * dd)3003 static void dc_start(struct hfi1_devdata *dd)
3004 {
3005 	unsigned long flags;
3006 	int ret;
3007 
3008 	spin_lock_irqsave(&dd->dc8051_lock, flags);
3009 	if (!dd->dc_shutdown)
3010 		goto done;
3011 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3012 	/* Take the 8051 out of reset */
3013 	write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3014 	/* Wait until 8051 is ready */
3015 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3016 	if (ret) {
3017 		dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3018 			__func__);
3019 	}
3020 	/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3021 	write_csr(dd, DCC_CFG_RESET, 0x10);
3022 	/* lcb_shutdown() with abort=1 does not restore these */
3023 	write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3024 	spin_lock_irqsave(&dd->dc8051_lock, flags);
3025 	dd->dc_shutdown = 0;
3026 done:
3027 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3028 }
3029 
3030 /*
3031  * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3032  */
adjust_lcb_for_fpga_serdes(struct hfi1_devdata * dd)3033 static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3034 {
3035 	u64 rx_radr, tx_radr;
3036 	u32 version;
3037 
3038 	if (dd->icode != ICODE_FPGA_EMULATION)
3039 		return;
3040 
3041 	/*
3042 	 * These LCB defaults on emulator _s are good, nothing to do here:
3043 	 *	LCB_CFG_TX_FIFOS_RADR
3044 	 *	LCB_CFG_RX_FIFOS_RADR
3045 	 *	LCB_CFG_LN_DCLK
3046 	 *	LCB_CFG_IGNORE_LOST_RCLK
3047 	 */
3048 	if (is_emulator_s(dd))
3049 		return;
3050 	/* else this is _p */
3051 
3052 	version = emulator_rev(dd);
3053 	if (!is_a0(dd))
3054 		version = 0x2d;	/* all B0 use 0x2d or higher settings */
3055 
3056 	if (version <= 0x12) {
3057 		/* release 0x12 and below */
3058 
3059 		/*
3060 		 * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3061 		 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3062 		 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3063 		 */
3064 		rx_radr =
3065 		      0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3066 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3067 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3068 		/*
3069 		 * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3070 		 * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3071 		 */
3072 		tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073 	} else if (version <= 0x18) {
3074 		/* release 0x13 up to 0x18 */
3075 		/* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3076 		rx_radr =
3077 		      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080 		tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081 	} else if (version == 0x19) {
3082 		/* release 0x19 */
3083 		/* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3084 		rx_radr =
3085 		      0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088 		tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089 	} else if (version == 0x1a) {
3090 		/* release 0x1a */
3091 		/* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3092 		rx_radr =
3093 		      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096 		tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097 		write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3098 	} else {
3099 		/* release 0x1b and higher */
3100 		/* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3101 		rx_radr =
3102 		      0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3103 		    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3104 		    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3105 		tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3106 	}
3107 
3108 	write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3109 	/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3110 	write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3111 		DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3112 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3113 }
3114 
3115 /*
3116  * Handle a SMA idle message
3117  *
3118  * This is a work-queue function outside of the interrupt.
3119  */
handle_sma_message(struct work_struct * work)3120 void handle_sma_message(struct work_struct *work)
3121 {
3122 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3123 							sma_message_work);
3124 	struct hfi1_devdata *dd = ppd->dd;
3125 	u64 msg;
3126 	int ret;
3127 
3128 	/* msg is bytes 1-4 of the 40-bit idle message - the command code
3129 	   is stripped off */
3130 	ret = read_idle_sma(dd, &msg);
3131 	if (ret)
3132 		return;
3133 	dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3134 	/*
3135 	 * React to the SMA message.  Byte[1] (0 for us) is the command.
3136 	 */
3137 	switch (msg & 0xff) {
3138 	case SMA_IDLE_ARM:
3139 		/*
3140 		 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3141 		 * State Transitions
3142 		 *
3143 		 * Only expected in INIT or ARMED, discard otherwise.
3144 		 */
3145 		if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3146 			ppd->neighbor_normal = 1;
3147 		break;
3148 	case SMA_IDLE_ACTIVE:
3149 		/*
3150 		 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3151 		 * State Transitions
3152 		 *
3153 		 * Can activate the node.  Discard otherwise.
3154 		 */
3155 		if (ppd->host_link_state == HLS_UP_ARMED
3156 					&& ppd->is_active_optimize_enabled) {
3157 			ppd->neighbor_normal = 1;
3158 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
3159 			if (ret)
3160 				dd_dev_err(
3161 					dd,
3162 					"%s: received Active SMA idle message, couldn't set link to Active\n",
3163 					__func__);
3164 		}
3165 		break;
3166 	default:
3167 		dd_dev_err(dd,
3168 			"%s: received unexpected SMA idle message 0x%llx\n",
3169 			__func__, msg);
3170 		break;
3171 	}
3172 }
3173 
adjust_rcvctrl(struct hfi1_devdata * dd,u64 add,u64 clear)3174 static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3175 {
3176 	u64 rcvctrl;
3177 	unsigned long flags;
3178 
3179 	spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3180 	rcvctrl = read_csr(dd, RCV_CTRL);
3181 	rcvctrl |= add;
3182 	rcvctrl &= ~clear;
3183 	write_csr(dd, RCV_CTRL, rcvctrl);
3184 	spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3185 }
3186 
add_rcvctrl(struct hfi1_devdata * dd,u64 add)3187 static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3188 {
3189 	adjust_rcvctrl(dd, add, 0);
3190 }
3191 
clear_rcvctrl(struct hfi1_devdata * dd,u64 clear)3192 static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3193 {
3194 	adjust_rcvctrl(dd, 0, clear);
3195 }
3196 
3197 /*
3198  * Called from all interrupt handlers to start handling an SPC freeze.
3199  */
start_freeze_handling(struct hfi1_pportdata * ppd,int flags)3200 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3201 {
3202 	struct hfi1_devdata *dd = ppd->dd;
3203 	struct send_context *sc;
3204 	int i;
3205 
3206 	if (flags & FREEZE_SELF)
3207 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3208 
3209 	/* enter frozen mode */
3210 	dd->flags |= HFI1_FROZEN;
3211 
3212 	/* notify all SDMA engines that they are going into a freeze */
3213 	sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3214 
3215 	/* do halt pre-handling on all enabled send contexts */
3216 	for (i = 0; i < dd->num_send_contexts; i++) {
3217 		sc = dd->send_contexts[i].sc;
3218 		if (sc && (sc->flags & SCF_ENABLED))
3219 			sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3220 	}
3221 
3222 	/* Send context are frozen. Notify user space */
3223 	hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3224 
3225 	if (flags & FREEZE_ABORT) {
3226 		dd_dev_err(dd,
3227 			   "Aborted freeze recovery. Please REBOOT system\n");
3228 		return;
3229 	}
3230 	/* queue non-interrupt handler */
3231 	queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3232 }
3233 
3234 /*
3235  * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3236  * depending on the "freeze" parameter.
3237  *
3238  * No need to return an error if it times out, our only option
3239  * is to proceed anyway.
3240  */
wait_for_freeze_status(struct hfi1_devdata * dd,int freeze)3241 static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3242 {
3243 	unsigned long timeout;
3244 	u64 reg;
3245 
3246 	timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3247 	while (1) {
3248 		reg = read_csr(dd, CCE_STATUS);
3249 		if (freeze) {
3250 			/* waiting until all indicators are set */
3251 			if ((reg & ALL_FROZE) == ALL_FROZE)
3252 				return;	/* all done */
3253 		} else {
3254 			/* waiting until all indicators are clear */
3255 			if ((reg & ALL_FROZE) == 0)
3256 				return; /* all done */
3257 		}
3258 
3259 		if (time_after(jiffies, timeout)) {
3260 			dd_dev_err(dd,
3261 				"Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3262 				freeze ? "" : "un",
3263 				reg & ALL_FROZE,
3264 				freeze ? ALL_FROZE : 0ull);
3265 			return;
3266 		}
3267 		usleep_range(80, 120);
3268 	}
3269 }
3270 
3271 /*
3272  * Do all freeze handling for the RXE block.
3273  */
rxe_freeze(struct hfi1_devdata * dd)3274 static void rxe_freeze(struct hfi1_devdata *dd)
3275 {
3276 	int i;
3277 
3278 	/* disable port */
3279 	clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3280 
3281 	/* disable all receive contexts */
3282 	for (i = 0; i < dd->num_rcv_contexts; i++)
3283 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3284 }
3285 
3286 /*
3287  * Unfreeze handling for the RXE block - kernel contexts only.
3288  * This will also enable the port.  User contexts will do unfreeze
3289  * handling on a per-context basis as they call into the driver.
3290  *
3291  */
rxe_kernel_unfreeze(struct hfi1_devdata * dd)3292 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3293 {
3294 	int i;
3295 
3296 	/* enable all kernel contexts */
3297 	for (i = 0; i < dd->n_krcv_queues; i++)
3298 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3299 
3300 	/* enable port */
3301 	add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3302 }
3303 
3304 /*
3305  * Non-interrupt SPC freeze handling.
3306  *
3307  * This is a work-queue function outside of the triggering interrupt.
3308  */
handle_freeze(struct work_struct * work)3309 void handle_freeze(struct work_struct *work)
3310 {
3311 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3312 								freeze_work);
3313 	struct hfi1_devdata *dd = ppd->dd;
3314 
3315 	/* wait for freeze indicators on all affected blocks */
3316 	dd_dev_info(dd, "Entering SPC freeze\n");
3317 	wait_for_freeze_status(dd, 1);
3318 
3319 	/* SPC is now frozen */
3320 
3321 	/* do send PIO freeze steps */
3322 	pio_freeze(dd);
3323 
3324 	/* do send DMA freeze steps */
3325 	sdma_freeze(dd);
3326 
3327 	/* do send egress freeze steps - nothing to do */
3328 
3329 	/* do receive freeze steps */
3330 	rxe_freeze(dd);
3331 
3332 	/*
3333 	 * Unfreeze the hardware - clear the freeze, wait for each
3334 	 * block's frozen bit to clear, then clear the frozen flag.
3335 	 */
3336 	write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3337 	wait_for_freeze_status(dd, 0);
3338 
3339 	if (is_a0(dd)) {
3340 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3341 		wait_for_freeze_status(dd, 1);
3342 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3343 		wait_for_freeze_status(dd, 0);
3344 	}
3345 
3346 	/* do send PIO unfreeze steps for kernel contexts */
3347 	pio_kernel_unfreeze(dd);
3348 
3349 	/* do send DMA unfreeze steps */
3350 	sdma_unfreeze(dd);
3351 
3352 	/* do send egress unfreeze steps - nothing to do */
3353 
3354 	/* do receive unfreeze steps for kernel contexts */
3355 	rxe_kernel_unfreeze(dd);
3356 
3357 	/*
3358 	 * The unfreeze procedure touches global device registers when
3359 	 * it disables and re-enables RXE. Mark the device unfrozen
3360 	 * after all that is done so other parts of the driver waiting
3361 	 * for the device to unfreeze don't do things out of order.
3362 	 *
3363 	 * The above implies that the meaning of HFI1_FROZEN flag is
3364 	 * "Device has gone into freeze mode and freeze mode handling
3365 	 * is still in progress."
3366 	 *
3367 	 * The flag will be removed when freeze mode processing has
3368 	 * completed.
3369 	 */
3370 	dd->flags &= ~HFI1_FROZEN;
3371 	wake_up(&dd->event_queue);
3372 
3373 	/* no longer frozen */
3374 	dd_dev_err(dd, "Exiting SPC freeze\n");
3375 }
3376 
3377 /*
3378  * Handle a link up interrupt from the 8051.
3379  *
3380  * This is a work-queue function outside of the interrupt.
3381  */
handle_link_up(struct work_struct * work)3382 void handle_link_up(struct work_struct *work)
3383 {
3384 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3385 								link_up_work);
3386 	set_link_state(ppd, HLS_UP_INIT);
3387 
3388 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3389 	read_ltp_rtt(ppd->dd);
3390 	/*
3391 	 * OPA specifies that certain counters are cleared on a transition
3392 	 * to link up, so do that.
3393 	 */
3394 	clear_linkup_counters(ppd->dd);
3395 	/*
3396 	 * And (re)set link up default values.
3397 	 */
3398 	set_linkup_defaults(ppd);
3399 
3400 	/* enforce link speed enabled */
3401 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3402 		/* oops - current speed is not enabled, bounce */
3403 		dd_dev_err(ppd->dd,
3404 			"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3405 			ppd->link_speed_active, ppd->link_speed_enabled);
3406 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3407 			OPA_LINKDOWN_REASON_SPEED_POLICY);
3408 		set_link_state(ppd, HLS_DN_OFFLINE);
3409 		start_link(ppd);
3410 	}
3411 }
3412 
3413 /* Several pieces of LNI information were cached for SMA in ppd.
3414  * Reset these on link down */
reset_neighbor_info(struct hfi1_pportdata * ppd)3415 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3416 {
3417 	ppd->neighbor_guid = 0;
3418 	ppd->neighbor_port_number = 0;
3419 	ppd->neighbor_type = 0;
3420 	ppd->neighbor_fm_security = 0;
3421 }
3422 
3423 /*
3424  * Handle a link down interrupt from the 8051.
3425  *
3426  * This is a work-queue function outside of the interrupt.
3427  */
handle_link_down(struct work_struct * work)3428 void handle_link_down(struct work_struct *work)
3429 {
3430 	u8 lcl_reason, neigh_reason = 0;
3431 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3432 								link_down_work);
3433 
3434 	/* go offline first, then deal with reasons */
3435 	set_link_state(ppd, HLS_DN_OFFLINE);
3436 
3437 	lcl_reason = 0;
3438 	read_planned_down_reason_code(ppd->dd, &neigh_reason);
3439 
3440 	/*
3441 	 * If no reason, assume peer-initiated but missed
3442 	 * LinkGoingDown idle flits.
3443 	 */
3444 	if (neigh_reason == 0)
3445 		lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3446 
3447 	set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3448 
3449 	reset_neighbor_info(ppd);
3450 
3451 	/* disable the port */
3452 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3453 
3454 	/* If there is no cable attached, turn the DC off. Otherwise,
3455 	 * start the link bring up. */
3456 	if (!qsfp_mod_present(ppd))
3457 		dc_shutdown(ppd->dd);
3458 	else
3459 		start_link(ppd);
3460 }
3461 
handle_link_bounce(struct work_struct * work)3462 void handle_link_bounce(struct work_struct *work)
3463 {
3464 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3465 							link_bounce_work);
3466 
3467 	/*
3468 	 * Only do something if the link is currently up.
3469 	 */
3470 	if (ppd->host_link_state & HLS_UP) {
3471 		set_link_state(ppd, HLS_DN_OFFLINE);
3472 		start_link(ppd);
3473 	} else {
3474 		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3475 			__func__, link_state_name(ppd->host_link_state));
3476 	}
3477 }
3478 
3479 /*
3480  * Mask conversion: Capability exchange to Port LTP.  The capability
3481  * exchange has an implicit 16b CRC that is mandatory.
3482  */
cap_to_port_ltp(int cap)3483 static int cap_to_port_ltp(int cap)
3484 {
3485 	int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3486 
3487 	if (cap & CAP_CRC_14B)
3488 		port_ltp |= PORT_LTP_CRC_MODE_14;
3489 	if (cap & CAP_CRC_48B)
3490 		port_ltp |= PORT_LTP_CRC_MODE_48;
3491 	if (cap & CAP_CRC_12B_16B_PER_LANE)
3492 		port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3493 
3494 	return port_ltp;
3495 }
3496 
3497 /*
3498  * Convert an OPA Port LTP mask to capability mask
3499  */
port_ltp_to_cap(int port_ltp)3500 int port_ltp_to_cap(int port_ltp)
3501 {
3502 	int cap_mask = 0;
3503 
3504 	if (port_ltp & PORT_LTP_CRC_MODE_14)
3505 		cap_mask |= CAP_CRC_14B;
3506 	if (port_ltp & PORT_LTP_CRC_MODE_48)
3507 		cap_mask |= CAP_CRC_48B;
3508 	if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3509 		cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3510 
3511 	return cap_mask;
3512 }
3513 
3514 /*
3515  * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3516  */
lcb_to_port_ltp(int lcb_crc)3517 static int lcb_to_port_ltp(int lcb_crc)
3518 {
3519 	int port_ltp = 0;
3520 
3521 	if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3522 		port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3523 	else if (lcb_crc == LCB_CRC_48B)
3524 		port_ltp = PORT_LTP_CRC_MODE_48;
3525 	else if (lcb_crc == LCB_CRC_14B)
3526 		port_ltp = PORT_LTP_CRC_MODE_14;
3527 	else
3528 		port_ltp = PORT_LTP_CRC_MODE_16;
3529 
3530 	return port_ltp;
3531 }
3532 
3533 /*
3534  * Our neighbor has indicated that we are allowed to act as a fabric
3535  * manager, so place the full management partition key in the second
3536  * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3537  * that we should already have the limited management partition key in
3538  * array element 1, and also that the port is not yet up when
3539  * add_full_mgmt_pkey() is invoked.
3540  */
add_full_mgmt_pkey(struct hfi1_pportdata * ppd)3541 static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3542 {
3543 	struct hfi1_devdata *dd = ppd->dd;
3544 
3545 	/* Sanity check - ppd->pkeys[2] should be 0 */
3546 	if (ppd->pkeys[2] != 0)
3547 		dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3548 			   __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3549 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
3550 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3551 }
3552 
3553 /*
3554  * Convert the given link width to the OPA link width bitmask.
3555  */
link_width_to_bits(struct hfi1_devdata * dd,u16 width)3556 static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3557 {
3558 	switch (width) {
3559 	case 0:
3560 		/*
3561 		 * Simulator and quick linkup do not set the width.
3562 		 * Just set it to 4x without complaint.
3563 		 */
3564 		if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3565 			return OPA_LINK_WIDTH_4X;
3566 		return 0; /* no lanes up */
3567 	case 1: return OPA_LINK_WIDTH_1X;
3568 	case 2: return OPA_LINK_WIDTH_2X;
3569 	case 3: return OPA_LINK_WIDTH_3X;
3570 	default:
3571 		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3572 			__func__, width);
3573 		/* fall through */
3574 	case 4: return OPA_LINK_WIDTH_4X;
3575 	}
3576 }
3577 
3578 /*
3579  * Do a population count on the bottom nibble.
3580  */
3581 static const u8 bit_counts[16] = {
3582 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3583 };
nibble_to_count(u8 nibble)3584 static inline u8 nibble_to_count(u8 nibble)
3585 {
3586 	return bit_counts[nibble & 0xf];
3587 }
3588 
3589 /*
3590  * Read the active lane information from the 8051 registers and return
3591  * their widths.
3592  *
3593  * Active lane information is found in these 8051 registers:
3594  *	enable_lane_tx
3595  *	enable_lane_rx
3596  */
get_link_widths(struct hfi1_devdata * dd,u16 * tx_width,u16 * rx_width)3597 static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3598 			    u16 *rx_width)
3599 {
3600 	u16 tx, rx;
3601 	u8 enable_lane_rx;
3602 	u8 enable_lane_tx;
3603 	u8 tx_polarity_inversion;
3604 	u8 rx_polarity_inversion;
3605 	u8 max_rate;
3606 
3607 	/* read the active lanes */
3608 	read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3609 				&rx_polarity_inversion, &max_rate);
3610 	read_local_lni(dd, &enable_lane_rx);
3611 
3612 	/* convert to counts */
3613 	tx = nibble_to_count(enable_lane_tx);
3614 	rx = nibble_to_count(enable_lane_rx);
3615 
3616 	/*
3617 	 * Set link_speed_active here, overriding what was set in
3618 	 * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3619 	 * set the max_rate field in handle_verify_cap until v0.19.
3620 	 */
3621 	if ((dd->icode == ICODE_RTL_SILICON)
3622 				&& (dd->dc8051_ver < dc8051_ver(0, 19))) {
3623 		/* max_rate: 0 = 12.5G, 1 = 25G */
3624 		switch (max_rate) {
3625 		case 0:
3626 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3627 			break;
3628 		default:
3629 			dd_dev_err(dd,
3630 				"%s: unexpected max rate %d, using 25Gb\n",
3631 				__func__, (int)max_rate);
3632 			/* fall through */
3633 		case 1:
3634 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3635 			break;
3636 		}
3637 	}
3638 
3639 	dd_dev_info(dd,
3640 		"Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3641 		enable_lane_tx, tx, enable_lane_rx, rx);
3642 	*tx_width = link_width_to_bits(dd, tx);
3643 	*rx_width = link_width_to_bits(dd, rx);
3644 }
3645 
3646 /*
3647  * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3648  * Valid after the end of VerifyCap and during LinkUp.  Does not change
3649  * after link up.  I.e. look elsewhere for downgrade information.
3650  *
3651  * Bits are:
3652  *	+ bits [7:4] contain the number of active transmitters
3653  *	+ bits [3:0] contain the number of active receivers
3654  * These are numbers 1 through 4 and can be different values if the
3655  * link is asymmetric.
3656  *
3657  * verify_cap_local_fm_link_width[0] retains its original value.
3658  */
get_linkup_widths(struct hfi1_devdata * dd,u16 * tx_width,u16 * rx_width)3659 static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3660 			      u16 *rx_width)
3661 {
3662 	u16 widths, tx, rx;
3663 	u8 misc_bits, local_flags;
3664 	u16 active_tx, active_rx;
3665 
3666 	read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3667 	tx = widths >> 12;
3668 	rx = (widths >> 8) & 0xf;
3669 
3670 	*tx_width = link_width_to_bits(dd, tx);
3671 	*rx_width = link_width_to_bits(dd, rx);
3672 
3673 	/* print the active widths */
3674 	get_link_widths(dd, &active_tx, &active_rx);
3675 }
3676 
3677 /*
3678  * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3679  * hardware information when the link first comes up.
3680  *
3681  * The link width is not available until after VerifyCap.AllFramesReceived
3682  * (the trigger for handle_verify_cap), so this is outside that routine
3683  * and should be called when the 8051 signals linkup.
3684  */
get_linkup_link_widths(struct hfi1_pportdata * ppd)3685 void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3686 {
3687 	u16 tx_width, rx_width;
3688 
3689 	/* get end-of-LNI link widths */
3690 	get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3691 
3692 	/* use tx_width as the link is supposed to be symmetric on link up */
3693 	ppd->link_width_active = tx_width;
3694 	/* link width downgrade active (LWD.A) starts out matching LW.A */
3695 	ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3696 	ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3697 	/* per OPA spec, on link up LWD.E resets to LWD.S */
3698 	ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3699 	/* cache the active egress rate (units {10^6 bits/sec]) */
3700 	ppd->current_egress_rate = active_egress_rate(ppd);
3701 }
3702 
3703 /*
3704  * Handle a verify capabilities interrupt from the 8051.
3705  *
3706  * This is a work-queue function outside of the interrupt.
3707  */
handle_verify_cap(struct work_struct * work)3708 void handle_verify_cap(struct work_struct *work)
3709 {
3710 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3711 								link_vc_work);
3712 	struct hfi1_devdata *dd = ppd->dd;
3713 	u64 reg;
3714 	u8 power_management;
3715 	u8 continious;
3716 	u8 vcu;
3717 	u8 vau;
3718 	u8 z;
3719 	u16 vl15buf;
3720 	u16 link_widths;
3721 	u16 crc_mask;
3722 	u16 crc_val;
3723 	u16 device_id;
3724 	u16 active_tx, active_rx;
3725 	u8 partner_supported_crc;
3726 	u8 remote_tx_rate;
3727 	u8 device_rev;
3728 
3729 	set_link_state(ppd, HLS_VERIFY_CAP);
3730 
3731 	lcb_shutdown(dd, 0);
3732 	adjust_lcb_for_fpga_serdes(dd);
3733 
3734 	/*
3735 	 * These are now valid:
3736 	 *	remote VerifyCap fields in the general LNI config
3737 	 *	CSR DC8051_STS_REMOTE_GUID
3738 	 *	CSR DC8051_STS_REMOTE_NODE_TYPE
3739 	 *	CSR DC8051_STS_REMOTE_FM_SECURITY
3740 	 *	CSR DC8051_STS_REMOTE_PORT_NO
3741 	 */
3742 
3743 	read_vc_remote_phy(dd, &power_management, &continious);
3744 	read_vc_remote_fabric(
3745 		dd,
3746 		&vau,
3747 		&z,
3748 		&vcu,
3749 		&vl15buf,
3750 		&partner_supported_crc);
3751 	read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3752 	read_remote_device_id(dd, &device_id, &device_rev);
3753 	/*
3754 	 * And the 'MgmtAllowed' information, which is exchanged during
3755 	 * LNI, is also be available at this point.
3756 	 */
3757 	read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3758 	/* print the active widths */
3759 	get_link_widths(dd, &active_tx, &active_rx);
3760 	dd_dev_info(dd,
3761 		"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3762 		(int)power_management, (int)continious);
3763 	dd_dev_info(dd,
3764 		"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3765 		(int)vau,
3766 		(int)z,
3767 		(int)vcu,
3768 		(int)vl15buf,
3769 		(int)partner_supported_crc);
3770 	dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3771 		(u32)remote_tx_rate, (u32)link_widths);
3772 	dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3773 		(u32)device_id, (u32)device_rev);
3774 	/*
3775 	 * The peer vAU value just read is the peer receiver value.  HFI does
3776 	 * not support a transmit vAU of 0 (AU == 8).  We advertised that
3777 	 * with Z=1 in the fabric capabilities sent to the peer.  The peer
3778 	 * will see our Z=1, and, if it advertised a vAU of 0, will move its
3779 	 * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3780 	 * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3781 	 * subject to the Z value exception.
3782 	 */
3783 	if (vau == 0)
3784 		vau = 1;
3785 	set_up_vl15(dd, vau, vl15buf);
3786 
3787 	/* set up the LCB CRC mode */
3788 	crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3789 
3790 	/* order is important: use the lowest bit in common */
3791 	if (crc_mask & CAP_CRC_14B)
3792 		crc_val = LCB_CRC_14B;
3793 	else if (crc_mask & CAP_CRC_48B)
3794 		crc_val = LCB_CRC_48B;
3795 	else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3796 		crc_val = LCB_CRC_12B_16B_PER_LANE;
3797 	else
3798 		crc_val = LCB_CRC_16B;
3799 
3800 	dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3801 	write_csr(dd, DC_LCB_CFG_CRC_MODE,
3802 		  (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3803 
3804 	/* set (14b only) or clear sideband credit */
3805 	reg = read_csr(dd, SEND_CM_CTRL);
3806 	if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3807 		write_csr(dd, SEND_CM_CTRL,
3808 			reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3809 	} else {
3810 		write_csr(dd, SEND_CM_CTRL,
3811 			reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3812 	}
3813 
3814 	ppd->link_speed_active = 0;	/* invalid value */
3815 	if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3816 		/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3817 		switch (remote_tx_rate) {
3818 		case 0:
3819 			ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3820 			break;
3821 		case 1:
3822 			ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823 			break;
3824 		}
3825 	} else {
3826 		/* actual rate is highest bit of the ANDed rates */
3827 		u8 rate = remote_tx_rate & ppd->local_tx_rate;
3828 
3829 		if (rate & 2)
3830 			ppd->link_speed_active = OPA_LINK_SPEED_25G;
3831 		else if (rate & 1)
3832 			ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3833 	}
3834 	if (ppd->link_speed_active == 0) {
3835 		dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3836 			__func__, (int)remote_tx_rate);
3837 		ppd->link_speed_active = OPA_LINK_SPEED_25G;
3838 	}
3839 
3840 	/*
3841 	 * Cache the values of the supported, enabled, and active
3842 	 * LTP CRC modes to return in 'portinfo' queries. But the bit
3843 	 * flags that are returned in the portinfo query differ from
3844 	 * what's in the link_crc_mask, crc_sizes, and crc_val
3845 	 * variables. Convert these here.
3846 	 */
3847 	ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3848 		/* supported crc modes */
3849 	ppd->port_ltp_crc_mode |=
3850 		cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3851 		/* enabled crc modes */
3852 	ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3853 		/* active crc mode */
3854 
3855 	/* set up the remote credit return table */
3856 	assign_remote_cm_au_table(dd, vcu);
3857 
3858 	/*
3859 	 * The LCB is reset on entry to handle_verify_cap(), so this must
3860 	 * be applied on every link up.
3861 	 *
3862 	 * Adjust LCB error kill enable to kill the link if
3863 	 * these RBUF errors are seen:
3864 	 *	REPLAY_BUF_MBE_SMASK
3865 	 *	FLIT_INPUT_BUF_MBE_SMASK
3866 	 */
3867 	if (is_a0(dd)) {			/* fixed in B0 */
3868 		reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3869 		reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3870 			| DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3871 		write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3872 	}
3873 
3874 	/* pull LCB fifos out of reset - all fifo clocks must be stable */
3875 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3876 
3877 	/* give 8051 access to the LCB CSRs */
3878 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3879 	set_8051_lcb_access(dd);
3880 
3881 	ppd->neighbor_guid =
3882 		read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3883 	ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3884 					DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3885 	ppd->neighbor_type =
3886 		read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3887 		DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3888 	ppd->neighbor_fm_security =
3889 		read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3890 		DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3891 	dd_dev_info(dd,
3892 		"Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3893 		ppd->neighbor_guid, ppd->neighbor_type,
3894 		ppd->mgmt_allowed, ppd->neighbor_fm_security);
3895 	if (ppd->mgmt_allowed)
3896 		add_full_mgmt_pkey(ppd);
3897 
3898 	/* tell the 8051 to go to LinkUp */
3899 	set_link_state(ppd, HLS_GOING_UP);
3900 }
3901 
3902 /*
3903  * Apply the link width downgrade enabled policy against the current active
3904  * link widths.
3905  *
3906  * Called when the enabled policy changes or the active link widths change.
3907  */
apply_link_downgrade_policy(struct hfi1_pportdata * ppd,int refresh_widths)3908 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3909 {
3910 	int skip = 1;
3911 	int do_bounce = 0;
3912 	u16 lwde = ppd->link_width_downgrade_enabled;
3913 	u16 tx, rx;
3914 
3915 	mutex_lock(&ppd->hls_lock);
3916 	/* only apply if the link is up */
3917 	if (ppd->host_link_state & HLS_UP)
3918 		skip = 0;
3919 	mutex_unlock(&ppd->hls_lock);
3920 	if (skip)
3921 		return;
3922 
3923 	if (refresh_widths) {
3924 		get_link_widths(ppd->dd, &tx, &rx);
3925 		ppd->link_width_downgrade_tx_active = tx;
3926 		ppd->link_width_downgrade_rx_active = rx;
3927 	}
3928 
3929 	if (lwde == 0) {
3930 		/* downgrade is disabled */
3931 
3932 		/* bounce if not at starting active width */
3933 		if ((ppd->link_width_active !=
3934 					ppd->link_width_downgrade_tx_active)
3935 				|| (ppd->link_width_active !=
3936 					ppd->link_width_downgrade_rx_active)) {
3937 			dd_dev_err(ppd->dd,
3938 				"Link downgrade is disabled and link has downgraded, downing link\n");
3939 			dd_dev_err(ppd->dd,
3940 				"  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3941 				ppd->link_width_active,
3942 				ppd->link_width_downgrade_tx_active,
3943 				ppd->link_width_downgrade_rx_active);
3944 			do_bounce = 1;
3945 		}
3946 	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3947 		|| (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3948 		/* Tx or Rx is outside the enabled policy */
3949 		dd_dev_err(ppd->dd,
3950 			"Link is outside of downgrade allowed, downing link\n");
3951 		dd_dev_err(ppd->dd,
3952 			"  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3953 			lwde,
3954 			ppd->link_width_downgrade_tx_active,
3955 			ppd->link_width_downgrade_rx_active);
3956 		do_bounce = 1;
3957 	}
3958 
3959 	if (do_bounce) {
3960 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3961 		  OPA_LINKDOWN_REASON_WIDTH_POLICY);
3962 		set_link_state(ppd, HLS_DN_OFFLINE);
3963 		start_link(ppd);
3964 	}
3965 }
3966 
3967 /*
3968  * Handle a link downgrade interrupt from the 8051.
3969  *
3970  * This is a work-queue function outside of the interrupt.
3971  */
handle_link_downgrade(struct work_struct * work)3972 void handle_link_downgrade(struct work_struct *work)
3973 {
3974 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3975 							link_downgrade_work);
3976 
3977 	dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3978 	apply_link_downgrade_policy(ppd, 1);
3979 }
3980 
dcc_err_string(char * buf,int buf_len,u64 flags)3981 static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3982 {
3983 	return flag_string(buf, buf_len, flags, dcc_err_flags,
3984 		ARRAY_SIZE(dcc_err_flags));
3985 }
3986 
lcb_err_string(char * buf,int buf_len,u64 flags)3987 static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3988 {
3989 	return flag_string(buf, buf_len, flags, lcb_err_flags,
3990 		ARRAY_SIZE(lcb_err_flags));
3991 }
3992 
dc8051_err_string(char * buf,int buf_len,u64 flags)3993 static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3994 {
3995 	return flag_string(buf, buf_len, flags, dc8051_err_flags,
3996 		ARRAY_SIZE(dc8051_err_flags));
3997 }
3998 
dc8051_info_err_string(char * buf,int buf_len,u64 flags)3999 static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4000 {
4001 	return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4002 		ARRAY_SIZE(dc8051_info_err_flags));
4003 }
4004 
dc8051_info_host_msg_string(char * buf,int buf_len,u64 flags)4005 static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4006 {
4007 	return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4008 		ARRAY_SIZE(dc8051_info_host_msg_flags));
4009 }
4010 
handle_8051_interrupt(struct hfi1_devdata * dd,u32 unused,u64 reg)4011 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4012 {
4013 	struct hfi1_pportdata *ppd = dd->pport;
4014 	u64 info, err, host_msg;
4015 	int queue_link_down = 0;
4016 	char buf[96];
4017 
4018 	/* look at the flags */
4019 	if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4020 		/* 8051 information set by firmware */
4021 		/* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4022 		info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4023 		err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4024 			& DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4025 		host_msg = (info >>
4026 			DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4027 			& DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4028 
4029 		/*
4030 		 * Handle error flags.
4031 		 */
4032 		if (err & FAILED_LNI) {
4033 			/*
4034 			 * LNI error indications are cleared by the 8051
4035 			 * only when starting polling.  Only pay attention
4036 			 * to them when in the states that occur during
4037 			 * LNI.
4038 			 */
4039 			if (ppd->host_link_state
4040 			    & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4041 				queue_link_down = 1;
4042 				dd_dev_info(dd, "Link error: %s\n",
4043 					dc8051_info_err_string(buf,
4044 						sizeof(buf),
4045 						err & FAILED_LNI));
4046 			}
4047 			err &= ~(u64)FAILED_LNI;
4048 		}
4049 		if (err) {
4050 			/* report remaining errors, but do not do anything */
4051 			dd_dev_err(dd, "8051 info error: %s\n",
4052 				dc8051_info_err_string(buf, sizeof(buf), err));
4053 		}
4054 
4055 		/*
4056 		 * Handle host message flags.
4057 		 */
4058 		if (host_msg & HOST_REQ_DONE) {
4059 			/*
4060 			 * Presently, the driver does a busy wait for
4061 			 * host requests to complete.  This is only an
4062 			 * informational message.
4063 			 * NOTE: The 8051 clears the host message
4064 			 * information *on the next 8051 command*.
4065 			 * Therefore, when linkup is achieved,
4066 			 * this flag will still be set.
4067 			 */
4068 			host_msg &= ~(u64)HOST_REQ_DONE;
4069 		}
4070 		if (host_msg & BC_SMA_MSG) {
4071 			queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4072 			host_msg &= ~(u64)BC_SMA_MSG;
4073 		}
4074 		if (host_msg & LINKUP_ACHIEVED) {
4075 			dd_dev_info(dd, "8051: Link up\n");
4076 			queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4077 			host_msg &= ~(u64)LINKUP_ACHIEVED;
4078 		}
4079 		if (host_msg & EXT_DEVICE_CFG_REQ) {
4080 			handle_8051_request(dd);
4081 			host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4082 		}
4083 		if (host_msg & VERIFY_CAP_FRAME) {
4084 			queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4085 			host_msg &= ~(u64)VERIFY_CAP_FRAME;
4086 		}
4087 		if (host_msg & LINK_GOING_DOWN) {
4088 			const char *extra = "";
4089 			/* no downgrade action needed if going down */
4090 			if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091 				host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4092 				extra = " (ignoring downgrade)";
4093 			}
4094 			dd_dev_info(dd, "8051: Link down%s\n", extra);
4095 			queue_link_down = 1;
4096 			host_msg &= ~(u64)LINK_GOING_DOWN;
4097 		}
4098 		if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099 			queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4100 			host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4101 		}
4102 		if (host_msg) {
4103 			/* report remaining messages, but do not do anything */
4104 			dd_dev_info(dd, "8051 info host message: %s\n",
4105 				dc8051_info_host_msg_string(buf, sizeof(buf),
4106 					host_msg));
4107 		}
4108 
4109 		reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4110 	}
4111 	if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4112 		/*
4113 		 * Lost the 8051 heartbeat.  If this happens, we
4114 		 * receive constant interrupts about it.  Disable
4115 		 * the interrupt after the first.
4116 		 */
4117 		dd_dev_err(dd, "Lost 8051 heartbeat\n");
4118 		write_csr(dd, DC_DC8051_ERR_EN,
4119 			read_csr(dd, DC_DC8051_ERR_EN)
4120 			  & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4121 
4122 		reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4123 	}
4124 	if (reg) {
4125 		/* report the error, but do not do anything */
4126 		dd_dev_err(dd, "8051 error: %s\n",
4127 			dc8051_err_string(buf, sizeof(buf), reg));
4128 	}
4129 
4130 	if (queue_link_down) {
4131 		/* if the link is already going down or disabled, do not
4132 		 * queue another */
4133 		if ((ppd->host_link_state
4134 				    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4135 				|| ppd->link_enabled == 0) {
4136 			dd_dev_info(dd, "%s: not queuing link down\n",
4137 				__func__);
4138 		} else {
4139 			queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4140 		}
4141 	}
4142 }
4143 
4144 static const char * const fm_config_txt[] = {
4145 [0] =
4146 	"BadHeadDist: Distance violation between two head flits",
4147 [1] =
4148 	"BadTailDist: Distance violation between two tail flits",
4149 [2] =
4150 	"BadCtrlDist: Distance violation between two credit control flits",
4151 [3] =
4152 	"BadCrdAck: Credits return for unsupported VL",
4153 [4] =
4154 	"UnsupportedVLMarker: Received VL Marker",
4155 [5] =
4156 	"BadPreempt: Exceeded the preemption nesting level",
4157 [6] =
4158 	"BadControlFlit: Received unsupported control flit",
4159 /* no 7 */
4160 [8] =
4161 	"UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4162 };
4163 
4164 static const char * const port_rcv_txt[] = {
4165 [1] =
4166 	"BadPktLen: Illegal PktLen",
4167 [2] =
4168 	"PktLenTooLong: Packet longer than PktLen",
4169 [3] =
4170 	"PktLenTooShort: Packet shorter than PktLen",
4171 [4] =
4172 	"BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4173 [5] =
4174 	"BadDLID: Illegal DLID (0, doesn't match HFI)",
4175 [6] =
4176 	"BadL2: Illegal L2 opcode",
4177 [7] =
4178 	"BadSC: Unsupported SC",
4179 [9] =
4180 	"BadRC: Illegal RC",
4181 [11] =
4182 	"PreemptError: Preempting with same VL",
4183 [12] =
4184 	"PreemptVL15: Preempting a VL15 packet",
4185 };
4186 
4187 #define OPA_LDR_FMCONFIG_OFFSET 16
4188 #define OPA_LDR_PORTRCV_OFFSET 0
handle_dcc_err(struct hfi1_devdata * dd,u32 unused,u64 reg)4189 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4190 {
4191 	u64 info, hdr0, hdr1;
4192 	const char *extra;
4193 	char buf[96];
4194 	struct hfi1_pportdata *ppd = dd->pport;
4195 	u8 lcl_reason = 0;
4196 	int do_bounce = 0;
4197 
4198 	if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4199 		if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4200 			info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4201 			dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4202 			/* set status bit */
4203 			dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4204 		}
4205 		reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4206 	}
4207 
4208 	if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4209 		struct hfi1_pportdata *ppd = dd->pport;
4210 		/* this counter saturates at (2^32) - 1 */
4211 		if (ppd->link_downed < (u32)UINT_MAX)
4212 			ppd->link_downed++;
4213 		reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4214 	}
4215 
4216 	if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4217 		u8 reason_valid = 1;
4218 
4219 		info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4220 		if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4221 			dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4222 			/* set status bit */
4223 			dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4224 		}
4225 		switch (info) {
4226 		case 0:
4227 		case 1:
4228 		case 2:
4229 		case 3:
4230 		case 4:
4231 		case 5:
4232 		case 6:
4233 			extra = fm_config_txt[info];
4234 			break;
4235 		case 8:
4236 			extra = fm_config_txt[info];
4237 			if (ppd->port_error_action &
4238 			    OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4239 				do_bounce = 1;
4240 				/*
4241 				 * lcl_reason cannot be derived from info
4242 				 * for this error
4243 				 */
4244 				lcl_reason =
4245 				  OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4246 			}
4247 			break;
4248 		default:
4249 			reason_valid = 0;
4250 			snprintf(buf, sizeof(buf), "reserved%lld", info);
4251 			extra = buf;
4252 			break;
4253 		}
4254 
4255 		if (reason_valid && !do_bounce) {
4256 			do_bounce = ppd->port_error_action &
4257 					(1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4258 			lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4259 		}
4260 
4261 		/* just report this */
4262 		dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4263 		reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4264 	}
4265 
4266 	if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4267 		u8 reason_valid = 1;
4268 
4269 		info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4270 		hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4271 		hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4272 		if (!(dd->err_info_rcvport.status_and_code &
4273 		      OPA_EI_STATUS_SMASK)) {
4274 			dd->err_info_rcvport.status_and_code =
4275 				info & OPA_EI_CODE_SMASK;
4276 			/* set status bit */
4277 			dd->err_info_rcvport.status_and_code |=
4278 				OPA_EI_STATUS_SMASK;
4279 			/* save first 2 flits in the packet that caused
4280 			 * the error */
4281 			 dd->err_info_rcvport.packet_flit1 = hdr0;
4282 			 dd->err_info_rcvport.packet_flit2 = hdr1;
4283 		}
4284 		switch (info) {
4285 		case 1:
4286 		case 2:
4287 		case 3:
4288 		case 4:
4289 		case 5:
4290 		case 6:
4291 		case 7:
4292 		case 9:
4293 		case 11:
4294 		case 12:
4295 			extra = port_rcv_txt[info];
4296 			break;
4297 		default:
4298 			reason_valid = 0;
4299 			snprintf(buf, sizeof(buf), "reserved%lld", info);
4300 			extra = buf;
4301 			break;
4302 		}
4303 
4304 		if (reason_valid && !do_bounce) {
4305 			do_bounce = ppd->port_error_action &
4306 					(1 << (OPA_LDR_PORTRCV_OFFSET + info));
4307 			lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4308 		}
4309 
4310 		/* just report this */
4311 		dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4312 		dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4313 			hdr0, hdr1);
4314 
4315 		reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4316 	}
4317 
4318 	if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4319 		/* informative only */
4320 		dd_dev_info(dd, "8051 access to LCB blocked\n");
4321 		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4322 	}
4323 	if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4324 		/* informative only */
4325 		dd_dev_info(dd, "host access to LCB blocked\n");
4326 		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4327 	}
4328 
4329 	/* report any remaining errors */
4330 	if (reg)
4331 		dd_dev_info(dd, "DCC Error: %s\n",
4332 			dcc_err_string(buf, sizeof(buf), reg));
4333 
4334 	if (lcl_reason == 0)
4335 		lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4336 
4337 	if (do_bounce) {
4338 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4339 		set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4340 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4341 	}
4342 }
4343 
handle_lcb_err(struct hfi1_devdata * dd,u32 unused,u64 reg)4344 static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4345 {
4346 	char buf[96];
4347 
4348 	dd_dev_info(dd, "LCB Error: %s\n",
4349 		lcb_err_string(buf, sizeof(buf), reg));
4350 }
4351 
4352 /*
4353  * CCE block DC interrupt.  Source is < 8.
4354  */
is_dc_int(struct hfi1_devdata * dd,unsigned int source)4355 static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4356 {
4357 	const struct err_reg_info *eri = &dc_errs[source];
4358 
4359 	if (eri->handler) {
4360 		interrupt_clear_down(dd, 0, eri);
4361 	} else if (source == 3 /* dc_lbm_int */) {
4362 		/*
4363 		 * This indicates that a parity error has occurred on the
4364 		 * address/control lines presented to the LBM.  The error
4365 		 * is a single pulse, there is no associated error flag,
4366 		 * and it is non-maskable.  This is because if a parity
4367 		 * error occurs on the request the request is dropped.
4368 		 * This should never occur, but it is nice to know if it
4369 		 * ever does.
4370 		 */
4371 		dd_dev_err(dd, "Parity error in DC LBM block\n");
4372 	} else {
4373 		dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4374 	}
4375 }
4376 
4377 /*
4378  * TX block send credit interrupt.  Source is < 160.
4379  */
is_send_credit_int(struct hfi1_devdata * dd,unsigned int source)4380 static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4381 {
4382 	sc_group_release_update(dd, source);
4383 }
4384 
4385 /*
4386  * TX block SDMA interrupt.  Source is < 48.
4387  *
4388  * SDMA interrupts are grouped by type:
4389  *
4390  *	 0 -  N-1 = SDma
4391  *	 N - 2N-1 = SDmaProgress
4392  *	2N - 3N-1 = SDmaIdle
4393  */
is_sdma_eng_int(struct hfi1_devdata * dd,unsigned int source)4394 static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4395 {
4396 	/* what interrupt */
4397 	unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4398 	/* which engine */
4399 	unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4400 
4401 #ifdef CONFIG_SDMA_VERBOSITY
4402 	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4403 		   slashstrip(__FILE__), __LINE__, __func__);
4404 	sdma_dumpstate(&dd->per_sdma[which]);
4405 #endif
4406 
4407 	if (likely(what < 3 && which < dd->num_sdma)) {
4408 		sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4409 	} else {
4410 		/* should not happen */
4411 		dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4412 	}
4413 }
4414 
4415 /*
4416  * RX block receive available interrupt.  Source is < 160.
4417  */
is_rcv_avail_int(struct hfi1_devdata * dd,unsigned int source)4418 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4419 {
4420 	struct hfi1_ctxtdata *rcd;
4421 	char *err_detail;
4422 
4423 	if (likely(source < dd->num_rcv_contexts)) {
4424 		rcd = dd->rcd[source];
4425 		if (rcd) {
4426 			if (source < dd->first_user_ctxt)
4427 				rcd->do_interrupt(rcd, 0);
4428 			else
4429 				handle_user_interrupt(rcd);
4430 			return;	/* OK */
4431 		}
4432 		/* received an interrupt, but no rcd */
4433 		err_detail = "dataless";
4434 	} else {
4435 		/* received an interrupt, but are not using that context */
4436 		err_detail = "out of range";
4437 	}
4438 	dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4439 		err_detail, source);
4440 }
4441 
4442 /*
4443  * RX block receive urgent interrupt.  Source is < 160.
4444  */
is_rcv_urgent_int(struct hfi1_devdata * dd,unsigned int source)4445 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4446 {
4447 	struct hfi1_ctxtdata *rcd;
4448 	char *err_detail;
4449 
4450 	if (likely(source < dd->num_rcv_contexts)) {
4451 		rcd = dd->rcd[source];
4452 		if (rcd) {
4453 			/* only pay attention to user urgent interrupts */
4454 			if (source >= dd->first_user_ctxt)
4455 				handle_user_interrupt(rcd);
4456 			return;	/* OK */
4457 		}
4458 		/* received an interrupt, but no rcd */
4459 		err_detail = "dataless";
4460 	} else {
4461 		/* received an interrupt, but are not using that context */
4462 		err_detail = "out of range";
4463 	}
4464 	dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4465 		err_detail, source);
4466 }
4467 
4468 /*
4469  * Reserved range interrupt.  Should not be called in normal operation.
4470  */
is_reserved_int(struct hfi1_devdata * dd,unsigned int source)4471 static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4472 {
4473 	char name[64];
4474 
4475 	dd_dev_err(dd, "unexpected %s interrupt\n",
4476 				is_reserved_name(name, sizeof(name), source));
4477 }
4478 
4479 static const struct is_table is_table[] = {
4480 /* start		     end
4481 				name func		interrupt func */
4482 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4483 				is_misc_err_name,	is_misc_err_int },
4484 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4485 				is_sdma_eng_err_name,	is_sdma_eng_err_int },
4486 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4487 				is_sendctxt_err_name,	is_sendctxt_err_int },
4488 { IS_SDMA_START,	     IS_SDMA_END,
4489 				is_sdma_eng_name,	is_sdma_eng_int },
4490 { IS_VARIOUS_START,	     IS_VARIOUS_END,
4491 				is_various_name,	is_various_int },
4492 { IS_DC_START,	     IS_DC_END,
4493 				is_dc_name,		is_dc_int },
4494 { IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4495 				is_rcv_avail_name,	is_rcv_avail_int },
4496 { IS_RCVURGENT_START,    IS_RCVURGENT_END,
4497 				is_rcv_urgent_name,	is_rcv_urgent_int },
4498 { IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4499 				is_send_credit_name,	is_send_credit_int},
4500 { IS_RESERVED_START,     IS_RESERVED_END,
4501 				is_reserved_name,	is_reserved_int},
4502 };
4503 
4504 /*
4505  * Interrupt source interrupt - called when the given source has an interrupt.
4506  * Source is a bit index into an array of 64-bit integers.
4507  */
is_interrupt(struct hfi1_devdata * dd,unsigned int source)4508 static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4509 {
4510 	const struct is_table *entry;
4511 
4512 	/* avoids a double compare by walking the table in-order */
4513 	for (entry = &is_table[0]; entry->is_name; entry++) {
4514 		if (source < entry->end) {
4515 			trace_hfi1_interrupt(dd, entry, source);
4516 			entry->is_int(dd, source - entry->start);
4517 			return;
4518 		}
4519 	}
4520 	/* fell off the end */
4521 	dd_dev_err(dd, "invalid interrupt source %u\n", source);
4522 }
4523 
4524 /*
4525  * General interrupt handler.  This is able to correctly handle
4526  * all interrupts in case INTx is used.
4527  */
general_interrupt(int irq,void * data)4528 static irqreturn_t general_interrupt(int irq, void *data)
4529 {
4530 	struct hfi1_devdata *dd = data;
4531 	u64 regs[CCE_NUM_INT_CSRS];
4532 	u32 bit;
4533 	int i;
4534 
4535 	this_cpu_inc(*dd->int_counter);
4536 
4537 	/* phase 1: scan and clear all handled interrupts */
4538 	for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4539 		if (dd->gi_mask[i] == 0) {
4540 			regs[i] = 0;	/* used later */
4541 			continue;
4542 		}
4543 		regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4544 				dd->gi_mask[i];
4545 		/* only clear if anything is set */
4546 		if (regs[i])
4547 			write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4548 	}
4549 
4550 	/* phase 2: call the appropriate handler */
4551 	for_each_set_bit(bit, (unsigned long *)&regs[0],
4552 						CCE_NUM_INT_CSRS*64) {
4553 		is_interrupt(dd, bit);
4554 	}
4555 
4556 	return IRQ_HANDLED;
4557 }
4558 
sdma_interrupt(int irq,void * data)4559 static irqreturn_t sdma_interrupt(int irq, void *data)
4560 {
4561 	struct sdma_engine *sde = data;
4562 	struct hfi1_devdata *dd = sde->dd;
4563 	u64 status;
4564 
4565 #ifdef CONFIG_SDMA_VERBOSITY
4566 	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4567 		   slashstrip(__FILE__), __LINE__, __func__);
4568 	sdma_dumpstate(sde);
4569 #endif
4570 
4571 	this_cpu_inc(*dd->int_counter);
4572 
4573 	/* This read_csr is really bad in the hot path */
4574 	status = read_csr(dd,
4575 			CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4576 			& sde->imask;
4577 	if (likely(status)) {
4578 		/* clear the interrupt(s) */
4579 		write_csr(dd,
4580 			CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4581 			status);
4582 
4583 		/* handle the interrupt(s) */
4584 		sdma_engine_interrupt(sde, status);
4585 	} else
4586 		dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4587 			sde->this_idx);
4588 
4589 	return IRQ_HANDLED;
4590 }
4591 
4592 /*
4593  * Clear the receive interrupt, forcing the write and making sure
4594  * we have data from the chip, pushing everything in front of it
4595  * back to the host.
4596  */
clear_recv_intr(struct hfi1_ctxtdata * rcd)4597 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4598 {
4599 	struct hfi1_devdata *dd = rcd->dd;
4600 	u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4601 
4602 	mmiowb();	/* make sure everything before is written */
4603 	write_csr(dd, addr, rcd->imask);
4604 	/* force the above write on the chip and get a value back */
4605 	(void)read_csr(dd, addr);
4606 }
4607 
4608 /* force the receive interrupt */
force_recv_intr(struct hfi1_ctxtdata * rcd)4609 static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4610 {
4611 	write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4612 }
4613 
4614 /* return non-zero if a packet is present */
check_packet_present(struct hfi1_ctxtdata * rcd)4615 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4616 {
4617 	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4618 		return (rcd->seq_cnt ==
4619 				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4620 
4621 	/* else is RDMA rtail */
4622 	return (rcd->head != get_rcvhdrtail(rcd));
4623 }
4624 
4625 /*
4626  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
4627  * This routine will try to handle packets immediately (latency), but if
4628  * it finds too many, it will invoke the thread handler (bandwitdh).  The
4629  * chip receive interupt is *not* cleared down until this or the thread (if
4630  * invoked) is finished.  The intent is to avoid extra interrupts while we
4631  * are processing packets anyway.
4632  */
receive_context_interrupt(int irq,void * data)4633 static irqreturn_t receive_context_interrupt(int irq, void *data)
4634 {
4635 	struct hfi1_ctxtdata *rcd = data;
4636 	struct hfi1_devdata *dd = rcd->dd;
4637 	int disposition;
4638 	int present;
4639 
4640 	trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4641 	this_cpu_inc(*dd->int_counter);
4642 
4643 	/* receive interrupt remains blocked while processing packets */
4644 	disposition = rcd->do_interrupt(rcd, 0);
4645 
4646 	/*
4647 	 * Too many packets were seen while processing packets in this
4648 	 * IRQ handler.  Invoke the handler thread.  The receive interrupt
4649 	 * remains blocked.
4650 	 */
4651 	if (disposition == RCV_PKT_LIMIT)
4652 		return IRQ_WAKE_THREAD;
4653 
4654 	/*
4655 	 * The packet processor detected no more packets.  Clear the receive
4656 	 * interrupt and recheck for a packet packet that may have arrived
4657 	 * after the previous check and interrupt clear.  If a packet arrived,
4658 	 * force another interrupt.
4659 	 */
4660 	clear_recv_intr(rcd);
4661 	present = check_packet_present(rcd);
4662 	if (present)
4663 		force_recv_intr(rcd);
4664 
4665 	return IRQ_HANDLED;
4666 }
4667 
4668 /*
4669  * Receive packet thread handler.  This expects to be invoked with the
4670  * receive interrupt still blocked.
4671  */
receive_context_thread(int irq,void * data)4672 static irqreturn_t receive_context_thread(int irq, void *data)
4673 {
4674 	struct hfi1_ctxtdata *rcd = data;
4675 	int present;
4676 
4677 	/* receive interrupt is still blocked from the IRQ handler */
4678 	(void)rcd->do_interrupt(rcd, 1);
4679 
4680 	/*
4681 	 * The packet processor will only return if it detected no more
4682 	 * packets.  Hold IRQs here so we can safely clear the interrupt and
4683 	 * recheck for a packet that may have arrived after the previous
4684 	 * check and the interrupt clear.  If a packet arrived, force another
4685 	 * interrupt.
4686 	 */
4687 	local_irq_disable();
4688 	clear_recv_intr(rcd);
4689 	present = check_packet_present(rcd);
4690 	if (present)
4691 		force_recv_intr(rcd);
4692 	local_irq_enable();
4693 
4694 	return IRQ_HANDLED;
4695 }
4696 
4697 /* ========================================================================= */
4698 
read_physical_state(struct hfi1_devdata * dd)4699 u32 read_physical_state(struct hfi1_devdata *dd)
4700 {
4701 	u64 reg;
4702 
4703 	reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4704 	return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4705 				& DC_DC8051_STS_CUR_STATE_PORT_MASK;
4706 }
4707 
read_logical_state(struct hfi1_devdata * dd)4708 static u32 read_logical_state(struct hfi1_devdata *dd)
4709 {
4710 	u64 reg;
4711 
4712 	reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4713 	return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4714 				& DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4715 }
4716 
set_logical_state(struct hfi1_devdata * dd,u32 chip_lstate)4717 static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4718 {
4719 	u64 reg;
4720 
4721 	reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4722 	/* clear current state, set new state */
4723 	reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4724 	reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4725 	write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4726 }
4727 
4728 /*
4729  * Use the 8051 to read a LCB CSR.
4730  */
read_lcb_via_8051(struct hfi1_devdata * dd,u32 addr,u64 * data)4731 static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4732 {
4733 	u32 regno;
4734 	int ret;
4735 
4736 	if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4737 		if (acquire_lcb_access(dd, 0) == 0) {
4738 			*data = read_csr(dd, addr);
4739 			release_lcb_access(dd, 0);
4740 			return 0;
4741 		}
4742 		return -EBUSY;
4743 	}
4744 
4745 	/* register is an index of LCB registers: (offset - base) / 8 */
4746 	regno = (addr - DC_LCB_CFG_RUN) >> 3;
4747 	ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4748 	if (ret != HCMD_SUCCESS)
4749 		return -EBUSY;
4750 	return 0;
4751 }
4752 
4753 /*
4754  * Read an LCB CSR.  Access may not be in host control, so check.
4755  * Return 0 on success, -EBUSY on failure.
4756  */
read_lcb_csr(struct hfi1_devdata * dd,u32 addr,u64 * data)4757 int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4758 {
4759 	struct hfi1_pportdata *ppd = dd->pport;
4760 
4761 	/* if up, go through the 8051 for the value */
4762 	if (ppd->host_link_state & HLS_UP)
4763 		return read_lcb_via_8051(dd, addr, data);
4764 	/* if going up or down, no access */
4765 	if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4766 		return -EBUSY;
4767 	/* otherwise, host has access */
4768 	*data = read_csr(dd, addr);
4769 	return 0;
4770 }
4771 
4772 /*
4773  * Use the 8051 to write a LCB CSR.
4774  */
write_lcb_via_8051(struct hfi1_devdata * dd,u32 addr,u64 data)4775 static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4776 {
4777 
4778 	if (acquire_lcb_access(dd, 0) == 0) {
4779 		write_csr(dd, addr, data);
4780 		release_lcb_access(dd, 0);
4781 		return 0;
4782 	}
4783 	return -EBUSY;
4784 }
4785 
4786 /*
4787  * Write an LCB CSR.  Access may not be in host control, so check.
4788  * Return 0 on success, -EBUSY on failure.
4789  */
write_lcb_csr(struct hfi1_devdata * dd,u32 addr,u64 data)4790 int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4791 {
4792 	struct hfi1_pportdata *ppd = dd->pport;
4793 
4794 	/* if up, go through the 8051 for the value */
4795 	if (ppd->host_link_state & HLS_UP)
4796 		return write_lcb_via_8051(dd, addr, data);
4797 	/* if going up or down, no access */
4798 	if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4799 		return -EBUSY;
4800 	/* otherwise, host has access */
4801 	write_csr(dd, addr, data);
4802 	return 0;
4803 }
4804 
4805 /*
4806  * Returns:
4807  *	< 0 = Linux error, not able to get access
4808  *	> 0 = 8051 command RETURN_CODE
4809  */
do_8051_command(struct hfi1_devdata * dd,u32 type,u64 in_data,u64 * out_data)4810 static int do_8051_command(
4811 	struct hfi1_devdata *dd,
4812 	u32 type,
4813 	u64 in_data,
4814 	u64 *out_data)
4815 {
4816 	u64 reg, completed;
4817 	int return_code;
4818 	unsigned long flags;
4819 	unsigned long timeout;
4820 
4821 	hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4822 
4823 	/*
4824 	 * Alternative to holding the lock for a long time:
4825 	 * - keep busy wait - have other users bounce off
4826 	 */
4827 	spin_lock_irqsave(&dd->dc8051_lock, flags);
4828 
4829 	/* We can't send any commands to the 8051 if it's in reset */
4830 	if (dd->dc_shutdown) {
4831 		return_code = -ENODEV;
4832 		goto fail;
4833 	}
4834 
4835 	/*
4836 	 * If an 8051 host command timed out previously, then the 8051 is
4837 	 * stuck.
4838 	 *
4839 	 * On first timeout, attempt to reset and restart the entire DC
4840 	 * block (including 8051). (Is this too big of a hammer?)
4841 	 *
4842 	 * If the 8051 times out a second time, the reset did not bring it
4843 	 * back to healthy life. In that case, fail any subsequent commands.
4844 	 */
4845 	if (dd->dc8051_timed_out) {
4846 		if (dd->dc8051_timed_out > 1) {
4847 			dd_dev_err(dd,
4848 				   "Previous 8051 host command timed out, skipping command %u\n",
4849 				   type);
4850 			return_code = -ENXIO;
4851 			goto fail;
4852 		}
4853 		spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4854 		dc_shutdown(dd);
4855 		dc_start(dd);
4856 		spin_lock_irqsave(&dd->dc8051_lock, flags);
4857 	}
4858 
4859 	/*
4860 	 * If there is no timeout, then the 8051 command interface is
4861 	 * waiting for a command.
4862 	 */
4863 
4864 	/*
4865 	 * Do two writes: the first to stabilize the type and req_data, the
4866 	 * second to activate.
4867 	 */
4868 	reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4869 			<< DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4870 		| (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4871 			<< DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4872 	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4873 	reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4874 	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4875 
4876 	/* wait for completion, alternate: interrupt */
4877 	timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4878 	while (1) {
4879 		reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4880 		completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4881 		if (completed)
4882 			break;
4883 		if (time_after(jiffies, timeout)) {
4884 			dd->dc8051_timed_out++;
4885 			dd_dev_err(dd, "8051 host command %u timeout\n", type);
4886 			if (out_data)
4887 				*out_data = 0;
4888 			return_code = -ETIMEDOUT;
4889 			goto fail;
4890 		}
4891 		udelay(2);
4892 	}
4893 
4894 	if (out_data) {
4895 		*out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4896 				& DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4897 		if (type == HCMD_READ_LCB_CSR) {
4898 			/* top 16 bits are in a different register */
4899 			*out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4900 				& DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4901 				<< (48
4902 				    - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4903 		}
4904 	}
4905 	return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4906 				& DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4907 	dd->dc8051_timed_out = 0;
4908 	/*
4909 	 * Clear command for next user.
4910 	 */
4911 	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4912 
4913 fail:
4914 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4915 
4916 	return return_code;
4917 }
4918 
set_physical_link_state(struct hfi1_devdata * dd,u64 state)4919 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4920 {
4921 	return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4922 }
4923 
load_8051_config(struct hfi1_devdata * dd,u8 field_id,u8 lane_id,u32 config_data)4924 static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4925 			    u8 lane_id, u32 config_data)
4926 {
4927 	u64 data;
4928 	int ret;
4929 
4930 	data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4931 		| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4932 		| (u64)config_data << LOAD_DATA_DATA_SHIFT;
4933 	ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4934 	if (ret != HCMD_SUCCESS) {
4935 		dd_dev_err(dd,
4936 			"load 8051 config: field id %d, lane %d, err %d\n",
4937 			(int)field_id, (int)lane_id, ret);
4938 	}
4939 	return ret;
4940 }
4941 
4942 /*
4943  * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4944  * set the result, even on error.
4945  * Return 0 on success, -errno on failure
4946  */
read_8051_config(struct hfi1_devdata * dd,u8 field_id,u8 lane_id,u32 * result)4947 static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4948 			    u32 *result)
4949 {
4950 	u64 big_data;
4951 	u32 addr;
4952 	int ret;
4953 
4954 	/* address start depends on the lane_id */
4955 	if (lane_id < 4)
4956 		addr = (4 * NUM_GENERAL_FIELDS)
4957 			+ (lane_id * 4 * NUM_LANE_FIELDS);
4958 	else
4959 		addr = 0;
4960 	addr += field_id * 4;
4961 
4962 	/* read is in 8-byte chunks, hardware will truncate the address down */
4963 	ret = read_8051_data(dd, addr, 8, &big_data);
4964 
4965 	if (ret == 0) {
4966 		/* extract the 4 bytes we want */
4967 		if (addr & 0x4)
4968 			*result = (u32)(big_data >> 32);
4969 		else
4970 			*result = (u32)big_data;
4971 	} else {
4972 		*result = 0;
4973 		dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4974 			__func__, lane_id, field_id);
4975 	}
4976 
4977 	return ret;
4978 }
4979 
write_vc_local_phy(struct hfi1_devdata * dd,u8 power_management,u8 continuous)4980 static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4981 			      u8 continuous)
4982 {
4983 	u32 frame;
4984 
4985 	frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4986 		| power_management << POWER_MANAGEMENT_SHIFT;
4987 	return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4988 				GENERAL_CONFIG, frame);
4989 }
4990 
write_vc_local_fabric(struct hfi1_devdata * dd,u8 vau,u8 z,u8 vcu,u16 vl15buf,u8 crc_sizes)4991 static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4992 				 u16 vl15buf, u8 crc_sizes)
4993 {
4994 	u32 frame;
4995 
4996 	frame = (u32)vau << VAU_SHIFT
4997 		| (u32)z << Z_SHIFT
4998 		| (u32)vcu << VCU_SHIFT
4999 		| (u32)vl15buf << VL15BUF_SHIFT
5000 		| (u32)crc_sizes << CRC_SIZES_SHIFT;
5001 	return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5002 				GENERAL_CONFIG, frame);
5003 }
5004 
read_vc_local_link_width(struct hfi1_devdata * dd,u8 * misc_bits,u8 * flag_bits,u16 * link_widths)5005 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5006 				     u8 *flag_bits, u16 *link_widths)
5007 {
5008 	u32 frame;
5009 
5010 	read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5011 				&frame);
5012 	*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5013 	*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5014 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5015 }
5016 
write_vc_local_link_width(struct hfi1_devdata * dd,u8 misc_bits,u8 flag_bits,u16 link_widths)5017 static int write_vc_local_link_width(struct hfi1_devdata *dd,
5018 				     u8 misc_bits,
5019 				     u8 flag_bits,
5020 				     u16 link_widths)
5021 {
5022 	u32 frame;
5023 
5024 	frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5025 		| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5026 		| (u32)link_widths << LINK_WIDTH_SHIFT;
5027 	return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5028 		     frame);
5029 }
5030 
write_local_device_id(struct hfi1_devdata * dd,u16 device_id,u8 device_rev)5031 static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5032 				 u8 device_rev)
5033 {
5034 	u32 frame;
5035 
5036 	frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5037 		| ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5038 	return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5039 }
5040 
read_remote_device_id(struct hfi1_devdata * dd,u16 * device_id,u8 * device_rev)5041 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5042 				  u8 *device_rev)
5043 {
5044 	u32 frame;
5045 
5046 	read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5047 	*device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5048 	*device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5049 			& REMOTE_DEVICE_REV_MASK;
5050 }
5051 
read_misc_status(struct hfi1_devdata * dd,u8 * ver_a,u8 * ver_b)5052 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5053 {
5054 	u32 frame;
5055 
5056 	read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5057 	*ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5058 	*ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5059 }
5060 
read_vc_remote_phy(struct hfi1_devdata * dd,u8 * power_management,u8 * continuous)5061 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5062 			       u8 *continuous)
5063 {
5064 	u32 frame;
5065 
5066 	read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5067 	*power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5068 					& POWER_MANAGEMENT_MASK;
5069 	*continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5070 					& CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5071 }
5072 
read_vc_remote_fabric(struct hfi1_devdata * dd,u8 * vau,u8 * z,u8 * vcu,u16 * vl15buf,u8 * crc_sizes)5073 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5074 				  u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5075 {
5076 	u32 frame;
5077 
5078 	read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5079 	*vau = (frame >> VAU_SHIFT) & VAU_MASK;
5080 	*z = (frame >> Z_SHIFT) & Z_MASK;
5081 	*vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5082 	*vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5083 	*crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5084 }
5085 
read_vc_remote_link_width(struct hfi1_devdata * dd,u8 * remote_tx_rate,u16 * link_widths)5086 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5087 				      u8 *remote_tx_rate,
5088 				      u16 *link_widths)
5089 {
5090 	u32 frame;
5091 
5092 	read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5093 				&frame);
5094 	*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5095 				& REMOTE_TX_RATE_MASK;
5096 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5097 }
5098 
read_local_lni(struct hfi1_devdata * dd,u8 * enable_lane_rx)5099 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5100 {
5101 	u32 frame;
5102 
5103 	read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5104 	*enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5105 }
5106 
read_mgmt_allowed(struct hfi1_devdata * dd,u8 * mgmt_allowed)5107 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5108 {
5109 	u32 frame;
5110 
5111 	read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5112 	*mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5113 }
5114 
read_last_local_state(struct hfi1_devdata * dd,u32 * lls)5115 static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5116 {
5117 	read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5118 }
5119 
read_last_remote_state(struct hfi1_devdata * dd,u32 * lrs)5120 static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5121 {
5122 	read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5123 }
5124 
hfi1_read_link_quality(struct hfi1_devdata * dd,u8 * link_quality)5125 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5126 {
5127 	u32 frame;
5128 	int ret;
5129 
5130 	*link_quality = 0;
5131 	if (dd->pport->host_link_state & HLS_UP) {
5132 		ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5133 					&frame);
5134 		if (ret == 0)
5135 			*link_quality = (frame >> LINK_QUALITY_SHIFT)
5136 						& LINK_QUALITY_MASK;
5137 	}
5138 }
5139 
read_planned_down_reason_code(struct hfi1_devdata * dd,u8 * pdrrc)5140 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5141 {
5142 	u32 frame;
5143 
5144 	read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5145 	*pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5146 }
5147 
read_tx_settings(struct hfi1_devdata * dd,u8 * enable_lane_tx,u8 * tx_polarity_inversion,u8 * rx_polarity_inversion,u8 * max_rate)5148 static int read_tx_settings(struct hfi1_devdata *dd,
5149 			    u8 *enable_lane_tx,
5150 			    u8 *tx_polarity_inversion,
5151 			    u8 *rx_polarity_inversion,
5152 			    u8 *max_rate)
5153 {
5154 	u32 frame;
5155 	int ret;
5156 
5157 	ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5158 	*enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5159 				& ENABLE_LANE_TX_MASK;
5160 	*tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5161 				& TX_POLARITY_INVERSION_MASK;
5162 	*rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5163 				& RX_POLARITY_INVERSION_MASK;
5164 	*max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5165 	return ret;
5166 }
5167 
write_tx_settings(struct hfi1_devdata * dd,u8 enable_lane_tx,u8 tx_polarity_inversion,u8 rx_polarity_inversion,u8 max_rate)5168 static int write_tx_settings(struct hfi1_devdata *dd,
5169 			     u8 enable_lane_tx,
5170 			     u8 tx_polarity_inversion,
5171 			     u8 rx_polarity_inversion,
5172 			     u8 max_rate)
5173 {
5174 	u32 frame;
5175 
5176 	/* no need to mask, all variable sizes match field widths */
5177 	frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5178 		| tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5179 		| rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5180 		| max_rate << MAX_RATE_SHIFT;
5181 	return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5182 }
5183 
check_fabric_firmware_versions(struct hfi1_devdata * dd)5184 static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5185 {
5186 	u32 frame, version, prod_id;
5187 	int ret, lane;
5188 
5189 	/* 4 lanes */
5190 	for (lane = 0; lane < 4; lane++) {
5191 		ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5192 		if (ret) {
5193 			dd_dev_err(
5194 				dd,
5195 				"Unable to read lane %d firmware details\n",
5196 				lane);
5197 			continue;
5198 		}
5199 		version = (frame >> SPICO_ROM_VERSION_SHIFT)
5200 					& SPICO_ROM_VERSION_MASK;
5201 		prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5202 					& SPICO_ROM_PROD_ID_MASK;
5203 		dd_dev_info(dd,
5204 			"Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5205 			lane, version, prod_id);
5206 	}
5207 }
5208 
5209 /*
5210  * Read an idle LCB message.
5211  *
5212  * Returns 0 on success, -EINVAL on error
5213  */
read_idle_message(struct hfi1_devdata * dd,u64 type,u64 * data_out)5214 static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5215 {
5216 	int ret;
5217 
5218 	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5219 		type, data_out);
5220 	if (ret != HCMD_SUCCESS) {
5221 		dd_dev_err(dd, "read idle message: type %d, err %d\n",
5222 			(u32)type, ret);
5223 		return -EINVAL;
5224 	}
5225 	dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5226 	/* return only the payload as we already know the type */
5227 	*data_out >>= IDLE_PAYLOAD_SHIFT;
5228 	return 0;
5229 }
5230 
5231 /*
5232  * Read an idle SMA message.  To be done in response to a notification from
5233  * the 8051.
5234  *
5235  * Returns 0 on success, -EINVAL on error
5236  */
read_idle_sma(struct hfi1_devdata * dd,u64 * data)5237 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5238 {
5239 	return read_idle_message(dd,
5240 			(u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5241 }
5242 
5243 /*
5244  * Send an idle LCB message.
5245  *
5246  * Returns 0 on success, -EINVAL on error
5247  */
send_idle_message(struct hfi1_devdata * dd,u64 data)5248 static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5249 {
5250 	int ret;
5251 
5252 	dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5253 	ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5254 	if (ret != HCMD_SUCCESS) {
5255 		dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5256 			data, ret);
5257 		return -EINVAL;
5258 	}
5259 	return 0;
5260 }
5261 
5262 /*
5263  * Send an idle SMA message.
5264  *
5265  * Returns 0 on success, -EINVAL on error
5266  */
send_idle_sma(struct hfi1_devdata * dd,u64 message)5267 int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5268 {
5269 	u64 data;
5270 
5271 	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5272 		| ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5273 	return send_idle_message(dd, data);
5274 }
5275 
5276 /*
5277  * Initialize the LCB then do a quick link up.  This may or may not be
5278  * in loopback.
5279  *
5280  * return 0 on success, -errno on error
5281  */
do_quick_linkup(struct hfi1_devdata * dd)5282 static int do_quick_linkup(struct hfi1_devdata *dd)
5283 {
5284 	u64 reg;
5285 	unsigned long timeout;
5286 	int ret;
5287 
5288 	lcb_shutdown(dd, 0);
5289 
5290 	if (loopback) {
5291 		/* LCB_CFG_LOOPBACK.VAL = 2 */
5292 		/* LCB_CFG_LANE_WIDTH.VAL = 0 */
5293 		write_csr(dd, DC_LCB_CFG_LOOPBACK,
5294 			IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5295 		write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5296 	}
5297 
5298 	/* start the LCBs */
5299 	/* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5300 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5301 
5302 	/* simulator only loopback steps */
5303 	if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5304 		/* LCB_CFG_RUN.EN = 1 */
5305 		write_csr(dd, DC_LCB_CFG_RUN,
5306 			1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5307 
5308 		/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5309 		timeout = jiffies + msecs_to_jiffies(10);
5310 		while (1) {
5311 			reg = read_csr(dd,
5312 				DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5313 			if (reg)
5314 				break;
5315 			if (time_after(jiffies, timeout)) {
5316 				dd_dev_err(dd,
5317 					"timeout waiting for LINK_TRANSFER_ACTIVE\n");
5318 				return -ETIMEDOUT;
5319 			}
5320 			udelay(2);
5321 		}
5322 
5323 		write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5324 			1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5325 	}
5326 
5327 	if (!loopback) {
5328 		/*
5329 		 * When doing quick linkup and not in loopback, both
5330 		 * sides must be done with LCB set-up before either
5331 		 * starts the quick linkup.  Put a delay here so that
5332 		 * both sides can be started and have a chance to be
5333 		 * done with LCB set up before resuming.
5334 		 */
5335 		dd_dev_err(dd,
5336 			"Pausing for peer to be finished with LCB set up\n");
5337 		msleep(5000);
5338 		dd_dev_err(dd,
5339 			"Continuing with quick linkup\n");
5340 	}
5341 
5342 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5343 	set_8051_lcb_access(dd);
5344 
5345 	/*
5346 	 * State "quick" LinkUp request sets the physical link state to
5347 	 * LinkUp without a verify capability sequence.
5348 	 * This state is in simulator v37 and later.
5349 	 */
5350 	ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5351 	if (ret != HCMD_SUCCESS) {
5352 		dd_dev_err(dd,
5353 			"%s: set physical link state to quick LinkUp failed with return %d\n",
5354 			__func__, ret);
5355 
5356 		set_host_lcb_access(dd);
5357 		write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5358 
5359 		if (ret >= 0)
5360 			ret = -EINVAL;
5361 		return ret;
5362 	}
5363 
5364 	return 0; /* success */
5365 }
5366 
5367 /*
5368  * Set the SerDes to internal loopback mode.
5369  * Returns 0 on success, -errno on error.
5370  */
set_serdes_loopback_mode(struct hfi1_devdata * dd)5371 static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5372 {
5373 	int ret;
5374 
5375 	ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5376 	if (ret == HCMD_SUCCESS)
5377 		return 0;
5378 	dd_dev_err(dd,
5379 		"Set physical link state to SerDes Loopback failed with return %d\n",
5380 		ret);
5381 	if (ret >= 0)
5382 		ret = -EINVAL;
5383 	return ret;
5384 }
5385 
5386 /*
5387  * Do all special steps to set up loopback.
5388  */
init_loopback(struct hfi1_devdata * dd)5389 static int init_loopback(struct hfi1_devdata *dd)
5390 {
5391 	dd_dev_info(dd, "Entering loopback mode\n");
5392 
5393 	/* all loopbacks should disable self GUID check */
5394 	write_csr(dd, DC_DC8051_CFG_MODE,
5395 		(read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5396 
5397 	/*
5398 	 * The simulator has only one loopback option - LCB.  Switch
5399 	 * to that option, which includes quick link up.
5400 	 *
5401 	 * Accept all valid loopback values.
5402 	 */
5403 	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5404 		&& (loopback == LOOPBACK_SERDES
5405 			|| loopback == LOOPBACK_LCB
5406 			|| loopback == LOOPBACK_CABLE)) {
5407 		loopback = LOOPBACK_LCB;
5408 		quick_linkup = 1;
5409 		return 0;
5410 	}
5411 
5412 	/* handle serdes loopback */
5413 	if (loopback == LOOPBACK_SERDES) {
5414 		/* internal serdes loopack needs quick linkup on RTL */
5415 		if (dd->icode == ICODE_RTL_SILICON)
5416 			quick_linkup = 1;
5417 		return set_serdes_loopback_mode(dd);
5418 	}
5419 
5420 	/* LCB loopback - handled at poll time */
5421 	if (loopback == LOOPBACK_LCB) {
5422 		quick_linkup = 1; /* LCB is always quick linkup */
5423 
5424 		/* not supported in emulation due to emulation RTL changes */
5425 		if (dd->icode == ICODE_FPGA_EMULATION) {
5426 			dd_dev_err(dd,
5427 				"LCB loopback not supported in emulation\n");
5428 			return -EINVAL;
5429 		}
5430 		return 0;
5431 	}
5432 
5433 	/* external cable loopback requires no extra steps */
5434 	if (loopback == LOOPBACK_CABLE)
5435 		return 0;
5436 
5437 	dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5438 	return -EINVAL;
5439 }
5440 
5441 /*
5442  * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5443  * used in the Verify Capability link width attribute.
5444  */
opa_to_vc_link_widths(u16 opa_widths)5445 static u16 opa_to_vc_link_widths(u16 opa_widths)
5446 {
5447 	int i;
5448 	u16 result = 0;
5449 
5450 	static const struct link_bits {
5451 		u16 from;
5452 		u16 to;
5453 	} opa_link_xlate[] = {
5454 		{ OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5455 		{ OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5456 		{ OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5457 		{ OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5458 	};
5459 
5460 	for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5461 		if (opa_widths & opa_link_xlate[i].from)
5462 			result |= opa_link_xlate[i].to;
5463 	}
5464 	return result;
5465 }
5466 
5467 /*
5468  * Set link attributes before moving to polling.
5469  */
set_local_link_attributes(struct hfi1_pportdata * ppd)5470 static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5471 {
5472 	struct hfi1_devdata *dd = ppd->dd;
5473 	u8 enable_lane_tx;
5474 	u8 tx_polarity_inversion;
5475 	u8 rx_polarity_inversion;
5476 	int ret;
5477 
5478 	/* reset our fabric serdes to clear any lingering problems */
5479 	fabric_serdes_reset(dd);
5480 
5481 	/* set the local tx rate - need to read-modify-write */
5482 	ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5483 		&rx_polarity_inversion, &ppd->local_tx_rate);
5484 	if (ret)
5485 		goto set_local_link_attributes_fail;
5486 
5487 	if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5488 		/* set the tx rate to the fastest enabled */
5489 		if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5490 			ppd->local_tx_rate = 1;
5491 		else
5492 			ppd->local_tx_rate = 0;
5493 	} else {
5494 		/* set the tx rate to all enabled */
5495 		ppd->local_tx_rate = 0;
5496 		if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5497 			ppd->local_tx_rate |= 2;
5498 		if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5499 			ppd->local_tx_rate |= 1;
5500 	}
5501 
5502 	enable_lane_tx = 0xF; /* enable all four lanes */
5503 	ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5504 		     rx_polarity_inversion, ppd->local_tx_rate);
5505 	if (ret != HCMD_SUCCESS)
5506 		goto set_local_link_attributes_fail;
5507 
5508 	/*
5509 	 * DC supports continuous updates.
5510 	 */
5511 	ret = write_vc_local_phy(dd, 0 /* no power management */,
5512 				     1 /* continuous updates */);
5513 	if (ret != HCMD_SUCCESS)
5514 		goto set_local_link_attributes_fail;
5515 
5516 	/* z=1 in the next call: AU of 0 is not supported by the hardware */
5517 	ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5518 				    ppd->port_crc_mode_enabled);
5519 	if (ret != HCMD_SUCCESS)
5520 		goto set_local_link_attributes_fail;
5521 
5522 	ret = write_vc_local_link_width(dd, 0, 0,
5523 		     opa_to_vc_link_widths(ppd->link_width_enabled));
5524 	if (ret != HCMD_SUCCESS)
5525 		goto set_local_link_attributes_fail;
5526 
5527 	/* let peer know who we are */
5528 	ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5529 	if (ret == HCMD_SUCCESS)
5530 		return 0;
5531 
5532 set_local_link_attributes_fail:
5533 	dd_dev_err(dd,
5534 		"Failed to set local link attributes, return 0x%x\n",
5535 		ret);
5536 	return ret;
5537 }
5538 
5539 /*
5540  * Call this to start the link.  Schedule a retry if the cable is not
5541  * present or if unable to start polling.  Do not do anything if the
5542  * link is disabled.  Returns 0 if link is disabled or moved to polling
5543  */
start_link(struct hfi1_pportdata * ppd)5544 int start_link(struct hfi1_pportdata *ppd)
5545 {
5546 	if (!ppd->link_enabled) {
5547 		dd_dev_info(ppd->dd,
5548 			"%s: stopping link start because link is disabled\n",
5549 			__func__);
5550 		return 0;
5551 	}
5552 	if (!ppd->driver_link_ready) {
5553 		dd_dev_info(ppd->dd,
5554 			"%s: stopping link start because driver is not ready\n",
5555 			__func__);
5556 		return 0;
5557 	}
5558 
5559 	if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5560 			loopback == LOOPBACK_LCB ||
5561 			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5562 		return set_link_state(ppd, HLS_DN_POLL);
5563 
5564 	dd_dev_info(ppd->dd,
5565 		"%s: stopping link start because no cable is present\n",
5566 		__func__);
5567 	return -EAGAIN;
5568 }
5569 
reset_qsfp(struct hfi1_pportdata * ppd)5570 static void reset_qsfp(struct hfi1_pportdata *ppd)
5571 {
5572 	struct hfi1_devdata *dd = ppd->dd;
5573 	u64 mask, qsfp_mask;
5574 
5575 	mask = (u64)QSFP_HFI0_RESET_N;
5576 	qsfp_mask = read_csr(dd,
5577 		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5578 	qsfp_mask |= mask;
5579 	write_csr(dd,
5580 		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5581 		qsfp_mask);
5582 
5583 	qsfp_mask = read_csr(dd,
5584 		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5585 	qsfp_mask &= ~mask;
5586 	write_csr(dd,
5587 		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5588 		qsfp_mask);
5589 
5590 	udelay(10);
5591 
5592 	qsfp_mask |= mask;
5593 	write_csr(dd,
5594 		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5595 		qsfp_mask);
5596 }
5597 
handle_qsfp_error_conditions(struct hfi1_pportdata * ppd,u8 * qsfp_interrupt_status)5598 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5599 					u8 *qsfp_interrupt_status)
5600 {
5601 	struct hfi1_devdata *dd = ppd->dd;
5602 
5603 	if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5604 		(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5605 		dd_dev_info(dd,
5606 			"%s: QSFP cable on fire\n",
5607 			__func__);
5608 
5609 	if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5610 		(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5611 		dd_dev_info(dd,
5612 			"%s: QSFP cable temperature too low\n",
5613 			__func__);
5614 
5615 	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5616 		(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5617 		dd_dev_info(dd,
5618 			"%s: QSFP supply voltage too high\n",
5619 			__func__);
5620 
5621 	if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5622 		(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5623 		dd_dev_info(dd,
5624 			"%s: QSFP supply voltage too low\n",
5625 			__func__);
5626 
5627 	/* Byte 2 is vendor specific */
5628 
5629 	if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5630 		(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5631 		dd_dev_info(dd,
5632 			"%s: Cable RX channel 1/2 power too high\n",
5633 			__func__);
5634 
5635 	if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5636 		(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5637 		dd_dev_info(dd,
5638 			"%s: Cable RX channel 1/2 power too low\n",
5639 			__func__);
5640 
5641 	if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5642 		(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5643 		dd_dev_info(dd,
5644 			"%s: Cable RX channel 3/4 power too high\n",
5645 			__func__);
5646 
5647 	if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5648 		(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5649 		dd_dev_info(dd,
5650 			"%s: Cable RX channel 3/4 power too low\n",
5651 			__func__);
5652 
5653 	if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5654 		(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5655 		dd_dev_info(dd,
5656 			"%s: Cable TX channel 1/2 bias too high\n",
5657 			__func__);
5658 
5659 	if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5660 		(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5661 		dd_dev_info(dd,
5662 			"%s: Cable TX channel 1/2 bias too low\n",
5663 			__func__);
5664 
5665 	if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5666 		(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5667 		dd_dev_info(dd,
5668 			"%s: Cable TX channel 3/4 bias too high\n",
5669 			__func__);
5670 
5671 	if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5672 		(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5673 		dd_dev_info(dd,
5674 			"%s: Cable TX channel 3/4 bias too low\n",
5675 			__func__);
5676 
5677 	if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5678 		(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5679 		dd_dev_info(dd,
5680 			"%s: Cable TX channel 1/2 power too high\n",
5681 			__func__);
5682 
5683 	if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5684 		(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5685 		dd_dev_info(dd,
5686 			"%s: Cable TX channel 1/2 power too low\n",
5687 			__func__);
5688 
5689 	if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5690 		(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5691 		dd_dev_info(dd,
5692 			"%s: Cable TX channel 3/4 power too high\n",
5693 			__func__);
5694 
5695 	if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5696 		(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5697 		dd_dev_info(dd,
5698 			"%s: Cable TX channel 3/4 power too low\n",
5699 			__func__);
5700 
5701 	/* Bytes 9-10 and 11-12 are reserved */
5702 	/* Bytes 13-15 are vendor specific */
5703 
5704 	return 0;
5705 }
5706 
do_pre_lni_host_behaviors(struct hfi1_pportdata * ppd)5707 static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5708 {
5709 	refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5710 
5711 	return 0;
5712 }
5713 
do_qsfp_intr_fallback(struct hfi1_pportdata * ppd)5714 static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5715 {
5716 	struct hfi1_devdata *dd = ppd->dd;
5717 	u8 qsfp_interrupt_status = 0;
5718 
5719 	if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5720 		!= 1) {
5721 		dd_dev_info(dd,
5722 			"%s: Failed to read status of QSFP module\n",
5723 			__func__);
5724 		return -EIO;
5725 	}
5726 
5727 	/* We don't care about alarms & warnings with a non-functional INT_N */
5728 	if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5729 		do_pre_lni_host_behaviors(ppd);
5730 
5731 	return 0;
5732 }
5733 
5734 /* This routine will only be scheduled if the QSFP module is present */
qsfp_event(struct work_struct * work)5735 static void qsfp_event(struct work_struct *work)
5736 {
5737 	struct qsfp_data *qd;
5738 	struct hfi1_pportdata *ppd;
5739 	struct hfi1_devdata *dd;
5740 
5741 	qd = container_of(work, struct qsfp_data, qsfp_work);
5742 	ppd = qd->ppd;
5743 	dd = ppd->dd;
5744 
5745 	/* Sanity check */
5746 	if (!qsfp_mod_present(ppd))
5747 		return;
5748 
5749 	/*
5750 	 * Turn DC back on after cables has been
5751 	 * re-inserted. Up until now, the DC has been in
5752 	 * reset to save power.
5753 	 */
5754 	dc_start(dd);
5755 
5756 	if (qd->cache_refresh_required) {
5757 		msleep(3000);
5758 		reset_qsfp(ppd);
5759 
5760 		/* Check for QSFP interrupt after t_init (SFF 8679)
5761 		 * + extra
5762 		 */
5763 		msleep(3000);
5764 		if (!qd->qsfp_interrupt_functional) {
5765 			if (do_qsfp_intr_fallback(ppd) < 0)
5766 				dd_dev_info(dd, "%s: QSFP fallback failed\n",
5767 					__func__);
5768 			ppd->driver_link_ready = 1;
5769 			start_link(ppd);
5770 		}
5771 	}
5772 
5773 	if (qd->check_interrupt_flags) {
5774 		u8 qsfp_interrupt_status[16] = {0,};
5775 
5776 		if (qsfp_read(ppd, dd->hfi1_id, 6,
5777 			      &qsfp_interrupt_status[0], 16) != 16) {
5778 			dd_dev_info(dd,
5779 				"%s: Failed to read status of QSFP module\n",
5780 				__func__);
5781 		} else {
5782 			unsigned long flags;
5783 			u8 data_status;
5784 
5785 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5786 			ppd->qsfp_info.check_interrupt_flags = 0;
5787 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5788 								flags);
5789 
5790 			if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5791 				 != 1) {
5792 				dd_dev_info(dd,
5793 				"%s: Failed to read status of QSFP module\n",
5794 					__func__);
5795 			}
5796 			if (!(data_status & QSFP_DATA_NOT_READY)) {
5797 				do_pre_lni_host_behaviors(ppd);
5798 				start_link(ppd);
5799 			} else
5800 				handle_qsfp_error_conditions(ppd,
5801 						qsfp_interrupt_status);
5802 		}
5803 	}
5804 }
5805 
init_qsfp(struct hfi1_pportdata * ppd)5806 void init_qsfp(struct hfi1_pportdata *ppd)
5807 {
5808 	struct hfi1_devdata *dd = ppd->dd;
5809 	u64 qsfp_mask;
5810 
5811 	if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5812 			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5813 		ppd->driver_link_ready = 1;
5814 		return;
5815 	}
5816 
5817 	ppd->qsfp_info.ppd = ppd;
5818 	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5819 
5820 	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5821 	/* Clear current status to avoid spurious interrupts */
5822 	write_csr(dd,
5823 			dd->hfi1_id ?
5824 				ASIC_QSFP2_CLEAR :
5825 				ASIC_QSFP1_CLEAR,
5826 		qsfp_mask);
5827 
5828 	/* Handle active low nature of INT_N and MODPRST_N pins */
5829 	if (qsfp_mod_present(ppd))
5830 		qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5831 	write_csr(dd,
5832 		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5833 		  qsfp_mask);
5834 
5835 	/* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5836 	qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5837 	write_csr(dd,
5838 		dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5839 		qsfp_mask);
5840 
5841 	if (qsfp_mod_present(ppd)) {
5842 		msleep(3000);
5843 		reset_qsfp(ppd);
5844 
5845 		/* Check for QSFP interrupt after t_init (SFF 8679)
5846 		 * + extra
5847 		 */
5848 		msleep(3000);
5849 		if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5850 			if (do_qsfp_intr_fallback(ppd) < 0)
5851 				dd_dev_info(dd,
5852 					"%s: QSFP fallback failed\n",
5853 					__func__);
5854 			ppd->driver_link_ready = 1;
5855 		}
5856 	}
5857 }
5858 
bringup_serdes(struct hfi1_pportdata * ppd)5859 int bringup_serdes(struct hfi1_pportdata *ppd)
5860 {
5861 	struct hfi1_devdata *dd = ppd->dd;
5862 	u64 guid;
5863 	int ret;
5864 
5865 	if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5866 		add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5867 
5868 	guid = ppd->guid;
5869 	if (!guid) {
5870 		if (dd->base_guid)
5871 			guid = dd->base_guid + ppd->port - 1;
5872 		ppd->guid = guid;
5873 	}
5874 
5875 	/* the link defaults to enabled */
5876 	ppd->link_enabled = 1;
5877 	/* Set linkinit_reason on power up per OPA spec */
5878 	ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5879 
5880 	if (loopback) {
5881 		ret = init_loopback(dd);
5882 		if (ret < 0)
5883 			return ret;
5884 	}
5885 
5886 	return start_link(ppd);
5887 }
5888 
hfi1_quiet_serdes(struct hfi1_pportdata * ppd)5889 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5890 {
5891 	struct hfi1_devdata *dd = ppd->dd;
5892 
5893 	/*
5894 	 * Shut down the link and keep it down.   First turn off that the
5895 	 * driver wants to allow the link to be up (driver_link_ready).
5896 	 * Then make sure the link is not automatically restarted
5897 	 * (link_enabled).  Cancel any pending restart.  And finally
5898 	 * go offline.
5899 	 */
5900 	ppd->driver_link_ready = 0;
5901 	ppd->link_enabled = 0;
5902 
5903 	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5904 	  OPA_LINKDOWN_REASON_SMA_DISABLED);
5905 	set_link_state(ppd, HLS_DN_OFFLINE);
5906 
5907 	/* disable the port */
5908 	clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5909 	cancel_work_sync(&ppd->freeze_work);
5910 }
5911 
init_cpu_counters(struct hfi1_devdata * dd)5912 static inline int init_cpu_counters(struct hfi1_devdata *dd)
5913 {
5914 	struct hfi1_pportdata *ppd;
5915 	int i;
5916 
5917 	ppd = (struct hfi1_pportdata *)(dd + 1);
5918 	for (i = 0; i < dd->num_pports; i++, ppd++) {
5919 		ppd->ibport_data.rc_acks = NULL;
5920 		ppd->ibport_data.rc_qacks = NULL;
5921 		ppd->ibport_data.rc_acks = alloc_percpu(u64);
5922 		ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5923 		ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5924 		if ((ppd->ibport_data.rc_acks == NULL) ||
5925 		    (ppd->ibport_data.rc_delayed_comp == NULL) ||
5926 		    (ppd->ibport_data.rc_qacks == NULL))
5927 			return -ENOMEM;
5928 	}
5929 
5930 	return 0;
5931 }
5932 
5933 static const char * const pt_names[] = {
5934 	"expected",
5935 	"eager",
5936 	"invalid"
5937 };
5938 
pt_name(u32 type)5939 static const char *pt_name(u32 type)
5940 {
5941 	return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5942 }
5943 
5944 /*
5945  * index is the index into the receive array
5946  */
hfi1_put_tid(struct hfi1_devdata * dd,u32 index,u32 type,unsigned long pa,u16 order)5947 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5948 		  u32 type, unsigned long pa, u16 order)
5949 {
5950 	u64 reg;
5951 	void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5952 			      (dd->kregbase + RCV_ARRAY));
5953 
5954 	if (!(dd->flags & HFI1_PRESENT))
5955 		goto done;
5956 
5957 	if (type == PT_INVALID) {
5958 		pa = 0;
5959 	} else if (type > PT_INVALID) {
5960 		dd_dev_err(dd,
5961 			"unexpected receive array type %u for index %u, not handled\n",
5962 			type, index);
5963 		goto done;
5964 	}
5965 
5966 	hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5967 		  pt_name(type), index, pa, (unsigned long)order);
5968 
5969 #define RT_ADDR_SHIFT 12	/* 4KB kernel address boundary */
5970 	reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5971 		| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5972 		| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5973 					<< RCV_ARRAY_RT_ADDR_SHIFT;
5974 	writeq(reg, base + (index * 8));
5975 
5976 	if (type == PT_EAGER)
5977 		/*
5978 		 * Eager entries are written one-by-one so we have to push them
5979 		 * after we write the entry.
5980 		 */
5981 		flush_wc();
5982 done:
5983 	return;
5984 }
5985 
hfi1_clear_tids(struct hfi1_ctxtdata * rcd)5986 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5987 {
5988 	struct hfi1_devdata *dd = rcd->dd;
5989 	u32 i;
5990 
5991 	/* this could be optimized */
5992 	for (i = rcd->eager_base; i < rcd->eager_base +
5993 		     rcd->egrbufs.alloced; i++)
5994 		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5995 
5996 	for (i = rcd->expected_base;
5997 			i < rcd->expected_base + rcd->expected_count; i++)
5998 		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5999 }
6000 
hfi1_get_base_kinfo(struct hfi1_ctxtdata * rcd,struct hfi1_ctxt_info * kinfo)6001 int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6002 			struct hfi1_ctxt_info *kinfo)
6003 {
6004 	kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6005 		HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6006 	return 0;
6007 }
6008 
hfi1_get_msgheader(struct hfi1_devdata * dd,__le32 * rhf_addr)6009 struct hfi1_message_header *hfi1_get_msgheader(
6010 				struct hfi1_devdata *dd, __le32 *rhf_addr)
6011 {
6012 	u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6013 
6014 	return (struct hfi1_message_header *)
6015 		(rhf_addr - dd->rhf_offset + offset);
6016 }
6017 
6018 static const char * const ib_cfg_name_strings[] = {
6019 	"HFI1_IB_CFG_LIDLMC",
6020 	"HFI1_IB_CFG_LWID_DG_ENB",
6021 	"HFI1_IB_CFG_LWID_ENB",
6022 	"HFI1_IB_CFG_LWID",
6023 	"HFI1_IB_CFG_SPD_ENB",
6024 	"HFI1_IB_CFG_SPD",
6025 	"HFI1_IB_CFG_RXPOL_ENB",
6026 	"HFI1_IB_CFG_LREV_ENB",
6027 	"HFI1_IB_CFG_LINKLATENCY",
6028 	"HFI1_IB_CFG_HRTBT",
6029 	"HFI1_IB_CFG_OP_VLS",
6030 	"HFI1_IB_CFG_VL_HIGH_CAP",
6031 	"HFI1_IB_CFG_VL_LOW_CAP",
6032 	"HFI1_IB_CFG_OVERRUN_THRESH",
6033 	"HFI1_IB_CFG_PHYERR_THRESH",
6034 	"HFI1_IB_CFG_LINKDEFAULT",
6035 	"HFI1_IB_CFG_PKEYS",
6036 	"HFI1_IB_CFG_MTU",
6037 	"HFI1_IB_CFG_LSTATE",
6038 	"HFI1_IB_CFG_VL_HIGH_LIMIT",
6039 	"HFI1_IB_CFG_PMA_TICKS",
6040 	"HFI1_IB_CFG_PORT"
6041 };
6042 
ib_cfg_name(int which)6043 static const char *ib_cfg_name(int which)
6044 {
6045 	if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6046 		return "invalid";
6047 	return ib_cfg_name_strings[which];
6048 }
6049 
hfi1_get_ib_cfg(struct hfi1_pportdata * ppd,int which)6050 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6051 {
6052 	struct hfi1_devdata *dd = ppd->dd;
6053 	int val = 0;
6054 
6055 	switch (which) {
6056 	case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6057 		val = ppd->link_width_enabled;
6058 		break;
6059 	case HFI1_IB_CFG_LWID: /* currently active Link-width */
6060 		val = ppd->link_width_active;
6061 		break;
6062 	case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6063 		val = ppd->link_speed_enabled;
6064 		break;
6065 	case HFI1_IB_CFG_SPD: /* current Link speed */
6066 		val = ppd->link_speed_active;
6067 		break;
6068 
6069 	case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6070 	case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6071 	case HFI1_IB_CFG_LINKLATENCY:
6072 		goto unimplemented;
6073 
6074 	case HFI1_IB_CFG_OP_VLS:
6075 		val = ppd->vls_operational;
6076 		break;
6077 	case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6078 		val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6079 		break;
6080 	case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6081 		val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6082 		break;
6083 	case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6084 		val = ppd->overrun_threshold;
6085 		break;
6086 	case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6087 		val = ppd->phy_error_threshold;
6088 		break;
6089 	case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6090 		val = dd->link_default;
6091 		break;
6092 
6093 	case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6094 	case HFI1_IB_CFG_PMA_TICKS:
6095 	default:
6096 unimplemented:
6097 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6098 			dd_dev_info(
6099 				dd,
6100 				"%s: which %s: not implemented\n",
6101 				__func__,
6102 				ib_cfg_name(which));
6103 		break;
6104 	}
6105 
6106 	return val;
6107 }
6108 
6109 /*
6110  * The largest MAD packet size.
6111  */
6112 #define MAX_MAD_PACKET 2048
6113 
6114 /*
6115  * Return the maximum header bytes that can go on the _wire_
6116  * for this device. This count includes the ICRC which is
6117  * not part of the packet held in memory but it is appended
6118  * by the HW.
6119  * This is dependent on the device's receive header entry size.
6120  * HFI allows this to be set per-receive context, but the
6121  * driver presently enforces a global value.
6122  */
lrh_max_header_bytes(struct hfi1_devdata * dd)6123 u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6124 {
6125 	/*
6126 	 * The maximum non-payload (MTU) bytes in LRH.PktLen are
6127 	 * the Receive Header Entry Size minus the PBC (or RHF) size
6128 	 * plus one DW for the ICRC appended by HW.
6129 	 *
6130 	 * dd->rcd[0].rcvhdrqentsize is in DW.
6131 	 * We use rcd[0] as all context will have the same value. Also,
6132 	 * the first kernel context would have been allocated by now so
6133 	 * we are guaranteed a valid value.
6134 	 */
6135 	return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6136 }
6137 
6138 /*
6139  * Set Send Length
6140  * @ppd - per port data
6141  *
6142  * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6143  * registers compare against LRH.PktLen, so use the max bytes included
6144  * in the LRH.
6145  *
6146  * This routine changes all VL values except VL15, which it maintains at
6147  * the same value.
6148  */
set_send_length(struct hfi1_pportdata * ppd)6149 static void set_send_length(struct hfi1_pportdata *ppd)
6150 {
6151 	struct hfi1_devdata *dd = ppd->dd;
6152 	u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6153 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6154 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6155 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6156 	int i;
6157 
6158 	for (i = 0; i < ppd->vls_supported; i++) {
6159 		if (dd->vld[i].mtu > maxvlmtu)
6160 			maxvlmtu = dd->vld[i].mtu;
6161 		if (i <= 3)
6162 			len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6163 				 & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6164 				((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6165 		else
6166 			len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6167 				 & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6168 				((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6169 	}
6170 	write_csr(dd, SEND_LEN_CHECK0, len1);
6171 	write_csr(dd, SEND_LEN_CHECK1, len2);
6172 	/* adjust kernel credit return thresholds based on new MTUs */
6173 	/* all kernel receive contexts have the same hdrqentsize */
6174 	for (i = 0; i < ppd->vls_supported; i++) {
6175 		sc_set_cr_threshold(dd->vld[i].sc,
6176 			sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6177 				dd->rcd[0]->rcvhdrqentsize));
6178 	}
6179 	sc_set_cr_threshold(dd->vld[15].sc,
6180 		sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6181 			dd->rcd[0]->rcvhdrqentsize));
6182 
6183 	/* Adjust maximum MTU for the port in DC */
6184 	dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6185 		(ilog2(maxvlmtu >> 8) + 1);
6186 	len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6187 	len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6188 	len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6189 		DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6190 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6191 }
6192 
set_lidlmc(struct hfi1_pportdata * ppd)6193 static void set_lidlmc(struct hfi1_pportdata *ppd)
6194 {
6195 	int i;
6196 	u64 sreg = 0;
6197 	struct hfi1_devdata *dd = ppd->dd;
6198 	u32 mask = ~((1U << ppd->lmc) - 1);
6199 	u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6200 
6201 	if (dd->hfi1_snoop.mode_flag)
6202 		dd_dev_info(dd, "Set lid/lmc while snooping");
6203 
6204 	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6205 		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6206 	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6207 			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6208 	      ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6209 			<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6210 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6211 
6212 	/*
6213 	 * Iterate over all the send contexts and set their SLID check
6214 	 */
6215 	sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6216 			SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6217 	       (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6218 			SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6219 
6220 	for (i = 0; i < dd->chip_send_contexts; i++) {
6221 		hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6222 			  i, (u32)sreg);
6223 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6224 	}
6225 
6226 	/* Now we have to do the same thing for the sdma engines */
6227 	sdma_update_lmc(dd, mask, ppd->lid);
6228 }
6229 
wait_phy_linkstate(struct hfi1_devdata * dd,u32 state,u32 msecs)6230 static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6231 {
6232 	unsigned long timeout;
6233 	u32 curr_state;
6234 
6235 	timeout = jiffies + msecs_to_jiffies(msecs);
6236 	while (1) {
6237 		curr_state = read_physical_state(dd);
6238 		if (curr_state == state)
6239 			break;
6240 		if (time_after(jiffies, timeout)) {
6241 			dd_dev_err(dd,
6242 				"timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6243 				state, curr_state);
6244 			return -ETIMEDOUT;
6245 		}
6246 		usleep_range(1950, 2050); /* sleep 2ms-ish */
6247 	}
6248 
6249 	return 0;
6250 }
6251 
6252 /*
6253  * Helper for set_link_state().  Do not call except from that routine.
6254  * Expects ppd->hls_mutex to be held.
6255  *
6256  * @rem_reason value to be sent to the neighbor
6257  *
6258  * LinkDownReasons only set if transition succeeds.
6259  */
goto_offline(struct hfi1_pportdata * ppd,u8 rem_reason)6260 static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6261 {
6262 	struct hfi1_devdata *dd = ppd->dd;
6263 	u32 pstate, previous_state;
6264 	u32 last_local_state;
6265 	u32 last_remote_state;
6266 	int ret;
6267 	int do_transition;
6268 	int do_wait;
6269 
6270 	previous_state = ppd->host_link_state;
6271 	ppd->host_link_state = HLS_GOING_OFFLINE;
6272 	pstate = read_physical_state(dd);
6273 	if (pstate == PLS_OFFLINE) {
6274 		do_transition = 0;	/* in right state */
6275 		do_wait = 0;		/* ...no need to wait */
6276 	} else if ((pstate & 0xff) == PLS_OFFLINE) {
6277 		do_transition = 0;	/* in an offline transient state */
6278 		do_wait = 1;		/* ...wait for it to settle */
6279 	} else {
6280 		do_transition = 1;	/* need to move to offline */
6281 		do_wait = 1;		/* ...will need to wait */
6282 	}
6283 
6284 	if (do_transition) {
6285 		ret = set_physical_link_state(dd,
6286 			PLS_OFFLINE | (rem_reason << 8));
6287 
6288 		if (ret != HCMD_SUCCESS) {
6289 			dd_dev_err(dd,
6290 				"Failed to transition to Offline link state, return %d\n",
6291 				ret);
6292 			return -EINVAL;
6293 		}
6294 		if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6295 			ppd->offline_disabled_reason =
6296 			OPA_LINKDOWN_REASON_TRANSIENT;
6297 	}
6298 
6299 	if (do_wait) {
6300 		/* it can take a while for the link to go down */
6301 		ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6302 		if (ret < 0)
6303 			return ret;
6304 	}
6305 
6306 	/* make sure the logical state is also down */
6307 	wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6308 
6309 	/*
6310 	 * Now in charge of LCB - must be after the physical state is
6311 	 * offline.quiet and before host_link_state is changed.
6312 	 */
6313 	set_host_lcb_access(dd);
6314 	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6315 	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6316 
6317 	/*
6318 	 * The LNI has a mandatory wait time after the physical state
6319 	 * moves to Offline.Quiet.  The wait time may be different
6320 	 * depending on how the link went down.  The 8051 firmware
6321 	 * will observe the needed wait time and only move to ready
6322 	 * when that is completed.  The largest of the quiet timeouts
6323 	 * is 2.5s, so wait that long and then a bit more.
6324 	 */
6325 	ret = wait_fm_ready(dd, 3000);
6326 	if (ret) {
6327 		dd_dev_err(dd,
6328 			"After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6329 		/* state is really offline, so make it so */
6330 		ppd->host_link_state = HLS_DN_OFFLINE;
6331 		return ret;
6332 	}
6333 
6334 	/*
6335 	 * The state is now offline and the 8051 is ready to accept host
6336 	 * requests.
6337 	 *	- change our state
6338 	 *	- notify others if we were previously in a linkup state
6339 	 */
6340 	ppd->host_link_state = HLS_DN_OFFLINE;
6341 	if (previous_state & HLS_UP) {
6342 		/* went down while link was up */
6343 		handle_linkup_change(dd, 0);
6344 	} else if (previous_state
6345 			& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6346 		/* went down while attempting link up */
6347 		/* byte 1 of last_*_state is the failure reason */
6348 		read_last_local_state(dd, &last_local_state);
6349 		read_last_remote_state(dd, &last_remote_state);
6350 		dd_dev_err(dd,
6351 			"LNI failure last states: local 0x%08x, remote 0x%08x\n",
6352 			last_local_state, last_remote_state);
6353 	}
6354 
6355 	/* the active link width (downgrade) is 0 on link down */
6356 	ppd->link_width_active = 0;
6357 	ppd->link_width_downgrade_tx_active = 0;
6358 	ppd->link_width_downgrade_rx_active = 0;
6359 	ppd->current_egress_rate = 0;
6360 	return 0;
6361 }
6362 
6363 /* return the link state name */
link_state_name(u32 state)6364 static const char *link_state_name(u32 state)
6365 {
6366 	const char *name;
6367 	int n = ilog2(state);
6368 	static const char * const names[] = {
6369 		[__HLS_UP_INIT_BP]	 = "INIT",
6370 		[__HLS_UP_ARMED_BP]	 = "ARMED",
6371 		[__HLS_UP_ACTIVE_BP]	 = "ACTIVE",
6372 		[__HLS_DN_DOWNDEF_BP]	 = "DOWNDEF",
6373 		[__HLS_DN_POLL_BP]	 = "POLL",
6374 		[__HLS_DN_DISABLE_BP]	 = "DISABLE",
6375 		[__HLS_DN_OFFLINE_BP]	 = "OFFLINE",
6376 		[__HLS_VERIFY_CAP_BP]	 = "VERIFY_CAP",
6377 		[__HLS_GOING_UP_BP]	 = "GOING_UP",
6378 		[__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6379 		[__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6380 	};
6381 
6382 	name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6383 	return name ? name : "unknown";
6384 }
6385 
6386 /* return the link state reason name */
link_state_reason_name(struct hfi1_pportdata * ppd,u32 state)6387 static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6388 {
6389 	if (state == HLS_UP_INIT) {
6390 		switch (ppd->linkinit_reason) {
6391 		case OPA_LINKINIT_REASON_LINKUP:
6392 			return "(LINKUP)";
6393 		case OPA_LINKINIT_REASON_FLAPPING:
6394 			return "(FLAPPING)";
6395 		case OPA_LINKINIT_OUTSIDE_POLICY:
6396 			return "(OUTSIDE_POLICY)";
6397 		case OPA_LINKINIT_QUARANTINED:
6398 			return "(QUARANTINED)";
6399 		case OPA_LINKINIT_INSUFIC_CAPABILITY:
6400 			return "(INSUFIC_CAPABILITY)";
6401 		default:
6402 			break;
6403 		}
6404 	}
6405 	return "";
6406 }
6407 
6408 /*
6409  * driver_physical_state - convert the driver's notion of a port's
6410  * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6411  * Return -1 (converted to a u32) to indicate error.
6412  */
driver_physical_state(struct hfi1_pportdata * ppd)6413 u32 driver_physical_state(struct hfi1_pportdata *ppd)
6414 {
6415 	switch (ppd->host_link_state) {
6416 	case HLS_UP_INIT:
6417 	case HLS_UP_ARMED:
6418 	case HLS_UP_ACTIVE:
6419 		return IB_PORTPHYSSTATE_LINKUP;
6420 	case HLS_DN_POLL:
6421 		return IB_PORTPHYSSTATE_POLLING;
6422 	case HLS_DN_DISABLE:
6423 		return IB_PORTPHYSSTATE_DISABLED;
6424 	case HLS_DN_OFFLINE:
6425 		return OPA_PORTPHYSSTATE_OFFLINE;
6426 	case HLS_VERIFY_CAP:
6427 		return IB_PORTPHYSSTATE_POLLING;
6428 	case HLS_GOING_UP:
6429 		return IB_PORTPHYSSTATE_POLLING;
6430 	case HLS_GOING_OFFLINE:
6431 		return OPA_PORTPHYSSTATE_OFFLINE;
6432 	case HLS_LINK_COOLDOWN:
6433 		return OPA_PORTPHYSSTATE_OFFLINE;
6434 	case HLS_DN_DOWNDEF:
6435 	default:
6436 		dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6437 			   ppd->host_link_state);
6438 		return  -1;
6439 	}
6440 }
6441 
6442 /*
6443  * driver_logical_state - convert the driver's notion of a port's
6444  * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6445  * (converted to a u32) to indicate error.
6446  */
driver_logical_state(struct hfi1_pportdata * ppd)6447 u32 driver_logical_state(struct hfi1_pportdata *ppd)
6448 {
6449 	if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6450 		return IB_PORT_DOWN;
6451 
6452 	switch (ppd->host_link_state & HLS_UP) {
6453 	case HLS_UP_INIT:
6454 		return IB_PORT_INIT;
6455 	case HLS_UP_ARMED:
6456 		return IB_PORT_ARMED;
6457 	case HLS_UP_ACTIVE:
6458 		return IB_PORT_ACTIVE;
6459 	default:
6460 		dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6461 			   ppd->host_link_state);
6462 	return -1;
6463 	}
6464 }
6465 
set_link_down_reason(struct hfi1_pportdata * ppd,u8 lcl_reason,u8 neigh_reason,u8 rem_reason)6466 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6467 			  u8 neigh_reason, u8 rem_reason)
6468 {
6469 	if (ppd->local_link_down_reason.latest == 0 &&
6470 	    ppd->neigh_link_down_reason.latest == 0) {
6471 		ppd->local_link_down_reason.latest = lcl_reason;
6472 		ppd->neigh_link_down_reason.latest = neigh_reason;
6473 		ppd->remote_link_down_reason = rem_reason;
6474 	}
6475 }
6476 
6477 /*
6478  * Change the physical and/or logical link state.
6479  *
6480  * Do not call this routine while inside an interrupt.  It contains
6481  * calls to routines that can take multiple seconds to finish.
6482  *
6483  * Returns 0 on success, -errno on failure.
6484  */
set_link_state(struct hfi1_pportdata * ppd,u32 state)6485 int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6486 {
6487 	struct hfi1_devdata *dd = ppd->dd;
6488 	struct ib_event event = {.device = NULL};
6489 	int ret1, ret = 0;
6490 	int was_up, is_down;
6491 	int orig_new_state, poll_bounce;
6492 
6493 	mutex_lock(&ppd->hls_lock);
6494 
6495 	orig_new_state = state;
6496 	if (state == HLS_DN_DOWNDEF)
6497 		state = dd->link_default;
6498 
6499 	/* interpret poll -> poll as a link bounce */
6500 	poll_bounce = ppd->host_link_state == HLS_DN_POLL
6501 				&& state == HLS_DN_POLL;
6502 
6503 	dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6504 		link_state_name(ppd->host_link_state),
6505 		link_state_name(orig_new_state),
6506 		poll_bounce ? "(bounce) " : "",
6507 		link_state_reason_name(ppd, state));
6508 
6509 	was_up = !!(ppd->host_link_state & HLS_UP);
6510 
6511 	/*
6512 	 * If we're going to a (HLS_*) link state that implies the logical
6513 	 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6514 	 * reset is_sm_config_started to 0.
6515 	 */
6516 	if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6517 		ppd->is_sm_config_started = 0;
6518 
6519 	/*
6520 	 * Do nothing if the states match.  Let a poll to poll link bounce
6521 	 * go through.
6522 	 */
6523 	if (ppd->host_link_state == state && !poll_bounce)
6524 		goto done;
6525 
6526 	switch (state) {
6527 	case HLS_UP_INIT:
6528 		if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6529 			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6530 			/*
6531 			 * Quick link up jumps from polling to here.
6532 			 *
6533 			 * Whether in normal or loopback mode, the
6534 			 * simulator jumps from polling to link up.
6535 			 * Accept that here.
6536 			 */
6537 			/* OK */;
6538 		} else if (ppd->host_link_state != HLS_GOING_UP) {
6539 			goto unexpected;
6540 		}
6541 
6542 		ppd->host_link_state = HLS_UP_INIT;
6543 		ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6544 		if (ret) {
6545 			/* logical state didn't change, stay at going_up */
6546 			ppd->host_link_state = HLS_GOING_UP;
6547 			dd_dev_err(dd,
6548 				"%s: logical state did not change to INIT\n",
6549 				__func__);
6550 		} else {
6551 			/* clear old transient LINKINIT_REASON code */
6552 			if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6553 				ppd->linkinit_reason =
6554 					OPA_LINKINIT_REASON_LINKUP;
6555 
6556 			/* enable the port */
6557 			add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6558 
6559 			handle_linkup_change(dd, 1);
6560 		}
6561 		break;
6562 	case HLS_UP_ARMED:
6563 		if (ppd->host_link_state != HLS_UP_INIT)
6564 			goto unexpected;
6565 
6566 		ppd->host_link_state = HLS_UP_ARMED;
6567 		set_logical_state(dd, LSTATE_ARMED);
6568 		ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6569 		if (ret) {
6570 			/* logical state didn't change, stay at init */
6571 			ppd->host_link_state = HLS_UP_INIT;
6572 			dd_dev_err(dd,
6573 				"%s: logical state did not change to ARMED\n",
6574 				__func__);
6575 		}
6576 		/*
6577 		 * The simulator does not currently implement SMA messages,
6578 		 * so neighbor_normal is not set.  Set it here when we first
6579 		 * move to Armed.
6580 		 */
6581 		if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6582 			ppd->neighbor_normal = 1;
6583 		break;
6584 	case HLS_UP_ACTIVE:
6585 		if (ppd->host_link_state != HLS_UP_ARMED)
6586 			goto unexpected;
6587 
6588 		ppd->host_link_state = HLS_UP_ACTIVE;
6589 		set_logical_state(dd, LSTATE_ACTIVE);
6590 		ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6591 		if (ret) {
6592 			/* logical state didn't change, stay at armed */
6593 			ppd->host_link_state = HLS_UP_ARMED;
6594 			dd_dev_err(dd,
6595 				"%s: logical state did not change to ACTIVE\n",
6596 				__func__);
6597 		} else {
6598 
6599 			/* tell all engines to go running */
6600 			sdma_all_running(dd);
6601 
6602 			/* Signal the IB layer that the port has went active */
6603 			event.device = &dd->verbs_dev.ibdev;
6604 			event.element.port_num = ppd->port;
6605 			event.event = IB_EVENT_PORT_ACTIVE;
6606 		}
6607 		break;
6608 	case HLS_DN_POLL:
6609 		if ((ppd->host_link_state == HLS_DN_DISABLE ||
6610 		     ppd->host_link_state == HLS_DN_OFFLINE) &&
6611 		    dd->dc_shutdown)
6612 			dc_start(dd);
6613 		/* Hand LED control to the DC */
6614 		write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6615 
6616 		if (ppd->host_link_state != HLS_DN_OFFLINE) {
6617 			u8 tmp = ppd->link_enabled;
6618 
6619 			ret = goto_offline(ppd, ppd->remote_link_down_reason);
6620 			if (ret) {
6621 				ppd->link_enabled = tmp;
6622 				break;
6623 			}
6624 			ppd->remote_link_down_reason = 0;
6625 
6626 			if (ppd->driver_link_ready)
6627 				ppd->link_enabled = 1;
6628 		}
6629 
6630 		ret = set_local_link_attributes(ppd);
6631 		if (ret)
6632 			break;
6633 
6634 		ppd->port_error_action = 0;
6635 		ppd->host_link_state = HLS_DN_POLL;
6636 
6637 		if (quick_linkup) {
6638 			/* quick linkup does not go into polling */
6639 			ret = do_quick_linkup(dd);
6640 		} else {
6641 			ret1 = set_physical_link_state(dd, PLS_POLLING);
6642 			if (ret1 != HCMD_SUCCESS) {
6643 				dd_dev_err(dd,
6644 					"Failed to transition to Polling link state, return 0x%x\n",
6645 					ret1);
6646 				ret = -EINVAL;
6647 			}
6648 		}
6649 		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6650 		/*
6651 		 * If an error occurred above, go back to offline.  The
6652 		 * caller may reschedule another attempt.
6653 		 */
6654 		if (ret)
6655 			goto_offline(ppd, 0);
6656 		break;
6657 	case HLS_DN_DISABLE:
6658 		/* link is disabled */
6659 		ppd->link_enabled = 0;
6660 
6661 		/* allow any state to transition to disabled */
6662 
6663 		/* must transition to offline first */
6664 		if (ppd->host_link_state != HLS_DN_OFFLINE) {
6665 			ret = goto_offline(ppd, ppd->remote_link_down_reason);
6666 			if (ret)
6667 				break;
6668 			ppd->remote_link_down_reason = 0;
6669 		}
6670 
6671 		ret1 = set_physical_link_state(dd, PLS_DISABLED);
6672 		if (ret1 != HCMD_SUCCESS) {
6673 			dd_dev_err(dd,
6674 				"Failed to transition to Disabled link state, return 0x%x\n",
6675 				ret1);
6676 			ret = -EINVAL;
6677 			break;
6678 		}
6679 		ppd->host_link_state = HLS_DN_DISABLE;
6680 		dc_shutdown(dd);
6681 		break;
6682 	case HLS_DN_OFFLINE:
6683 		if (ppd->host_link_state == HLS_DN_DISABLE)
6684 			dc_start(dd);
6685 
6686 		/* allow any state to transition to offline */
6687 		ret = goto_offline(ppd, ppd->remote_link_down_reason);
6688 		if (!ret)
6689 			ppd->remote_link_down_reason = 0;
6690 		break;
6691 	case HLS_VERIFY_CAP:
6692 		if (ppd->host_link_state != HLS_DN_POLL)
6693 			goto unexpected;
6694 		ppd->host_link_state = HLS_VERIFY_CAP;
6695 		break;
6696 	case HLS_GOING_UP:
6697 		if (ppd->host_link_state != HLS_VERIFY_CAP)
6698 			goto unexpected;
6699 
6700 		ret1 = set_physical_link_state(dd, PLS_LINKUP);
6701 		if (ret1 != HCMD_SUCCESS) {
6702 			dd_dev_err(dd,
6703 				"Failed to transition to link up state, return 0x%x\n",
6704 				ret1);
6705 			ret = -EINVAL;
6706 			break;
6707 		}
6708 		ppd->host_link_state = HLS_GOING_UP;
6709 		break;
6710 
6711 	case HLS_GOING_OFFLINE:		/* transient within goto_offline() */
6712 	case HLS_LINK_COOLDOWN:		/* transient within goto_offline() */
6713 	default:
6714 		dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6715 			__func__, state);
6716 		ret = -EINVAL;
6717 		break;
6718 	}
6719 
6720 	is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6721 			HLS_DN_DISABLE | HLS_DN_OFFLINE));
6722 
6723 	if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6724 	    ppd->neigh_link_down_reason.sma == 0) {
6725 		ppd->local_link_down_reason.sma =
6726 		  ppd->local_link_down_reason.latest;
6727 		ppd->neigh_link_down_reason.sma =
6728 		  ppd->neigh_link_down_reason.latest;
6729 	}
6730 
6731 	goto done;
6732 
6733 unexpected:
6734 	dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6735 		__func__, link_state_name(ppd->host_link_state),
6736 		link_state_name(state));
6737 	ret = -EINVAL;
6738 
6739 done:
6740 	mutex_unlock(&ppd->hls_lock);
6741 
6742 	if (event.device)
6743 		ib_dispatch_event(&event);
6744 
6745 	return ret;
6746 }
6747 
hfi1_set_ib_cfg(struct hfi1_pportdata * ppd,int which,u32 val)6748 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6749 {
6750 	u64 reg;
6751 	int ret = 0;
6752 
6753 	switch (which) {
6754 	case HFI1_IB_CFG_LIDLMC:
6755 		set_lidlmc(ppd);
6756 		break;
6757 	case HFI1_IB_CFG_VL_HIGH_LIMIT:
6758 		/*
6759 		 * The VL Arbitrator high limit is sent in units of 4k
6760 		 * bytes, while HFI stores it in units of 64 bytes.
6761 		 */
6762 		val *= 4096/64;
6763 		reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6764 			<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6765 		write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6766 		break;
6767 	case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6768 		/* HFI only supports POLL as the default link down state */
6769 		if (val != HLS_DN_POLL)
6770 			ret = -EINVAL;
6771 		break;
6772 	case HFI1_IB_CFG_OP_VLS:
6773 		if (ppd->vls_operational != val) {
6774 			ppd->vls_operational = val;
6775 			if (!ppd->port)
6776 				ret = -EINVAL;
6777 			else
6778 				ret = sdma_map_init(
6779 					ppd->dd,
6780 					ppd->port - 1,
6781 					val,
6782 					NULL);
6783 		}
6784 		break;
6785 	/*
6786 	 * For link width, link width downgrade, and speed enable, always AND
6787 	 * the setting with what is actually supported.  This has two benefits.
6788 	 * First, enabled can't have unsupported values, no matter what the
6789 	 * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6790 	 * "fill in with your supported value" have all the bits in the
6791 	 * field set, so simply ANDing with supported has the desired result.
6792 	 */
6793 	case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6794 		ppd->link_width_enabled = val & ppd->link_width_supported;
6795 		break;
6796 	case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6797 		ppd->link_width_downgrade_enabled =
6798 				val & ppd->link_width_downgrade_supported;
6799 		break;
6800 	case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6801 		ppd->link_speed_enabled = val & ppd->link_speed_supported;
6802 		break;
6803 	case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6804 		/*
6805 		 * HFI does not follow IB specs, save this value
6806 		 * so we can report it, if asked.
6807 		 */
6808 		ppd->overrun_threshold = val;
6809 		break;
6810 	case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6811 		/*
6812 		 * HFI does not follow IB specs, save this value
6813 		 * so we can report it, if asked.
6814 		 */
6815 		ppd->phy_error_threshold = val;
6816 		break;
6817 
6818 	case HFI1_IB_CFG_MTU:
6819 		set_send_length(ppd);
6820 		break;
6821 
6822 	case HFI1_IB_CFG_PKEYS:
6823 		if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6824 			set_partition_keys(ppd);
6825 		break;
6826 
6827 	default:
6828 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6829 			dd_dev_info(ppd->dd,
6830 			  "%s: which %s, val 0x%x: not implemented\n",
6831 			  __func__, ib_cfg_name(which), val);
6832 		break;
6833 	}
6834 	return ret;
6835 }
6836 
6837 /* begin functions related to vl arbitration table caching */
init_vl_arb_caches(struct hfi1_pportdata * ppd)6838 static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6839 {
6840 	int i;
6841 
6842 	BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6843 			VL_ARB_LOW_PRIO_TABLE_SIZE);
6844 	BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6845 			VL_ARB_HIGH_PRIO_TABLE_SIZE);
6846 
6847 	/*
6848 	 * Note that we always return values directly from the
6849 	 * 'vl_arb_cache' (and do no CSR reads) in response to a
6850 	 * 'Get(VLArbTable)'. This is obviously correct after a
6851 	 * 'Set(VLArbTable)', since the cache will then be up to
6852 	 * date. But it's also correct prior to any 'Set(VLArbTable)'
6853 	 * since then both the cache, and the relevant h/w registers
6854 	 * will be zeroed.
6855 	 */
6856 
6857 	for (i = 0; i < MAX_PRIO_TABLE; i++)
6858 		spin_lock_init(&ppd->vl_arb_cache[i].lock);
6859 }
6860 
6861 /*
6862  * vl_arb_lock_cache
6863  *
6864  * All other vl_arb_* functions should be called only after locking
6865  * the cache.
6866  */
6867 static inline struct vl_arb_cache *
vl_arb_lock_cache(struct hfi1_pportdata * ppd,int idx)6868 vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6869 {
6870 	if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6871 		return NULL;
6872 	spin_lock(&ppd->vl_arb_cache[idx].lock);
6873 	return &ppd->vl_arb_cache[idx];
6874 }
6875 
vl_arb_unlock_cache(struct hfi1_pportdata * ppd,int idx)6876 static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6877 {
6878 	spin_unlock(&ppd->vl_arb_cache[idx].lock);
6879 }
6880 
vl_arb_get_cache(struct vl_arb_cache * cache,struct ib_vl_weight_elem * vl)6881 static void vl_arb_get_cache(struct vl_arb_cache *cache,
6882 			     struct ib_vl_weight_elem *vl)
6883 {
6884 	memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6885 }
6886 
vl_arb_set_cache(struct vl_arb_cache * cache,struct ib_vl_weight_elem * vl)6887 static void vl_arb_set_cache(struct vl_arb_cache *cache,
6888 			     struct ib_vl_weight_elem *vl)
6889 {
6890 	memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6891 }
6892 
vl_arb_match_cache(struct vl_arb_cache * cache,struct ib_vl_weight_elem * vl)6893 static int vl_arb_match_cache(struct vl_arb_cache *cache,
6894 			      struct ib_vl_weight_elem *vl)
6895 {
6896 	return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6897 }
6898 /* end functions related to vl arbitration table caching */
6899 
set_vl_weights(struct hfi1_pportdata * ppd,u32 target,u32 size,struct ib_vl_weight_elem * vl)6900 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6901 			  u32 size, struct ib_vl_weight_elem *vl)
6902 {
6903 	struct hfi1_devdata *dd = ppd->dd;
6904 	u64 reg;
6905 	unsigned int i, is_up = 0;
6906 	int drain, ret = 0;
6907 
6908 	mutex_lock(&ppd->hls_lock);
6909 
6910 	if (ppd->host_link_state & HLS_UP)
6911 		is_up = 1;
6912 
6913 	drain = !is_ax(dd) && is_up;
6914 
6915 	if (drain)
6916 		/*
6917 		 * Before adjusting VL arbitration weights, empty per-VL
6918 		 * FIFOs, otherwise a packet whose VL weight is being
6919 		 * set to 0 could get stuck in a FIFO with no chance to
6920 		 * egress.
6921 		 */
6922 		ret = stop_drain_data_vls(dd);
6923 
6924 	if (ret) {
6925 		dd_dev_err(
6926 			dd,
6927 			"%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6928 			__func__);
6929 		goto err;
6930 	}
6931 
6932 	for (i = 0; i < size; i++, vl++) {
6933 		/*
6934 		 * NOTE: The low priority shift and mask are used here, but
6935 		 * they are the same for both the low and high registers.
6936 		 */
6937 		reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6938 				<< SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6939 		      | (((u64)vl->weight
6940 				& SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6941 				<< SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6942 		write_csr(dd, target + (i * 8), reg);
6943 	}
6944 	pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6945 
6946 	if (drain)
6947 		open_fill_data_vls(dd); /* reopen all VLs */
6948 
6949 err:
6950 	mutex_unlock(&ppd->hls_lock);
6951 
6952 	return ret;
6953 }
6954 
6955 /*
6956  * Read one credit merge VL register.
6957  */
read_one_cm_vl(struct hfi1_devdata * dd,u32 csr,struct vl_limit * vll)6958 static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6959 			   struct vl_limit *vll)
6960 {
6961 	u64 reg = read_csr(dd, csr);
6962 
6963 	vll->dedicated = cpu_to_be16(
6964 		(reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6965 		& SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6966 	vll->shared = cpu_to_be16(
6967 		(reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6968 		& SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6969 }
6970 
6971 /*
6972  * Read the current credit merge limits.
6973  */
get_buffer_control(struct hfi1_devdata * dd,struct buffer_control * bc,u16 * overall_limit)6974 static int get_buffer_control(struct hfi1_devdata *dd,
6975 			      struct buffer_control *bc, u16 *overall_limit)
6976 {
6977 	u64 reg;
6978 	int i;
6979 
6980 	/* not all entries are filled in */
6981 	memset(bc, 0, sizeof(*bc));
6982 
6983 	/* OPA and HFI have a 1-1 mapping */
6984 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
6985 		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6986 
6987 	/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6988 	read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6989 
6990 	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6991 	bc->overall_shared_limit = cpu_to_be16(
6992 		(reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6993 		& SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6994 	if (overall_limit)
6995 		*overall_limit = (reg
6996 			>> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6997 			& SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6998 	return sizeof(struct buffer_control);
6999 }
7000 
get_sc2vlnt(struct hfi1_devdata * dd,struct sc2vlnt * dp)7001 static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7002 {
7003 	u64 reg;
7004 	int i;
7005 
7006 	/* each register contains 16 SC->VLnt mappings, 4 bits each */
7007 	reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7008 	for (i = 0; i < sizeof(u64); i++) {
7009 		u8 byte = *(((u8 *)&reg) + i);
7010 
7011 		dp->vlnt[2 * i] = byte & 0xf;
7012 		dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7013 	}
7014 
7015 	reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7016 	for (i = 0; i < sizeof(u64); i++) {
7017 		u8 byte = *(((u8 *)&reg) + i);
7018 
7019 		dp->vlnt[16 + (2 * i)] = byte & 0xf;
7020 		dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7021 	}
7022 	return sizeof(struct sc2vlnt);
7023 }
7024 
get_vlarb_preempt(struct hfi1_devdata * dd,u32 nelems,struct ib_vl_weight_elem * vl)7025 static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7026 			      struct ib_vl_weight_elem *vl)
7027 {
7028 	unsigned int i;
7029 
7030 	for (i = 0; i < nelems; i++, vl++) {
7031 		vl->vl = 0xf;
7032 		vl->weight = 0;
7033 	}
7034 }
7035 
set_sc2vlnt(struct hfi1_devdata * dd,struct sc2vlnt * dp)7036 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7037 {
7038 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7039 		DC_SC_VL_VAL(15_0,
7040 		0, dp->vlnt[0] & 0xf,
7041 		1, dp->vlnt[1] & 0xf,
7042 		2, dp->vlnt[2] & 0xf,
7043 		3, dp->vlnt[3] & 0xf,
7044 		4, dp->vlnt[4] & 0xf,
7045 		5, dp->vlnt[5] & 0xf,
7046 		6, dp->vlnt[6] & 0xf,
7047 		7, dp->vlnt[7] & 0xf,
7048 		8, dp->vlnt[8] & 0xf,
7049 		9, dp->vlnt[9] & 0xf,
7050 		10, dp->vlnt[10] & 0xf,
7051 		11, dp->vlnt[11] & 0xf,
7052 		12, dp->vlnt[12] & 0xf,
7053 		13, dp->vlnt[13] & 0xf,
7054 		14, dp->vlnt[14] & 0xf,
7055 		15, dp->vlnt[15] & 0xf));
7056 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7057 		DC_SC_VL_VAL(31_16,
7058 		16, dp->vlnt[16] & 0xf,
7059 		17, dp->vlnt[17] & 0xf,
7060 		18, dp->vlnt[18] & 0xf,
7061 		19, dp->vlnt[19] & 0xf,
7062 		20, dp->vlnt[20] & 0xf,
7063 		21, dp->vlnt[21] & 0xf,
7064 		22, dp->vlnt[22] & 0xf,
7065 		23, dp->vlnt[23] & 0xf,
7066 		24, dp->vlnt[24] & 0xf,
7067 		25, dp->vlnt[25] & 0xf,
7068 		26, dp->vlnt[26] & 0xf,
7069 		27, dp->vlnt[27] & 0xf,
7070 		28, dp->vlnt[28] & 0xf,
7071 		29, dp->vlnt[29] & 0xf,
7072 		30, dp->vlnt[30] & 0xf,
7073 		31, dp->vlnt[31] & 0xf));
7074 }
7075 
nonzero_msg(struct hfi1_devdata * dd,int idx,const char * what,u16 limit)7076 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7077 			u16 limit)
7078 {
7079 	if (limit != 0)
7080 		dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7081 			what, (int)limit, idx);
7082 }
7083 
7084 /* change only the shared limit portion of SendCmGLobalCredit */
set_global_shared(struct hfi1_devdata * dd,u16 limit)7085 static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7086 {
7087 	u64 reg;
7088 
7089 	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7090 	reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7091 	reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7092 	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7093 }
7094 
7095 /* change only the total credit limit portion of SendCmGLobalCredit */
set_global_limit(struct hfi1_devdata * dd,u16 limit)7096 static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7097 {
7098 	u64 reg;
7099 
7100 	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7101 	reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7102 	reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7103 	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7104 }
7105 
7106 /* set the given per-VL shared limit */
set_vl_shared(struct hfi1_devdata * dd,int vl,u16 limit)7107 static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7108 {
7109 	u64 reg;
7110 	u32 addr;
7111 
7112 	if (vl < TXE_NUM_DATA_VL)
7113 		addr = SEND_CM_CREDIT_VL + (8 * vl);
7114 	else
7115 		addr = SEND_CM_CREDIT_VL15;
7116 
7117 	reg = read_csr(dd, addr);
7118 	reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7119 	reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7120 	write_csr(dd, addr, reg);
7121 }
7122 
7123 /* set the given per-VL dedicated limit */
set_vl_dedicated(struct hfi1_devdata * dd,int vl,u16 limit)7124 static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7125 {
7126 	u64 reg;
7127 	u32 addr;
7128 
7129 	if (vl < TXE_NUM_DATA_VL)
7130 		addr = SEND_CM_CREDIT_VL + (8 * vl);
7131 	else
7132 		addr = SEND_CM_CREDIT_VL15;
7133 
7134 	reg = read_csr(dd, addr);
7135 	reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7136 	reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7137 	write_csr(dd, addr, reg);
7138 }
7139 
7140 /* spin until the given per-VL status mask bits clear */
wait_for_vl_status_clear(struct hfi1_devdata * dd,u64 mask,const char * which)7141 static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7142 				     const char *which)
7143 {
7144 	unsigned long timeout;
7145 	u64 reg;
7146 
7147 	timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7148 	while (1) {
7149 		reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7150 
7151 		if (reg == 0)
7152 			return;	/* success */
7153 		if (time_after(jiffies, timeout))
7154 			break;		/* timed out */
7155 		udelay(1);
7156 	}
7157 
7158 	dd_dev_err(dd,
7159 		"%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7160 		which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7161 	/*
7162 	 * If this occurs, it is likely there was a credit loss on the link.
7163 	 * The only recovery from that is a link bounce.
7164 	 */
7165 	dd_dev_err(dd,
7166 		"Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7167 }
7168 
7169 /*
7170  * The number of credits on the VLs may be changed while everything
7171  * is "live", but the following algorithm must be followed due to
7172  * how the hardware is actually implemented.  In particular,
7173  * Return_Credit_Status[] is the only correct status check.
7174  *
7175  * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7176  *     set Global_Shared_Credit_Limit = 0
7177  *     use_all_vl = 1
7178  * mask0 = all VLs that are changing either dedicated or shared limits
7179  * set Shared_Limit[mask0] = 0
7180  * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7181  * if (changing any dedicated limit)
7182  *     mask1 = all VLs that are lowering dedicated limits
7183  *     lower Dedicated_Limit[mask1]
7184  *     spin until Return_Credit_Status[mask1] == 0
7185  *     raise Dedicated_Limits
7186  * raise Shared_Limits
7187  * raise Global_Shared_Credit_Limit
7188  *
7189  * lower = if the new limit is lower, set the limit to the new value
7190  * raise = if the new limit is higher than the current value (may be changed
7191  *	earlier in the algorithm), set the new limit to the new value
7192  */
set_buffer_control(struct hfi1_devdata * dd,struct buffer_control * new_bc)7193 static int set_buffer_control(struct hfi1_devdata *dd,
7194 			      struct buffer_control *new_bc)
7195 {
7196 	u64 changing_mask, ld_mask, stat_mask;
7197 	int change_count;
7198 	int i, use_all_mask;
7199 	int this_shared_changing;
7200 	/*
7201 	 * A0: add the variable any_shared_limit_changing below and in the
7202 	 * algorithm above.  If removing A0 support, it can be removed.
7203 	 */
7204 	int any_shared_limit_changing;
7205 	struct buffer_control cur_bc;
7206 	u8 changing[OPA_MAX_VLS];
7207 	u8 lowering_dedicated[OPA_MAX_VLS];
7208 	u16 cur_total;
7209 	u32 new_total = 0;
7210 	const u64 all_mask =
7211 	SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7212 	 | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7213 	 | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7214 	 | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7215 	 | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7216 	 | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7217 	 | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7218 	 | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7219 	 | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7220 
7221 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7222 #define NUM_USABLE_VLS 16	/* look at VL15 and less */
7223 
7224 
7225 	/* find the new total credits, do sanity check on unused VLs */
7226 	for (i = 0; i < OPA_MAX_VLS; i++) {
7227 		if (valid_vl(i)) {
7228 			new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7229 			continue;
7230 		}
7231 		nonzero_msg(dd, i, "dedicated",
7232 			be16_to_cpu(new_bc->vl[i].dedicated));
7233 		nonzero_msg(dd, i, "shared",
7234 			be16_to_cpu(new_bc->vl[i].shared));
7235 		new_bc->vl[i].dedicated = 0;
7236 		new_bc->vl[i].shared = 0;
7237 	}
7238 	new_total += be16_to_cpu(new_bc->overall_shared_limit);
7239 	if (new_total > (u32)dd->link_credits)
7240 		return -EINVAL;
7241 	/* fetch the current values */
7242 	get_buffer_control(dd, &cur_bc, &cur_total);
7243 
7244 	/*
7245 	 * Create the masks we will use.
7246 	 */
7247 	memset(changing, 0, sizeof(changing));
7248 	memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7249 	/* NOTE: Assumes that the individual VL bits are adjacent and in
7250 	   increasing order */
7251 	stat_mask =
7252 		SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7253 	changing_mask = 0;
7254 	ld_mask = 0;
7255 	change_count = 0;
7256 	any_shared_limit_changing = 0;
7257 	for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7258 		if (!valid_vl(i))
7259 			continue;
7260 		this_shared_changing = new_bc->vl[i].shared
7261 						!= cur_bc.vl[i].shared;
7262 		if (this_shared_changing)
7263 			any_shared_limit_changing = 1;
7264 		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7265 				|| this_shared_changing) {
7266 			changing[i] = 1;
7267 			changing_mask |= stat_mask;
7268 			change_count++;
7269 		}
7270 		if (be16_to_cpu(new_bc->vl[i].dedicated) <
7271 					be16_to_cpu(cur_bc.vl[i].dedicated)) {
7272 			lowering_dedicated[i] = 1;
7273 			ld_mask |= stat_mask;
7274 		}
7275 	}
7276 
7277 	/* bracket the credit change with a total adjustment */
7278 	if (new_total > cur_total)
7279 		set_global_limit(dd, new_total);
7280 
7281 	/*
7282 	 * Start the credit change algorithm.
7283 	 */
7284 	use_all_mask = 0;
7285 	if ((be16_to_cpu(new_bc->overall_shared_limit) <
7286 				be16_to_cpu(cur_bc.overall_shared_limit))
7287 			|| (is_a0(dd) && any_shared_limit_changing)) {
7288 		set_global_shared(dd, 0);
7289 		cur_bc.overall_shared_limit = 0;
7290 		use_all_mask = 1;
7291 	}
7292 
7293 	for (i = 0; i < NUM_USABLE_VLS; i++) {
7294 		if (!valid_vl(i))
7295 			continue;
7296 
7297 		if (changing[i]) {
7298 			set_vl_shared(dd, i, 0);
7299 			cur_bc.vl[i].shared = 0;
7300 		}
7301 	}
7302 
7303 	wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7304 		"shared");
7305 
7306 	if (change_count > 0) {
7307 		for (i = 0; i < NUM_USABLE_VLS; i++) {
7308 			if (!valid_vl(i))
7309 				continue;
7310 
7311 			if (lowering_dedicated[i]) {
7312 				set_vl_dedicated(dd, i,
7313 					be16_to_cpu(new_bc->vl[i].dedicated));
7314 				cur_bc.vl[i].dedicated =
7315 						new_bc->vl[i].dedicated;
7316 			}
7317 		}
7318 
7319 		wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7320 
7321 		/* now raise all dedicated that are going up */
7322 		for (i = 0; i < NUM_USABLE_VLS; i++) {
7323 			if (!valid_vl(i))
7324 				continue;
7325 
7326 			if (be16_to_cpu(new_bc->vl[i].dedicated) >
7327 					be16_to_cpu(cur_bc.vl[i].dedicated))
7328 				set_vl_dedicated(dd, i,
7329 					be16_to_cpu(new_bc->vl[i].dedicated));
7330 		}
7331 	}
7332 
7333 	/* next raise all shared that are going up */
7334 	for (i = 0; i < NUM_USABLE_VLS; i++) {
7335 		if (!valid_vl(i))
7336 			continue;
7337 
7338 		if (be16_to_cpu(new_bc->vl[i].shared) >
7339 				be16_to_cpu(cur_bc.vl[i].shared))
7340 			set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7341 	}
7342 
7343 	/* finally raise the global shared */
7344 	if (be16_to_cpu(new_bc->overall_shared_limit) >
7345 			be16_to_cpu(cur_bc.overall_shared_limit))
7346 		set_global_shared(dd,
7347 			be16_to_cpu(new_bc->overall_shared_limit));
7348 
7349 	/* bracket the credit change with a total adjustment */
7350 	if (new_total < cur_total)
7351 		set_global_limit(dd, new_total);
7352 	return 0;
7353 }
7354 
7355 /*
7356  * Read the given fabric manager table. Return the size of the
7357  * table (in bytes) on success, and a negative error code on
7358  * failure.
7359  */
fm_get_table(struct hfi1_pportdata * ppd,int which,void * t)7360 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7361 
7362 {
7363 	int size;
7364 	struct vl_arb_cache *vlc;
7365 
7366 	switch (which) {
7367 	case FM_TBL_VL_HIGH_ARB:
7368 		size = 256;
7369 		/*
7370 		 * OPA specifies 128 elements (of 2 bytes each), though
7371 		 * HFI supports only 16 elements in h/w.
7372 		 */
7373 		vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7374 		vl_arb_get_cache(vlc, t);
7375 		vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7376 		break;
7377 	case FM_TBL_VL_LOW_ARB:
7378 		size = 256;
7379 		/*
7380 		 * OPA specifies 128 elements (of 2 bytes each), though
7381 		 * HFI supports only 16 elements in h/w.
7382 		 */
7383 		vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7384 		vl_arb_get_cache(vlc, t);
7385 		vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7386 		break;
7387 	case FM_TBL_BUFFER_CONTROL:
7388 		size = get_buffer_control(ppd->dd, t, NULL);
7389 		break;
7390 	case FM_TBL_SC2VLNT:
7391 		size = get_sc2vlnt(ppd->dd, t);
7392 		break;
7393 	case FM_TBL_VL_PREEMPT_ELEMS:
7394 		size = 256;
7395 		/* OPA specifies 128 elements, of 2 bytes each */
7396 		get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7397 		break;
7398 	case FM_TBL_VL_PREEMPT_MATRIX:
7399 		size = 256;
7400 		/*
7401 		 * OPA specifies that this is the same size as the VL
7402 		 * arbitration tables (i.e., 256 bytes).
7403 		 */
7404 		break;
7405 	default:
7406 		return -EINVAL;
7407 	}
7408 	return size;
7409 }
7410 
7411 /*
7412  * Write the given fabric manager table.
7413  */
fm_set_table(struct hfi1_pportdata * ppd,int which,void * t)7414 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7415 {
7416 	int ret = 0;
7417 	struct vl_arb_cache *vlc;
7418 
7419 	switch (which) {
7420 	case FM_TBL_VL_HIGH_ARB:
7421 		vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7422 		if (vl_arb_match_cache(vlc, t)) {
7423 			vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7424 			break;
7425 		}
7426 		vl_arb_set_cache(vlc, t);
7427 		vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7428 		ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7429 				     VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7430 		break;
7431 	case FM_TBL_VL_LOW_ARB:
7432 		vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7433 		if (vl_arb_match_cache(vlc, t)) {
7434 			vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7435 			break;
7436 		}
7437 		vl_arb_set_cache(vlc, t);
7438 		vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7439 		ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7440 				     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7441 		break;
7442 	case FM_TBL_BUFFER_CONTROL:
7443 		ret = set_buffer_control(ppd->dd, t);
7444 		break;
7445 	case FM_TBL_SC2VLNT:
7446 		set_sc2vlnt(ppd->dd, t);
7447 		break;
7448 	default:
7449 		ret = -EINVAL;
7450 	}
7451 	return ret;
7452 }
7453 
7454 /*
7455  * Disable all data VLs.
7456  *
7457  * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7458  */
disable_data_vls(struct hfi1_devdata * dd)7459 static int disable_data_vls(struct hfi1_devdata *dd)
7460 {
7461 	if (is_a0(dd))
7462 		return 1;
7463 
7464 	pio_send_control(dd, PSC_DATA_VL_DISABLE);
7465 
7466 	return 0;
7467 }
7468 
7469 /*
7470  * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7471  * Just re-enables all data VLs (the "fill" part happens
7472  * automatically - the name was chosen for symmetry with
7473  * stop_drain_data_vls()).
7474  *
7475  * Return 0 if successful, non-zero if the VLs cannot be enabled.
7476  */
open_fill_data_vls(struct hfi1_devdata * dd)7477 int open_fill_data_vls(struct hfi1_devdata *dd)
7478 {
7479 	if (is_a0(dd))
7480 		return 1;
7481 
7482 	pio_send_control(dd, PSC_DATA_VL_ENABLE);
7483 
7484 	return 0;
7485 }
7486 
7487 /*
7488  * drain_data_vls() - assumes that disable_data_vls() has been called,
7489  * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7490  * engines to drop to 0.
7491  */
drain_data_vls(struct hfi1_devdata * dd)7492 static void drain_data_vls(struct hfi1_devdata *dd)
7493 {
7494 	sc_wait(dd);
7495 	sdma_wait(dd);
7496 	pause_for_credit_return(dd);
7497 }
7498 
7499 /*
7500  * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7501  *
7502  * Use open_fill_data_vls() to resume using data VLs.  This pair is
7503  * meant to be used like this:
7504  *
7505  * stop_drain_data_vls(dd);
7506  * // do things with per-VL resources
7507  * open_fill_data_vls(dd);
7508  */
stop_drain_data_vls(struct hfi1_devdata * dd)7509 int stop_drain_data_vls(struct hfi1_devdata *dd)
7510 {
7511 	int ret;
7512 
7513 	ret = disable_data_vls(dd);
7514 	if (ret == 0)
7515 		drain_data_vls(dd);
7516 
7517 	return ret;
7518 }
7519 
7520 /*
7521  * Convert a nanosecond time to a cclock count.  No matter how slow
7522  * the cclock, a non-zero ns will always have a non-zero result.
7523  */
ns_to_cclock(struct hfi1_devdata * dd,u32 ns)7524 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7525 {
7526 	u32 cclocks;
7527 
7528 	if (dd->icode == ICODE_FPGA_EMULATION)
7529 		cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7530 	else  /* simulation pretends to be ASIC */
7531 		cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7532 	if (ns && !cclocks)	/* if ns nonzero, must be at least 1 */
7533 		cclocks = 1;
7534 	return cclocks;
7535 }
7536 
7537 /*
7538  * Convert a cclock count to nanoseconds. Not matter how slow
7539  * the cclock, a non-zero cclocks will always have a non-zero result.
7540  */
cclock_to_ns(struct hfi1_devdata * dd,u32 cclocks)7541 u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7542 {
7543 	u32 ns;
7544 
7545 	if (dd->icode == ICODE_FPGA_EMULATION)
7546 		ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7547 	else  /* simulation pretends to be ASIC */
7548 		ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7549 	if (cclocks && !ns)
7550 		ns = 1;
7551 	return ns;
7552 }
7553 
7554 /*
7555  * Dynamically adjust the receive interrupt timeout for a context based on
7556  * incoming packet rate.
7557  *
7558  * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7559  */
adjust_rcv_timeout(struct hfi1_ctxtdata * rcd,u32 npkts)7560 static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7561 {
7562 	struct hfi1_devdata *dd = rcd->dd;
7563 	u32 timeout = rcd->rcvavail_timeout;
7564 
7565 	/*
7566 	 * This algorithm doubles or halves the timeout depending on whether
7567 	 * the number of packets received in this interrupt were less than or
7568 	 * greater equal the interrupt count.
7569 	 *
7570 	 * The calculations below do not allow a steady state to be achieved.
7571 	 * Only at the endpoints it is possible to have an unchanging
7572 	 * timeout.
7573 	 */
7574 	if (npkts < rcv_intr_count) {
7575 		/*
7576 		 * Not enough packets arrived before the timeout, adjust
7577 		 * timeout downward.
7578 		 */
7579 		if (timeout < 2) /* already at minimum? */
7580 			return;
7581 		timeout >>= 1;
7582 	} else {
7583 		/*
7584 		 * More than enough packets arrived before the timeout, adjust
7585 		 * timeout upward.
7586 		 */
7587 		if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7588 			return;
7589 		timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7590 	}
7591 
7592 	rcd->rcvavail_timeout = timeout;
7593 	/* timeout cannot be larger than rcv_intr_timeout_csr which has already
7594 	   been verified to be in range */
7595 	write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7596 		(u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7597 }
7598 
update_usrhead(struct hfi1_ctxtdata * rcd,u32 hd,u32 updegr,u32 egrhd,u32 intr_adjust,u32 npkts)7599 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7600 		    u32 intr_adjust, u32 npkts)
7601 {
7602 	struct hfi1_devdata *dd = rcd->dd;
7603 	u64 reg;
7604 	u32 ctxt = rcd->ctxt;
7605 
7606 	/*
7607 	 * Need to write timeout register before updating RcvHdrHead to ensure
7608 	 * that a new value is used when the HW decides to restart counting.
7609 	 */
7610 	if (intr_adjust)
7611 		adjust_rcv_timeout(rcd, npkts);
7612 	if (updegr) {
7613 		reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7614 			<< RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7615 		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7616 	}
7617 	mmiowb();
7618 	reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7619 		(((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7620 			<< RCV_HDR_HEAD_HEAD_SHIFT);
7621 	write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7622 	mmiowb();
7623 }
7624 
hdrqempty(struct hfi1_ctxtdata * rcd)7625 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7626 {
7627 	u32 head, tail;
7628 
7629 	head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7630 		& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7631 
7632 	if (rcd->rcvhdrtail_kvaddr)
7633 		tail = get_rcvhdrtail(rcd);
7634 	else
7635 		tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7636 
7637 	return head == tail;
7638 }
7639 
7640 /*
7641  * Context Control and Receive Array encoding for buffer size:
7642  *	0x0 invalid
7643  *	0x1   4 KB
7644  *	0x2   8 KB
7645  *	0x3  16 KB
7646  *	0x4  32 KB
7647  *	0x5  64 KB
7648  *	0x6 128 KB
7649  *	0x7 256 KB
7650  *	0x8 512 KB (Receive Array only)
7651  *	0x9   1 MB (Receive Array only)
7652  *	0xa   2 MB (Receive Array only)
7653  *
7654  *	0xB-0xF - reserved (Receive Array only)
7655  *
7656  *
7657  * This routine assumes that the value has already been sanity checked.
7658  */
encoded_size(u32 size)7659 static u32 encoded_size(u32 size)
7660 {
7661 	switch (size) {
7662 	case   4*1024: return 0x1;
7663 	case   8*1024: return 0x2;
7664 	case  16*1024: return 0x3;
7665 	case  32*1024: return 0x4;
7666 	case  64*1024: return 0x5;
7667 	case 128*1024: return 0x6;
7668 	case 256*1024: return 0x7;
7669 	case 512*1024: return 0x8;
7670 	case   1*1024*1024: return 0x9;
7671 	case   2*1024*1024: return 0xa;
7672 	}
7673 	return 0x1;	/* if invalid, go with the minimum size */
7674 }
7675 
hfi1_rcvctrl(struct hfi1_devdata * dd,unsigned int op,int ctxt)7676 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7677 {
7678 	struct hfi1_ctxtdata *rcd;
7679 	u64 rcvctrl, reg;
7680 	int did_enable = 0;
7681 
7682 	rcd = dd->rcd[ctxt];
7683 	if (!rcd)
7684 		return;
7685 
7686 	hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7687 
7688 	rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7689 	/* if the context already enabled, don't do the extra steps */
7690 	if ((op & HFI1_RCVCTRL_CTXT_ENB)
7691 			&& !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7692 		/* reset the tail and hdr addresses, and sequence count */
7693 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7694 				rcd->rcvhdrq_phys);
7695 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7696 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7697 					rcd->rcvhdrqtailaddr_phys);
7698 		rcd->seq_cnt = 1;
7699 
7700 		/* reset the cached receive header queue head value */
7701 		rcd->head = 0;
7702 
7703 		/*
7704 		 * Zero the receive header queue so we don't get false
7705 		 * positives when checking the sequence number.  The
7706 		 * sequence numbers could land exactly on the same spot.
7707 		 * E.g. a rcd restart before the receive header wrapped.
7708 		 */
7709 		memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7710 
7711 		/* starting timeout */
7712 		rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7713 
7714 		/* enable the context */
7715 		rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7716 
7717 		/* clean the egr buffer size first */
7718 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7719 		rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7720 				& RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7721 					<< RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7722 
7723 		/* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7724 		write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7725 		did_enable = 1;
7726 
7727 		/* zero RcvEgrIndexHead */
7728 		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7729 
7730 		/* set eager count and base index */
7731 		reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7732 			& RCV_EGR_CTRL_EGR_CNT_MASK)
7733 		       << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7734 			(((rcd->eager_base >> RCV_SHIFT)
7735 			  & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7736 			 << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7737 		write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7738 
7739 		/*
7740 		 * Set TID (expected) count and base index.
7741 		 * rcd->expected_count is set to individual RcvArray entries,
7742 		 * not pairs, and the CSR takes a pair-count in groups of
7743 		 * four, so divide by 8.
7744 		 */
7745 		reg = (((rcd->expected_count >> RCV_SHIFT)
7746 					& RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7747 				<< RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7748 		      (((rcd->expected_base >> RCV_SHIFT)
7749 					& RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7750 				<< RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7751 		write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7752 		if (ctxt == VL15CTXT)
7753 			write_csr(dd, RCV_VL15, VL15CTXT);
7754 	}
7755 	if (op & HFI1_RCVCTRL_CTXT_DIS) {
7756 		write_csr(dd, RCV_VL15, 0);
7757 		rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7758 	}
7759 	if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7760 		rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7761 	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7762 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7763 	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7764 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7765 	if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7766 		rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7767 	if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7768 		rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7769 	if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7770 		rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7771 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7772 		/* In one-packet-per-eager mode, the size comes from
7773 		   the RcvArray entry. */
7774 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7775 		rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7776 	}
7777 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7778 		rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7779 	if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7780 		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7781 	if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7782 		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7783 	if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7784 		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7785 	if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7786 		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7787 	rcd->rcvctrl = rcvctrl;
7788 	hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7789 	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7790 
7791 	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
7792 	if (did_enable
7793 	    && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7794 		reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7795 		if (reg != 0) {
7796 			dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7797 				ctxt, reg);
7798 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7799 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7800 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7801 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7802 			reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7803 			dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7804 				ctxt, reg, reg == 0 ? "not" : "still");
7805 		}
7806 	}
7807 
7808 	if (did_enable) {
7809 		/*
7810 		 * The interrupt timeout and count must be set after
7811 		 * the context is enabled to take effect.
7812 		 */
7813 		/* set interrupt timeout */
7814 		write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7815 			(u64)rcd->rcvavail_timeout <<
7816 				RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7817 
7818 		/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7819 		reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7820 		write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7821 	}
7822 
7823 	if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7824 		/*
7825 		 * If the context has been disabled and the Tail Update has
7826 		 * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7827 		 * it doesn't contain an address that is invalid.
7828 		 */
7829 		write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7830 }
7831 
hfi1_read_cntrs(struct hfi1_devdata * dd,loff_t pos,char ** namep,u64 ** cntrp)7832 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7833 		    u64 **cntrp)
7834 {
7835 	int ret;
7836 	u64 val = 0;
7837 
7838 	if (namep) {
7839 		ret = dd->cntrnameslen;
7840 		if (pos != 0) {
7841 			dd_dev_err(dd, "read_cntrs does not support indexing");
7842 			return 0;
7843 		}
7844 		*namep = dd->cntrnames;
7845 	} else {
7846 		const struct cntr_entry *entry;
7847 		int i, j;
7848 
7849 		ret = (dd->ndevcntrs) * sizeof(u64);
7850 		if (pos != 0) {
7851 			dd_dev_err(dd, "read_cntrs does not support indexing");
7852 			return 0;
7853 		}
7854 
7855 		/* Get the start of the block of counters */
7856 		*cntrp = dd->cntrs;
7857 
7858 		/*
7859 		 * Now go and fill in each counter in the block.
7860 		 */
7861 		for (i = 0; i < DEV_CNTR_LAST; i++) {
7862 			entry = &dev_cntrs[i];
7863 			hfi1_cdbg(CNTR, "reading %s", entry->name);
7864 			if (entry->flags & CNTR_DISABLED) {
7865 				/* Nothing */
7866 				hfi1_cdbg(CNTR, "\tDisabled\n");
7867 			} else {
7868 				if (entry->flags & CNTR_VL) {
7869 					hfi1_cdbg(CNTR, "\tPer VL\n");
7870 					for (j = 0; j < C_VL_COUNT; j++) {
7871 						val = entry->rw_cntr(entry,
7872 								  dd, j,
7873 								  CNTR_MODE_R,
7874 								  0);
7875 						hfi1_cdbg(
7876 						   CNTR,
7877 						   "\t\tRead 0x%llx for %d\n",
7878 						   val, j);
7879 						dd->cntrs[entry->offset + j] =
7880 									    val;
7881 					}
7882 				} else {
7883 					val = entry->rw_cntr(entry, dd,
7884 							CNTR_INVALID_VL,
7885 							CNTR_MODE_R, 0);
7886 					dd->cntrs[entry->offset] = val;
7887 					hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7888 				}
7889 			}
7890 		}
7891 	}
7892 	return ret;
7893 }
7894 
7895 /*
7896  * Used by sysfs to create files for hfi stats to read
7897  */
hfi1_read_portcntrs(struct hfi1_devdata * dd,loff_t pos,u32 port,char ** namep,u64 ** cntrp)7898 u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7899 			char **namep, u64 **cntrp)
7900 {
7901 	int ret;
7902 	u64 val = 0;
7903 
7904 	if (namep) {
7905 		ret = dd->portcntrnameslen;
7906 		if (pos != 0) {
7907 			dd_dev_err(dd, "index not supported");
7908 			return 0;
7909 		}
7910 		*namep = dd->portcntrnames;
7911 	} else {
7912 		const struct cntr_entry *entry;
7913 		struct hfi1_pportdata *ppd;
7914 		int i, j;
7915 
7916 		ret = (dd->nportcntrs) * sizeof(u64);
7917 		if (pos != 0) {
7918 			dd_dev_err(dd, "indexing not supported");
7919 			return 0;
7920 		}
7921 		ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7922 		*cntrp = ppd->cntrs;
7923 
7924 		for (i = 0; i < PORT_CNTR_LAST; i++) {
7925 			entry = &port_cntrs[i];
7926 			hfi1_cdbg(CNTR, "reading %s", entry->name);
7927 			if (entry->flags & CNTR_DISABLED) {
7928 				/* Nothing */
7929 				hfi1_cdbg(CNTR, "\tDisabled\n");
7930 				continue;
7931 			}
7932 
7933 			if (entry->flags & CNTR_VL) {
7934 				hfi1_cdbg(CNTR, "\tPer VL");
7935 				for (j = 0; j < C_VL_COUNT; j++) {
7936 					val = entry->rw_cntr(entry, ppd, j,
7937 							       CNTR_MODE_R,
7938 							       0);
7939 					hfi1_cdbg(
7940 					   CNTR,
7941 					   "\t\tRead 0x%llx for %d",
7942 					   val, j);
7943 					ppd->cntrs[entry->offset + j] = val;
7944 				}
7945 			} else {
7946 				val = entry->rw_cntr(entry, ppd,
7947 						       CNTR_INVALID_VL,
7948 						       CNTR_MODE_R,
7949 						       0);
7950 				ppd->cntrs[entry->offset] = val;
7951 				hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7952 			}
7953 		}
7954 	}
7955 	return ret;
7956 }
7957 
free_cntrs(struct hfi1_devdata * dd)7958 static void free_cntrs(struct hfi1_devdata *dd)
7959 {
7960 	struct hfi1_pportdata *ppd;
7961 	int i;
7962 
7963 	if (dd->synth_stats_timer.data)
7964 		del_timer_sync(&dd->synth_stats_timer);
7965 	dd->synth_stats_timer.data = 0;
7966 	ppd = (struct hfi1_pportdata *)(dd + 1);
7967 	for (i = 0; i < dd->num_pports; i++, ppd++) {
7968 		kfree(ppd->cntrs);
7969 		kfree(ppd->scntrs);
7970 		free_percpu(ppd->ibport_data.rc_acks);
7971 		free_percpu(ppd->ibport_data.rc_qacks);
7972 		free_percpu(ppd->ibport_data.rc_delayed_comp);
7973 		ppd->cntrs = NULL;
7974 		ppd->scntrs = NULL;
7975 		ppd->ibport_data.rc_acks = NULL;
7976 		ppd->ibport_data.rc_qacks = NULL;
7977 		ppd->ibport_data.rc_delayed_comp = NULL;
7978 	}
7979 	kfree(dd->portcntrnames);
7980 	dd->portcntrnames = NULL;
7981 	kfree(dd->cntrs);
7982 	dd->cntrs = NULL;
7983 	kfree(dd->scntrs);
7984 	dd->scntrs = NULL;
7985 	kfree(dd->cntrnames);
7986 	dd->cntrnames = NULL;
7987 }
7988 
7989 #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7990 #define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7991 
read_dev_port_cntr(struct hfi1_devdata * dd,struct cntr_entry * entry,u64 * psval,void * context,int vl)7992 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7993 			      u64 *psval, void *context, int vl)
7994 {
7995 	u64 val;
7996 	u64 sval = *psval;
7997 
7998 	if (entry->flags & CNTR_DISABLED) {
7999 		dd_dev_err(dd, "Counter %s not enabled", entry->name);
8000 		return 0;
8001 	}
8002 
8003 	hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8004 
8005 	val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8006 
8007 	/* If its a synthetic counter there is more work we need to do */
8008 	if (entry->flags & CNTR_SYNTH) {
8009 		if (sval == CNTR_MAX) {
8010 			/* No need to read already saturated */
8011 			return CNTR_MAX;
8012 		}
8013 
8014 		if (entry->flags & CNTR_32BIT) {
8015 			/* 32bit counters can wrap multiple times */
8016 			u64 upper = sval >> 32;
8017 			u64 lower = (sval << 32) >> 32;
8018 
8019 			if (lower > val) { /* hw wrapped */
8020 				if (upper == CNTR_32BIT_MAX)
8021 					val = CNTR_MAX;
8022 				else
8023 					upper++;
8024 			}
8025 
8026 			if (val != CNTR_MAX)
8027 				val = (upper << 32) | val;
8028 
8029 		} else {
8030 			/* If we rolled we are saturated */
8031 			if ((val < sval) || (val > CNTR_MAX))
8032 				val = CNTR_MAX;
8033 		}
8034 	}
8035 
8036 	*psval = val;
8037 
8038 	hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8039 
8040 	return val;
8041 }
8042 
write_dev_port_cntr(struct hfi1_devdata * dd,struct cntr_entry * entry,u64 * psval,void * context,int vl,u64 data)8043 static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8044 			       struct cntr_entry *entry,
8045 			       u64 *psval, void *context, int vl, u64 data)
8046 {
8047 	u64 val;
8048 
8049 	if (entry->flags & CNTR_DISABLED) {
8050 		dd_dev_err(dd, "Counter %s not enabled", entry->name);
8051 		return 0;
8052 	}
8053 
8054 	hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8055 
8056 	if (entry->flags & CNTR_SYNTH) {
8057 		*psval = data;
8058 		if (entry->flags & CNTR_32BIT) {
8059 			val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8060 					     (data << 32) >> 32);
8061 			val = data; /* return the full 64bit value */
8062 		} else {
8063 			val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8064 					     data);
8065 		}
8066 	} else {
8067 		val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8068 	}
8069 
8070 	*psval = val;
8071 
8072 	hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8073 
8074 	return val;
8075 }
8076 
read_dev_cntr(struct hfi1_devdata * dd,int index,int vl)8077 u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8078 {
8079 	struct cntr_entry *entry;
8080 	u64 *sval;
8081 
8082 	entry = &dev_cntrs[index];
8083 	sval = dd->scntrs + entry->offset;
8084 
8085 	if (vl != CNTR_INVALID_VL)
8086 		sval += vl;
8087 
8088 	return read_dev_port_cntr(dd, entry, sval, dd, vl);
8089 }
8090 
write_dev_cntr(struct hfi1_devdata * dd,int index,int vl,u64 data)8091 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8092 {
8093 	struct cntr_entry *entry;
8094 	u64 *sval;
8095 
8096 	entry = &dev_cntrs[index];
8097 	sval = dd->scntrs + entry->offset;
8098 
8099 	if (vl != CNTR_INVALID_VL)
8100 		sval += vl;
8101 
8102 	return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8103 }
8104 
read_port_cntr(struct hfi1_pportdata * ppd,int index,int vl)8105 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8106 {
8107 	struct cntr_entry *entry;
8108 	u64 *sval;
8109 
8110 	entry = &port_cntrs[index];
8111 	sval = ppd->scntrs + entry->offset;
8112 
8113 	if (vl != CNTR_INVALID_VL)
8114 		sval += vl;
8115 
8116 	if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8117 	    (index <= C_RCV_HDR_OVF_LAST)) {
8118 		/* We do not want to bother for disabled contexts */
8119 		return 0;
8120 	}
8121 
8122 	return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8123 }
8124 
write_port_cntr(struct hfi1_pportdata * ppd,int index,int vl,u64 data)8125 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8126 {
8127 	struct cntr_entry *entry;
8128 	u64 *sval;
8129 
8130 	entry = &port_cntrs[index];
8131 	sval = ppd->scntrs + entry->offset;
8132 
8133 	if (vl != CNTR_INVALID_VL)
8134 		sval += vl;
8135 
8136 	if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8137 	    (index <= C_RCV_HDR_OVF_LAST)) {
8138 		/* We do not want to bother for disabled contexts */
8139 		return 0;
8140 	}
8141 
8142 	return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8143 }
8144 
update_synth_timer(unsigned long opaque)8145 static void update_synth_timer(unsigned long opaque)
8146 {
8147 	u64 cur_tx;
8148 	u64 cur_rx;
8149 	u64 total_flits;
8150 	u8 update = 0;
8151 	int i, j, vl;
8152 	struct hfi1_pportdata *ppd;
8153 	struct cntr_entry *entry;
8154 
8155 	struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8156 
8157 	/*
8158 	 * Rather than keep beating on the CSRs pick a minimal set that we can
8159 	 * check to watch for potential roll over. We can do this by looking at
8160 	 * the number of flits sent/recv. If the total flits exceeds 32bits then
8161 	 * we have to iterate all the counters and update.
8162 	 */
8163 	entry = &dev_cntrs[C_DC_RCV_FLITS];
8164 	cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8165 
8166 	entry = &dev_cntrs[C_DC_XMIT_FLITS];
8167 	cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8168 
8169 	hfi1_cdbg(
8170 	    CNTR,
8171 	    "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8172 	    dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8173 
8174 	if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8175 		/*
8176 		 * May not be strictly necessary to update but it won't hurt and
8177 		 * simplifies the logic here.
8178 		 */
8179 		update = 1;
8180 		hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8181 			  dd->unit);
8182 	} else {
8183 		total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8184 		hfi1_cdbg(CNTR,
8185 			  "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8186 			  total_flits, (u64)CNTR_32BIT_MAX);
8187 		if (total_flits >= CNTR_32BIT_MAX) {
8188 			hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8189 				  dd->unit);
8190 			update = 1;
8191 		}
8192 	}
8193 
8194 	if (update) {
8195 		hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8196 		for (i = 0; i < DEV_CNTR_LAST; i++) {
8197 			entry = &dev_cntrs[i];
8198 			if (entry->flags & CNTR_VL) {
8199 				for (vl = 0; vl < C_VL_COUNT; vl++)
8200 					read_dev_cntr(dd, i, vl);
8201 			} else {
8202 				read_dev_cntr(dd, i, CNTR_INVALID_VL);
8203 			}
8204 		}
8205 		ppd = (struct hfi1_pportdata *)(dd + 1);
8206 		for (i = 0; i < dd->num_pports; i++, ppd++) {
8207 			for (j = 0; j < PORT_CNTR_LAST; j++) {
8208 				entry = &port_cntrs[j];
8209 				if (entry->flags & CNTR_VL) {
8210 					for (vl = 0; vl < C_VL_COUNT; vl++)
8211 						read_port_cntr(ppd, j, vl);
8212 				} else {
8213 					read_port_cntr(ppd, j, CNTR_INVALID_VL);
8214 				}
8215 			}
8216 		}
8217 
8218 		/*
8219 		 * We want the value in the register. The goal is to keep track
8220 		 * of the number of "ticks" not the counter value. In other
8221 		 * words if the register rolls we want to notice it and go ahead
8222 		 * and force an update.
8223 		 */
8224 		entry = &dev_cntrs[C_DC_XMIT_FLITS];
8225 		dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8226 						CNTR_MODE_R, 0);
8227 
8228 		entry = &dev_cntrs[C_DC_RCV_FLITS];
8229 		dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8230 						CNTR_MODE_R, 0);
8231 
8232 		hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8233 			  dd->unit, dd->last_tx, dd->last_rx);
8234 
8235 	} else {
8236 		hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8237 	}
8238 
8239 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8240 }
8241 
8242 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
init_cntrs(struct hfi1_devdata * dd)8243 static int init_cntrs(struct hfi1_devdata *dd)
8244 {
8245 	int i, rcv_ctxts, index, j;
8246 	size_t sz;
8247 	char *p;
8248 	char name[C_MAX_NAME];
8249 	struct hfi1_pportdata *ppd;
8250 
8251 	/* set up the stats timer; the add_timer is done at the end */
8252 	setup_timer(&dd->synth_stats_timer, update_synth_timer,
8253 		    (unsigned long)dd);
8254 
8255 	/***********************/
8256 	/* per device counters */
8257 	/***********************/
8258 
8259 	/* size names and determine how many we have*/
8260 	dd->ndevcntrs = 0;
8261 	sz = 0;
8262 	index = 0;
8263 
8264 	for (i = 0; i < DEV_CNTR_LAST; i++) {
8265 		hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8266 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
8267 			hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8268 			continue;
8269 		}
8270 
8271 		if (dev_cntrs[i].flags & CNTR_VL) {
8272 			hfi1_dbg_early("\tProcessing VL cntr\n");
8273 			dev_cntrs[i].offset = index;
8274 			for (j = 0; j < C_VL_COUNT; j++) {
8275 				memset(name, '\0', C_MAX_NAME);
8276 				snprintf(name, C_MAX_NAME, "%s%d",
8277 					dev_cntrs[i].name,
8278 					vl_from_idx(j));
8279 				sz += strlen(name);
8280 				sz++;
8281 				hfi1_dbg_early("\t\t%s\n", name);
8282 				dd->ndevcntrs++;
8283 				index++;
8284 			}
8285 		} else {
8286 			/* +1 for newline  */
8287 			sz += strlen(dev_cntrs[i].name) + 1;
8288 			dd->ndevcntrs++;
8289 			dev_cntrs[i].offset = index;
8290 			index++;
8291 			hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8292 		}
8293 	}
8294 
8295 	/* allocate space for the counter values */
8296 	dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8297 	if (!dd->cntrs)
8298 		goto bail;
8299 
8300 	dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8301 	if (!dd->scntrs)
8302 		goto bail;
8303 
8304 
8305 	/* allocate space for the counter names */
8306 	dd->cntrnameslen = sz;
8307 	dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8308 	if (!dd->cntrnames)
8309 		goto bail;
8310 
8311 	/* fill in the names */
8312 	for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8313 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
8314 			/* Nothing */
8315 		} else {
8316 			if (dev_cntrs[i].flags & CNTR_VL) {
8317 				for (j = 0; j < C_VL_COUNT; j++) {
8318 					memset(name, '\0', C_MAX_NAME);
8319 					snprintf(name, C_MAX_NAME, "%s%d",
8320 						dev_cntrs[i].name,
8321 						vl_from_idx(j));
8322 					memcpy(p, name, strlen(name));
8323 					p += strlen(name);
8324 					*p++ = '\n';
8325 				}
8326 			} else {
8327 				memcpy(p, dev_cntrs[i].name,
8328 				       strlen(dev_cntrs[i].name));
8329 				p += strlen(dev_cntrs[i].name);
8330 				*p++ = '\n';
8331 			}
8332 			index++;
8333 		}
8334 	}
8335 
8336 	/*********************/
8337 	/* per port counters */
8338 	/*********************/
8339 
8340 	/*
8341 	 * Go through the counters for the overflows and disable the ones we
8342 	 * don't need. This varies based on platform so we need to do it
8343 	 * dynamically here.
8344 	 */
8345 	rcv_ctxts = dd->num_rcv_contexts;
8346 	for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8347 	     i <= C_RCV_HDR_OVF_LAST; i++) {
8348 		port_cntrs[i].flags |= CNTR_DISABLED;
8349 	}
8350 
8351 	/* size port counter names and determine how many we have*/
8352 	sz = 0;
8353 	dd->nportcntrs = 0;
8354 	for (i = 0; i < PORT_CNTR_LAST; i++) {
8355 		hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8356 		if (port_cntrs[i].flags & CNTR_DISABLED) {
8357 			hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8358 			continue;
8359 		}
8360 
8361 		if (port_cntrs[i].flags & CNTR_VL) {
8362 			hfi1_dbg_early("\tProcessing VL cntr\n");
8363 			port_cntrs[i].offset = dd->nportcntrs;
8364 			for (j = 0; j < C_VL_COUNT; j++) {
8365 				memset(name, '\0', C_MAX_NAME);
8366 				snprintf(name, C_MAX_NAME, "%s%d",
8367 					port_cntrs[i].name,
8368 					vl_from_idx(j));
8369 				sz += strlen(name);
8370 				sz++;
8371 				hfi1_dbg_early("\t\t%s\n", name);
8372 				dd->nportcntrs++;
8373 			}
8374 		} else {
8375 			/* +1 for newline  */
8376 			sz += strlen(port_cntrs[i].name) + 1;
8377 			port_cntrs[i].offset = dd->nportcntrs;
8378 			dd->nportcntrs++;
8379 			hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8380 		}
8381 	}
8382 
8383 	/* allocate space for the counter names */
8384 	dd->portcntrnameslen = sz;
8385 	dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8386 	if (!dd->portcntrnames)
8387 		goto bail;
8388 
8389 	/* fill in port cntr names */
8390 	for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8391 		if (port_cntrs[i].flags & CNTR_DISABLED)
8392 			continue;
8393 
8394 		if (port_cntrs[i].flags & CNTR_VL) {
8395 			for (j = 0; j < C_VL_COUNT; j++) {
8396 				memset(name, '\0', C_MAX_NAME);
8397 				snprintf(name, C_MAX_NAME, "%s%d",
8398 					port_cntrs[i].name,
8399 					vl_from_idx(j));
8400 				memcpy(p, name, strlen(name));
8401 				p += strlen(name);
8402 				*p++ = '\n';
8403 			}
8404 		} else {
8405 			memcpy(p, port_cntrs[i].name,
8406 			       strlen(port_cntrs[i].name));
8407 			p += strlen(port_cntrs[i].name);
8408 			*p++ = '\n';
8409 		}
8410 	}
8411 
8412 	/* allocate per port storage for counter values */
8413 	ppd = (struct hfi1_pportdata *)(dd + 1);
8414 	for (i = 0; i < dd->num_pports; i++, ppd++) {
8415 		ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8416 		if (!ppd->cntrs)
8417 			goto bail;
8418 
8419 		ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8420 		if (!ppd->scntrs)
8421 			goto bail;
8422 	}
8423 
8424 	/* CPU counters need to be allocated and zeroed */
8425 	if (init_cpu_counters(dd))
8426 		goto bail;
8427 
8428 	mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8429 	return 0;
8430 bail:
8431 	free_cntrs(dd);
8432 	return -ENOMEM;
8433 }
8434 
8435 
chip_to_opa_lstate(struct hfi1_devdata * dd,u32 chip_lstate)8436 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8437 {
8438 	switch (chip_lstate) {
8439 	default:
8440 		dd_dev_err(dd,
8441 			 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8442 			 chip_lstate);
8443 		/* fall through */
8444 	case LSTATE_DOWN:
8445 		return IB_PORT_DOWN;
8446 	case LSTATE_INIT:
8447 		return IB_PORT_INIT;
8448 	case LSTATE_ARMED:
8449 		return IB_PORT_ARMED;
8450 	case LSTATE_ACTIVE:
8451 		return IB_PORT_ACTIVE;
8452 	}
8453 }
8454 
chip_to_opa_pstate(struct hfi1_devdata * dd,u32 chip_pstate)8455 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8456 {
8457 	/* look at the HFI meta-states only */
8458 	switch (chip_pstate & 0xf0) {
8459 	default:
8460 		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8461 			chip_pstate);
8462 		/* fall through */
8463 	case PLS_DISABLED:
8464 		return IB_PORTPHYSSTATE_DISABLED;
8465 	case PLS_OFFLINE:
8466 		return OPA_PORTPHYSSTATE_OFFLINE;
8467 	case PLS_POLLING:
8468 		return IB_PORTPHYSSTATE_POLLING;
8469 	case PLS_CONFIGPHY:
8470 		return IB_PORTPHYSSTATE_TRAINING;
8471 	case PLS_LINKUP:
8472 		return IB_PORTPHYSSTATE_LINKUP;
8473 	case PLS_PHYTEST:
8474 		return IB_PORTPHYSSTATE_PHY_TEST;
8475 	}
8476 }
8477 
8478 /* return the OPA port logical state name */
opa_lstate_name(u32 lstate)8479 const char *opa_lstate_name(u32 lstate)
8480 {
8481 	static const char * const port_logical_names[] = {
8482 		"PORT_NOP",
8483 		"PORT_DOWN",
8484 		"PORT_INIT",
8485 		"PORT_ARMED",
8486 		"PORT_ACTIVE",
8487 		"PORT_ACTIVE_DEFER",
8488 	};
8489 	if (lstate < ARRAY_SIZE(port_logical_names))
8490 		return port_logical_names[lstate];
8491 	return "unknown";
8492 }
8493 
8494 /* return the OPA port physical state name */
opa_pstate_name(u32 pstate)8495 const char *opa_pstate_name(u32 pstate)
8496 {
8497 	static const char * const port_physical_names[] = {
8498 		"PHYS_NOP",
8499 		"reserved1",
8500 		"PHYS_POLL",
8501 		"PHYS_DISABLED",
8502 		"PHYS_TRAINING",
8503 		"PHYS_LINKUP",
8504 		"PHYS_LINK_ERR_RECOVER",
8505 		"PHYS_PHY_TEST",
8506 		"reserved8",
8507 		"PHYS_OFFLINE",
8508 		"PHYS_GANGED",
8509 		"PHYS_TEST",
8510 	};
8511 	if (pstate < ARRAY_SIZE(port_physical_names))
8512 		return port_physical_names[pstate];
8513 	return "unknown";
8514 }
8515 
8516 /*
8517  * Read the hardware link state and set the driver's cached value of it.
8518  * Return the (new) current value.
8519  */
get_logical_state(struct hfi1_pportdata * ppd)8520 u32 get_logical_state(struct hfi1_pportdata *ppd)
8521 {
8522 	u32 new_state;
8523 
8524 	new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8525 	if (new_state != ppd->lstate) {
8526 		dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8527 			opa_lstate_name(new_state), new_state);
8528 		ppd->lstate = new_state;
8529 	}
8530 	/*
8531 	 * Set port status flags in the page mapped into userspace
8532 	 * memory. Do it here to ensure a reliable state - this is
8533 	 * the only function called by all state handling code.
8534 	 * Always set the flags due to the fact that the cache value
8535 	 * might have been changed explicitly outside of this
8536 	 * function.
8537 	 */
8538 	if (ppd->statusp) {
8539 		switch (ppd->lstate) {
8540 		case IB_PORT_DOWN:
8541 		case IB_PORT_INIT:
8542 			*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8543 					   HFI1_STATUS_IB_READY);
8544 			break;
8545 		case IB_PORT_ARMED:
8546 			*ppd->statusp |= HFI1_STATUS_IB_CONF;
8547 			break;
8548 		case IB_PORT_ACTIVE:
8549 			*ppd->statusp |= HFI1_STATUS_IB_READY;
8550 			break;
8551 		}
8552 	}
8553 	return ppd->lstate;
8554 }
8555 
8556 /**
8557  * wait_logical_linkstate - wait for an IB link state change to occur
8558  * @ppd: port device
8559  * @state: the state to wait for
8560  * @msecs: the number of milliseconds to wait
8561  *
8562  * Wait up to msecs milliseconds for IB link state change to occur.
8563  * For now, take the easy polling route.
8564  * Returns 0 if state reached, otherwise -ETIMEDOUT.
8565  */
wait_logical_linkstate(struct hfi1_pportdata * ppd,u32 state,int msecs)8566 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8567 				  int msecs)
8568 {
8569 	unsigned long timeout;
8570 
8571 	timeout = jiffies + msecs_to_jiffies(msecs);
8572 	while (1) {
8573 		if (get_logical_state(ppd) == state)
8574 			return 0;
8575 		if (time_after(jiffies, timeout))
8576 			break;
8577 		msleep(20);
8578 	}
8579 	dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8580 
8581 	return -ETIMEDOUT;
8582 }
8583 
hfi1_ibphys_portstate(struct hfi1_pportdata * ppd)8584 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8585 {
8586 	static u32 remembered_state = 0xff;
8587 	u32 pstate;
8588 	u32 ib_pstate;
8589 
8590 	pstate = read_physical_state(ppd->dd);
8591 	ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8592 	if (remembered_state != ib_pstate) {
8593 		dd_dev_info(ppd->dd,
8594 			"%s: physical state changed to %s (0x%x), phy 0x%x\n",
8595 			__func__, opa_pstate_name(ib_pstate), ib_pstate,
8596 			pstate);
8597 		remembered_state = ib_pstate;
8598 	}
8599 	return ib_pstate;
8600 }
8601 
8602 /*
8603  * Read/modify/write ASIC_QSFP register bits as selected by mask
8604  * data: 0 or 1 in the positions depending on what needs to be written
8605  * dir: 0 for read, 1 for write
8606  * mask: select by setting
8607  *      I2CCLK  (bit 0)
8608  *      I2CDATA (bit 1)
8609  */
hfi1_gpio_mod(struct hfi1_devdata * dd,u32 target,u32 data,u32 dir,u32 mask)8610 u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8611 		  u32 mask)
8612 {
8613 	u64 qsfp_oe, target_oe;
8614 
8615 	target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8616 	if (mask) {
8617 		/* We are writing register bits, so lock access */
8618 		dir &= mask;
8619 		data &= mask;
8620 
8621 		qsfp_oe = read_csr(dd, target_oe);
8622 		qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8623 		write_csr(dd, target_oe, qsfp_oe);
8624 	}
8625 	/* We are exclusively reading bits here, but it is unlikely
8626 	 * we'll get valid data when we set the direction of the pin
8627 	 * in the same call, so read should call this function again
8628 	 * to get valid data
8629 	 */
8630 	return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8631 }
8632 
8633 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8634 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8635 
8636 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
8637 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8638 
hfi1_init_ctxt(struct send_context * sc)8639 int hfi1_init_ctxt(struct send_context *sc)
8640 {
8641 	if (sc != NULL) {
8642 		struct hfi1_devdata *dd = sc->dd;
8643 		u64 reg;
8644 		u8 set = (sc->type == SC_USER ?
8645 			  HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8646 			  HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8647 		reg = read_kctxt_csr(dd, sc->hw_context,
8648 				     SEND_CTXT_CHECK_ENABLE);
8649 		if (set)
8650 			CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8651 		else
8652 			SET_STATIC_RATE_CONTROL_SMASK(reg);
8653 		write_kctxt_csr(dd, sc->hw_context,
8654 				SEND_CTXT_CHECK_ENABLE, reg);
8655 	}
8656 	return 0;
8657 }
8658 
hfi1_tempsense_rd(struct hfi1_devdata * dd,struct hfi1_temp * temp)8659 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8660 {
8661 	int ret = 0;
8662 	u64 reg;
8663 
8664 	if (dd->icode != ICODE_RTL_SILICON) {
8665 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8666 			dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8667 				    __func__);
8668 		return -EINVAL;
8669 	}
8670 	reg = read_csr(dd, ASIC_STS_THERM);
8671 	temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8672 		      ASIC_STS_THERM_CURR_TEMP_MASK);
8673 	temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8674 			ASIC_STS_THERM_LO_TEMP_MASK);
8675 	temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8676 			ASIC_STS_THERM_HI_TEMP_MASK);
8677 	temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8678 			  ASIC_STS_THERM_CRIT_TEMP_MASK);
8679 	/* triggers is a 3-bit value - 1 bit per trigger. */
8680 	temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8681 
8682 	return ret;
8683 }
8684 
8685 /* ========================================================================= */
8686 
8687 /*
8688  * Enable/disable chip from delivering interrupts.
8689  */
set_intr_state(struct hfi1_devdata * dd,u32 enable)8690 void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8691 {
8692 	int i;
8693 
8694 	/*
8695 	 * In HFI, the mask needs to be 1 to allow interrupts.
8696 	 */
8697 	if (enable) {
8698 		u64 cce_int_mask;
8699 		const int qsfp1_int_smask = QSFP1_INT % 64;
8700 		const int qsfp2_int_smask = QSFP2_INT % 64;
8701 
8702 		/* enable all interrupts */
8703 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8704 			write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8705 
8706 		/*
8707 		 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8708 		 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8709 		 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8710 		 * the index of the appropriate CSR in the CCEIntMask CSR array
8711 		 */
8712 		cce_int_mask = read_csr(dd, CCE_INT_MASK +
8713 						(8*(QSFP1_INT/64)));
8714 		if (dd->hfi1_id) {
8715 			cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8716 			write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8717 					cce_int_mask);
8718 		} else {
8719 			cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8720 			write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8721 					cce_int_mask);
8722 		}
8723 	} else {
8724 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8725 			write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8726 	}
8727 }
8728 
8729 /*
8730  * Clear all interrupt sources on the chip.
8731  */
clear_all_interrupts(struct hfi1_devdata * dd)8732 static void clear_all_interrupts(struct hfi1_devdata *dd)
8733 {
8734 	int i;
8735 
8736 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8737 		write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8738 
8739 	write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8740 	write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8741 	write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8742 	write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8743 	write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8744 	write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8745 	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8746 	for (i = 0; i < dd->chip_send_contexts; i++)
8747 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8748 	for (i = 0; i < dd->chip_sdma_engines; i++)
8749 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8750 
8751 	write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8752 	write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8753 	write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8754 }
8755 
8756 /* Move to pcie.c? */
disable_intx(struct pci_dev * pdev)8757 static void disable_intx(struct pci_dev *pdev)
8758 {
8759 	pci_intx(pdev, 0);
8760 }
8761 
clean_up_interrupts(struct hfi1_devdata * dd)8762 static void clean_up_interrupts(struct hfi1_devdata *dd)
8763 {
8764 	int i;
8765 
8766 	/* remove irqs - must happen before disabling/turning off */
8767 	if (dd->num_msix_entries) {
8768 		/* MSI-X */
8769 		struct hfi1_msix_entry *me = dd->msix_entries;
8770 
8771 		for (i = 0; i < dd->num_msix_entries; i++, me++) {
8772 			if (me->arg == NULL) /* => no irq, no affinity */
8773 				break;
8774 			irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8775 					NULL);
8776 			free_irq(me->msix.vector, me->arg);
8777 		}
8778 	} else {
8779 		/* INTx */
8780 		if (dd->requested_intx_irq) {
8781 			free_irq(dd->pcidev->irq, dd);
8782 			dd->requested_intx_irq = 0;
8783 		}
8784 	}
8785 
8786 	/* turn off interrupts */
8787 	if (dd->num_msix_entries) {
8788 		/* MSI-X */
8789 		hfi1_nomsix(dd);
8790 	} else {
8791 		/* INTx */
8792 		disable_intx(dd->pcidev);
8793 	}
8794 
8795 	/* clean structures */
8796 	for (i = 0; i < dd->num_msix_entries; i++)
8797 		free_cpumask_var(dd->msix_entries[i].mask);
8798 	kfree(dd->msix_entries);
8799 	dd->msix_entries = NULL;
8800 	dd->num_msix_entries = 0;
8801 }
8802 
8803 /*
8804  * Remap the interrupt source from the general handler to the given MSI-X
8805  * interrupt.
8806  */
remap_intr(struct hfi1_devdata * dd,int isrc,int msix_intr)8807 static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8808 {
8809 	u64 reg;
8810 	int m, n;
8811 
8812 	/* clear from the handled mask of the general interrupt */
8813 	m = isrc / 64;
8814 	n = isrc % 64;
8815 	dd->gi_mask[m] &= ~((u64)1 << n);
8816 
8817 	/* direct the chip source to the given MSI-X interrupt */
8818 	m = isrc / 8;
8819 	n = isrc % 8;
8820 	reg = read_csr(dd, CCE_INT_MAP + (8*m));
8821 	reg &= ~((u64)0xff << (8*n));
8822 	reg |= ((u64)msix_intr & 0xff) << (8*n);
8823 	write_csr(dd, CCE_INT_MAP + (8*m), reg);
8824 }
8825 
remap_sdma_interrupts(struct hfi1_devdata * dd,int engine,int msix_intr)8826 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8827 				  int engine, int msix_intr)
8828 {
8829 	/*
8830 	 * SDMA engine interrupt sources grouped by type, rather than
8831 	 * engine.  Per-engine interrupts are as follows:
8832 	 *	SDMA
8833 	 *	SDMAProgress
8834 	 *	SDMAIdle
8835 	 */
8836 	remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8837 		msix_intr);
8838 	remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8839 		msix_intr);
8840 	remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8841 		msix_intr);
8842 }
8843 
remap_receive_available_interrupt(struct hfi1_devdata * dd,int rx,int msix_intr)8844 static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8845 					      int rx, int msix_intr)
8846 {
8847 	remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8848 }
8849 
request_intx_irq(struct hfi1_devdata * dd)8850 static int request_intx_irq(struct hfi1_devdata *dd)
8851 {
8852 	int ret;
8853 
8854 	snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8855 		dd->unit);
8856 	ret = request_irq(dd->pcidev->irq, general_interrupt,
8857 				  IRQF_SHARED, dd->intx_name, dd);
8858 	if (ret)
8859 		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8860 				ret);
8861 	else
8862 		dd->requested_intx_irq = 1;
8863 	return ret;
8864 }
8865 
request_msix_irqs(struct hfi1_devdata * dd)8866 static int request_msix_irqs(struct hfi1_devdata *dd)
8867 {
8868 	const struct cpumask *local_mask;
8869 	cpumask_var_t def, rcv;
8870 	bool def_ret, rcv_ret;
8871 	int first_general, last_general;
8872 	int first_sdma, last_sdma;
8873 	int first_rx, last_rx;
8874 	int first_cpu, restart_cpu, curr_cpu;
8875 	int rcv_cpu, sdma_cpu;
8876 	int i, ret = 0, possible;
8877 	int ht;
8878 
8879 	/* calculate the ranges we are going to use */
8880 	first_general = 0;
8881 	first_sdma = last_general = first_general + 1;
8882 	first_rx = last_sdma = first_sdma + dd->num_sdma;
8883 	last_rx = first_rx + dd->n_krcv_queues;
8884 
8885 	/*
8886 	 * Interrupt affinity.
8887 	 *
8888 	 * non-rcv avail gets a default mask that
8889 	 * starts as possible cpus with threads reset
8890 	 * and each rcv avail reset.
8891 	 *
8892 	 * rcv avail gets node relative 1 wrapping back
8893 	 * to the node relative 1 as necessary.
8894 	 *
8895 	 */
8896 	local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8897 	/* if first cpu is invalid, use NUMA 0 */
8898 	if (cpumask_first(local_mask) >= nr_cpu_ids)
8899 		local_mask = topology_core_cpumask(0);
8900 
8901 	def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8902 	rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8903 	if (!def_ret || !rcv_ret)
8904 		goto bail;
8905 	/* use local mask as default */
8906 	cpumask_copy(def, local_mask);
8907 	possible = cpumask_weight(def);
8908 	/* disarm threads from default */
8909 	ht = cpumask_weight(
8910 			topology_sibling_cpumask(cpumask_first(local_mask)));
8911 	for (i = possible/ht; i < possible; i++)
8912 		cpumask_clear_cpu(i, def);
8913 	/* reset possible */
8914 	possible = cpumask_weight(def);
8915 	/* def now has full cores on chosen node*/
8916 	first_cpu = cpumask_first(def);
8917 	if (nr_cpu_ids >= first_cpu)
8918 		first_cpu++;
8919 	restart_cpu = first_cpu;
8920 	curr_cpu = restart_cpu;
8921 
8922 	for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8923 		cpumask_clear_cpu(curr_cpu, def);
8924 		cpumask_set_cpu(curr_cpu, rcv);
8925 		if (curr_cpu >= possible)
8926 			curr_cpu = restart_cpu;
8927 		else
8928 			curr_cpu++;
8929 	}
8930 	/* def mask has non-rcv, rcv has recv mask */
8931 	rcv_cpu = cpumask_first(rcv);
8932 	sdma_cpu = cpumask_first(def);
8933 
8934 	/*
8935 	 * Sanity check - the code expects all SDMA chip source
8936 	 * interrupts to be in the same CSR, starting at bit 0.  Verify
8937 	 * that this is true by checking the bit location of the start.
8938 	 */
8939 	BUILD_BUG_ON(IS_SDMA_START % 64);
8940 
8941 	for (i = 0; i < dd->num_msix_entries; i++) {
8942 		struct hfi1_msix_entry *me = &dd->msix_entries[i];
8943 		const char *err_info;
8944 		irq_handler_t handler;
8945 		irq_handler_t thread = NULL;
8946 		void *arg;
8947 		int idx;
8948 		struct hfi1_ctxtdata *rcd = NULL;
8949 		struct sdma_engine *sde = NULL;
8950 
8951 		/* obtain the arguments to request_irq */
8952 		if (first_general <= i && i < last_general) {
8953 			idx = i - first_general;
8954 			handler = general_interrupt;
8955 			arg = dd;
8956 			snprintf(me->name, sizeof(me->name),
8957 				DRIVER_NAME"_%d", dd->unit);
8958 			err_info = "general";
8959 		} else if (first_sdma <= i && i < last_sdma) {
8960 			idx = i - first_sdma;
8961 			sde = &dd->per_sdma[idx];
8962 			handler = sdma_interrupt;
8963 			arg = sde;
8964 			snprintf(me->name, sizeof(me->name),
8965 				DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8966 			err_info = "sdma";
8967 			remap_sdma_interrupts(dd, idx, i);
8968 		} else if (first_rx <= i && i < last_rx) {
8969 			idx = i - first_rx;
8970 			rcd = dd->rcd[idx];
8971 			/* no interrupt if no rcd */
8972 			if (!rcd)
8973 				continue;
8974 			/*
8975 			 * Set the interrupt register and mask for this
8976 			 * context's interrupt.
8977 			 */
8978 			rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8979 			rcd->imask = ((u64)1) <<
8980 					((IS_RCVAVAIL_START+idx) % 64);
8981 			handler = receive_context_interrupt;
8982 			thread = receive_context_thread;
8983 			arg = rcd;
8984 			snprintf(me->name, sizeof(me->name),
8985 				DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8986 			err_info = "receive context";
8987 			remap_receive_available_interrupt(dd, idx, i);
8988 		} else {
8989 			/* not in our expected range - complain, then
8990 			   ignore it */
8991 			dd_dev_err(dd,
8992 				"Unexpected extra MSI-X interrupt %d\n", i);
8993 			continue;
8994 		}
8995 		/* no argument, no interrupt */
8996 		if (arg == NULL)
8997 			continue;
8998 		/* make sure the name is terminated */
8999 		me->name[sizeof(me->name)-1] = 0;
9000 
9001 		ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9002 						me->name, arg);
9003 		if (ret) {
9004 			dd_dev_err(dd,
9005 				"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9006 				 err_info, me->msix.vector, idx, ret);
9007 			return ret;
9008 		}
9009 		/*
9010 		 * assign arg after request_irq call, so it will be
9011 		 * cleaned up
9012 		 */
9013 		me->arg = arg;
9014 
9015 		if (!zalloc_cpumask_var(
9016 			&dd->msix_entries[i].mask,
9017 			GFP_KERNEL))
9018 			goto bail;
9019 		if (handler == sdma_interrupt) {
9020 			dd_dev_info(dd, "sdma engine %d cpu %d\n",
9021 				sde->this_idx, sdma_cpu);
9022 			cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9023 			sdma_cpu = cpumask_next(sdma_cpu, def);
9024 			if (sdma_cpu >= nr_cpu_ids)
9025 				sdma_cpu = cpumask_first(def);
9026 		} else if (handler == receive_context_interrupt) {
9027 			dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
9028 				rcd->ctxt, rcv_cpu);
9029 			cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
9030 			rcv_cpu = cpumask_next(rcv_cpu, rcv);
9031 			if (rcv_cpu >= nr_cpu_ids)
9032 				rcv_cpu = cpumask_first(rcv);
9033 		} else {
9034 			/* otherwise first def */
9035 			dd_dev_info(dd, "%s cpu %d\n",
9036 				err_info, cpumask_first(def));
9037 			cpumask_set_cpu(
9038 				cpumask_first(def), dd->msix_entries[i].mask);
9039 		}
9040 		irq_set_affinity_hint(
9041 			dd->msix_entries[i].msix.vector,
9042 			dd->msix_entries[i].mask);
9043 	}
9044 
9045 out:
9046 	free_cpumask_var(def);
9047 	free_cpumask_var(rcv);
9048 	return ret;
9049 bail:
9050 	ret = -ENOMEM;
9051 	goto  out;
9052 }
9053 
9054 /*
9055  * Set the general handler to accept all interrupts, remap all
9056  * chip interrupts back to MSI-X 0.
9057  */
reset_interrupts(struct hfi1_devdata * dd)9058 static void reset_interrupts(struct hfi1_devdata *dd)
9059 {
9060 	int i;
9061 
9062 	/* all interrupts handled by the general handler */
9063 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9064 		dd->gi_mask[i] = ~(u64)0;
9065 
9066 	/* all chip interrupts map to MSI-X 0 */
9067 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9068 		write_csr(dd, CCE_INT_MAP + (8*i), 0);
9069 }
9070 
set_up_interrupts(struct hfi1_devdata * dd)9071 static int set_up_interrupts(struct hfi1_devdata *dd)
9072 {
9073 	struct hfi1_msix_entry *entries;
9074 	u32 total, request;
9075 	int i, ret;
9076 	int single_interrupt = 0; /* we expect to have all the interrupts */
9077 
9078 	/*
9079 	 * Interrupt count:
9080 	 *	1 general, "slow path" interrupt (includes the SDMA engines
9081 	 *		slow source, SDMACleanupDone)
9082 	 *	N interrupts - one per used SDMA engine
9083 	 *	M interrupt - one per kernel receive context
9084 	 */
9085 	total = 1 + dd->num_sdma + dd->n_krcv_queues;
9086 
9087 	entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9088 	if (!entries) {
9089 		ret = -ENOMEM;
9090 		goto fail;
9091 	}
9092 	/* 1-1 MSI-X entry assignment */
9093 	for (i = 0; i < total; i++)
9094 		entries[i].msix.entry = i;
9095 
9096 	/* ask for MSI-X interrupts */
9097 	request = total;
9098 	request_msix(dd, &request, entries);
9099 
9100 	if (request == 0) {
9101 		/* using INTx */
9102 		/* dd->num_msix_entries already zero */
9103 		kfree(entries);
9104 		single_interrupt = 1;
9105 		dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9106 	} else {
9107 		/* using MSI-X */
9108 		dd->num_msix_entries = request;
9109 		dd->msix_entries = entries;
9110 
9111 		if (request != total) {
9112 			/* using MSI-X, with reduced interrupts */
9113 			dd_dev_err(
9114 				dd,
9115 				"cannot handle reduced interrupt case, want %u, got %u\n",
9116 				total, request);
9117 			ret = -EINVAL;
9118 			goto fail;
9119 		}
9120 		dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9121 	}
9122 
9123 	/* mask all interrupts */
9124 	set_intr_state(dd, 0);
9125 	/* clear all pending interrupts */
9126 	clear_all_interrupts(dd);
9127 
9128 	/* reset general handler mask, chip MSI-X mappings */
9129 	reset_interrupts(dd);
9130 
9131 	if (single_interrupt)
9132 		ret = request_intx_irq(dd);
9133 	else
9134 		ret = request_msix_irqs(dd);
9135 	if (ret)
9136 		goto fail;
9137 
9138 	return 0;
9139 
9140 fail:
9141 	clean_up_interrupts(dd);
9142 	return ret;
9143 }
9144 
9145 /*
9146  * Set up context values in dd.  Sets:
9147  *
9148  *	num_rcv_contexts - number of contexts being used
9149  *	n_krcv_queues - number of kernel contexts
9150  *	first_user_ctxt - first non-kernel context in array of contexts
9151  *	freectxts  - number of free user contexts
9152  *	num_send_contexts - number of PIO send contexts being used
9153  */
set_up_context_variables(struct hfi1_devdata * dd)9154 static int set_up_context_variables(struct hfi1_devdata *dd)
9155 {
9156 	int num_kernel_contexts;
9157 	int num_user_contexts;
9158 	int total_contexts;
9159 	int ret;
9160 	unsigned ngroups;
9161 
9162 	/*
9163 	 * Kernel contexts: (to be fixed later):
9164 	 * - min or 2 or 1 context/numa
9165 	 * - Context 0 - default/errors
9166 	 * - Context 1 - VL15
9167 	 */
9168 	if (n_krcvqs)
9169 		num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9170 	else
9171 		num_kernel_contexts = num_online_nodes();
9172 	num_kernel_contexts =
9173 		max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9174 	/*
9175 	 * Every kernel receive context needs an ACK send context.
9176 	 * one send context is allocated for each VL{0-7} and VL15
9177 	 */
9178 	if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9179 		dd_dev_err(dd,
9180 			   "Reducing # kernel rcv contexts to: %d, from %d\n",
9181 			   (int)(dd->chip_send_contexts - num_vls - 1),
9182 			   (int)num_kernel_contexts);
9183 		num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9184 	}
9185 	/*
9186 	 * User contexts: (to be fixed later)
9187 	 *	- set to num_rcv_contexts if non-zero
9188 	 *	- default to 1 user context per CPU
9189 	 */
9190 	if (num_rcv_contexts)
9191 		num_user_contexts = num_rcv_contexts;
9192 	else
9193 		num_user_contexts = num_online_cpus();
9194 
9195 	total_contexts = num_kernel_contexts + num_user_contexts;
9196 
9197 	/*
9198 	 * Adjust the counts given a global max.
9199 	 */
9200 	if (total_contexts > dd->chip_rcv_contexts) {
9201 		dd_dev_err(dd,
9202 			   "Reducing # user receive contexts to: %d, from %d\n",
9203 			   (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9204 			   (int)num_user_contexts);
9205 		num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9206 		/* recalculate */
9207 		total_contexts = num_kernel_contexts + num_user_contexts;
9208 	}
9209 
9210 	/* the first N are kernel contexts, the rest are user contexts */
9211 	dd->num_rcv_contexts = total_contexts;
9212 	dd->n_krcv_queues = num_kernel_contexts;
9213 	dd->first_user_ctxt = num_kernel_contexts;
9214 	dd->freectxts = num_user_contexts;
9215 	dd_dev_info(dd,
9216 		"rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9217 		(int)dd->chip_rcv_contexts,
9218 		(int)dd->num_rcv_contexts,
9219 		(int)dd->n_krcv_queues,
9220 		(int)dd->num_rcv_contexts - dd->n_krcv_queues);
9221 
9222 	/*
9223 	 * Receive array allocation:
9224 	 *   All RcvArray entries are divided into groups of 8. This
9225 	 *   is required by the hardware and will speed up writes to
9226 	 *   consecutive entries by using write-combining of the entire
9227 	 *   cacheline.
9228 	 *
9229 	 *   The number of groups are evenly divided among all contexts.
9230 	 *   any left over groups will be given to the first N user
9231 	 *   contexts.
9232 	 */
9233 	dd->rcv_entries.group_size = RCV_INCREMENT;
9234 	ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9235 	dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9236 	dd->rcv_entries.nctxt_extra = ngroups -
9237 		(dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9238 	dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9239 		    dd->rcv_entries.ngroups,
9240 		    dd->rcv_entries.nctxt_extra);
9241 	if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9242 	    MAX_EAGER_ENTRIES * 2) {
9243 		dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9244 			dd->rcv_entries.group_size;
9245 		dd_dev_info(dd,
9246 		   "RcvArray group count too high, change to %u\n",
9247 		   dd->rcv_entries.ngroups);
9248 		dd->rcv_entries.nctxt_extra = 0;
9249 	}
9250 	/*
9251 	 * PIO send contexts
9252 	 */
9253 	ret = init_sc_pools_and_sizes(dd);
9254 	if (ret >= 0) {	/* success */
9255 		dd->num_send_contexts = ret;
9256 		dd_dev_info(
9257 			dd,
9258 			"send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9259 			dd->chip_send_contexts,
9260 			dd->num_send_contexts,
9261 			dd->sc_sizes[SC_KERNEL].count,
9262 			dd->sc_sizes[SC_ACK].count,
9263 			dd->sc_sizes[SC_USER].count);
9264 		ret = 0;	/* success */
9265 	}
9266 
9267 	return ret;
9268 }
9269 
9270 /*
9271  * Set the device/port partition key table. The MAD code
9272  * will ensure that, at least, the partial management
9273  * partition key is present in the table.
9274  */
set_partition_keys(struct hfi1_pportdata * ppd)9275 static void set_partition_keys(struct hfi1_pportdata *ppd)
9276 {
9277 	struct hfi1_devdata *dd = ppd->dd;
9278 	u64 reg = 0;
9279 	int i;
9280 
9281 	dd_dev_info(dd, "Setting partition keys\n");
9282 	for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9283 		reg |= (ppd->pkeys[i] &
9284 			RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9285 			((i % 4) *
9286 			 RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9287 		/* Each register holds 4 PKey values. */
9288 		if ((i % 4) == 3) {
9289 			write_csr(dd, RCV_PARTITION_KEY +
9290 				  ((i - 3) * 2), reg);
9291 			reg = 0;
9292 		}
9293 	}
9294 
9295 	/* Always enable HW pkeys check when pkeys table is set */
9296 	add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9297 }
9298 
9299 /*
9300  * These CSRs and memories are uninitialized on reset and must be
9301  * written before reading to set the ECC/parity bits.
9302  *
9303  * NOTE: All user context CSRs that are not mmaped write-only
9304  * (e.g. the TID flows) must be initialized even if the driver never
9305  * reads them.
9306  */
write_uninitialized_csrs_and_memories(struct hfi1_devdata * dd)9307 static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9308 {
9309 	int i, j;
9310 
9311 	/* CceIntMap */
9312 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9313 		write_csr(dd, CCE_INT_MAP+(8*i), 0);
9314 
9315 	/* SendCtxtCreditReturnAddr */
9316 	for (i = 0; i < dd->chip_send_contexts; i++)
9317 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9318 
9319 	/* PIO Send buffers */
9320 	/* SDMA Send buffers */
9321 	/* These are not normally read, and (presently) have no method
9322 	   to be read, so are not pre-initialized */
9323 
9324 	/* RcvHdrAddr */
9325 	/* RcvHdrTailAddr */
9326 	/* RcvTidFlowTable */
9327 	for (i = 0; i < dd->chip_rcv_contexts; i++) {
9328 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9329 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9330 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9331 			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9332 	}
9333 
9334 	/* RcvArray */
9335 	for (i = 0; i < dd->chip_rcv_array_count; i++)
9336 		write_csr(dd, RCV_ARRAY + (8*i),
9337 					RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9338 
9339 	/* RcvQPMapTable */
9340 	for (i = 0; i < 32; i++)
9341 		write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9342 }
9343 
9344 /*
9345  * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9346  */
clear_cce_status(struct hfi1_devdata * dd,u64 status_bits,u64 ctrl_bits)9347 static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9348 			     u64 ctrl_bits)
9349 {
9350 	unsigned long timeout;
9351 	u64 reg;
9352 
9353 	/* is the condition present? */
9354 	reg = read_csr(dd, CCE_STATUS);
9355 	if ((reg & status_bits) == 0)
9356 		return;
9357 
9358 	/* clear the condition */
9359 	write_csr(dd, CCE_CTRL, ctrl_bits);
9360 
9361 	/* wait for the condition to clear */
9362 	timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9363 	while (1) {
9364 		reg = read_csr(dd, CCE_STATUS);
9365 		if ((reg & status_bits) == 0)
9366 			return;
9367 		if (time_after(jiffies, timeout)) {
9368 			dd_dev_err(dd,
9369 				"Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9370 				status_bits, reg & status_bits);
9371 			return;
9372 		}
9373 		udelay(1);
9374 	}
9375 }
9376 
9377 /* set CCE CSRs to chip reset defaults */
reset_cce_csrs(struct hfi1_devdata * dd)9378 static void reset_cce_csrs(struct hfi1_devdata *dd)
9379 {
9380 	int i;
9381 
9382 	/* CCE_REVISION read-only */
9383 	/* CCE_REVISION2 read-only */
9384 	/* CCE_CTRL - bits clear automatically */
9385 	/* CCE_STATUS read-only, use CceCtrl to clear */
9386 	clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9387 	clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9388 	clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9389 	for (i = 0; i < CCE_NUM_SCRATCH; i++)
9390 		write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9391 	/* CCE_ERR_STATUS read-only */
9392 	write_csr(dd, CCE_ERR_MASK, 0);
9393 	write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9394 	/* CCE_ERR_FORCE leave alone */
9395 	for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9396 		write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9397 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9398 	/* CCE_PCIE_CTRL leave alone */
9399 	for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9400 		write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9401 		write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9402 					CCE_MSIX_TABLE_UPPER_RESETCSR);
9403 	}
9404 	for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9405 		/* CCE_MSIX_PBA read-only */
9406 		write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9407 		write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9408 	}
9409 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9410 		write_csr(dd, CCE_INT_MAP, 0);
9411 	for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9412 		/* CCE_INT_STATUS read-only */
9413 		write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9414 		write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9415 		/* CCE_INT_FORCE leave alone */
9416 		/* CCE_INT_BLOCKED read-only */
9417 	}
9418 	for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9419 		write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9420 }
9421 
9422 /* set ASIC CSRs to chip reset defaults */
reset_asic_csrs(struct hfi1_devdata * dd)9423 static void reset_asic_csrs(struct hfi1_devdata *dd)
9424 {
9425 	int i;
9426 
9427 	/*
9428 	 * If the HFIs are shared between separate nodes or VMs,
9429 	 * then more will need to be done here.  One idea is a module
9430 	 * parameter that returns early, letting the first power-on or
9431 	 * a known first load do the reset and blocking all others.
9432 	 */
9433 
9434 	if (!(dd->flags & HFI1_DO_INIT_ASIC))
9435 		return;
9436 
9437 	if (dd->icode != ICODE_FPGA_EMULATION) {
9438 		/* emulation does not have an SBus - leave these alone */
9439 		/*
9440 		 * All writes to ASIC_CFG_SBUS_REQUEST do something.
9441 		 * Notes:
9442 		 * o The reset is not zero if aimed at the core.  See the
9443 		 *   SBus documentation for details.
9444 		 * o If the SBus firmware has been updated (e.g. by the BIOS),
9445 		 *   will the reset revert that?
9446 		 */
9447 		/* ASIC_CFG_SBUS_REQUEST leave alone */
9448 		write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9449 	}
9450 	/* ASIC_SBUS_RESULT read-only */
9451 	write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9452 	for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9453 		write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9454 	write_csr(dd, ASIC_CFG_MUTEX, 0);	/* this will clear it */
9455 
9456 	/* We might want to retain this state across FLR if we ever use it */
9457 	write_csr(dd, ASIC_CFG_DRV_STR, 0);
9458 
9459 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9460 	/* ASIC_STS_THERM read-only */
9461 	/* ASIC_CFG_RESET leave alone */
9462 
9463 	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9464 	/* ASIC_PCIE_SD_HOST_STATUS read-only */
9465 	write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9466 	write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9467 	/* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9468 	write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9469 	/* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9470 	/* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9471 	for (i = 0; i < 16; i++)
9472 		write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9473 
9474 	/* ASIC_GPIO_IN read-only */
9475 	write_csr(dd, ASIC_GPIO_OE, 0);
9476 	write_csr(dd, ASIC_GPIO_INVERT, 0);
9477 	write_csr(dd, ASIC_GPIO_OUT, 0);
9478 	write_csr(dd, ASIC_GPIO_MASK, 0);
9479 	/* ASIC_GPIO_STATUS read-only */
9480 	write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9481 	/* ASIC_GPIO_FORCE leave alone */
9482 
9483 	/* ASIC_QSFP1_IN read-only */
9484 	write_csr(dd, ASIC_QSFP1_OE, 0);
9485 	write_csr(dd, ASIC_QSFP1_INVERT, 0);
9486 	write_csr(dd, ASIC_QSFP1_OUT, 0);
9487 	write_csr(dd, ASIC_QSFP1_MASK, 0);
9488 	/* ASIC_QSFP1_STATUS read-only */
9489 	write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9490 	/* ASIC_QSFP1_FORCE leave alone */
9491 
9492 	/* ASIC_QSFP2_IN read-only */
9493 	write_csr(dd, ASIC_QSFP2_OE, 0);
9494 	write_csr(dd, ASIC_QSFP2_INVERT, 0);
9495 	write_csr(dd, ASIC_QSFP2_OUT, 0);
9496 	write_csr(dd, ASIC_QSFP2_MASK, 0);
9497 	/* ASIC_QSFP2_STATUS read-only */
9498 	write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9499 	/* ASIC_QSFP2_FORCE leave alone */
9500 
9501 	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9502 	/* this also writes a NOP command, clearing paging mode */
9503 	write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9504 	write_csr(dd, ASIC_EEP_DATA, 0);
9505 }
9506 
9507 /* set MISC CSRs to chip reset defaults */
reset_misc_csrs(struct hfi1_devdata * dd)9508 static void reset_misc_csrs(struct hfi1_devdata *dd)
9509 {
9510 	int i;
9511 
9512 	for (i = 0; i < 32; i++) {
9513 		write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9514 		write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9515 		write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9516 	}
9517 	/* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9518 	   only be written 128-byte chunks */
9519 	/* init RSA engine to clear lingering errors */
9520 	write_csr(dd, MISC_CFG_RSA_CMD, 1);
9521 	write_csr(dd, MISC_CFG_RSA_MU, 0);
9522 	write_csr(dd, MISC_CFG_FW_CTRL, 0);
9523 	/* MISC_STS_8051_DIGEST read-only */
9524 	/* MISC_STS_SBM_DIGEST read-only */
9525 	/* MISC_STS_PCIE_DIGEST read-only */
9526 	/* MISC_STS_FAB_DIGEST read-only */
9527 	/* MISC_ERR_STATUS read-only */
9528 	write_csr(dd, MISC_ERR_MASK, 0);
9529 	write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9530 	/* MISC_ERR_FORCE leave alone */
9531 }
9532 
9533 /* set TXE CSRs to chip reset defaults */
reset_txe_csrs(struct hfi1_devdata * dd)9534 static void reset_txe_csrs(struct hfi1_devdata *dd)
9535 {
9536 	int i;
9537 
9538 	/*
9539 	 * TXE Kernel CSRs
9540 	 */
9541 	write_csr(dd, SEND_CTRL, 0);
9542 	__cm_reset(dd, 0);	/* reset CM internal state */
9543 	/* SEND_CONTEXTS read-only */
9544 	/* SEND_DMA_ENGINES read-only */
9545 	/* SEND_PIO_MEM_SIZE read-only */
9546 	/* SEND_DMA_MEM_SIZE read-only */
9547 	write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9548 	pio_reset_all(dd);	/* SEND_PIO_INIT_CTXT */
9549 	/* SEND_PIO_ERR_STATUS read-only */
9550 	write_csr(dd, SEND_PIO_ERR_MASK, 0);
9551 	write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9552 	/* SEND_PIO_ERR_FORCE leave alone */
9553 	/* SEND_DMA_ERR_STATUS read-only */
9554 	write_csr(dd, SEND_DMA_ERR_MASK, 0);
9555 	write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9556 	/* SEND_DMA_ERR_FORCE leave alone */
9557 	/* SEND_EGRESS_ERR_STATUS read-only */
9558 	write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9559 	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9560 	/* SEND_EGRESS_ERR_FORCE leave alone */
9561 	write_csr(dd, SEND_BTH_QP, 0);
9562 	write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9563 	write_csr(dd, SEND_SC2VLT0, 0);
9564 	write_csr(dd, SEND_SC2VLT1, 0);
9565 	write_csr(dd, SEND_SC2VLT2, 0);
9566 	write_csr(dd, SEND_SC2VLT3, 0);
9567 	write_csr(dd, SEND_LEN_CHECK0, 0);
9568 	write_csr(dd, SEND_LEN_CHECK1, 0);
9569 	/* SEND_ERR_STATUS read-only */
9570 	write_csr(dd, SEND_ERR_MASK, 0);
9571 	write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9572 	/* SEND_ERR_FORCE read-only */
9573 	for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9574 		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9575 	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9576 		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9577 	for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9578 		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9579 	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9580 		write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9581 	for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9582 		write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9583 	write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9584 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9585 					SEND_CM_GLOBAL_CREDIT_RESETCSR);
9586 	/* SEND_CM_CREDIT_USED_STATUS read-only */
9587 	write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9588 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9589 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9590 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9591 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9592 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
9593 		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9594 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9595 	/* SEND_CM_CREDIT_USED_VL read-only */
9596 	/* SEND_CM_CREDIT_USED_VL15 read-only */
9597 	/* SEND_EGRESS_CTXT_STATUS read-only */
9598 	/* SEND_EGRESS_SEND_DMA_STATUS read-only */
9599 	write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9600 	/* SEND_EGRESS_ERR_INFO read-only */
9601 	/* SEND_EGRESS_ERR_SOURCE read-only */
9602 
9603 	/*
9604 	 * TXE Per-Context CSRs
9605 	 */
9606 	for (i = 0; i < dd->chip_send_contexts; i++) {
9607 		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9608 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9609 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9610 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9611 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9612 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9613 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9614 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9615 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9616 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9617 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9618 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9619 	}
9620 
9621 	/*
9622 	 * TXE Per-SDMA CSRs
9623 	 */
9624 	for (i = 0; i < dd->chip_sdma_engines; i++) {
9625 		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9626 		/* SEND_DMA_STATUS read-only */
9627 		write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9628 		write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9629 		write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9630 		/* SEND_DMA_HEAD read-only */
9631 		write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9632 		write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9633 		/* SEND_DMA_IDLE_CNT read-only */
9634 		write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9635 		write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9636 		/* SEND_DMA_DESC_FETCHED_CNT read-only */
9637 		/* SEND_DMA_ENG_ERR_STATUS read-only */
9638 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9639 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9640 		/* SEND_DMA_ENG_ERR_FORCE leave alone */
9641 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9642 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9643 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9644 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9645 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9646 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9647 		write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9648 	}
9649 }
9650 
9651 /*
9652  * Expect on entry:
9653  * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9654  */
init_rbufs(struct hfi1_devdata * dd)9655 static void init_rbufs(struct hfi1_devdata *dd)
9656 {
9657 	u64 reg;
9658 	int count;
9659 
9660 	/*
9661 	 * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9662 	 * clear.
9663 	 */
9664 	count = 0;
9665 	while (1) {
9666 		reg = read_csr(dd, RCV_STATUS);
9667 		if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9668 			    | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9669 			break;
9670 		/*
9671 		 * Give up after 1ms - maximum wait time.
9672 		 *
9673 		 * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9674 		 * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9675 		 *	148 KB / (66% * 250MB/s) = 920us
9676 		 */
9677 		if (count++ > 500) {
9678 			dd_dev_err(dd,
9679 				"%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9680 				__func__, reg);
9681 			break;
9682 		}
9683 		udelay(2); /* do not busy-wait the CSR */
9684 	}
9685 
9686 	/* start the init - expect RcvCtrl to be 0 */
9687 	write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9688 
9689 	/*
9690 	 * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9691 	 * period after the write before RcvStatus.RxRbufInitDone is valid.
9692 	 * The delay in the first run through the loop below is sufficient and
9693 	 * required before the first read of RcvStatus.RxRbufInintDone.
9694 	 */
9695 	read_csr(dd, RCV_CTRL);
9696 
9697 	/* wait for the init to finish */
9698 	count = 0;
9699 	while (1) {
9700 		/* delay is required first time through - see above */
9701 		udelay(2); /* do not busy-wait the CSR */
9702 		reg = read_csr(dd, RCV_STATUS);
9703 		if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9704 			break;
9705 
9706 		/* give up after 100us - slowest possible at 33MHz is 73us */
9707 		if (count++ > 50) {
9708 			dd_dev_err(dd,
9709 				"%s: RcvStatus.RxRbufInit not set, continuing\n",
9710 				__func__);
9711 			break;
9712 		}
9713 	}
9714 }
9715 
9716 /* set RXE CSRs to chip reset defaults */
reset_rxe_csrs(struct hfi1_devdata * dd)9717 static void reset_rxe_csrs(struct hfi1_devdata *dd)
9718 {
9719 	int i, j;
9720 
9721 	/*
9722 	 * RXE Kernel CSRs
9723 	 */
9724 	write_csr(dd, RCV_CTRL, 0);
9725 	init_rbufs(dd);
9726 	/* RCV_STATUS read-only */
9727 	/* RCV_CONTEXTS read-only */
9728 	/* RCV_ARRAY_CNT read-only */
9729 	/* RCV_BUF_SIZE read-only */
9730 	write_csr(dd, RCV_BTH_QP, 0);
9731 	write_csr(dd, RCV_MULTICAST, 0);
9732 	write_csr(dd, RCV_BYPASS, 0);
9733 	write_csr(dd, RCV_VL15, 0);
9734 	/* this is a clear-down */
9735 	write_csr(dd, RCV_ERR_INFO,
9736 			RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9737 	/* RCV_ERR_STATUS read-only */
9738 	write_csr(dd, RCV_ERR_MASK, 0);
9739 	write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9740 	/* RCV_ERR_FORCE leave alone */
9741 	for (i = 0; i < 32; i++)
9742 		write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9743 	for (i = 0; i < 4; i++)
9744 		write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9745 	for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9746 		write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9747 	for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9748 		write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9749 	for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9750 		write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9751 		write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9752 		write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9753 	}
9754 	for (i = 0; i < 32; i++)
9755 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9756 
9757 	/*
9758 	 * RXE Kernel and User Per-Context CSRs
9759 	 */
9760 	for (i = 0; i < dd->chip_rcv_contexts; i++) {
9761 		/* kernel */
9762 		write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9763 		/* RCV_CTXT_STATUS read-only */
9764 		write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9765 		write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9766 		write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9767 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9768 		write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9769 		write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9770 		write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9771 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9772 		write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9773 		write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9774 
9775 		/* user */
9776 		/* RCV_HDR_TAIL read-only */
9777 		write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9778 		/* RCV_EGR_INDEX_TAIL read-only */
9779 		write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9780 		/* RCV_EGR_OFFSET_TAIL read-only */
9781 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9782 			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9783 				0);
9784 		}
9785 	}
9786 }
9787 
9788 /*
9789  * Set sc2vl tables.
9790  *
9791  * They power on to zeros, so to avoid send context errors
9792  * they need to be set:
9793  *
9794  * SC 0-7 -> VL 0-7 (respectively)
9795  * SC 15  -> VL 15
9796  * otherwise
9797  *        -> VL 0
9798  */
init_sc2vl_tables(struct hfi1_devdata * dd)9799 static void init_sc2vl_tables(struct hfi1_devdata *dd)
9800 {
9801 	int i;
9802 	/* init per architecture spec, constrained by hardware capability */
9803 
9804 	/* HFI maps sent packets */
9805 	write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9806 		0,
9807 		0, 0, 1, 1,
9808 		2, 2, 3, 3,
9809 		4, 4, 5, 5,
9810 		6, 6, 7, 7));
9811 	write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9812 		1,
9813 		8, 0, 9, 0,
9814 		10, 0, 11, 0,
9815 		12, 0, 13, 0,
9816 		14, 0, 15, 15));
9817 	write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9818 		2,
9819 		16, 0, 17, 0,
9820 		18, 0, 19, 0,
9821 		20, 0, 21, 0,
9822 		22, 0, 23, 0));
9823 	write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9824 		3,
9825 		24, 0, 25, 0,
9826 		26, 0, 27, 0,
9827 		28, 0, 29, 0,
9828 		30, 0, 31, 0));
9829 
9830 	/* DC maps received packets */
9831 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9832 		15_0,
9833 		0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9834 		8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9835 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9836 		31_16,
9837 		16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9838 		24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9839 
9840 	/* initialize the cached sc2vl values consistently with h/w */
9841 	for (i = 0; i < 32; i++) {
9842 		if (i < 8 || i == 15)
9843 			*((u8 *)(dd->sc2vl) + i) = (u8)i;
9844 		else
9845 			*((u8 *)(dd->sc2vl) + i) = 0;
9846 	}
9847 }
9848 
9849 /*
9850  * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9851  * depend on the chip going through a power-on reset - a driver may be loaded
9852  * and unloaded many times.
9853  *
9854  * Do not write any CSR values to the chip in this routine - there may be
9855  * a reset following the (possible) FLR in this routine.
9856  *
9857  */
init_chip(struct hfi1_devdata * dd)9858 static void init_chip(struct hfi1_devdata *dd)
9859 {
9860 	int i;
9861 
9862 	/*
9863 	 * Put the HFI CSRs in a known state.
9864 	 * Combine this with a DC reset.
9865 	 *
9866 	 * Stop the device from doing anything while we do a
9867 	 * reset.  We know there are no other active users of
9868 	 * the device since we are now in charge.  Turn off
9869 	 * off all outbound and inbound traffic and make sure
9870 	 * the device does not generate any interrupts.
9871 	 */
9872 
9873 	/* disable send contexts and SDMA engines */
9874 	write_csr(dd, SEND_CTRL, 0);
9875 	for (i = 0; i < dd->chip_send_contexts; i++)
9876 		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9877 	for (i = 0; i < dd->chip_sdma_engines; i++)
9878 		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9879 	/* disable port (turn off RXE inbound traffic) and contexts */
9880 	write_csr(dd, RCV_CTRL, 0);
9881 	for (i = 0; i < dd->chip_rcv_contexts; i++)
9882 		write_csr(dd, RCV_CTXT_CTRL, 0);
9883 	/* mask all interrupt sources */
9884 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9885 		write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9886 
9887 	/*
9888 	 * DC Reset: do a full DC reset before the register clear.
9889 	 * A recommended length of time to hold is one CSR read,
9890 	 * so reread the CceDcCtrl.  Then, hold the DC in reset
9891 	 * across the clear.
9892 	 */
9893 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9894 	(void) read_csr(dd, CCE_DC_CTRL);
9895 
9896 	if (use_flr) {
9897 		/*
9898 		 * A FLR will reset the SPC core and part of the PCIe.
9899 		 * The parts that need to be restored have already been
9900 		 * saved.
9901 		 */
9902 		dd_dev_info(dd, "Resetting CSRs with FLR\n");
9903 
9904 		/* do the FLR, the DC reset will remain */
9905 		hfi1_pcie_flr(dd);
9906 
9907 		/* restore command and BARs */
9908 		restore_pci_variables(dd);
9909 
9910 		if (is_a0(dd)) {
9911 			dd_dev_info(dd, "Resetting CSRs with FLR\n");
9912 			hfi1_pcie_flr(dd);
9913 			restore_pci_variables(dd);
9914 		}
9915 
9916 		reset_asic_csrs(dd);
9917 	} else {
9918 		dd_dev_info(dd, "Resetting CSRs with writes\n");
9919 		reset_cce_csrs(dd);
9920 		reset_txe_csrs(dd);
9921 		reset_rxe_csrs(dd);
9922 		reset_asic_csrs(dd);
9923 		reset_misc_csrs(dd);
9924 	}
9925 	/* clear the DC reset */
9926 	write_csr(dd, CCE_DC_CTRL, 0);
9927 
9928 	/* Set the LED off */
9929 	if (is_a0(dd))
9930 		setextled(dd, 0);
9931 	/*
9932 	 * Clear the QSFP reset.
9933 	 * A0 leaves the out lines floating on power on, then on an FLR
9934 	 * enforces a 0 on all out pins.  The driver does not touch
9935 	 * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9936 	 * anything  plugged constantly in reset, if it pays attention
9937 	 * to RESET_N.
9938 	 * A prime example of this is SiPh. For now, set all pins high.
9939 	 * I2CCLK and I2CDAT will change per direction, and INT_N and
9940 	 * MODPRS_N are input only and their value is ignored.
9941 	 */
9942 	if (is_a0(dd)) {
9943 		write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9944 		write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9945 	}
9946 }
9947 
init_early_variables(struct hfi1_devdata * dd)9948 static void init_early_variables(struct hfi1_devdata *dd)
9949 {
9950 	int i;
9951 
9952 	/* assign link credit variables */
9953 	dd->vau = CM_VAU;
9954 	dd->link_credits = CM_GLOBAL_CREDITS;
9955 	if (is_a0(dd))
9956 		dd->link_credits--;
9957 	dd->vcu = cu_to_vcu(hfi1_cu);
9958 	/* enough room for 8 MAD packets plus header - 17K */
9959 	dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9960 	if (dd->vl15_init > dd->link_credits)
9961 		dd->vl15_init = dd->link_credits;
9962 
9963 	write_uninitialized_csrs_and_memories(dd);
9964 
9965 	if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9966 		for (i = 0; i < dd->num_pports; i++) {
9967 			struct hfi1_pportdata *ppd = &dd->pport[i];
9968 
9969 			set_partition_keys(ppd);
9970 		}
9971 	init_sc2vl_tables(dd);
9972 }
9973 
init_kdeth_qp(struct hfi1_devdata * dd)9974 static void init_kdeth_qp(struct hfi1_devdata *dd)
9975 {
9976 	/* user changed the KDETH_QP */
9977 	if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9978 		/* out of range or illegal value */
9979 		dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9980 		kdeth_qp = 0;
9981 	}
9982 	if (kdeth_qp == 0)	/* not set, or failed range check */
9983 		kdeth_qp = DEFAULT_KDETH_QP;
9984 
9985 	write_csr(dd, SEND_BTH_QP,
9986 			(kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9987 				<< SEND_BTH_QP_KDETH_QP_SHIFT);
9988 
9989 	write_csr(dd, RCV_BTH_QP,
9990 			(kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9991 				<< RCV_BTH_QP_KDETH_QP_SHIFT);
9992 }
9993 
9994 /**
9995  * init_qpmap_table
9996  * @dd - device data
9997  * @first_ctxt - first context
9998  * @last_ctxt - first context
9999  *
10000  * This return sets the qpn mapping table that
10001  * is indexed by qpn[8:1].
10002  *
10003  * The routine will round robin the 256 settings
10004  * from first_ctxt to last_ctxt.
10005  *
10006  * The first/last looks ahead to having specialized
10007  * receive contexts for mgmt and bypass.  Normal
10008  * verbs traffic will assumed to be on a range
10009  * of receive contexts.
10010  */
init_qpmap_table(struct hfi1_devdata * dd,u32 first_ctxt,u32 last_ctxt)10011 static void init_qpmap_table(struct hfi1_devdata *dd,
10012 			     u32 first_ctxt,
10013 			     u32 last_ctxt)
10014 {
10015 	u64 reg = 0;
10016 	u64 regno = RCV_QP_MAP_TABLE;
10017 	int i;
10018 	u64 ctxt = first_ctxt;
10019 
10020 	for (i = 0; i < 256;) {
10021 		if (ctxt == VL15CTXT) {
10022 			ctxt++;
10023 			if (ctxt > last_ctxt)
10024 				ctxt = first_ctxt;
10025 			continue;
10026 		}
10027 		reg |= ctxt << (8 * (i % 8));
10028 		i++;
10029 		ctxt++;
10030 		if (ctxt > last_ctxt)
10031 			ctxt = first_ctxt;
10032 		if (i % 8 == 0) {
10033 			write_csr(dd, regno, reg);
10034 			reg = 0;
10035 			regno += 8;
10036 		}
10037 	}
10038 	if (i % 8)
10039 		write_csr(dd, regno, reg);
10040 
10041 	add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10042 			| RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10043 }
10044 
10045 /**
10046  * init_qos - init RX qos
10047  * @dd - device data
10048  * @first_context
10049  *
10050  * This routine initializes Rule 0 and the
10051  * RSM map table to implement qos.
10052  *
10053  * If all of the limit tests succeed,
10054  * qos is applied based on the array
10055  * interpretation of krcvqs where
10056  * entry 0 is VL0.
10057  *
10058  * The number of vl bits (n) and the number of qpn
10059  * bits (m) are computed to feed both the RSM map table
10060  * and the single rule.
10061  *
10062  */
init_qos(struct hfi1_devdata * dd,u32 first_ctxt)10063 static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10064 {
10065 	u8 max_by_vl = 0;
10066 	unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10067 	u64 *rsmmap;
10068 	u64 reg;
10069 	u8  rxcontext = is_a0(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
10070 
10071 	/* validate */
10072 	if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10073 	    num_vls == 1 ||
10074 	    krcvqsset <= 1)
10075 		goto bail;
10076 	for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10077 		if (krcvqs[i] > max_by_vl)
10078 			max_by_vl = krcvqs[i];
10079 	if (max_by_vl > 32)
10080 		goto bail;
10081 	qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10082 	/* determine bits vl */
10083 	n = ilog2(num_vls);
10084 	/* determine bits for qpn */
10085 	m = ilog2(qpns_per_vl);
10086 	if ((m + n) > 7)
10087 		goto bail;
10088 	if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10089 		goto bail;
10090 	rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10091 	memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10092 	/* init the local copy of the table */
10093 	for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10094 		unsigned tctxt;
10095 
10096 		for (qpn = 0, tctxt = ctxt;
10097 		     krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10098 			unsigned idx, regoff, regidx;
10099 
10100 			/* generate index <= 128 */
10101 			idx = (qpn << n) ^ i;
10102 			regoff = (idx % 8) * 8;
10103 			regidx = idx / 8;
10104 			reg = rsmmap[regidx];
10105 			/* replace 0xff with context number */
10106 			reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10107 				<< regoff);
10108 			reg |= (u64)(tctxt++) << regoff;
10109 			rsmmap[regidx] = reg;
10110 			if (tctxt == ctxt + krcvqs[i])
10111 				tctxt = ctxt;
10112 		}
10113 		ctxt += krcvqs[i];
10114 	}
10115 	/* flush cached copies to chip */
10116 	for (i = 0; i < NUM_MAP_REGS; i++)
10117 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10118 	/* add rule0 */
10119 	write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10120 		RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10121 			<< RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10122 		2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10123 	write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10124 		LRH_BTH_MATCH_OFFSET
10125 			<< RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10126 		LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10127 		LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10128 		((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10129 		QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10130 		((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10131 	write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10132 		LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10133 		LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10134 		LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10135 		LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10136 	/* Enable RSM */
10137 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10138 	kfree(rsmmap);
10139 	/* map everything else (non-VL15) to context 0 */
10140 	init_qpmap_table(
10141 		dd,
10142 		0,
10143 		0);
10144 	dd->qos_shift = n + 1;
10145 	return;
10146 bail:
10147 	dd->qos_shift = 1;
10148 	init_qpmap_table(
10149 		dd,
10150 		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10151 		dd->n_krcv_queues - 1);
10152 }
10153 
init_rxe(struct hfi1_devdata * dd)10154 static void init_rxe(struct hfi1_devdata *dd)
10155 {
10156 	/* enable all receive errors */
10157 	write_csr(dd, RCV_ERR_MASK, ~0ull);
10158 	/* setup QPN map table - start where VL15 context leaves off */
10159 	init_qos(
10160 		dd,
10161 		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10162 	/*
10163 	 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10164 	 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10165 	 * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10166 	 * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10167 	 * Max_PayLoad_Size set to its minimum of 128.
10168 	 *
10169 	 * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10170 	 * (64 bytes).  Max_Payload_Size is possibly modified upward in
10171 	 * tune_pcie_caps() which is called after this routine.
10172 	 */
10173 }
10174 
init_other(struct hfi1_devdata * dd)10175 static void init_other(struct hfi1_devdata *dd)
10176 {
10177 	/* enable all CCE errors */
10178 	write_csr(dd, CCE_ERR_MASK, ~0ull);
10179 	/* enable *some* Misc errors */
10180 	write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10181 	/* enable all DC errors, except LCB */
10182 	write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10183 	write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10184 }
10185 
10186 /*
10187  * Fill out the given AU table using the given CU.  A CU is defined in terms
10188  * AUs.  The table is a an encoding: given the index, how many AUs does that
10189  * represent?
10190  *
10191  * NOTE: Assumes that the register layout is the same for the
10192  * local and remote tables.
10193  */
assign_cm_au_table(struct hfi1_devdata * dd,u32 cu,u32 csr0to3,u32 csr4to7)10194 static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10195 			       u32 csr0to3, u32 csr4to7)
10196 {
10197 	write_csr(dd, csr0to3,
10198 		   0ull <<
10199 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10200 		|  1ull <<
10201 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10202 		|  2ull * cu <<
10203 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10204 		|  4ull * cu <<
10205 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10206 	write_csr(dd, csr4to7,
10207 		   8ull * cu <<
10208 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10209 		| 16ull * cu <<
10210 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10211 		| 32ull * cu <<
10212 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10213 		| 64ull * cu <<
10214 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10215 
10216 }
10217 
assign_local_cm_au_table(struct hfi1_devdata * dd,u8 vcu)10218 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10219 {
10220 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10221 					SEND_CM_LOCAL_AU_TABLE4_TO7);
10222 }
10223 
assign_remote_cm_au_table(struct hfi1_devdata * dd,u8 vcu)10224 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10225 {
10226 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10227 					SEND_CM_REMOTE_AU_TABLE4_TO7);
10228 }
10229 
init_txe(struct hfi1_devdata * dd)10230 static void init_txe(struct hfi1_devdata *dd)
10231 {
10232 	int i;
10233 
10234 	/* enable all PIO, SDMA, general, and Egress errors */
10235 	write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10236 	write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10237 	write_csr(dd, SEND_ERR_MASK, ~0ull);
10238 	write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10239 
10240 	/* enable all per-context and per-SDMA engine errors */
10241 	for (i = 0; i < dd->chip_send_contexts; i++)
10242 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10243 	for (i = 0; i < dd->chip_sdma_engines; i++)
10244 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10245 
10246 	/* set the local CU to AU mapping */
10247 	assign_local_cm_au_table(dd, dd->vcu);
10248 
10249 	/*
10250 	 * Set reasonable default for Credit Return Timer
10251 	 * Don't set on Simulator - causes it to choke.
10252 	 */
10253 	if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10254 		write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10255 }
10256 
hfi1_set_ctxt_jkey(struct hfi1_devdata * dd,unsigned ctxt,u16 jkey)10257 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10258 {
10259 	struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10260 	unsigned sctxt;
10261 	int ret = 0;
10262 	u64 reg;
10263 
10264 	if (!rcd || !rcd->sc) {
10265 		ret = -EINVAL;
10266 		goto done;
10267 	}
10268 	sctxt = rcd->sc->hw_context;
10269 	reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10270 		((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10271 		 SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10272 	/* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10273 	if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10274 		reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10275 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10276 	/*
10277 	 * Enable send-side J_KEY integrity check, unless this is A0 h/w
10278 	 * (due to A0 erratum).
10279 	 */
10280 	if (!is_a0(dd)) {
10281 		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10282 		reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10283 		write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10284 	}
10285 
10286 	/* Enable J_KEY check on receive context. */
10287 	reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10288 		((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10289 		 RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10290 	write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10291 done:
10292 	return ret;
10293 }
10294 
hfi1_clear_ctxt_jkey(struct hfi1_devdata * dd,unsigned ctxt)10295 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10296 {
10297 	struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10298 	unsigned sctxt;
10299 	int ret = 0;
10300 	u64 reg;
10301 
10302 	if (!rcd || !rcd->sc) {
10303 		ret = -EINVAL;
10304 		goto done;
10305 	}
10306 	sctxt = rcd->sc->hw_context;
10307 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10308 	/*
10309 	 * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10310 	 * This check would not have been enabled for A0 h/w, see
10311 	 * set_ctxt_jkey().
10312 	 */
10313 	if (!is_a0(dd)) {
10314 		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10315 		reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10316 		write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10317 	}
10318 	/* Turn off the J_KEY on the receive side */
10319 	write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10320 done:
10321 	return ret;
10322 }
10323 
hfi1_set_ctxt_pkey(struct hfi1_devdata * dd,unsigned ctxt,u16 pkey)10324 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10325 {
10326 	struct hfi1_ctxtdata *rcd;
10327 	unsigned sctxt;
10328 	int ret = 0;
10329 	u64 reg;
10330 
10331 	if (ctxt < dd->num_rcv_contexts)
10332 		rcd = dd->rcd[ctxt];
10333 	else {
10334 		ret = -EINVAL;
10335 		goto done;
10336 	}
10337 	if (!rcd || !rcd->sc) {
10338 		ret = -EINVAL;
10339 		goto done;
10340 	}
10341 	sctxt = rcd->sc->hw_context;
10342 	reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10343 		SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10344 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10345 	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10346 	reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10347 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10348 done:
10349 	return ret;
10350 }
10351 
hfi1_clear_ctxt_pkey(struct hfi1_devdata * dd,unsigned ctxt)10352 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10353 {
10354 	struct hfi1_ctxtdata *rcd;
10355 	unsigned sctxt;
10356 	int ret = 0;
10357 	u64 reg;
10358 
10359 	if (ctxt < dd->num_rcv_contexts)
10360 		rcd = dd->rcd[ctxt];
10361 	else {
10362 		ret = -EINVAL;
10363 		goto done;
10364 	}
10365 	if (!rcd || !rcd->sc) {
10366 		ret = -EINVAL;
10367 		goto done;
10368 	}
10369 	sctxt = rcd->sc->hw_context;
10370 	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10371 	reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10372 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10373 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10374 done:
10375 	return ret;
10376 }
10377 
10378 /*
10379  * Start doing the clean up the the chip. Our clean up happens in multiple
10380  * stages and this is just the first.
10381  */
hfi1_start_cleanup(struct hfi1_devdata * dd)10382 void hfi1_start_cleanup(struct hfi1_devdata *dd)
10383 {
10384 	free_cntrs(dd);
10385 	free_rcverr(dd);
10386 	clean_up_interrupts(dd);
10387 }
10388 
10389 #define HFI_BASE_GUID(dev) \
10390 	((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10391 
10392 /*
10393  * Certain chip functions need to be initialized only once per asic
10394  * instead of per-device. This function finds the peer device and
10395  * checks whether that chip initialization needs to be done by this
10396  * device.
10397  */
asic_should_init(struct hfi1_devdata * dd)10398 static void asic_should_init(struct hfi1_devdata *dd)
10399 {
10400 	unsigned long flags;
10401 	struct hfi1_devdata *tmp, *peer = NULL;
10402 
10403 	spin_lock_irqsave(&hfi1_devs_lock, flags);
10404 	/* Find our peer device */
10405 	list_for_each_entry(tmp, &hfi1_dev_list, list) {
10406 		if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10407 		    dd->unit != tmp->unit) {
10408 			peer = tmp;
10409 			break;
10410 		}
10411 	}
10412 
10413 	/*
10414 	 * "Claim" the ASIC for initialization if it hasn't been
10415 	 " "claimed" yet.
10416 	 */
10417 	if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10418 		dd->flags |= HFI1_DO_INIT_ASIC;
10419 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10420 }
10421 
10422 /**
10423  * Allocate and initialize the device structure for the hfi.
10424  * @dev: the pci_dev for hfi1_ib device
10425  * @ent: pci_device_id struct for this dev
10426  *
10427  * Also allocates, initializes, and returns the devdata struct for this
10428  * device instance
10429  *
10430  * This is global, and is called directly at init to set up the
10431  * chip-specific function pointers for later use.
10432  */
hfi1_init_dd(struct pci_dev * pdev,const struct pci_device_id * ent)10433 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10434 				  const struct pci_device_id *ent)
10435 {
10436 	struct hfi1_devdata *dd;
10437 	struct hfi1_pportdata *ppd;
10438 	u64 reg;
10439 	int i, ret;
10440 	static const char * const inames[] = { /* implementation names */
10441 		"RTL silicon",
10442 		"RTL VCS simulation",
10443 		"RTL FPGA emulation",
10444 		"Functional simulator"
10445 	};
10446 
10447 	dd = hfi1_alloc_devdata(pdev,
10448 		NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10449 	if (IS_ERR(dd))
10450 		goto bail;
10451 	ppd = dd->pport;
10452 	for (i = 0; i < dd->num_pports; i++, ppd++) {
10453 		int vl;
10454 		/* init common fields */
10455 		hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10456 		/* DC supports 4 link widths */
10457 		ppd->link_width_supported =
10458 			OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10459 			OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10460 		ppd->link_width_downgrade_supported =
10461 			ppd->link_width_supported;
10462 		/* start out enabling only 4X */
10463 		ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10464 		ppd->link_width_downgrade_enabled =
10465 					ppd->link_width_downgrade_supported;
10466 		/* link width active is 0 when link is down */
10467 		/* link width downgrade active is 0 when link is down */
10468 
10469 		if (num_vls < HFI1_MIN_VLS_SUPPORTED
10470 			|| num_vls > HFI1_MAX_VLS_SUPPORTED) {
10471 			hfi1_early_err(&pdev->dev,
10472 				       "Invalid num_vls %u, using %u VLs\n",
10473 				    num_vls, HFI1_MAX_VLS_SUPPORTED);
10474 			num_vls = HFI1_MAX_VLS_SUPPORTED;
10475 		}
10476 		ppd->vls_supported = num_vls;
10477 		ppd->vls_operational = ppd->vls_supported;
10478 		/* Set the default MTU. */
10479 		for (vl = 0; vl < num_vls; vl++)
10480 			dd->vld[vl].mtu = hfi1_max_mtu;
10481 		dd->vld[15].mtu = MAX_MAD_PACKET;
10482 		/*
10483 		 * Set the initial values to reasonable default, will be set
10484 		 * for real when link is up.
10485 		 */
10486 		ppd->lstate = IB_PORT_DOWN;
10487 		ppd->overrun_threshold = 0x4;
10488 		ppd->phy_error_threshold = 0xf;
10489 		ppd->port_crc_mode_enabled = link_crc_mask;
10490 		/* initialize supported LTP CRC mode */
10491 		ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10492 		/* initialize enabled LTP CRC mode */
10493 		ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10494 		/* start in offline */
10495 		ppd->host_link_state = HLS_DN_OFFLINE;
10496 		init_vl_arb_caches(ppd);
10497 	}
10498 
10499 	dd->link_default = HLS_DN_POLL;
10500 
10501 	/*
10502 	 * Do remaining PCIe setup and save PCIe values in dd.
10503 	 * Any error printing is already done by the init code.
10504 	 * On return, we have the chip mapped.
10505 	 */
10506 	ret = hfi1_pcie_ddinit(dd, pdev, ent);
10507 	if (ret < 0)
10508 		goto bail_free;
10509 
10510 	/* verify that reads actually work, save revision for reset check */
10511 	dd->revision = read_csr(dd, CCE_REVISION);
10512 	if (dd->revision == ~(u64)0) {
10513 		dd_dev_err(dd, "cannot read chip CSRs\n");
10514 		ret = -EINVAL;
10515 		goto bail_cleanup;
10516 	}
10517 	dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10518 			& CCE_REVISION_CHIP_REV_MAJOR_MASK;
10519 	dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10520 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
10521 
10522 	/* obtain the hardware ID - NOT related to unit, which is a
10523 	   software enumeration */
10524 	reg = read_csr(dd, CCE_REVISION2);
10525 	dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10526 					& CCE_REVISION2_HFI_ID_MASK;
10527 	/* the variable size will remove unwanted bits */
10528 	dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10529 	dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10530 	dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10531 		dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10532 		(int)dd->irev);
10533 
10534 	/* speeds the hardware can support */
10535 	dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10536 	/* speeds allowed to run at */
10537 	dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10538 	/* give a reasonable active value, will be set on link up */
10539 	dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10540 
10541 	dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10542 	dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10543 	dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10544 	dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10545 	dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10546 	/* fix up link widths for emulation _p */
10547 	ppd = dd->pport;
10548 	if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10549 		ppd->link_width_supported =
10550 			ppd->link_width_enabled =
10551 			ppd->link_width_downgrade_supported =
10552 			ppd->link_width_downgrade_enabled =
10553 				OPA_LINK_WIDTH_1X;
10554 	}
10555 	/* insure num_vls isn't larger than number of sdma engines */
10556 	if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10557 		dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10558 				num_vls, HFI1_MAX_VLS_SUPPORTED);
10559 		ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10560 		ppd->vls_operational = ppd->vls_supported;
10561 	}
10562 
10563 	/*
10564 	 * Convert the ns parameter to the 64 * cclocks used in the CSR.
10565 	 * Limit the max if larger than the field holds.  If timeout is
10566 	 * non-zero, then the calculated field will be at least 1.
10567 	 *
10568 	 * Must be after icode is set up - the cclock rate depends
10569 	 * on knowing the hardware being used.
10570 	 */
10571 	dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10572 	if (dd->rcv_intr_timeout_csr >
10573 			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10574 		dd->rcv_intr_timeout_csr =
10575 			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10576 	else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10577 		dd->rcv_intr_timeout_csr = 1;
10578 
10579 	/* needs to be done before we look for the peer device */
10580 	read_guid(dd);
10581 
10582 	/* should this device init the ASIC block? */
10583 	asic_should_init(dd);
10584 
10585 	/* obtain chip sizes, reset chip CSRs */
10586 	init_chip(dd);
10587 
10588 	/* read in the PCIe link speed information */
10589 	ret = pcie_speeds(dd);
10590 	if (ret)
10591 		goto bail_cleanup;
10592 
10593 	/* read in firmware */
10594 	ret = hfi1_firmware_init(dd);
10595 	if (ret)
10596 		goto bail_cleanup;
10597 
10598 	/*
10599 	 * In general, the PCIe Gen3 transition must occur after the
10600 	 * chip has been idled (so it won't initiate any PCIe transactions
10601 	 * e.g. an interrupt) and before the driver changes any registers
10602 	 * (the transition will reset the registers).
10603 	 *
10604 	 * In particular, place this call after:
10605 	 * - init_chip()     - the chip will not initiate any PCIe transactions
10606 	 * - pcie_speeds()   - reads the current link speed
10607 	 * - hfi1_firmware_init() - the needed firmware is ready to be
10608 	 *			    downloaded
10609 	 */
10610 	ret = do_pcie_gen3_transition(dd);
10611 	if (ret)
10612 		goto bail_cleanup;
10613 
10614 	/* start setting dd values and adjusting CSRs */
10615 	init_early_variables(dd);
10616 
10617 	parse_platform_config(dd);
10618 
10619 	/* add board names as they are defined */
10620 	dd->boardname = kmalloc(64, GFP_KERNEL);
10621 	if (!dd->boardname)
10622 		goto bail_cleanup;
10623 	snprintf(dd->boardname, 64, "Board ID 0x%llx",
10624 		 dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10625 		    & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10626 
10627 	snprintf(dd->boardversion, BOARD_VERS_MAX,
10628 		 "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10629 		 HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10630 		 dd->boardname,
10631 		 (u32)dd->majrev,
10632 		 (u32)dd->minrev,
10633 		 (dd->revision >> CCE_REVISION_SW_SHIFT)
10634 		    & CCE_REVISION_SW_MASK);
10635 
10636 	ret = set_up_context_variables(dd);
10637 	if (ret)
10638 		goto bail_cleanup;
10639 
10640 	/* set initial RXE CSRs */
10641 	init_rxe(dd);
10642 	/* set initial TXE CSRs */
10643 	init_txe(dd);
10644 	/* set initial non-RXE, non-TXE CSRs */
10645 	init_other(dd);
10646 	/* set up KDETH QP prefix in both RX and TX CSRs */
10647 	init_kdeth_qp(dd);
10648 
10649 	/* send contexts must be set up before receive contexts */
10650 	ret = init_send_contexts(dd);
10651 	if (ret)
10652 		goto bail_cleanup;
10653 
10654 	ret = hfi1_create_ctxts(dd);
10655 	if (ret)
10656 		goto bail_cleanup;
10657 
10658 	dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10659 	/*
10660 	 * rcd[0] is guaranteed to be valid by this point. Also, all
10661 	 * context are using the same value, as per the module parameter.
10662 	 */
10663 	dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10664 
10665 	ret = init_pervl_scs(dd);
10666 	if (ret)
10667 		goto bail_cleanup;
10668 
10669 	/* sdma init */
10670 	for (i = 0; i < dd->num_pports; ++i) {
10671 		ret = sdma_init(dd, i);
10672 		if (ret)
10673 			goto bail_cleanup;
10674 	}
10675 
10676 	/* use contexts created by hfi1_create_ctxts */
10677 	ret = set_up_interrupts(dd);
10678 	if (ret)
10679 		goto bail_cleanup;
10680 
10681 	/* set up LCB access - must be after set_up_interrupts() */
10682 	init_lcb_access(dd);
10683 
10684 	snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10685 		 dd->base_guid & 0xFFFFFF);
10686 
10687 	dd->oui1 = dd->base_guid >> 56 & 0xFF;
10688 	dd->oui2 = dd->base_guid >> 48 & 0xFF;
10689 	dd->oui3 = dd->base_guid >> 40 & 0xFF;
10690 
10691 	ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10692 	if (ret)
10693 		goto bail_clear_intr;
10694 	check_fabric_firmware_versions(dd);
10695 
10696 	thermal_init(dd);
10697 
10698 	ret = init_cntrs(dd);
10699 	if (ret)
10700 		goto bail_clear_intr;
10701 
10702 	ret = init_rcverr(dd);
10703 	if (ret)
10704 		goto bail_free_cntrs;
10705 
10706 	ret = eprom_init(dd);
10707 	if (ret)
10708 		goto bail_free_rcverr;
10709 
10710 	goto bail;
10711 
10712 bail_free_rcverr:
10713 	free_rcverr(dd);
10714 bail_free_cntrs:
10715 	free_cntrs(dd);
10716 bail_clear_intr:
10717 	clean_up_interrupts(dd);
10718 bail_cleanup:
10719 	hfi1_pcie_ddcleanup(dd);
10720 bail_free:
10721 	hfi1_free_devdata(dd);
10722 	dd = ERR_PTR(ret);
10723 bail:
10724 	return dd;
10725 }
10726 
delay_cycles(struct hfi1_pportdata * ppd,u32 desired_egress_rate,u32 dw_len)10727 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10728 			u32 dw_len)
10729 {
10730 	u32 delta_cycles;
10731 	u32 current_egress_rate = ppd->current_egress_rate;
10732 	/* rates here are in units of 10^6 bits/sec */
10733 
10734 	if (desired_egress_rate == -1)
10735 		return 0; /* shouldn't happen */
10736 
10737 	if (desired_egress_rate >= current_egress_rate)
10738 		return 0; /* we can't help go faster, only slower */
10739 
10740 	delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10741 			egress_cycles(dw_len * 4, current_egress_rate);
10742 
10743 	return (u16)delta_cycles;
10744 }
10745 
10746 
10747 /**
10748  * create_pbc - build a pbc for transmission
10749  * @flags: special case flags or-ed in built pbc
10750  * @srate: static rate
10751  * @vl: vl
10752  * @dwlen: dword length (header words + data words + pbc words)
10753  *
10754  * Create a PBC with the given flags, rate, VL, and length.
10755  *
10756  * NOTE: The PBC created will not insert any HCRC - all callers but one are
10757  * for verbs, which does not use this PSM feature.  The lone other caller
10758  * is for the diagnostic interface which calls this if the user does not
10759  * supply their own PBC.
10760  */
create_pbc(struct hfi1_pportdata * ppd,u64 flags,int srate_mbs,u32 vl,u32 dw_len)10761 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10762 	       u32 dw_len)
10763 {
10764 	u64 pbc, delay = 0;
10765 
10766 	if (unlikely(srate_mbs))
10767 		delay = delay_cycles(ppd, srate_mbs, dw_len);
10768 
10769 	pbc = flags
10770 		| (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10771 		| ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10772 		| (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10773 		| (dw_len & PBC_LENGTH_DWS_MASK)
10774 			<< PBC_LENGTH_DWS_SHIFT;
10775 
10776 	return pbc;
10777 }
10778 
10779 #define SBUS_THERMAL    0x4f
10780 #define SBUS_THERM_MONITOR_MODE 0x1
10781 
10782 #define THERM_FAILURE(dev, ret, reason) \
10783 	dd_dev_err((dd),						\
10784 		   "Thermal sensor initialization failed: %s (%d)\n",	\
10785 		   (reason), (ret))
10786 
10787 /*
10788  * Initialize the Avago Thermal sensor.
10789  *
10790  * After initialization, enable polling of thermal sensor through
10791  * SBus interface. In order for this to work, the SBus Master
10792  * firmware has to be loaded due to the fact that the HW polling
10793  * logic uses SBus interrupts, which are not supported with
10794  * default firmware. Otherwise, no data will be returned through
10795  * the ASIC_STS_THERM CSR.
10796  */
thermal_init(struct hfi1_devdata * dd)10797 static int thermal_init(struct hfi1_devdata *dd)
10798 {
10799 	int ret = 0;
10800 
10801 	if (dd->icode != ICODE_RTL_SILICON ||
10802 	    !(dd->flags & HFI1_DO_INIT_ASIC))
10803 		return ret;
10804 
10805 	acquire_hw_mutex(dd);
10806 	dd_dev_info(dd, "Initializing thermal sensor\n");
10807 
10808 	/* Thermal Sensor Initialization */
10809 	/*    Step 1: Reset the Thermal SBus Receiver */
10810 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10811 				RESET_SBUS_RECEIVER, 0);
10812 	if (ret) {
10813 		THERM_FAILURE(dd, ret, "Bus Reset");
10814 		goto done;
10815 	}
10816 	/*    Step 2: Set Reset bit in Thermal block */
10817 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10818 				WRITE_SBUS_RECEIVER, 0x1);
10819 	if (ret) {
10820 		THERM_FAILURE(dd, ret, "Therm Block Reset");
10821 		goto done;
10822 	}
10823 	/*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10824 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10825 				WRITE_SBUS_RECEIVER, 0x32);
10826 	if (ret) {
10827 		THERM_FAILURE(dd, ret, "Write Clock Div");
10828 		goto done;
10829 	}
10830 	/*    Step 4: Select temperature mode */
10831 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10832 				WRITE_SBUS_RECEIVER,
10833 				SBUS_THERM_MONITOR_MODE);
10834 	if (ret) {
10835 		THERM_FAILURE(dd, ret, "Write Mode Sel");
10836 		goto done;
10837 	}
10838 	/*    Step 5: De-assert block reset and start conversion */
10839 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10840 				WRITE_SBUS_RECEIVER, 0x2);
10841 	if (ret) {
10842 		THERM_FAILURE(dd, ret, "Write Reset Deassert");
10843 		goto done;
10844 	}
10845 	/*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10846 	msleep(22);
10847 
10848 	/* Enable polling of thermal readings */
10849 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10850 done:
10851 	release_hw_mutex(dd);
10852 	return ret;
10853 }
10854 
handle_temp_err(struct hfi1_devdata * dd)10855 static void handle_temp_err(struct hfi1_devdata *dd)
10856 {
10857 	struct hfi1_pportdata *ppd = &dd->pport[0];
10858 	/*
10859 	 * Thermal Critical Interrupt
10860 	 * Put the device into forced freeze mode, take link down to
10861 	 * offline, and put DC into reset.
10862 	 */
10863 	dd_dev_emerg(dd,
10864 		     "Critical temperature reached! Forcing device into freeze mode!\n");
10865 	dd->flags |= HFI1_FORCED_FREEZE;
10866 	start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10867 	/*
10868 	 * Shut DC down as much and as quickly as possible.
10869 	 *
10870 	 * Step 1: Take the link down to OFFLINE. This will cause the
10871 	 *         8051 to put the Serdes in reset. However, we don't want to
10872 	 *         go through the entire link state machine since we want to
10873 	 *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10874 	 *         but rather an attempt to save the chip.
10875 	 *         Code below is almost the same as quiet_serdes() but avoids
10876 	 *         all the extra work and the sleeps.
10877 	 */
10878 	ppd->driver_link_ready = 0;
10879 	ppd->link_enabled = 0;
10880 	set_physical_link_state(dd, PLS_OFFLINE |
10881 				(OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10882 	/*
10883 	 * Step 2: Shutdown LCB and 8051
10884 	 *         After shutdown, do not restore DC_CFG_RESET value.
10885 	 */
10886 	dc_shutdown(dd);
10887 }
10888