1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4 * Originally split out from the skx_edac driver.
5 *
6 * Copyright (c) 2018, Intel Corporation.
7 */
8
9 #ifndef _SKX_COMM_EDAC_H
10 #define _SKX_COMM_EDAC_H
11
12 #include <linux/bits.h>
13 #include <asm/mce.h>
14
15 #define MSG_SIZE 1024
16
17 /*
18 * Debug macros
19 */
20 #define skx_printk(level, fmt, arg...) \
21 edac_printk(level, "skx", fmt, ##arg)
22
23 #define skx_mc_printk(mci, level, fmt, arg...) \
24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25
26 /*
27 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28 */
29 #define GET_BITFIELD(v, lo, hi) \
30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31
32 #define SKX_NUM_IMC 2 /* Memory controllers per socket */
33 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
34 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
35
36 #define I10NM_NUM_DDR_IMC 12
37 #define I10NM_NUM_DDR_CHANNELS 2
38 #define I10NM_NUM_DDR_DIMMS 2
39
40 #define I10NM_NUM_HBM_IMC 16
41 #define I10NM_NUM_HBM_CHANNELS 2
42 #define I10NM_NUM_HBM_DIMMS 1
43
44 #define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
47
48 #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
49 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
50 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
51
52 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
53 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
54
55 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
56 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
57
58 /*
59 * According to Intel Architecture spec vol 3B,
60 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
61 * memory errors should fit one of these masks:
62 * 000f 0000 1mmm cccc (binary)
63 * 000f 0010 1mmm cccc (binary) [RAM used as cache]
64 * where:
65 * f = Correction Report Filtering Bit. If 1, subsequent errors
66 * won't be shown
67 * mmm = error type
68 * cccc = channel
69 */
70 #define MCACOD_MEM_ERR_MASK 0xef80
71 /*
72 * Errors from either the memory of the 1-level memory system or the
73 * 2nd level memory (the slow "far" memory) of the 2-level memory system.
74 */
75 #define MCACOD_MEM_CTL_ERR 0x80
76 /*
77 * Errors from the 1st level memory (the fast "near" memory as cache)
78 * of the 2-level memory system.
79 */
80 #define MCACOD_EXT_MEM_ERR 0x280
81
82 /* Max RRL register sets per {,sub-,pseudo-}channel. */
83 #define NUM_RRL_SET 3
84
85 /*
86 * Each cpu socket contains some pci devices that provide global
87 * information, and also some that are local to each of the two
88 * memory controllers on the die.
89 */
90 struct skx_dev {
91 struct list_head list;
92 u8 bus[4];
93 int seg;
94 struct pci_dev *sad_all;
95 struct pci_dev *util_all;
96 struct pci_dev *uracu; /* for i10nm CPU */
97 struct pci_dev *pcu_cr3; /* for HBM memory detection */
98 u32 mcroute;
99 /*
100 * Some server BIOS may hide certain memory controllers, and the
101 * EDAC driver skips those hidden memory controllers. However, the
102 * ADXL still decodes memory error address using physical memory
103 * controller indices. The mapping table is used to convert the
104 * physical indices (reported by ADXL) to the logical indices
105 * (used the EDAC driver) of present memory controllers during the
106 * error handling process.
107 */
108 u8 mc_mapping[NUM_IMC];
109 struct skx_imc {
110 struct mem_ctl_info *mci;
111 struct pci_dev *mdev; /* for i10nm CPU */
112 void __iomem *mbase; /* for i10nm CPU */
113 int chan_mmio_sz; /* for i10nm CPU */
114 int num_channels; /* channels per memory controller */
115 int num_dimms; /* dimms per channel */
116 bool hbm_mc;
117 u8 mc; /* system wide mc# */
118 u8 lmc; /* socket relative mc# */
119 u8 src_id, node_id;
120 struct skx_channel {
121 struct pci_dev *cdev;
122 struct pci_dev *edev;
123 /*
124 * Two groups of RRL control registers per channel to save default RRL
125 * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
126 */
127 u32 rrl_ctl[2][NUM_RRL_SET];
128 struct skx_dimm {
129 u8 close_pg;
130 u8 bank_xor_enable;
131 u8 fine_grain_bank;
132 u8 rowbits;
133 u8 colbits;
134 } dimms[NUM_DIMMS];
135 } chan[NUM_CHANNELS];
136 } imc[NUM_IMC];
137 };
138
139 struct skx_pvt {
140 struct skx_imc *imc;
141 };
142
143 enum type {
144 SKX,
145 I10NM,
146 SPR,
147 GNR
148 };
149
150 enum {
151 INDEX_SOCKET,
152 INDEX_MEMCTRL,
153 INDEX_CHANNEL,
154 INDEX_DIMM,
155 INDEX_CS,
156 INDEX_NM_FIRST,
157 INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
158 INDEX_NM_CHANNEL,
159 INDEX_NM_DIMM,
160 INDEX_NM_CS,
161 INDEX_MAX
162 };
163
164 enum error_source {
165 ERR_SRC_1LM,
166 ERR_SRC_2LM_NM,
167 ERR_SRC_2LM_FM,
168 ERR_SRC_NOT_MEMORY,
169 };
170
171 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
172 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
173 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
174 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
175
176 struct decoded_addr {
177 struct mce *mce;
178 struct skx_dev *dev;
179 u64 addr;
180 int socket;
181 int imc;
182 int channel;
183 u64 chan_addr;
184 int sktways;
185 int chanways;
186 int dimm;
187 int cs;
188 int rank;
189 int channel_rank;
190 u64 rank_address;
191 int row;
192 int column;
193 int bank_address;
194 int bank_group;
195 bool decoded_by_adxl;
196 };
197
198 struct pci_bdf {
199 u32 bus : 8;
200 u32 dev : 5;
201 u32 fun : 3;
202 };
203
204 struct res_config {
205 enum type type;
206 /* Configuration agent device ID */
207 unsigned int decs_did;
208 /* Default bus number configuration register offset */
209 int busno_cfg_offset;
210 /* DDR memory controllers per socket */
211 int ddr_imc_num;
212 /* DDR channels per DDR memory controller */
213 int ddr_chan_num;
214 /* DDR DIMMs per DDR memory channel */
215 int ddr_dimm_num;
216 /* Per DDR channel memory-mapped I/O size */
217 int ddr_chan_mmio_sz;
218 /* HBM memory controllers per socket */
219 int hbm_imc_num;
220 /* HBM channels per HBM memory controller */
221 int hbm_chan_num;
222 /* HBM DIMMs per HBM memory channel */
223 int hbm_dimm_num;
224 /* Per HBM channel memory-mapped I/O size */
225 int hbm_chan_mmio_sz;
226 bool support_ddr5;
227 /* SAD device BDF */
228 struct pci_bdf sad_all_bdf;
229 /* PCU device BDF */
230 struct pci_bdf pcu_cr3_bdf;
231 /* UTIL device BDF */
232 struct pci_bdf util_all_bdf;
233 /* URACU device BDF */
234 struct pci_bdf uracu_bdf;
235 /* DDR mdev device BDF */
236 struct pci_bdf ddr_mdev_bdf;
237 /* HBM mdev device BDF */
238 struct pci_bdf hbm_mdev_bdf;
239 int sad_all_offset;
240 /* Offsets of retry_rd_err_log registers */
241 u32 *offsets_scrub;
242 u32 *offsets_scrub_hbm0;
243 u32 *offsets_scrub_hbm1;
244 u32 *offsets_demand;
245 u32 *offsets_demand2;
246 u32 *offsets_demand_hbm0;
247 u32 *offsets_demand_hbm1;
248 };
249
250 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
251 struct res_config *cfg);
252 typedef bool (*skx_decode_f)(struct decoded_addr *res);
253 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
254
255 int skx_adxl_get(void);
256 void skx_adxl_put(void);
257 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
258 void skx_set_mem_cfg(bool mem_cfg_2lm);
259 void skx_set_res_cfg(struct res_config *cfg);
260 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
261
262 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
263 int skx_get_node_id(struct skx_dev *d, u8 *id);
264
265 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
266
267 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
268
269 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
270 struct skx_imc *imc, int chan, int dimmno,
271 struct res_config *cfg);
272
273 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
274 int chan, int dimmno, const char *mod_str);
275
276 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
277 const char *ctl_name, const char *mod_str,
278 get_dimm_config_f get_dimm_config,
279 struct res_config *cfg);
280
281 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
282 void *data);
283
284 void skx_remove(void);
285
286 #ifdef CONFIG_EDAC_DEBUG
287 void skx_setup_debug(const char *name);
288 void skx_teardown_debug(void);
289 #else
skx_setup_debug(const char * name)290 static inline void skx_setup_debug(const char *name) {}
skx_teardown_debug(void)291 static inline void skx_teardown_debug(void) {}
292 #endif
293
294 #endif /* _SKX_COMM_EDAC_H */
295