• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "amd64_edac.h"
2 #include <asm/amd_nb.h>
3 
4 static struct edac_pci_ctl_info *pci_ctl;
5 
6 static int report_gart_errors;
7 module_param(report_gart_errors, int, 0644);
8 
9 /*
10  * Set by command line parameter. If BIOS has enabled the ECC, this override is
11  * cleared to prevent re-enabling the hardware by this driver.
12  */
13 static int ecc_enable_override;
14 module_param(ecc_enable_override, int, 0644);
15 
16 static struct msr __percpu *msrs;
17 
18 /* Per-node stuff */
19 static struct ecc_settings **ecc_stngs;
20 
21 /*
22  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
23  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
24  * or higher value'.
25  *
26  *FIXME: Produce a better mapping/linearisation.
27  */
28 static const struct scrubrate {
29        u32 scrubval;           /* bit pattern for scrub rate */
30        u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
31 } scrubrates[] = {
32 	{ 0x01, 1600000000UL},
33 	{ 0x02, 800000000UL},
34 	{ 0x03, 400000000UL},
35 	{ 0x04, 200000000UL},
36 	{ 0x05, 100000000UL},
37 	{ 0x06, 50000000UL},
38 	{ 0x07, 25000000UL},
39 	{ 0x08, 12284069UL},
40 	{ 0x09, 6274509UL},
41 	{ 0x0A, 3121951UL},
42 	{ 0x0B, 1560975UL},
43 	{ 0x0C, 781440UL},
44 	{ 0x0D, 390720UL},
45 	{ 0x0E, 195300UL},
46 	{ 0x0F, 97650UL},
47 	{ 0x10, 48854UL},
48 	{ 0x11, 24427UL},
49 	{ 0x12, 12213UL},
50 	{ 0x13, 6101UL},
51 	{ 0x14, 3051UL},
52 	{ 0x15, 1523UL},
53 	{ 0x16, 761UL},
54 	{ 0x00, 0UL},        /* scrubbing off */
55 };
56 
__amd64_read_pci_cfg_dword(struct pci_dev * pdev,int offset,u32 * val,const char * func)57 int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
58 			       u32 *val, const char *func)
59 {
60 	int err = 0;
61 
62 	err = pci_read_config_dword(pdev, offset, val);
63 	if (err)
64 		amd64_warn("%s: error reading F%dx%03x.\n",
65 			   func, PCI_FUNC(pdev->devfn), offset);
66 
67 	return err;
68 }
69 
__amd64_write_pci_cfg_dword(struct pci_dev * pdev,int offset,u32 val,const char * func)70 int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
71 				u32 val, const char *func)
72 {
73 	int err = 0;
74 
75 	err = pci_write_config_dword(pdev, offset, val);
76 	if (err)
77 		amd64_warn("%s: error writing to F%dx%03x.\n",
78 			   func, PCI_FUNC(pdev->devfn), offset);
79 
80 	return err;
81 }
82 
83 /*
84  * Select DCT to which PCI cfg accesses are routed
85  */
f15h_select_dct(struct amd64_pvt * pvt,u8 dct)86 static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
87 {
88 	u32 reg = 0;
89 
90 	amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
91 	reg &= (pvt->model == 0x30) ? ~3 : ~1;
92 	reg |= dct;
93 	amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
94 }
95 
96 /*
97  *
98  * Depending on the family, F2 DCT reads need special handling:
99  *
100  * K8: has a single DCT only and no address offsets >= 0x100
101  *
102  * F10h: each DCT has its own set of regs
103  *	DCT0 -> F2x040..
104  *	DCT1 -> F2x140..
105  *
106  * F16h: has only 1 DCT
107  *
108  * F15h: we select which DCT we access using F1x10C[DctCfgSel]
109  */
amd64_read_dct_pci_cfg(struct amd64_pvt * pvt,u8 dct,int offset,u32 * val)110 static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct,
111 					 int offset, u32 *val)
112 {
113 	switch (pvt->fam) {
114 	case 0xf:
115 		if (dct || offset >= 0x100)
116 			return -EINVAL;
117 		break;
118 
119 	case 0x10:
120 		if (dct) {
121 			/*
122 			 * Note: If ganging is enabled, barring the regs
123 			 * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx
124 			 * return 0. (cf. Section 2.8.1 F10h BKDG)
125 			 */
126 			if (dct_ganging_enabled(pvt))
127 				return 0;
128 
129 			offset += 0x100;
130 		}
131 		break;
132 
133 	case 0x15:
134 		/*
135 		 * F15h: F2x1xx addresses do not map explicitly to DCT1.
136 		 * We should select which DCT we access using F1x10C[DctCfgSel]
137 		 */
138 		dct = (dct && pvt->model == 0x30) ? 3 : dct;
139 		f15h_select_dct(pvt, dct);
140 		break;
141 
142 	case 0x16:
143 		if (dct)
144 			return -EINVAL;
145 		break;
146 
147 	default:
148 		break;
149 	}
150 	return amd64_read_pci_cfg(pvt->F2, offset, val);
151 }
152 
153 /*
154  * Memory scrubber control interface. For K8, memory scrubbing is handled by
155  * hardware and can involve L2 cache, dcache as well as the main memory. With
156  * F10, this is extended to L3 cache scrubbing on CPU models sporting that
157  * functionality.
158  *
159  * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
160  * (dram) over to cache lines. This is nasty, so we will use bandwidth in
161  * bytes/sec for the setting.
162  *
163  * Currently, we only do dram scrubbing. If the scrubbing is done in software on
164  * other archs, we might not have access to the caches directly.
165  */
166 
__f17h_set_scrubval(struct amd64_pvt * pvt,u32 scrubval)167 static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval)
168 {
169 	/*
170 	 * Fam17h supports scrub values between 0x5 and 0x14. Also, the values
171 	 * are shifted down by 0x5, so scrubval 0x5 is written to the register
172 	 * as 0x0, scrubval 0x6 as 0x1, etc.
173 	 */
174 	if (scrubval >= 0x5 && scrubval <= 0x14) {
175 		scrubval -= 0x5;
176 		pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF);
177 		pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1);
178 	} else {
179 		pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1);
180 	}
181 }
182 /*
183  * Scan the scrub rate mapping table for a close or matching bandwidth value to
184  * issue. If requested is too big, then use last maximum value found.
185  */
__set_scrub_rate(struct amd64_pvt * pvt,u32 new_bw,u32 min_rate)186 static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
187 {
188 	u32 scrubval;
189 	int i;
190 
191 	/*
192 	 * map the configured rate (new_bw) to a value specific to the AMD64
193 	 * memory controller and apply to register. Search for the first
194 	 * bandwidth entry that is greater or equal than the setting requested
195 	 * and program that. If at last entry, turn off DRAM scrubbing.
196 	 *
197 	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
198 	 * by falling back to the last element in scrubrates[].
199 	 */
200 	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
201 		/*
202 		 * skip scrub rates which aren't recommended
203 		 * (see F10 BKDG, F3x58)
204 		 */
205 		if (scrubrates[i].scrubval < min_rate)
206 			continue;
207 
208 		if (scrubrates[i].bandwidth <= new_bw)
209 			break;
210 	}
211 
212 	scrubval = scrubrates[i].scrubval;
213 
214 	if (pvt->fam == 0x17) {
215 		__f17h_set_scrubval(pvt, scrubval);
216 	} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
217 		f15h_select_dct(pvt, 0);
218 		pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
219 		f15h_select_dct(pvt, 1);
220 		pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
221 	} else {
222 		pci_write_bits32(pvt->F3, SCRCTRL, scrubval, 0x001F);
223 	}
224 
225 	if (scrubval)
226 		return scrubrates[i].bandwidth;
227 
228 	return 0;
229 }
230 
set_scrub_rate(struct mem_ctl_info * mci,u32 bw)231 static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
232 {
233 	struct amd64_pvt *pvt = mci->pvt_info;
234 	u32 min_scrubrate = 0x5;
235 
236 	if (pvt->fam == 0xf)
237 		min_scrubrate = 0x0;
238 
239 	if (pvt->fam == 0x15) {
240 		/* Erratum #505 */
241 		if (pvt->model < 0x10)
242 			f15h_select_dct(pvt, 0);
243 
244 		if (pvt->model == 0x60)
245 			min_scrubrate = 0x6;
246 	}
247 	return __set_scrub_rate(pvt, bw, min_scrubrate);
248 }
249 
get_scrub_rate(struct mem_ctl_info * mci)250 static int get_scrub_rate(struct mem_ctl_info *mci)
251 {
252 	struct amd64_pvt *pvt = mci->pvt_info;
253 	int i, retval = -EINVAL;
254 	u32 scrubval = 0;
255 
256 	switch (pvt->fam) {
257 	case 0x15:
258 		/* Erratum #505 */
259 		if (pvt->model < 0x10)
260 			f15h_select_dct(pvt, 0);
261 
262 		if (pvt->model == 0x60)
263 			amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
264 		break;
265 
266 	case 0x17:
267 		amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
268 		if (scrubval & BIT(0)) {
269 			amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
270 			scrubval &= 0xF;
271 			scrubval += 0x5;
272 		} else {
273 			scrubval = 0;
274 		}
275 		break;
276 
277 	default:
278 		amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
279 		break;
280 	}
281 
282 	scrubval = scrubval & 0x001F;
283 
284 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
285 		if (scrubrates[i].scrubval == scrubval) {
286 			retval = scrubrates[i].bandwidth;
287 			break;
288 		}
289 	}
290 	return retval;
291 }
292 
293 /*
294  * returns true if the SysAddr given by sys_addr matches the
295  * DRAM base/limit associated with node_id
296  */
base_limit_match(struct amd64_pvt * pvt,u64 sys_addr,u8 nid)297 static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid)
298 {
299 	u64 addr;
300 
301 	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
302 	 * all ones if the most significant implemented address bit is 1.
303 	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
304 	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
305 	 * Application Programming.
306 	 */
307 	addr = sys_addr & 0x000000ffffffffffull;
308 
309 	return ((addr >= get_dram_base(pvt, nid)) &&
310 		(addr <= get_dram_limit(pvt, nid)));
311 }
312 
313 /*
314  * Attempt to map a SysAddr to a node. On success, return a pointer to the
315  * mem_ctl_info structure for the node that the SysAddr maps to.
316  *
317  * On failure, return NULL.
318  */
find_mc_by_sys_addr(struct mem_ctl_info * mci,u64 sys_addr)319 static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
320 						u64 sys_addr)
321 {
322 	struct amd64_pvt *pvt;
323 	u8 node_id;
324 	u32 intlv_en, bits;
325 
326 	/*
327 	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
328 	 * 3.4.4.2) registers to map the SysAddr to a node ID.
329 	 */
330 	pvt = mci->pvt_info;
331 
332 	/*
333 	 * The value of this field should be the same for all DRAM Base
334 	 * registers.  Therefore we arbitrarily choose to read it from the
335 	 * register for node 0.
336 	 */
337 	intlv_en = dram_intlv_en(pvt, 0);
338 
339 	if (intlv_en == 0) {
340 		for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
341 			if (base_limit_match(pvt, sys_addr, node_id))
342 				goto found;
343 		}
344 		goto err_no_match;
345 	}
346 
347 	if (unlikely((intlv_en != 0x01) &&
348 		     (intlv_en != 0x03) &&
349 		     (intlv_en != 0x07))) {
350 		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
351 		return NULL;
352 	}
353 
354 	bits = (((u32) sys_addr) >> 12) & intlv_en;
355 
356 	for (node_id = 0; ; ) {
357 		if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
358 			break;	/* intlv_sel field matches */
359 
360 		if (++node_id >= DRAM_RANGES)
361 			goto err_no_match;
362 	}
363 
364 	/* sanity test for sys_addr */
365 	if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
366 		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
367 			   "range for node %d with node interleaving enabled.\n",
368 			   __func__, sys_addr, node_id);
369 		return NULL;
370 	}
371 
372 found:
373 	return edac_mc_find((int)node_id);
374 
375 err_no_match:
376 	edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
377 		 (unsigned long)sys_addr);
378 
379 	return NULL;
380 }
381 
382 /*
383  * compute the CS base address of the @csrow on the DRAM controller @dct.
384  * For details see F2x[5C:40] in the processor's BKDG
385  */
get_cs_base_and_mask(struct amd64_pvt * pvt,int csrow,u8 dct,u64 * base,u64 * mask)386 static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
387 				 u64 *base, u64 *mask)
388 {
389 	u64 csbase, csmask, base_bits, mask_bits;
390 	u8 addr_shift;
391 
392 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
393 		csbase		= pvt->csels[dct].csbases[csrow];
394 		csmask		= pvt->csels[dct].csmasks[csrow];
395 		base_bits	= GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
396 		mask_bits	= GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
397 		addr_shift	= 4;
398 
399 	/*
400 	 * F16h and F15h, models 30h and later need two addr_shift values:
401 	 * 8 for high and 6 for low (cf. F16h BKDG).
402 	 */
403 	} else if (pvt->fam == 0x16 ||
404 		  (pvt->fam == 0x15 && pvt->model >= 0x30)) {
405 		csbase          = pvt->csels[dct].csbases[csrow];
406 		csmask          = pvt->csels[dct].csmasks[csrow >> 1];
407 
408 		*base  = (csbase & GENMASK_ULL(15,  5)) << 6;
409 		*base |= (csbase & GENMASK_ULL(30, 19)) << 8;
410 
411 		*mask = ~0ULL;
412 		/* poke holes for the csmask */
413 		*mask &= ~((GENMASK_ULL(15, 5)  << 6) |
414 			   (GENMASK_ULL(30, 19) << 8));
415 
416 		*mask |= (csmask & GENMASK_ULL(15, 5))  << 6;
417 		*mask |= (csmask & GENMASK_ULL(30, 19)) << 8;
418 
419 		return;
420 	} else {
421 		csbase		= pvt->csels[dct].csbases[csrow];
422 		csmask		= pvt->csels[dct].csmasks[csrow >> 1];
423 		addr_shift	= 8;
424 
425 		if (pvt->fam == 0x15)
426 			base_bits = mask_bits =
427 				GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
428 		else
429 			base_bits = mask_bits =
430 				GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
431 	}
432 
433 	*base  = (csbase & base_bits) << addr_shift;
434 
435 	*mask  = ~0ULL;
436 	/* poke holes for the csmask */
437 	*mask &= ~(mask_bits << addr_shift);
438 	/* OR them in */
439 	*mask |= (csmask & mask_bits) << addr_shift;
440 }
441 
442 #define for_each_chip_select(i, dct, pvt) \
443 	for (i = 0; i < pvt->csels[dct].b_cnt; i++)
444 
445 #define chip_select_base(i, dct, pvt) \
446 	pvt->csels[dct].csbases[i]
447 
448 #define for_each_chip_select_mask(i, dct, pvt) \
449 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
450 
451 /*
452  * @input_addr is an InputAddr associated with the node given by mci. Return the
453  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
454  */
input_addr_to_csrow(struct mem_ctl_info * mci,u64 input_addr)455 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
456 {
457 	struct amd64_pvt *pvt;
458 	int csrow;
459 	u64 base, mask;
460 
461 	pvt = mci->pvt_info;
462 
463 	for_each_chip_select(csrow, 0, pvt) {
464 		if (!csrow_enabled(csrow, 0, pvt))
465 			continue;
466 
467 		get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
468 
469 		mask = ~mask;
470 
471 		if ((input_addr & mask) == (base & mask)) {
472 			edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
473 				 (unsigned long)input_addr, csrow,
474 				 pvt->mc_node_id);
475 
476 			return csrow;
477 		}
478 	}
479 	edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
480 		 (unsigned long)input_addr, pvt->mc_node_id);
481 
482 	return -1;
483 }
484 
485 /*
486  * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
487  * for the node represented by mci. Info is passed back in *hole_base,
488  * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
489  * info is invalid. Info may be invalid for either of the following reasons:
490  *
491  * - The revision of the node is not E or greater.  In this case, the DRAM Hole
492  *   Address Register does not exist.
493  *
494  * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
495  *   indicating that its contents are not valid.
496  *
497  * The values passed back in *hole_base, *hole_offset, and *hole_size are
498  * complete 32-bit values despite the fact that the bitfields in the DHAR
499  * only represent bits 31-24 of the base and offset values.
500  */
amd64_get_dram_hole_info(struct mem_ctl_info * mci,u64 * hole_base,u64 * hole_offset,u64 * hole_size)501 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
502 			     u64 *hole_offset, u64 *hole_size)
503 {
504 	struct amd64_pvt *pvt = mci->pvt_info;
505 
506 	/* only revE and later have the DRAM Hole Address Register */
507 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) {
508 		edac_dbg(1, "  revision %d for node %d does not support DHAR\n",
509 			 pvt->ext_model, pvt->mc_node_id);
510 		return 1;
511 	}
512 
513 	/* valid for Fam10h and above */
514 	if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
515 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this system\n");
516 		return 1;
517 	}
518 
519 	if (!dhar_valid(pvt)) {
520 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this node %d\n",
521 			 pvt->mc_node_id);
522 		return 1;
523 	}
524 
525 	/* This node has Memory Hoisting */
526 
527 	/* +------------------+--------------------+--------------------+-----
528 	 * | memory           | DRAM hole          | relocated          |
529 	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
530 	 * |                  |                    | DRAM hole          |
531 	 * |                  |                    | [0x100000000,      |
532 	 * |                  |                    |  (0x100000000+     |
533 	 * |                  |                    |   (0xffffffff-x))] |
534 	 * +------------------+--------------------+--------------------+-----
535 	 *
536 	 * Above is a diagram of physical memory showing the DRAM hole and the
537 	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
538 	 * starts at address x (the base address) and extends through address
539 	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
540 	 * addresses in the hole so that they start at 0x100000000.
541 	 */
542 
543 	*hole_base = dhar_base(pvt);
544 	*hole_size = (1ULL << 32) - *hole_base;
545 
546 	*hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt)
547 					: k8_dhar_offset(pvt);
548 
549 	edac_dbg(1, "  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
550 		 pvt->mc_node_id, (unsigned long)*hole_base,
551 		 (unsigned long)*hole_offset, (unsigned long)*hole_size);
552 
553 	return 0;
554 }
555 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
556 
557 /*
558  * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
559  * assumed that sys_addr maps to the node given by mci.
560  *
561  * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
562  * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
563  * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
564  * then it is also involved in translating a SysAddr to a DramAddr. Sections
565  * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
566  * These parts of the documentation are unclear. I interpret them as follows:
567  *
568  * When node n receives a SysAddr, it processes the SysAddr as follows:
569  *
570  * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
571  *    Limit registers for node n. If the SysAddr is not within the range
572  *    specified by the base and limit values, then node n ignores the Sysaddr
573  *    (since it does not map to node n). Otherwise continue to step 2 below.
574  *
575  * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
576  *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
577  *    the range of relocated addresses (starting at 0x100000000) from the DRAM
578  *    hole. If not, skip to step 3 below. Else get the value of the
579  *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
580  *    offset defined by this value from the SysAddr.
581  *
582  * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
583  *    Base register for node n. To obtain the DramAddr, subtract the base
584  *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
585  */
sys_addr_to_dram_addr(struct mem_ctl_info * mci,u64 sys_addr)586 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
587 {
588 	struct amd64_pvt *pvt = mci->pvt_info;
589 	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
590 	int ret;
591 
592 	dram_base = get_dram_base(pvt, pvt->mc_node_id);
593 
594 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
595 				      &hole_size);
596 	if (!ret) {
597 		if ((sys_addr >= (1ULL << 32)) &&
598 		    (sys_addr < ((1ULL << 32) + hole_size))) {
599 			/* use DHAR to translate SysAddr to DramAddr */
600 			dram_addr = sys_addr - hole_offset;
601 
602 			edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
603 				 (unsigned long)sys_addr,
604 				 (unsigned long)dram_addr);
605 
606 			return dram_addr;
607 		}
608 	}
609 
610 	/*
611 	 * Translate the SysAddr to a DramAddr as shown near the start of
612 	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
613 	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
614 	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
615 	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
616 	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
617 	 * Programmer's Manual Volume 1 Application Programming.
618 	 */
619 	dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
620 
621 	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
622 		 (unsigned long)sys_addr, (unsigned long)dram_addr);
623 	return dram_addr;
624 }
625 
626 /*
627  * @intlv_en is the value of the IntlvEn field from a DRAM Base register
628  * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
629  * for node interleaving.
630  */
num_node_interleave_bits(unsigned intlv_en)631 static int num_node_interleave_bits(unsigned intlv_en)
632 {
633 	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
634 	int n;
635 
636 	BUG_ON(intlv_en > 7);
637 	n = intlv_shift_table[intlv_en];
638 	return n;
639 }
640 
641 /* Translate the DramAddr given by @dram_addr to an InputAddr. */
dram_addr_to_input_addr(struct mem_ctl_info * mci,u64 dram_addr)642 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
643 {
644 	struct amd64_pvt *pvt;
645 	int intlv_shift;
646 	u64 input_addr;
647 
648 	pvt = mci->pvt_info;
649 
650 	/*
651 	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
652 	 * concerning translating a DramAddr to an InputAddr.
653 	 */
654 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
655 	input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
656 		      (dram_addr & 0xfff);
657 
658 	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
659 		 intlv_shift, (unsigned long)dram_addr,
660 		 (unsigned long)input_addr);
661 
662 	return input_addr;
663 }
664 
665 /*
666  * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
667  * assumed that @sys_addr maps to the node given by mci.
668  */
sys_addr_to_input_addr(struct mem_ctl_info * mci,u64 sys_addr)669 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
670 {
671 	u64 input_addr;
672 
673 	input_addr =
674 	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
675 
676 	edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n",
677 		 (unsigned long)sys_addr, (unsigned long)input_addr);
678 
679 	return input_addr;
680 }
681 
682 /* Map the Error address to a PAGE and PAGE OFFSET. */
error_address_to_page_and_offset(u64 error_address,struct err_info * err)683 static inline void error_address_to_page_and_offset(u64 error_address,
684 						    struct err_info *err)
685 {
686 	err->page = (u32) (error_address >> PAGE_SHIFT);
687 	err->offset = ((u32) error_address) & ~PAGE_MASK;
688 }
689 
690 /*
691  * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
692  * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
693  * of a node that detected an ECC memory error.  mci represents the node that
694  * the error address maps to (possibly different from the node that detected
695  * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
696  * error.
697  */
sys_addr_to_csrow(struct mem_ctl_info * mci,u64 sys_addr)698 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
699 {
700 	int csrow;
701 
702 	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
703 
704 	if (csrow == -1)
705 		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
706 				  "address 0x%lx\n", (unsigned long)sys_addr);
707 	return csrow;
708 }
709 
710 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
711 
712 /*
713  * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
714  * are ECC capable.
715  */
determine_edac_cap(struct amd64_pvt * pvt)716 static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
717 {
718 	unsigned long edac_cap = EDAC_FLAG_NONE;
719 	u8 bit;
720 
721 	if (pvt->umc) {
722 		u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
723 
724 		for (i = 0; i < NUM_UMCS; i++) {
725 			if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
726 				continue;
727 
728 			umc_en_mask |= BIT(i);
729 
730 			/* UMC Configuration bit 12 (DimmEccEn) */
731 			if (pvt->umc[i].umc_cfg & BIT(12))
732 				dimm_ecc_en_mask |= BIT(i);
733 		}
734 
735 		if (umc_en_mask == dimm_ecc_en_mask)
736 			edac_cap = EDAC_FLAG_SECDED;
737 	} else {
738 		bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
739 			? 19
740 			: 17;
741 
742 		if (pvt->dclr0 & BIT(bit))
743 			edac_cap = EDAC_FLAG_SECDED;
744 	}
745 
746 	return edac_cap;
747 }
748 
749 static void debug_display_dimm_sizes(struct amd64_pvt *, u8);
750 
debug_dump_dramcfg_low(struct amd64_pvt * pvt,u32 dclr,int chan)751 static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
752 {
753 	edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
754 
755 	if (pvt->dram_type == MEM_LRDDR3) {
756 		u32 dcsm = pvt->csels[chan].csmasks[0];
757 		/*
758 		 * It's assumed all LRDIMMs in a DCT are going to be of
759 		 * same 'type' until proven otherwise. So, use a cs
760 		 * value of '0' here to get dcsm value.
761 		 */
762 		edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
763 	}
764 
765 	edac_dbg(1, "All DIMMs support ECC:%s\n",
766 		    (dclr & BIT(19)) ? "yes" : "no");
767 
768 
769 	edac_dbg(1, "  PAR/ERR parity: %s\n",
770 		 (dclr & BIT(8)) ?  "enabled" : "disabled");
771 
772 	if (pvt->fam == 0x10)
773 		edac_dbg(1, "  DCT 128bit mode width: %s\n",
774 			 (dclr & BIT(11)) ?  "128b" : "64b");
775 
776 	edac_dbg(1, "  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
777 		 (dclr & BIT(12)) ?  "yes" : "no",
778 		 (dclr & BIT(13)) ?  "yes" : "no",
779 		 (dclr & BIT(14)) ?  "yes" : "no",
780 		 (dclr & BIT(15)) ?  "yes" : "no");
781 }
782 
debug_display_dimm_sizes_df(struct amd64_pvt * pvt,u8 ctrl)783 static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
784 {
785 	int dimm, size0, size1, cs0, cs1;
786 
787 	edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
788 
789 	for (dimm = 0; dimm < 4; dimm++) {
790 		size0 = 0;
791 		cs0 = dimm * 2;
792 
793 		if (csrow_enabled(cs0, ctrl, pvt))
794 			size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0);
795 
796 		size1 = 0;
797 		cs1 = dimm * 2 + 1;
798 
799 		if (csrow_enabled(cs1, ctrl, pvt))
800 			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
801 
802 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
803 				cs0,	size0,
804 				cs1,	size1);
805 	}
806 }
807 
__dump_misc_regs_df(struct amd64_pvt * pvt)808 static void __dump_misc_regs_df(struct amd64_pvt *pvt)
809 {
810 	struct amd64_umc *umc;
811 	u32 i, tmp, umc_base;
812 
813 	for (i = 0; i < NUM_UMCS; i++) {
814 		umc_base = get_umc_base(i);
815 		umc = &pvt->umc[i];
816 
817 		edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg);
818 		edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
819 		edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
820 		edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
821 
822 		amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp);
823 		edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp);
824 
825 		amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_UMC_CAP, &tmp);
826 		edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i, tmp);
827 		edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi);
828 
829 		edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
830 				i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no",
831 				    (umc->umc_cap_hi & BIT(31)) ? "yes" : "no");
832 		edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
833 				i, (umc->umc_cfg & BIT(12)) ? "yes" : "no");
834 		edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
835 				i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no");
836 		edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
837 				i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
838 
839 		if (pvt->dram_type == MEM_LRDDR4) {
840 			amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
841 			edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
842 					i, 1 << ((tmp >> 4) & 0x3));
843 		}
844 
845 		debug_display_dimm_sizes_df(pvt, i);
846 	}
847 
848 	edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n",
849 		 pvt->dhar, dhar_base(pvt));
850 }
851 
852 /* Display and decode various NB registers for debug purposes. */
__dump_misc_regs(struct amd64_pvt * pvt)853 static void __dump_misc_regs(struct amd64_pvt *pvt)
854 {
855 	edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
856 
857 	edac_dbg(1, "  NB two channel DRAM capable: %s\n",
858 		 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
859 
860 	edac_dbg(1, "  ECC capable: %s, ChipKill ECC capable: %s\n",
861 		 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
862 		 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
863 
864 	debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
865 
866 	edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
867 
868 	edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
869 		 pvt->dhar, dhar_base(pvt),
870 		 (pvt->fam == 0xf) ? k8_dhar_offset(pvt)
871 				   : f10_dhar_offset(pvt));
872 
873 	debug_display_dimm_sizes(pvt, 0);
874 
875 	/* everything below this point is Fam10h and above */
876 	if (pvt->fam == 0xf)
877 		return;
878 
879 	debug_display_dimm_sizes(pvt, 1);
880 
881 	/* Only if NOT ganged does dclr1 have valid info */
882 	if (!dct_ganging_enabled(pvt))
883 		debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
884 }
885 
886 /* Display and decode various NB registers for debug purposes. */
dump_misc_regs(struct amd64_pvt * pvt)887 static void dump_misc_regs(struct amd64_pvt *pvt)
888 {
889 	if (pvt->umc)
890 		__dump_misc_regs_df(pvt);
891 	else
892 		__dump_misc_regs(pvt);
893 
894 	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
895 
896 	amd64_info("using %s syndromes.\n",
897 			((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
898 }
899 
900 /*
901  * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
902  */
prep_chip_selects(struct amd64_pvt * pvt)903 static void prep_chip_selects(struct amd64_pvt *pvt)
904 {
905 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
906 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
907 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
908 	} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
909 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
910 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
911 	} else {
912 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
913 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
914 	}
915 }
916 
917 /*
918  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
919  */
read_dct_base_mask(struct amd64_pvt * pvt)920 static void read_dct_base_mask(struct amd64_pvt *pvt)
921 {
922 	int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
923 
924 	prep_chip_selects(pvt);
925 
926 	if (pvt->umc) {
927 		base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
928 		base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
929 		mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
930 		mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
931 	} else {
932 		base_reg0 = DCSB0;
933 		base_reg1 = DCSB1;
934 		mask_reg0 = DCSM0;
935 		mask_reg1 = DCSM1;
936 	}
937 
938 	for_each_chip_select(cs, 0, pvt) {
939 		int reg0   = base_reg0 + (cs * 4);
940 		int reg1   = base_reg1 + (cs * 4);
941 		u32 *base0 = &pvt->csels[0].csbases[cs];
942 		u32 *base1 = &pvt->csels[1].csbases[cs];
943 
944 		if (pvt->umc) {
945 			if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
946 				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: 0x%x\n",
947 					 cs, *base0, reg0);
948 
949 			if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
950 				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: 0x%x\n",
951 					 cs, *base1, reg1);
952 		} else {
953 			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
954 				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
955 					 cs, *base0, reg0);
956 
957 			if (pvt->fam == 0xf)
958 				continue;
959 
960 			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
961 				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
962 					 cs, *base1, (pvt->fam == 0x10) ? reg1
963 								: reg0);
964 		}
965 	}
966 
967 	for_each_chip_select_mask(cs, 0, pvt) {
968 		int reg0   = mask_reg0 + (cs * 4);
969 		int reg1   = mask_reg1 + (cs * 4);
970 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
971 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
972 
973 		if (pvt->umc) {
974 			if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
975 				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: 0x%x\n",
976 					 cs, *mask0, reg0);
977 
978 			if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
979 				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: 0x%x\n",
980 					 cs, *mask1, reg1);
981 		} else {
982 			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
983 				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
984 					 cs, *mask0, reg0);
985 
986 			if (pvt->fam == 0xf)
987 				continue;
988 
989 			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
990 				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
991 					 cs, *mask1, (pvt->fam == 0x10) ? reg1
992 								: reg0);
993 		}
994 	}
995 }
996 
determine_memory_type(struct amd64_pvt * pvt)997 static void determine_memory_type(struct amd64_pvt *pvt)
998 {
999 	u32 dram_ctrl, dcsm;
1000 
1001 	switch (pvt->fam) {
1002 	case 0xf:
1003 		if (pvt->ext_model >= K8_REV_F)
1004 			goto ddr3;
1005 
1006 		pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
1007 		return;
1008 
1009 	case 0x10:
1010 		if (pvt->dchr0 & DDR3_MODE)
1011 			goto ddr3;
1012 
1013 		pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
1014 		return;
1015 
1016 	case 0x15:
1017 		if (pvt->model < 0x60)
1018 			goto ddr3;
1019 
1020 		/*
1021 		 * Model 0x60h needs special handling:
1022 		 *
1023 		 * We use a Chip Select value of '0' to obtain dcsm.
1024 		 * Theoretically, it is possible to populate LRDIMMs of different
1025 		 * 'Rank' value on a DCT. But this is not the common case. So,
1026 		 * it's reasonable to assume all DIMMs are going to be of same
1027 		 * 'type' until proven otherwise.
1028 		 */
1029 		amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl);
1030 		dcsm = pvt->csels[0].csmasks[0];
1031 
1032 		if (((dram_ctrl >> 8) & 0x7) == 0x2)
1033 			pvt->dram_type = MEM_DDR4;
1034 		else if (pvt->dclr0 & BIT(16))
1035 			pvt->dram_type = MEM_DDR3;
1036 		else if (dcsm & 0x3)
1037 			pvt->dram_type = MEM_LRDDR3;
1038 		else
1039 			pvt->dram_type = MEM_RDDR3;
1040 
1041 		return;
1042 
1043 	case 0x16:
1044 		goto ddr3;
1045 
1046 	case 0x17:
1047 		if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
1048 			pvt->dram_type = MEM_LRDDR4;
1049 		else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
1050 			pvt->dram_type = MEM_RDDR4;
1051 		else
1052 			pvt->dram_type = MEM_DDR4;
1053 		return;
1054 
1055 	default:
1056 		WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
1057 		pvt->dram_type = MEM_EMPTY;
1058 	}
1059 	return;
1060 
1061 ddr3:
1062 	pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
1063 }
1064 
1065 /* Get the number of DCT channels the memory controller is using. */
k8_early_channel_count(struct amd64_pvt * pvt)1066 static int k8_early_channel_count(struct amd64_pvt *pvt)
1067 {
1068 	int flag;
1069 
1070 	if (pvt->ext_model >= K8_REV_F)
1071 		/* RevF (NPT) and later */
1072 		flag = pvt->dclr0 & WIDTH_128;
1073 	else
1074 		/* RevE and earlier */
1075 		flag = pvt->dclr0 & REVE_WIDTH_128;
1076 
1077 	/* not used */
1078 	pvt->dclr1 = 0;
1079 
1080 	return (flag) ? 2 : 1;
1081 }
1082 
1083 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
get_error_address(struct amd64_pvt * pvt,struct mce * m)1084 static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
1085 {
1086 	u16 mce_nid = amd_get_nb_id(m->extcpu);
1087 	struct mem_ctl_info *mci;
1088 	u8 start_bit = 1;
1089 	u8 end_bit   = 47;
1090 	u64 addr;
1091 
1092 	mci = edac_mc_find(mce_nid);
1093 	if (!mci)
1094 		return 0;
1095 
1096 	pvt = mci->pvt_info;
1097 
1098 	if (pvt->fam == 0xf) {
1099 		start_bit = 3;
1100 		end_bit   = 39;
1101 	}
1102 
1103 	addr = m->addr & GENMASK_ULL(end_bit, start_bit);
1104 
1105 	/*
1106 	 * Erratum 637 workaround
1107 	 */
1108 	if (pvt->fam == 0x15) {
1109 		u64 cc6_base, tmp_addr;
1110 		u32 tmp;
1111 		u8 intlv_en;
1112 
1113 		if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
1114 			return addr;
1115 
1116 
1117 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
1118 		intlv_en = tmp >> 21 & 0x7;
1119 
1120 		/* add [47:27] + 3 trailing bits */
1121 		cc6_base  = (tmp & GENMASK_ULL(20, 0)) << 3;
1122 
1123 		/* reverse and add DramIntlvEn */
1124 		cc6_base |= intlv_en ^ 0x7;
1125 
1126 		/* pin at [47:24] */
1127 		cc6_base <<= 24;
1128 
1129 		if (!intlv_en)
1130 			return cc6_base | (addr & GENMASK_ULL(23, 0));
1131 
1132 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
1133 
1134 							/* faster log2 */
1135 		tmp_addr  = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1);
1136 
1137 		/* OR DramIntlvSel into bits [14:12] */
1138 		tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9;
1139 
1140 		/* add remaining [11:0] bits from original MC4_ADDR */
1141 		tmp_addr |= addr & GENMASK_ULL(11, 0);
1142 
1143 		return cc6_base | tmp_addr;
1144 	}
1145 
1146 	return addr;
1147 }
1148 
pci_get_related_function(unsigned int vendor,unsigned int device,struct pci_dev * related)1149 static struct pci_dev *pci_get_related_function(unsigned int vendor,
1150 						unsigned int device,
1151 						struct pci_dev *related)
1152 {
1153 	struct pci_dev *dev = NULL;
1154 
1155 	while ((dev = pci_get_device(vendor, device, dev))) {
1156 		if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
1157 		    (dev->bus->number == related->bus->number) &&
1158 		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1159 			break;
1160 	}
1161 
1162 	return dev;
1163 }
1164 
read_dram_base_limit_regs(struct amd64_pvt * pvt,unsigned range)1165 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
1166 {
1167 	struct amd_northbridge *nb;
1168 	struct pci_dev *f1 = NULL;
1169 	unsigned int pci_func;
1170 	int off = range << 3;
1171 	u32 llim;
1172 
1173 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
1174 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
1175 
1176 	if (pvt->fam == 0xf)
1177 		return;
1178 
1179 	if (!dram_rw(pvt, range))
1180 		return;
1181 
1182 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
1183 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
1184 
1185 	/* F15h: factor in CC6 save area by reading dst node's limit reg */
1186 	if (pvt->fam != 0x15)
1187 		return;
1188 
1189 	nb = node_to_amd_nb(dram_dst_node(pvt, range));
1190 	if (WARN_ON(!nb))
1191 		return;
1192 
1193 	if (pvt->model == 0x60)
1194 		pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
1195 	else if (pvt->model == 0x30)
1196 		pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
1197 	else
1198 		pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1;
1199 
1200 	f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc);
1201 	if (WARN_ON(!f1))
1202 		return;
1203 
1204 	amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1205 
1206 	pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0);
1207 
1208 				    /* {[39:27],111b} */
1209 	pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1210 
1211 	pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0);
1212 
1213 				    /* [47:40] */
1214 	pvt->ranges[range].lim.hi |= llim >> 13;
1215 
1216 	pci_dev_put(f1);
1217 }
1218 
k8_map_sysaddr_to_csrow(struct mem_ctl_info * mci,u64 sys_addr,struct err_info * err)1219 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1220 				    struct err_info *err)
1221 {
1222 	struct amd64_pvt *pvt = mci->pvt_info;
1223 
1224 	error_address_to_page_and_offset(sys_addr, err);
1225 
1226 	/*
1227 	 * Find out which node the error address belongs to. This may be
1228 	 * different from the node that detected the error.
1229 	 */
1230 	err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
1231 	if (!err->src_mci) {
1232 		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1233 			     (unsigned long)sys_addr);
1234 		err->err_code = ERR_NODE;
1235 		return;
1236 	}
1237 
1238 	/* Now map the sys_addr to a CSROW */
1239 	err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
1240 	if (err->csrow < 0) {
1241 		err->err_code = ERR_CSROW;
1242 		return;
1243 	}
1244 
1245 	/* CHIPKILL enabled */
1246 	if (pvt->nbcfg & NBCFG_CHIPKILL) {
1247 		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1248 		if (err->channel < 0) {
1249 			/*
1250 			 * Syndrome didn't map, so we don't know which of the
1251 			 * 2 DIMMs is in error. So we need to ID 'both' of them
1252 			 * as suspect.
1253 			 */
1254 			amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
1255 				      "possible error reporting race\n",
1256 				      err->syndrome);
1257 			err->err_code = ERR_CHANNEL;
1258 			return;
1259 		}
1260 	} else {
1261 		/*
1262 		 * non-chipkill ecc mode
1263 		 *
1264 		 * The k8 documentation is unclear about how to determine the
1265 		 * channel number when using non-chipkill memory.  This method
1266 		 * was obtained from email communication with someone at AMD.
1267 		 * (Wish the email was placed in this comment - norsk)
1268 		 */
1269 		err->channel = ((sys_addr & BIT(3)) != 0);
1270 	}
1271 }
1272 
ddr2_cs_size(unsigned i,bool dct_width)1273 static int ddr2_cs_size(unsigned i, bool dct_width)
1274 {
1275 	unsigned shift = 0;
1276 
1277 	if (i <= 2)
1278 		shift = i;
1279 	else if (!(i & 0x1))
1280 		shift = i >> 1;
1281 	else
1282 		shift = (i + 1) >> 1;
1283 
1284 	return 128 << (shift + !!dct_width);
1285 }
1286 
k8_dbam_to_chip_select(struct amd64_pvt * pvt,u8 dct,unsigned cs_mode,int cs_mask_nr)1287 static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1288 				  unsigned cs_mode, int cs_mask_nr)
1289 {
1290 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1291 
1292 	if (pvt->ext_model >= K8_REV_F) {
1293 		WARN_ON(cs_mode > 11);
1294 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1295 	}
1296 	else if (pvt->ext_model >= K8_REV_D) {
1297 		unsigned diff;
1298 		WARN_ON(cs_mode > 10);
1299 
1300 		/*
1301 		 * the below calculation, besides trying to win an obfuscated C
1302 		 * contest, maps cs_mode values to DIMM chip select sizes. The
1303 		 * mappings are:
1304 		 *
1305 		 * cs_mode	CS size (mb)
1306 		 * =======	============
1307 		 * 0		32
1308 		 * 1		64
1309 		 * 2		128
1310 		 * 3		128
1311 		 * 4		256
1312 		 * 5		512
1313 		 * 6		256
1314 		 * 7		512
1315 		 * 8		1024
1316 		 * 9		1024
1317 		 * 10		2048
1318 		 *
1319 		 * Basically, it calculates a value with which to shift the
1320 		 * smallest CS size of 32MB.
1321 		 *
1322 		 * ddr[23]_cs_size have a similar purpose.
1323 		 */
1324 		diff = cs_mode/3 + (unsigned)(cs_mode > 5);
1325 
1326 		return 32 << (cs_mode - diff);
1327 	}
1328 	else {
1329 		WARN_ON(cs_mode > 6);
1330 		return 32 << cs_mode;
1331 	}
1332 }
1333 
1334 /*
1335  * Get the number of DCT channels in use.
1336  *
1337  * Return:
1338  *	number of Memory Channels in operation
1339  * Pass back:
1340  *	contents of the DCL0_LOW register
1341  */
f1x_early_channel_count(struct amd64_pvt * pvt)1342 static int f1x_early_channel_count(struct amd64_pvt *pvt)
1343 {
1344 	int i, j, channels = 0;
1345 
1346 	/* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1347 	if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128))
1348 		return 2;
1349 
1350 	/*
1351 	 * Need to check if in unganged mode: In such, there are 2 channels,
1352 	 * but they are not in 128 bit mode and thus the above 'dclr0' status
1353 	 * bit will be OFF.
1354 	 *
1355 	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1356 	 * their CSEnable bit on. If so, then SINGLE DIMM case.
1357 	 */
1358 	edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1359 
1360 	/*
1361 	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1362 	 * is more than just one DIMM present in unganged mode. Need to check
1363 	 * both controllers since DIMMs can be placed in either one.
1364 	 */
1365 	for (i = 0; i < 2; i++) {
1366 		u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1367 
1368 		for (j = 0; j < 4; j++) {
1369 			if (DBAM_DIMM(j, dbam) > 0) {
1370 				channels++;
1371 				break;
1372 			}
1373 		}
1374 	}
1375 
1376 	if (channels > 2)
1377 		channels = 2;
1378 
1379 	amd64_info("MCT channel count: %d\n", channels);
1380 
1381 	return channels;
1382 }
1383 
f17_early_channel_count(struct amd64_pvt * pvt)1384 static int f17_early_channel_count(struct amd64_pvt *pvt)
1385 {
1386 	int i, channels = 0;
1387 
1388 	/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
1389 	for (i = 0; i < NUM_UMCS; i++)
1390 		channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
1391 
1392 	amd64_info("MCT channel count: %d\n", channels);
1393 
1394 	return channels;
1395 }
1396 
ddr3_cs_size(unsigned i,bool dct_width)1397 static int ddr3_cs_size(unsigned i, bool dct_width)
1398 {
1399 	unsigned shift = 0;
1400 	int cs_size = 0;
1401 
1402 	if (i == 0 || i == 3 || i == 4)
1403 		cs_size = -1;
1404 	else if (i <= 2)
1405 		shift = i;
1406 	else if (i == 12)
1407 		shift = 7;
1408 	else if (!(i & 0x1))
1409 		shift = i >> 1;
1410 	else
1411 		shift = (i + 1) >> 1;
1412 
1413 	if (cs_size != -1)
1414 		cs_size = (128 * (1 << !!dct_width)) << shift;
1415 
1416 	return cs_size;
1417 }
1418 
ddr3_lrdimm_cs_size(unsigned i,unsigned rank_multiply)1419 static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply)
1420 {
1421 	unsigned shift = 0;
1422 	int cs_size = 0;
1423 
1424 	if (i < 4 || i == 6)
1425 		cs_size = -1;
1426 	else if (i == 12)
1427 		shift = 7;
1428 	else if (!(i & 0x1))
1429 		shift = i >> 1;
1430 	else
1431 		shift = (i + 1) >> 1;
1432 
1433 	if (cs_size != -1)
1434 		cs_size = rank_multiply * (128 << shift);
1435 
1436 	return cs_size;
1437 }
1438 
ddr4_cs_size(unsigned i)1439 static int ddr4_cs_size(unsigned i)
1440 {
1441 	int cs_size = 0;
1442 
1443 	if (i == 0)
1444 		cs_size = -1;
1445 	else if (i == 1)
1446 		cs_size = 1024;
1447 	else
1448 		/* Min cs_size = 1G */
1449 		cs_size = 1024 * (1 << (i >> 1));
1450 
1451 	return cs_size;
1452 }
1453 
f10_dbam_to_chip_select(struct amd64_pvt * pvt,u8 dct,unsigned cs_mode,int cs_mask_nr)1454 static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1455 				   unsigned cs_mode, int cs_mask_nr)
1456 {
1457 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1458 
1459 	WARN_ON(cs_mode > 11);
1460 
1461 	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1462 		return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1463 	else
1464 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1465 }
1466 
1467 /*
1468  * F15h supports only 64bit DCT interfaces
1469  */
f15_dbam_to_chip_select(struct amd64_pvt * pvt,u8 dct,unsigned cs_mode,int cs_mask_nr)1470 static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1471 				   unsigned cs_mode, int cs_mask_nr)
1472 {
1473 	WARN_ON(cs_mode > 12);
1474 
1475 	return ddr3_cs_size(cs_mode, false);
1476 }
1477 
1478 /* F15h M60h supports DDR4 mapping as well.. */
f15_m60h_dbam_to_chip_select(struct amd64_pvt * pvt,u8 dct,unsigned cs_mode,int cs_mask_nr)1479 static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1480 					unsigned cs_mode, int cs_mask_nr)
1481 {
1482 	int cs_size;
1483 	u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr];
1484 
1485 	WARN_ON(cs_mode > 12);
1486 
1487 	if (pvt->dram_type == MEM_DDR4) {
1488 		if (cs_mode > 9)
1489 			return -1;
1490 
1491 		cs_size = ddr4_cs_size(cs_mode);
1492 	} else if (pvt->dram_type == MEM_LRDDR3) {
1493 		unsigned rank_multiply = dcsm & 0xf;
1494 
1495 		if (rank_multiply == 3)
1496 			rank_multiply = 4;
1497 		cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply);
1498 	} else {
1499 		/* Minimum cs size is 512mb for F15hM60h*/
1500 		if (cs_mode == 0x1)
1501 			return -1;
1502 
1503 		cs_size = ddr3_cs_size(cs_mode, false);
1504 	}
1505 
1506 	return cs_size;
1507 }
1508 
1509 /*
1510  * F16h and F15h model 30h have only limited cs_modes.
1511  */
f16_dbam_to_chip_select(struct amd64_pvt * pvt,u8 dct,unsigned cs_mode,int cs_mask_nr)1512 static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1513 				unsigned cs_mode, int cs_mask_nr)
1514 {
1515 	WARN_ON(cs_mode > 12);
1516 
1517 	if (cs_mode == 6 || cs_mode == 8 ||
1518 	    cs_mode == 9 || cs_mode == 12)
1519 		return -1;
1520 	else
1521 		return ddr3_cs_size(cs_mode, false);
1522 }
1523 
f17_base_addr_to_cs_size(struct amd64_pvt * pvt,u8 umc,unsigned int cs_mode,int csrow_nr)1524 static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc,
1525 				    unsigned int cs_mode, int csrow_nr)
1526 {
1527 	u32 base_addr = pvt->csels[umc].csbases[csrow_nr];
1528 
1529 	/*  Each mask is used for every two base addresses. */
1530 	u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1];
1531 
1532 	/*  Register [31:1] = Address [39:9]. Size is in kBs here. */
1533 	u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1;
1534 
1535 	edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask);
1536 
1537 	/* Return size in MBs. */
1538 	return size >> 10;
1539 }
1540 
read_dram_ctl_register(struct amd64_pvt * pvt)1541 static void read_dram_ctl_register(struct amd64_pvt *pvt)
1542 {
1543 
1544 	if (pvt->fam == 0xf)
1545 		return;
1546 
1547 	if (!amd64_read_pci_cfg(pvt->F2, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1548 		edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1549 			 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1550 
1551 		edac_dbg(0, "  DCTs operate in %s mode\n",
1552 			 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1553 
1554 		if (!dct_ganging_enabled(pvt))
1555 			edac_dbg(0, "  Address range split per DCT: %s\n",
1556 				 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1557 
1558 		edac_dbg(0, "  data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1559 			 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1560 			 (dct_memory_cleared(pvt) ? "yes" : "no"));
1561 
1562 		edac_dbg(0, "  channel interleave: %s, "
1563 			 "interleave bits selector: 0x%x\n",
1564 			 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1565 			 dct_sel_interleave_addr(pvt));
1566 	}
1567 
1568 	amd64_read_pci_cfg(pvt->F2, DCT_SEL_HI, &pvt->dct_sel_hi);
1569 }
1570 
1571 /*
1572  * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG,
1573  * 2.10.12 Memory Interleaving Modes).
1574  */
f15_m30h_determine_channel(struct amd64_pvt * pvt,u64 sys_addr,u8 intlv_en,int num_dcts_intlv,u32 dct_sel)1575 static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1576 				     u8 intlv_en, int num_dcts_intlv,
1577 				     u32 dct_sel)
1578 {
1579 	u8 channel = 0;
1580 	u8 select;
1581 
1582 	if (!(intlv_en))
1583 		return (u8)(dct_sel);
1584 
1585 	if (num_dcts_intlv == 2) {
1586 		select = (sys_addr >> 8) & 0x3;
1587 		channel = select ? 0x3 : 0;
1588 	} else if (num_dcts_intlv == 4) {
1589 		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1590 		switch (intlv_addr) {
1591 		case 0x4:
1592 			channel = (sys_addr >> 8) & 0x3;
1593 			break;
1594 		case 0x5:
1595 			channel = (sys_addr >> 9) & 0x3;
1596 			break;
1597 		}
1598 	}
1599 	return channel;
1600 }
1601 
1602 /*
1603  * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1604  * Interleaving Modes.
1605  */
f1x_determine_channel(struct amd64_pvt * pvt,u64 sys_addr,bool hi_range_sel,u8 intlv_en)1606 static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1607 				bool hi_range_sel, u8 intlv_en)
1608 {
1609 	u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1610 
1611 	if (dct_ganging_enabled(pvt))
1612 		return 0;
1613 
1614 	if (hi_range_sel)
1615 		return dct_sel_high;
1616 
1617 	/*
1618 	 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1619 	 */
1620 	if (dct_interleave_enabled(pvt)) {
1621 		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1622 
1623 		/* return DCT select function: 0=DCT0, 1=DCT1 */
1624 		if (!intlv_addr)
1625 			return sys_addr >> 6 & 1;
1626 
1627 		if (intlv_addr & 0x2) {
1628 			u8 shift = intlv_addr & 0x1 ? 9 : 6;
1629 			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) & 1;
1630 
1631 			return ((sys_addr >> shift) & 1) ^ temp;
1632 		}
1633 
1634 		if (intlv_addr & 0x4) {
1635 			u8 shift = intlv_addr & 0x1 ? 9 : 8;
1636 
1637 			return (sys_addr >> shift) & 1;
1638 		}
1639 
1640 		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
1641 	}
1642 
1643 	if (dct_high_range_enabled(pvt))
1644 		return ~dct_sel_high & 1;
1645 
1646 	return 0;
1647 }
1648 
1649 /* Convert the sys_addr to the normalized DCT address */
f1x_get_norm_dct_addr(struct amd64_pvt * pvt,u8 range,u64 sys_addr,bool hi_rng,u32 dct_sel_base_addr)1650 static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
1651 				 u64 sys_addr, bool hi_rng,
1652 				 u32 dct_sel_base_addr)
1653 {
1654 	u64 chan_off;
1655 	u64 dram_base		= get_dram_base(pvt, range);
1656 	u64 hole_off		= f10_dhar_offset(pvt);
1657 	u64 dct_sel_base_off	= (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1658 
1659 	if (hi_rng) {
1660 		/*
1661 		 * if
1662 		 * base address of high range is below 4Gb
1663 		 * (bits [47:27] at [31:11])
1664 		 * DRAM address space on this DCT is hoisted above 4Gb	&&
1665 		 * sys_addr > 4Gb
1666 		 *
1667 		 *	remove hole offset from sys_addr
1668 		 * else
1669 		 *	remove high range offset from sys_addr
1670 		 */
1671 		if ((!(dct_sel_base_addr >> 16) ||
1672 		     dct_sel_base_addr < dhar_base(pvt)) &&
1673 		    dhar_valid(pvt) &&
1674 		    (sys_addr >= BIT_64(32)))
1675 			chan_off = hole_off;
1676 		else
1677 			chan_off = dct_sel_base_off;
1678 	} else {
1679 		/*
1680 		 * if
1681 		 * we have a valid hole		&&
1682 		 * sys_addr > 4Gb
1683 		 *
1684 		 *	remove hole
1685 		 * else
1686 		 *	remove dram base to normalize to DCT address
1687 		 */
1688 		if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1689 			chan_off = hole_off;
1690 		else
1691 			chan_off = dram_base;
1692 	}
1693 
1694 	return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23));
1695 }
1696 
1697 /*
1698  * checks if the csrow passed in is marked as SPARED, if so returns the new
1699  * spare row
1700  */
f10_process_possible_spare(struct amd64_pvt * pvt,u8 dct,int csrow)1701 static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1702 {
1703 	int tmp_cs;
1704 
1705 	if (online_spare_swap_done(pvt, dct) &&
1706 	    csrow == online_spare_bad_dramcs(pvt, dct)) {
1707 
1708 		for_each_chip_select(tmp_cs, dct, pvt) {
1709 			if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
1710 				csrow = tmp_cs;
1711 				break;
1712 			}
1713 		}
1714 	}
1715 	return csrow;
1716 }
1717 
1718 /*
1719  * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1720  * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1721  *
1722  * Return:
1723  *	-EINVAL:  NOT FOUND
1724  *	0..csrow = Chip-Select Row
1725  */
f1x_lookup_addr_in_dct(u64 in_addr,u8 nid,u8 dct)1726 static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
1727 {
1728 	struct mem_ctl_info *mci;
1729 	struct amd64_pvt *pvt;
1730 	u64 cs_base, cs_mask;
1731 	int cs_found = -EINVAL;
1732 	int csrow;
1733 
1734 	mci = edac_mc_find(nid);
1735 	if (!mci)
1736 		return cs_found;
1737 
1738 	pvt = mci->pvt_info;
1739 
1740 	edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1741 
1742 	for_each_chip_select(csrow, dct, pvt) {
1743 		if (!csrow_enabled(csrow, dct, pvt))
1744 			continue;
1745 
1746 		get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1747 
1748 		edac_dbg(1, "    CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1749 			 csrow, cs_base, cs_mask);
1750 
1751 		cs_mask = ~cs_mask;
1752 
1753 		edac_dbg(1, "    (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1754 			 (in_addr & cs_mask), (cs_base & cs_mask));
1755 
1756 		if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
1757 			if (pvt->fam == 0x15 && pvt->model >= 0x30) {
1758 				cs_found =  csrow;
1759 				break;
1760 			}
1761 			cs_found = f10_process_possible_spare(pvt, dct, csrow);
1762 
1763 			edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1764 			break;
1765 		}
1766 	}
1767 	return cs_found;
1768 }
1769 
1770 /*
1771  * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1772  * swapped with a region located at the bottom of memory so that the GPU can use
1773  * the interleaved region and thus two channels.
1774  */
f1x_swap_interleaved_region(struct amd64_pvt * pvt,u64 sys_addr)1775 static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1776 {
1777 	u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
1778 
1779 	if (pvt->fam == 0x10) {
1780 		/* only revC3 and revE have that feature */
1781 		if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3))
1782 			return sys_addr;
1783 	}
1784 
1785 	amd64_read_pci_cfg(pvt->F2, SWAP_INTLV_REG, &swap_reg);
1786 
1787 	if (!(swap_reg & 0x1))
1788 		return sys_addr;
1789 
1790 	swap_base	= (swap_reg >> 3) & 0x7f;
1791 	swap_limit	= (swap_reg >> 11) & 0x7f;
1792 	rgn_size	= (swap_reg >> 20) & 0x7f;
1793 	tmp_addr	= sys_addr >> 27;
1794 
1795 	if (!(sys_addr >> 34) &&
1796 	    (((tmp_addr >= swap_base) &&
1797 	     (tmp_addr <= swap_limit)) ||
1798 	     (tmp_addr < rgn_size)))
1799 		return sys_addr ^ (u64)swap_base << 27;
1800 
1801 	return sys_addr;
1802 }
1803 
1804 /* For a given @dram_range, check if @sys_addr falls within it. */
f1x_match_to_this_node(struct amd64_pvt * pvt,unsigned range,u64 sys_addr,int * chan_sel)1805 static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1806 				  u64 sys_addr, int *chan_sel)
1807 {
1808 	int cs_found = -EINVAL;
1809 	u64 chan_addr;
1810 	u32 dct_sel_base;
1811 	u8 channel;
1812 	bool high_range = false;
1813 
1814 	u8 node_id    = dram_dst_node(pvt, range);
1815 	u8 intlv_en   = dram_intlv_en(pvt, range);
1816 	u32 intlv_sel = dram_intlv_sel(pvt, range);
1817 
1818 	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1819 		 range, sys_addr, get_dram_limit(pvt, range));
1820 
1821 	if (dhar_valid(pvt) &&
1822 	    dhar_base(pvt) <= sys_addr &&
1823 	    sys_addr < BIT_64(32)) {
1824 		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1825 			    sys_addr);
1826 		return -EINVAL;
1827 	}
1828 
1829 	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1830 		return -EINVAL;
1831 
1832 	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1833 
1834 	dct_sel_base = dct_sel_baseaddr(pvt);
1835 
1836 	/*
1837 	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1838 	 * select between DCT0 and DCT1.
1839 	 */
1840 	if (dct_high_range_enabled(pvt) &&
1841 	   !dct_ganging_enabled(pvt) &&
1842 	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1843 		high_range = true;
1844 
1845 	channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1846 
1847 	chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1848 					  high_range, dct_sel_base);
1849 
1850 	/* Remove node interleaving, see F1x120 */
1851 	if (intlv_en)
1852 		chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
1853 			    (chan_addr & 0xfff);
1854 
1855 	/* remove channel interleave */
1856 	if (dct_interleave_enabled(pvt) &&
1857 	   !dct_high_range_enabled(pvt) &&
1858 	   !dct_ganging_enabled(pvt)) {
1859 
1860 		if (dct_sel_interleave_addr(pvt) != 1) {
1861 			if (dct_sel_interleave_addr(pvt) == 0x3)
1862 				/* hash 9 */
1863 				chan_addr = ((chan_addr >> 10) << 9) |
1864 					     (chan_addr & 0x1ff);
1865 			else
1866 				/* A[6] or hash 6 */
1867 				chan_addr = ((chan_addr >> 7) << 6) |
1868 					     (chan_addr & 0x3f);
1869 		} else
1870 			/* A[12] */
1871 			chan_addr = ((chan_addr >> 13) << 12) |
1872 				     (chan_addr & 0xfff);
1873 	}
1874 
1875 	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1876 
1877 	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1878 
1879 	if (cs_found >= 0)
1880 		*chan_sel = channel;
1881 
1882 	return cs_found;
1883 }
1884 
f15_m30h_match_to_this_node(struct amd64_pvt * pvt,unsigned range,u64 sys_addr,int * chan_sel)1885 static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1886 					u64 sys_addr, int *chan_sel)
1887 {
1888 	int cs_found = -EINVAL;
1889 	int num_dcts_intlv = 0;
1890 	u64 chan_addr, chan_offset;
1891 	u64 dct_base, dct_limit;
1892 	u32 dct_cont_base_reg, dct_cont_limit_reg, tmp;
1893 	u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en;
1894 
1895 	u64 dhar_offset		= f10_dhar_offset(pvt);
1896 	u8 intlv_addr		= dct_sel_interleave_addr(pvt);
1897 	u8 node_id		= dram_dst_node(pvt, range);
1898 	u8 intlv_en		= dram_intlv_en(pvt, range);
1899 
1900 	amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg);
1901 	amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg);
1902 
1903 	dct_offset_en		= (u8) ((dct_cont_base_reg >> 3) & BIT(0));
1904 	dct_sel			= (u8) ((dct_cont_base_reg >> 4) & 0x7);
1905 
1906 	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1907 		 range, sys_addr, get_dram_limit(pvt, range));
1908 
1909 	if (!(get_dram_base(pvt, range)  <= sys_addr) &&
1910 	    !(get_dram_limit(pvt, range) >= sys_addr))
1911 		return -EINVAL;
1912 
1913 	if (dhar_valid(pvt) &&
1914 	    dhar_base(pvt) <= sys_addr &&
1915 	    sys_addr < BIT_64(32)) {
1916 		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1917 			    sys_addr);
1918 		return -EINVAL;
1919 	}
1920 
1921 	/* Verify sys_addr is within DCT Range. */
1922 	dct_base = (u64) dct_sel_baseaddr(pvt);
1923 	dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF;
1924 
1925 	if (!(dct_cont_base_reg & BIT(0)) &&
1926 	    !(dct_base <= (sys_addr >> 27) &&
1927 	      dct_limit >= (sys_addr >> 27)))
1928 		return -EINVAL;
1929 
1930 	/* Verify number of dct's that participate in channel interleaving. */
1931 	num_dcts_intlv = (int) hweight8(intlv_en);
1932 
1933 	if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4))
1934 		return -EINVAL;
1935 
1936 	if (pvt->model >= 0x60)
1937 		channel = f1x_determine_channel(pvt, sys_addr, false, intlv_en);
1938 	else
1939 		channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
1940 						     num_dcts_intlv, dct_sel);
1941 
1942 	/* Verify we stay within the MAX number of channels allowed */
1943 	if (channel > 3)
1944 		return -EINVAL;
1945 
1946 	leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0));
1947 
1948 	/* Get normalized DCT addr */
1949 	if (leg_mmio_hole && (sys_addr >= BIT_64(32)))
1950 		chan_offset = dhar_offset;
1951 	else
1952 		chan_offset = dct_base << 27;
1953 
1954 	chan_addr = sys_addr - chan_offset;
1955 
1956 	/* remove channel interleave */
1957 	if (num_dcts_intlv == 2) {
1958 		if (intlv_addr == 0x4)
1959 			chan_addr = ((chan_addr >> 9) << 8) |
1960 						(chan_addr & 0xff);
1961 		else if (intlv_addr == 0x5)
1962 			chan_addr = ((chan_addr >> 10) << 9) |
1963 						(chan_addr & 0x1ff);
1964 		else
1965 			return -EINVAL;
1966 
1967 	} else if (num_dcts_intlv == 4) {
1968 		if (intlv_addr == 0x4)
1969 			chan_addr = ((chan_addr >> 10) << 8) |
1970 							(chan_addr & 0xff);
1971 		else if (intlv_addr == 0x5)
1972 			chan_addr = ((chan_addr >> 11) << 9) |
1973 							(chan_addr & 0x1ff);
1974 		else
1975 			return -EINVAL;
1976 	}
1977 
1978 	if (dct_offset_en) {
1979 		amd64_read_pci_cfg(pvt->F1,
1980 				   DRAM_CONT_HIGH_OFF + (int) channel * 4,
1981 				   &tmp);
1982 		chan_addr +=  (u64) ((tmp >> 11) & 0xfff) << 27;
1983 	}
1984 
1985 	f15h_select_dct(pvt, channel);
1986 
1987 	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1988 
1989 	/*
1990 	 * Find Chip select:
1991 	 * if channel = 3, then alias it to 1. This is because, in F15 M30h,
1992 	 * there is support for 4 DCT's, but only 2 are currently functional.
1993 	 * They are DCT0 and DCT3. But we have read all registers of DCT3 into
1994 	 * pvt->csels[1]. So we need to use '1' here to get correct info.
1995 	 * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
1996 	 */
1997 	alias_channel =  (channel == 3) ? 1 : channel;
1998 
1999 	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel);
2000 
2001 	if (cs_found >= 0)
2002 		*chan_sel = alias_channel;
2003 
2004 	return cs_found;
2005 }
2006 
f1x_translate_sysaddr_to_cs(struct amd64_pvt * pvt,u64 sys_addr,int * chan_sel)2007 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt,
2008 					u64 sys_addr,
2009 					int *chan_sel)
2010 {
2011 	int cs_found = -EINVAL;
2012 	unsigned range;
2013 
2014 	for (range = 0; range < DRAM_RANGES; range++) {
2015 		if (!dram_rw(pvt, range))
2016 			continue;
2017 
2018 		if (pvt->fam == 0x15 && pvt->model >= 0x30)
2019 			cs_found = f15_m30h_match_to_this_node(pvt, range,
2020 							       sys_addr,
2021 							       chan_sel);
2022 
2023 		else if ((get_dram_base(pvt, range)  <= sys_addr) &&
2024 			 (get_dram_limit(pvt, range) >= sys_addr)) {
2025 			cs_found = f1x_match_to_this_node(pvt, range,
2026 							  sys_addr, chan_sel);
2027 			if (cs_found >= 0)
2028 				break;
2029 		}
2030 	}
2031 	return cs_found;
2032 }
2033 
2034 /*
2035  * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
2036  * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
2037  *
2038  * The @sys_addr is usually an error address received from the hardware
2039  * (MCX_ADDR).
2040  */
f1x_map_sysaddr_to_csrow(struct mem_ctl_info * mci,u64 sys_addr,struct err_info * err)2041 static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
2042 				     struct err_info *err)
2043 {
2044 	struct amd64_pvt *pvt = mci->pvt_info;
2045 
2046 	error_address_to_page_and_offset(sys_addr, err);
2047 
2048 	err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
2049 	if (err->csrow < 0) {
2050 		err->err_code = ERR_CSROW;
2051 		return;
2052 	}
2053 
2054 	/*
2055 	 * We need the syndromes for channel detection only when we're
2056 	 * ganged. Otherwise @chan should already contain the channel at
2057 	 * this point.
2058 	 */
2059 	if (dct_ganging_enabled(pvt))
2060 		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
2061 }
2062 
2063 /*
2064  * debug routine to display the memory sizes of all logical DIMMs and its
2065  * CSROWs
2066  */
debug_display_dimm_sizes(struct amd64_pvt * pvt,u8 ctrl)2067 static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
2068 {
2069 	int dimm, size0, size1;
2070 	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
2071 	u32 dbam  = ctrl ? pvt->dbam1 : pvt->dbam0;
2072 
2073 	if (pvt->fam == 0xf) {
2074 		/* K8 families < revF not supported yet */
2075 	       if (pvt->ext_model < K8_REV_F)
2076 			return;
2077 	       else
2078 		       WARN_ON(ctrl != 0);
2079 	}
2080 
2081 	if (pvt->fam == 0x10) {
2082 		dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1
2083 							   : pvt->dbam0;
2084 		dcsb = (ctrl && !dct_ganging_enabled(pvt)) ?
2085 				 pvt->csels[1].csbases :
2086 				 pvt->csels[0].csbases;
2087 	} else if (ctrl) {
2088 		dbam = pvt->dbam0;
2089 		dcsb = pvt->csels[1].csbases;
2090 	}
2091 	edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
2092 		 ctrl, dbam);
2093 
2094 	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
2095 
2096 	/* Dump memory sizes for DIMM and its CSROWs */
2097 	for (dimm = 0; dimm < 4; dimm++) {
2098 
2099 		size0 = 0;
2100 		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
2101 			/*
2102 			 * For F15m60h, we need multiplier for LRDIMM cs_size
2103 			 * calculation. We pass dimm value to the dbam_to_cs
2104 			 * mapper so we can find the multiplier from the
2105 			 * corresponding DCSM.
2106 			 */
2107 			size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
2108 						     DBAM_DIMM(dimm, dbam),
2109 						     dimm);
2110 
2111 		size1 = 0;
2112 		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
2113 			size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
2114 						     DBAM_DIMM(dimm, dbam),
2115 						     dimm);
2116 
2117 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
2118 				dimm * 2,     size0,
2119 				dimm * 2 + 1, size1);
2120 	}
2121 }
2122 
2123 static struct amd64_family_type family_types[] = {
2124 	[K8_CPUS] = {
2125 		.ctl_name = "K8",
2126 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
2127 		.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
2128 		.ops = {
2129 			.early_channel_count	= k8_early_channel_count,
2130 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
2131 			.dbam_to_cs		= k8_dbam_to_chip_select,
2132 		}
2133 	},
2134 	[F10_CPUS] = {
2135 		.ctl_name = "F10h",
2136 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
2137 		.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
2138 		.ops = {
2139 			.early_channel_count	= f1x_early_channel_count,
2140 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
2141 			.dbam_to_cs		= f10_dbam_to_chip_select,
2142 		}
2143 	},
2144 	[F15_CPUS] = {
2145 		.ctl_name = "F15h",
2146 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
2147 		.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
2148 		.ops = {
2149 			.early_channel_count	= f1x_early_channel_count,
2150 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
2151 			.dbam_to_cs		= f15_dbam_to_chip_select,
2152 		}
2153 	},
2154 	[F15_M30H_CPUS] = {
2155 		.ctl_name = "F15h_M30h",
2156 		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
2157 		.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
2158 		.ops = {
2159 			.early_channel_count	= f1x_early_channel_count,
2160 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
2161 			.dbam_to_cs		= f16_dbam_to_chip_select,
2162 		}
2163 	},
2164 	[F15_M60H_CPUS] = {
2165 		.ctl_name = "F15h_M60h",
2166 		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
2167 		.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
2168 		.ops = {
2169 			.early_channel_count	= f1x_early_channel_count,
2170 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
2171 			.dbam_to_cs		= f15_m60h_dbam_to_chip_select,
2172 		}
2173 	},
2174 	[F16_CPUS] = {
2175 		.ctl_name = "F16h",
2176 		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
2177 		.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
2178 		.ops = {
2179 			.early_channel_count	= f1x_early_channel_count,
2180 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
2181 			.dbam_to_cs		= f16_dbam_to_chip_select,
2182 		}
2183 	},
2184 	[F16_M30H_CPUS] = {
2185 		.ctl_name = "F16h_M30h",
2186 		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
2187 		.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
2188 		.ops = {
2189 			.early_channel_count	= f1x_early_channel_count,
2190 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
2191 			.dbam_to_cs		= f16_dbam_to_chip_select,
2192 		}
2193 	},
2194 	[F17_CPUS] = {
2195 		.ctl_name = "F17h",
2196 		.f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
2197 		.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
2198 		.ops = {
2199 			.early_channel_count	= f17_early_channel_count,
2200 			.dbam_to_cs		= f17_base_addr_to_cs_size,
2201 		}
2202 	},
2203 	[F17_M10H_CPUS] = {
2204 		.ctl_name = "F17h_M10h",
2205 		.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
2206 		.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
2207 		.ops = {
2208 			.early_channel_count	= f17_early_channel_count,
2209 			.dbam_to_cs		= f17_base_addr_to_cs_size,
2210 		}
2211 	},
2212 };
2213 
2214 /*
2215  * These are tables of eigenvectors (one per line) which can be used for the
2216  * construction of the syndrome tables. The modified syndrome search algorithm
2217  * uses those to find the symbol in error and thus the DIMM.
2218  *
2219  * Algorithm courtesy of Ross LaFetra from AMD.
2220  */
2221 static const u16 x4_vectors[] = {
2222 	0x2f57, 0x1afe, 0x66cc, 0xdd88,
2223 	0x11eb, 0x3396, 0x7f4c, 0xeac8,
2224 	0x0001, 0x0002, 0x0004, 0x0008,
2225 	0x1013, 0x3032, 0x4044, 0x8088,
2226 	0x106b, 0x30d6, 0x70fc, 0xe0a8,
2227 	0x4857, 0xc4fe, 0x13cc, 0x3288,
2228 	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
2229 	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
2230 	0x15c1, 0x2a42, 0x89ac, 0x4758,
2231 	0x2b03, 0x1602, 0x4f0c, 0xca08,
2232 	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
2233 	0x8ba7, 0x465e, 0x244c, 0x1cc8,
2234 	0x2b87, 0x164e, 0x642c, 0xdc18,
2235 	0x40b9, 0x80de, 0x1094, 0x20e8,
2236 	0x27db, 0x1eb6, 0x9dac, 0x7b58,
2237 	0x11c1, 0x2242, 0x84ac, 0x4c58,
2238 	0x1be5, 0x2d7a, 0x5e34, 0xa718,
2239 	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
2240 	0x4c97, 0xc87e, 0x11fc, 0x33a8,
2241 	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
2242 	0x16b3, 0x3d62, 0x4f34, 0x8518,
2243 	0x1e2f, 0x391a, 0x5cac, 0xf858,
2244 	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
2245 	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
2246 	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
2247 	0x4397, 0xc27e, 0x17fc, 0x3ea8,
2248 	0x1617, 0x3d3e, 0x6464, 0xb8b8,
2249 	0x23ff, 0x12aa, 0xab6c, 0x56d8,
2250 	0x2dfb, 0x1ba6, 0x913c, 0x7328,
2251 	0x185d, 0x2ca6, 0x7914, 0x9e28,
2252 	0x171b, 0x3e36, 0x7d7c, 0xebe8,
2253 	0x4199, 0x82ee, 0x19f4, 0x2e58,
2254 	0x4807, 0xc40e, 0x130c, 0x3208,
2255 	0x1905, 0x2e0a, 0x5804, 0xac08,
2256 	0x213f, 0x132a, 0xadfc, 0x5ba8,
2257 	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
2258 };
2259 
2260 static const u16 x8_vectors[] = {
2261 	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
2262 	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
2263 	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
2264 	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
2265 	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
2266 	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
2267 	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
2268 	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
2269 	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
2270 	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
2271 	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
2272 	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
2273 	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
2274 	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
2275 	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
2276 	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
2277 	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
2278 	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
2279 	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
2280 };
2281 
decode_syndrome(u16 syndrome,const u16 * vectors,unsigned num_vecs,unsigned v_dim)2282 static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
2283 			   unsigned v_dim)
2284 {
2285 	unsigned int i, err_sym;
2286 
2287 	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
2288 		u16 s = syndrome;
2289 		unsigned v_idx =  err_sym * v_dim;
2290 		unsigned v_end = (err_sym + 1) * v_dim;
2291 
2292 		/* walk over all 16 bits of the syndrome */
2293 		for (i = 1; i < (1U << 16); i <<= 1) {
2294 
2295 			/* if bit is set in that eigenvector... */
2296 			if (v_idx < v_end && vectors[v_idx] & i) {
2297 				u16 ev_comp = vectors[v_idx++];
2298 
2299 				/* ... and bit set in the modified syndrome, */
2300 				if (s & i) {
2301 					/* remove it. */
2302 					s ^= ev_comp;
2303 
2304 					if (!s)
2305 						return err_sym;
2306 				}
2307 
2308 			} else if (s & i)
2309 				/* can't get to zero, move to next symbol */
2310 				break;
2311 		}
2312 	}
2313 
2314 	edac_dbg(0, "syndrome(%x) not found\n", syndrome);
2315 	return -1;
2316 }
2317 
map_err_sym_to_channel(int err_sym,int sym_size)2318 static int map_err_sym_to_channel(int err_sym, int sym_size)
2319 {
2320 	if (sym_size == 4)
2321 		switch (err_sym) {
2322 		case 0x20:
2323 		case 0x21:
2324 			return 0;
2325 			break;
2326 		case 0x22:
2327 		case 0x23:
2328 			return 1;
2329 			break;
2330 		default:
2331 			return err_sym >> 4;
2332 			break;
2333 		}
2334 	/* x8 symbols */
2335 	else
2336 		switch (err_sym) {
2337 		/* imaginary bits not in a DIMM */
2338 		case 0x10:
2339 			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
2340 					  err_sym);
2341 			return -1;
2342 			break;
2343 
2344 		case 0x11:
2345 			return 0;
2346 			break;
2347 		case 0x12:
2348 			return 1;
2349 			break;
2350 		default:
2351 			return err_sym >> 3;
2352 			break;
2353 		}
2354 	return -1;
2355 }
2356 
get_channel_from_ecc_syndrome(struct mem_ctl_info * mci,u16 syndrome)2357 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
2358 {
2359 	struct amd64_pvt *pvt = mci->pvt_info;
2360 	int err_sym = -1;
2361 
2362 	if (pvt->ecc_sym_sz == 8)
2363 		err_sym = decode_syndrome(syndrome, x8_vectors,
2364 					  ARRAY_SIZE(x8_vectors),
2365 					  pvt->ecc_sym_sz);
2366 	else if (pvt->ecc_sym_sz == 4)
2367 		err_sym = decode_syndrome(syndrome, x4_vectors,
2368 					  ARRAY_SIZE(x4_vectors),
2369 					  pvt->ecc_sym_sz);
2370 	else {
2371 		amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
2372 		return err_sym;
2373 	}
2374 
2375 	return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
2376 }
2377 
__log_ecc_error(struct mem_ctl_info * mci,struct err_info * err,u8 ecc_type)2378 static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
2379 			    u8 ecc_type)
2380 {
2381 	enum hw_event_mc_err_type err_type;
2382 	const char *string;
2383 
2384 	if (ecc_type == 2)
2385 		err_type = HW_EVENT_ERR_CORRECTED;
2386 	else if (ecc_type == 1)
2387 		err_type = HW_EVENT_ERR_UNCORRECTED;
2388 	else if (ecc_type == 3)
2389 		err_type = HW_EVENT_ERR_DEFERRED;
2390 	else {
2391 		WARN(1, "Something is rotten in the state of Denmark.\n");
2392 		return;
2393 	}
2394 
2395 	switch (err->err_code) {
2396 	case DECODE_OK:
2397 		string = "";
2398 		break;
2399 	case ERR_NODE:
2400 		string = "Failed to map error addr to a node";
2401 		break;
2402 	case ERR_CSROW:
2403 		string = "Failed to map error addr to a csrow";
2404 		break;
2405 	case ERR_CHANNEL:
2406 		string = "Unknown syndrome - possible error reporting race";
2407 		break;
2408 	case ERR_SYND:
2409 		string = "MCA_SYND not valid - unknown syndrome and csrow";
2410 		break;
2411 	case ERR_NORM_ADDR:
2412 		string = "Cannot decode normalized address";
2413 		break;
2414 	default:
2415 		string = "WTF error";
2416 		break;
2417 	}
2418 
2419 	edac_mc_handle_error(err_type, mci, 1,
2420 			     err->page, err->offset, err->syndrome,
2421 			     err->csrow, err->channel, -1,
2422 			     string, "");
2423 }
2424 
decode_bus_error(int node_id,struct mce * m)2425 static inline void decode_bus_error(int node_id, struct mce *m)
2426 {
2427 	struct mem_ctl_info *mci;
2428 	struct amd64_pvt *pvt;
2429 	u8 ecc_type = (m->status >> 45) & 0x3;
2430 	u8 xec = XEC(m->status, 0x1f);
2431 	u16 ec = EC(m->status);
2432 	u64 sys_addr;
2433 	struct err_info err;
2434 
2435 	mci = edac_mc_find(node_id);
2436 	if (!mci)
2437 		return;
2438 
2439 	pvt = mci->pvt_info;
2440 
2441 	/* Bail out early if this was an 'observed' error */
2442 	if (PP(ec) == NBSL_PP_OBS)
2443 		return;
2444 
2445 	/* Do only ECC errors */
2446 	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
2447 		return;
2448 
2449 	memset(&err, 0, sizeof(err));
2450 
2451 	sys_addr = get_error_address(pvt, m);
2452 
2453 	if (ecc_type == 2)
2454 		err.syndrome = extract_syndrome(m->status);
2455 
2456 	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
2457 
2458 	__log_ecc_error(mci, &err, ecc_type);
2459 }
2460 
2461 /*
2462  * To find the UMC channel represented by this bank we need to match on its
2463  * instance_id. The instance_id of a bank is held in the lower 32 bits of its
2464  * IPID.
2465  */
find_umc_channel(struct amd64_pvt * pvt,struct mce * m)2466 static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
2467 {
2468 	u32 umc_instance_id[] = {0x50f00, 0x150f00};
2469 	u32 instance_id = m->ipid & GENMASK(31, 0);
2470 	int i, channel = -1;
2471 
2472 	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
2473 		if (umc_instance_id[i] == instance_id)
2474 			channel = i;
2475 
2476 	return channel;
2477 }
2478 
decode_umc_error(int node_id,struct mce * m)2479 static void decode_umc_error(int node_id, struct mce *m)
2480 {
2481 	u8 ecc_type = (m->status >> 45) & 0x3;
2482 	struct mem_ctl_info *mci;
2483 	struct amd64_pvt *pvt;
2484 	struct err_info err;
2485 	u64 sys_addr;
2486 
2487 	mci = edac_mc_find(node_id);
2488 	if (!mci)
2489 		return;
2490 
2491 	pvt = mci->pvt_info;
2492 
2493 	memset(&err, 0, sizeof(err));
2494 
2495 	if (m->status & MCI_STATUS_DEFERRED)
2496 		ecc_type = 3;
2497 
2498 	err.channel = find_umc_channel(pvt, m);
2499 	if (err.channel < 0) {
2500 		err.err_code = ERR_CHANNEL;
2501 		goto log_error;
2502 	}
2503 
2504 	if (!(m->status & MCI_STATUS_SYNDV)) {
2505 		err.err_code = ERR_SYND;
2506 		goto log_error;
2507 	}
2508 
2509 	if (ecc_type == 2) {
2510 		u8 length = (m->synd >> 18) & 0x3f;
2511 
2512 		if (length)
2513 			err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
2514 		else
2515 			err.err_code = ERR_CHANNEL;
2516 	}
2517 
2518 	err.csrow = m->synd & 0x7;
2519 
2520 	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
2521 		err.err_code = ERR_NORM_ADDR;
2522 		goto log_error;
2523 	}
2524 
2525 	error_address_to_page_and_offset(sys_addr, &err);
2526 
2527 log_error:
2528 	__log_ecc_error(mci, &err, ecc_type);
2529 }
2530 
2531 /*
2532  * Use pvt->F3 which contains the F3 CPU PCI device to get the related
2533  * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
2534  * Reserve F0 and F6 on systems with a UMC.
2535  */
2536 static int
reserve_mc_sibling_devs(struct amd64_pvt * pvt,u16 pci_id1,u16 pci_id2)2537 reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2)
2538 {
2539 	if (pvt->umc) {
2540 		pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
2541 		if (!pvt->F0) {
2542 			amd64_err("F0 not found, device 0x%x (broken BIOS?)\n", pci_id1);
2543 			return -ENODEV;
2544 		}
2545 
2546 		pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
2547 		if (!pvt->F6) {
2548 			pci_dev_put(pvt->F0);
2549 			pvt->F0 = NULL;
2550 
2551 			amd64_err("F6 not found: device 0x%x (broken BIOS?)\n", pci_id2);
2552 			return -ENODEV;
2553 		}
2554 
2555 		edac_dbg(1, "F0: %s\n", pci_name(pvt->F0));
2556 		edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2557 		edac_dbg(1, "F6: %s\n", pci_name(pvt->F6));
2558 
2559 		return 0;
2560 	}
2561 
2562 	/* Reserve the ADDRESS MAP Device */
2563 	pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
2564 	if (!pvt->F1) {
2565 		amd64_err("F1 not found: device 0x%x (broken BIOS?)\n", pci_id1);
2566 		return -ENODEV;
2567 	}
2568 
2569 	/* Reserve the DCT Device */
2570 	pvt->F2 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
2571 	if (!pvt->F2) {
2572 		pci_dev_put(pvt->F1);
2573 		pvt->F1 = NULL;
2574 
2575 		amd64_err("F2 not found: device 0x%x (broken BIOS?)\n", pci_id2);
2576 		return -ENODEV;
2577 	}
2578 
2579 	edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
2580 	edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
2581 	edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2582 
2583 	return 0;
2584 }
2585 
free_mc_sibling_devs(struct amd64_pvt * pvt)2586 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2587 {
2588 	if (pvt->umc) {
2589 		pci_dev_put(pvt->F0);
2590 		pci_dev_put(pvt->F6);
2591 	} else {
2592 		pci_dev_put(pvt->F1);
2593 		pci_dev_put(pvt->F2);
2594 	}
2595 }
2596 
determine_ecc_sym_sz(struct amd64_pvt * pvt)2597 static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
2598 {
2599 	pvt->ecc_sym_sz = 4;
2600 
2601 	if (pvt->umc) {
2602 		u8 i;
2603 
2604 		for (i = 0; i < NUM_UMCS; i++) {
2605 			/* Check enabled channels only: */
2606 			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
2607 			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
2608 				pvt->ecc_sym_sz = 8;
2609 				break;
2610 			}
2611 		}
2612 
2613 		return;
2614 	}
2615 
2616 	if (pvt->fam >= 0x10) {
2617 		u32 tmp;
2618 
2619 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2620 		/* F16h has only DCT0, so no need to read dbam1. */
2621 		if (pvt->fam != 0x16)
2622 			amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
2623 
2624 		/* F10h, revD and later can do x8 ECC too. */
2625 		if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
2626 			pvt->ecc_sym_sz = 8;
2627 	}
2628 }
2629 
2630 /*
2631  * Retrieve the hardware registers of the memory controller.
2632  */
__read_mc_regs_df(struct amd64_pvt * pvt)2633 static void __read_mc_regs_df(struct amd64_pvt *pvt)
2634 {
2635 	u8 nid = pvt->mc_node_id;
2636 	struct amd64_umc *umc;
2637 	u32 i, umc_base;
2638 
2639 	/* Read registers from each UMC */
2640 	for (i = 0; i < NUM_UMCS; i++) {
2641 
2642 		umc_base = get_umc_base(i);
2643 		umc = &pvt->umc[i];
2644 
2645 		amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
2646 		amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
2647 		amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
2648 		amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
2649 		amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi);
2650 	}
2651 }
2652 
2653 /*
2654  * Retrieve the hardware registers of the memory controller (this includes the
2655  * 'Address Map' and 'Misc' device regs)
2656  */
read_mc_regs(struct amd64_pvt * pvt)2657 static void read_mc_regs(struct amd64_pvt *pvt)
2658 {
2659 	unsigned int range;
2660 	u64 msr_val;
2661 
2662 	/*
2663 	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2664 	 * those are Read-As-Zero.
2665 	 */
2666 	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2667 	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
2668 
2669 	/* Check first whether TOP_MEM2 is enabled: */
2670 	rdmsrl(MSR_K8_SYSCFG, msr_val);
2671 	if (msr_val & BIT(21)) {
2672 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2673 		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2674 	} else {
2675 		edac_dbg(0, "  TOP_MEM2 disabled\n");
2676 	}
2677 
2678 	if (pvt->umc) {
2679 		__read_mc_regs_df(pvt);
2680 		amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
2681 
2682 		goto skip;
2683 	}
2684 
2685 	amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2686 
2687 	read_dram_ctl_register(pvt);
2688 
2689 	for (range = 0; range < DRAM_RANGES; range++) {
2690 		u8 rw;
2691 
2692 		/* read settings for this DRAM range */
2693 		read_dram_base_limit_regs(pvt, range);
2694 
2695 		rw = dram_rw(pvt, range);
2696 		if (!rw)
2697 			continue;
2698 
2699 		edac_dbg(1, "  DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2700 			 range,
2701 			 get_dram_base(pvt, range),
2702 			 get_dram_limit(pvt, range));
2703 
2704 		edac_dbg(1, "   IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2705 			 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
2706 			 (rw & 0x1) ? "R" : "-",
2707 			 (rw & 0x2) ? "W" : "-",
2708 			 dram_intlv_sel(pvt, range),
2709 			 dram_dst_node(pvt, range));
2710 	}
2711 
2712 	amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2713 	amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0);
2714 
2715 	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2716 
2717 	amd64_read_dct_pci_cfg(pvt, 0, DCLR0, &pvt->dclr0);
2718 	amd64_read_dct_pci_cfg(pvt, 0, DCHR0, &pvt->dchr0);
2719 
2720 	if (!dct_ganging_enabled(pvt)) {
2721 		amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1);
2722 		amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
2723 	}
2724 
2725 skip:
2726 	read_dct_base_mask(pvt);
2727 
2728 	determine_memory_type(pvt);
2729 	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
2730 
2731 	determine_ecc_sym_sz(pvt);
2732 
2733 	dump_misc_regs(pvt);
2734 }
2735 
2736 /*
2737  * NOTE: CPU Revision Dependent code
2738  *
2739  * Input:
2740  *	@csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2741  *	k8 private pointer to -->
2742  *			DRAM Bank Address mapping register
2743  *			node_id
2744  *			DCL register where dual_channel_active is
2745  *
2746  * The DBAM register consists of 4 sets of 4 bits each definitions:
2747  *
2748  * Bits:	CSROWs
2749  * 0-3		CSROWs 0 and 1
2750  * 4-7		CSROWs 2 and 3
2751  * 8-11		CSROWs 4 and 5
2752  * 12-15	CSROWs 6 and 7
2753  *
2754  * Values range from: 0 to 15
2755  * The meaning of the values depends on CPU revision and dual-channel state,
2756  * see relevant BKDG more info.
2757  *
2758  * The memory controller provides for total of only 8 CSROWs in its current
2759  * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2760  * single channel or two (2) DIMMs in dual channel mode.
2761  *
2762  * The following code logic collapses the various tables for CSROW based on CPU
2763  * revision.
2764  *
2765  * Returns:
2766  *	The number of PAGE_SIZE pages on the specified CSROW number it
2767  *	encompasses
2768  *
2769  */
get_csrow_nr_pages(struct amd64_pvt * pvt,u8 dct,int csrow_nr_orig)2770 static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
2771 {
2772 	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2773 	int csrow_nr = csrow_nr_orig;
2774 	u32 cs_mode, nr_pages;
2775 
2776 	if (!pvt->umc)
2777 		csrow_nr >>= 1;
2778 
2779 	cs_mode = DBAM_DIMM(csrow_nr, dbam);
2780 
2781 	nr_pages   = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
2782 	nr_pages <<= 20 - PAGE_SHIFT;
2783 
2784 	edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2785 		    csrow_nr_orig, dct,  cs_mode);
2786 	edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
2787 
2788 	return nr_pages;
2789 }
2790 
2791 /*
2792  * Initialize the array of csrow attribute instances, based on the values
2793  * from pci config hardware registers.
2794  */
init_csrows(struct mem_ctl_info * mci)2795 static int init_csrows(struct mem_ctl_info *mci)
2796 {
2797 	struct amd64_pvt *pvt = mci->pvt_info;
2798 	enum edac_type edac_mode = EDAC_NONE;
2799 	struct csrow_info *csrow;
2800 	struct dimm_info *dimm;
2801 	int i, j, empty = 1;
2802 	int nr_pages = 0;
2803 	u32 val;
2804 
2805 	if (!pvt->umc) {
2806 		amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2807 
2808 		pvt->nbcfg = val;
2809 
2810 		edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2811 			 pvt->mc_node_id, val,
2812 			 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2813 	}
2814 
2815 	/*
2816 	 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
2817 	 */
2818 	for_each_chip_select(i, 0, pvt) {
2819 		bool row_dct0 = !!csrow_enabled(i, 0, pvt);
2820 		bool row_dct1 = false;
2821 
2822 		if (pvt->fam != 0xf)
2823 			row_dct1 = !!csrow_enabled(i, 1, pvt);
2824 
2825 		if (!row_dct0 && !row_dct1)
2826 			continue;
2827 
2828 		csrow = mci->csrows[i];
2829 		empty = 0;
2830 
2831 		edac_dbg(1, "MC node: %d, csrow: %d\n",
2832 			    pvt->mc_node_id, i);
2833 
2834 		if (row_dct0) {
2835 			nr_pages = get_csrow_nr_pages(pvt, 0, i);
2836 			csrow->channels[0]->dimm->nr_pages = nr_pages;
2837 		}
2838 
2839 		/* K8 has only one DCT */
2840 		if (pvt->fam != 0xf && row_dct1) {
2841 			int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i);
2842 
2843 			csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
2844 			nr_pages += row_dct1_pages;
2845 		}
2846 
2847 		edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
2848 
2849 		/* Determine DIMM ECC mode: */
2850 		if (pvt->umc) {
2851 			if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED)
2852 				edac_mode = EDAC_S4ECD4ED;
2853 			else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED)
2854 				edac_mode = EDAC_SECDED;
2855 
2856 		} else if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
2857 			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL)
2858 					? EDAC_S4ECD4ED
2859 					: EDAC_SECDED;
2860 		}
2861 
2862 		for (j = 0; j < pvt->channel_count; j++) {
2863 			dimm = csrow->channels[j]->dimm;
2864 			dimm->mtype = pvt->dram_type;
2865 			dimm->edac_mode = edac_mode;
2866 			dimm->grain = 64;
2867 		}
2868 	}
2869 
2870 	return empty;
2871 }
2872 
2873 /* get all cores on this DCT */
get_cpus_on_this_dct_cpumask(struct cpumask * mask,u16 nid)2874 static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
2875 {
2876 	int cpu;
2877 
2878 	for_each_online_cpu(cpu)
2879 		if (amd_get_nb_id(cpu) == nid)
2880 			cpumask_set_cpu(cpu, mask);
2881 }
2882 
2883 /* check MCG_CTL on all the cpus on this node */
nb_mce_bank_enabled_on_node(u16 nid)2884 static bool nb_mce_bank_enabled_on_node(u16 nid)
2885 {
2886 	cpumask_var_t mask;
2887 	int cpu, nbe;
2888 	bool ret = false;
2889 
2890 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2891 		amd64_warn("%s: Error allocating mask\n", __func__);
2892 		return false;
2893 	}
2894 
2895 	get_cpus_on_this_dct_cpumask(mask, nid);
2896 
2897 	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2898 
2899 	for_each_cpu(cpu, mask) {
2900 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2901 		nbe = reg->l & MSR_MCGCTL_NBE;
2902 
2903 		edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2904 			 cpu, reg->q,
2905 			 (nbe ? "enabled" : "disabled"));
2906 
2907 		if (!nbe)
2908 			goto out;
2909 	}
2910 	ret = true;
2911 
2912 out:
2913 	free_cpumask_var(mask);
2914 	return ret;
2915 }
2916 
toggle_ecc_err_reporting(struct ecc_settings * s,u16 nid,bool on)2917 static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
2918 {
2919 	cpumask_var_t cmask;
2920 	int cpu;
2921 
2922 	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2923 		amd64_warn("%s: error allocating mask\n", __func__);
2924 		return -ENOMEM;
2925 	}
2926 
2927 	get_cpus_on_this_dct_cpumask(cmask, nid);
2928 
2929 	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2930 
2931 	for_each_cpu(cpu, cmask) {
2932 
2933 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2934 
2935 		if (on) {
2936 			if (reg->l & MSR_MCGCTL_NBE)
2937 				s->flags.nb_mce_enable = 1;
2938 
2939 			reg->l |= MSR_MCGCTL_NBE;
2940 		} else {
2941 			/*
2942 			 * Turn off NB MCE reporting only when it was off before
2943 			 */
2944 			if (!s->flags.nb_mce_enable)
2945 				reg->l &= ~MSR_MCGCTL_NBE;
2946 		}
2947 	}
2948 	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2949 
2950 	free_cpumask_var(cmask);
2951 
2952 	return 0;
2953 }
2954 
enable_ecc_error_reporting(struct ecc_settings * s,u16 nid,struct pci_dev * F3)2955 static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2956 				       struct pci_dev *F3)
2957 {
2958 	bool ret = true;
2959 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2960 
2961 	if (toggle_ecc_err_reporting(s, nid, ON)) {
2962 		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2963 		return false;
2964 	}
2965 
2966 	amd64_read_pci_cfg(F3, NBCTL, &value);
2967 
2968 	s->old_nbctl   = value & mask;
2969 	s->nbctl_valid = true;
2970 
2971 	value |= mask;
2972 	amd64_write_pci_cfg(F3, NBCTL, value);
2973 
2974 	amd64_read_pci_cfg(F3, NBCFG, &value);
2975 
2976 	edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2977 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2978 
2979 	if (!(value & NBCFG_ECC_ENABLE)) {
2980 		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2981 
2982 		s->flags.nb_ecc_prev = 0;
2983 
2984 		/* Attempt to turn on DRAM ECC Enable */
2985 		value |= NBCFG_ECC_ENABLE;
2986 		amd64_write_pci_cfg(F3, NBCFG, value);
2987 
2988 		amd64_read_pci_cfg(F3, NBCFG, &value);
2989 
2990 		if (!(value & NBCFG_ECC_ENABLE)) {
2991 			amd64_warn("Hardware rejected DRAM ECC enable,"
2992 				   "check memory DIMM configuration.\n");
2993 			ret = false;
2994 		} else {
2995 			amd64_info("Hardware accepted DRAM ECC Enable\n");
2996 		}
2997 	} else {
2998 		s->flags.nb_ecc_prev = 1;
2999 	}
3000 
3001 	edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
3002 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
3003 
3004 	return ret;
3005 }
3006 
restore_ecc_error_reporting(struct ecc_settings * s,u16 nid,struct pci_dev * F3)3007 static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
3008 					struct pci_dev *F3)
3009 {
3010 	u32 value, mask = 0x3;		/* UECC/CECC enable */
3011 
3012 	if (!s->nbctl_valid)
3013 		return;
3014 
3015 	amd64_read_pci_cfg(F3, NBCTL, &value);
3016 	value &= ~mask;
3017 	value |= s->old_nbctl;
3018 
3019 	amd64_write_pci_cfg(F3, NBCTL, value);
3020 
3021 	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
3022 	if (!s->flags.nb_ecc_prev) {
3023 		amd64_read_pci_cfg(F3, NBCFG, &value);
3024 		value &= ~NBCFG_ECC_ENABLE;
3025 		amd64_write_pci_cfg(F3, NBCFG, value);
3026 	}
3027 
3028 	/* restore the NB Enable MCGCTL bit */
3029 	if (toggle_ecc_err_reporting(s, nid, OFF))
3030 		amd64_warn("Error restoring NB MCGCTL settings!\n");
3031 }
3032 
3033 /*
3034  * EDAC requires that the BIOS have ECC enabled before
3035  * taking over the processing of ECC errors. A command line
3036  * option allows to force-enable hardware ECC later in
3037  * enable_ecc_error_reporting().
3038  */
3039 static const char *ecc_msg =
3040 	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
3041 	" Either enable ECC checking or force module loading by setting "
3042 	"'ecc_enable_override'.\n"
3043 	" (Note that use of the override may cause unknown side effects.)\n";
3044 
ecc_enabled(struct pci_dev * F3,u16 nid)3045 static bool ecc_enabled(struct pci_dev *F3, u16 nid)
3046 {
3047 	bool nb_mce_en = false;
3048 	u8 ecc_en = 0, i;
3049 	u32 value;
3050 
3051 	if (boot_cpu_data.x86 >= 0x17) {
3052 		u8 umc_en_mask = 0, ecc_en_mask = 0;
3053 
3054 		for (i = 0; i < NUM_UMCS; i++) {
3055 			u32 base = get_umc_base(i);
3056 
3057 			/* Only check enabled UMCs. */
3058 			if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value))
3059 				continue;
3060 
3061 			if (!(value & UMC_SDP_INIT))
3062 				continue;
3063 
3064 			umc_en_mask |= BIT(i);
3065 
3066 			if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value))
3067 				continue;
3068 
3069 			if (value & UMC_ECC_ENABLED)
3070 				ecc_en_mask |= BIT(i);
3071 		}
3072 
3073 		/* Check whether at least one UMC is enabled: */
3074 		if (umc_en_mask)
3075 			ecc_en = umc_en_mask == ecc_en_mask;
3076 		else
3077 			edac_dbg(0, "Node %d: No enabled UMCs.\n", nid);
3078 
3079 		/* Assume UMC MCA banks are enabled. */
3080 		nb_mce_en = true;
3081 	} else {
3082 		amd64_read_pci_cfg(F3, NBCFG, &value);
3083 
3084 		ecc_en = !!(value & NBCFG_ECC_ENABLE);
3085 
3086 		nb_mce_en = nb_mce_bank_enabled_on_node(nid);
3087 		if (!nb_mce_en)
3088 			edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
3089 				     MSR_IA32_MCG_CTL, nid);
3090 	}
3091 
3092 	amd64_info("Node %d: DRAM ECC %s.\n",
3093 		   nid, (ecc_en ? "enabled" : "disabled"));
3094 
3095 	if (!ecc_en || !nb_mce_en) {
3096 		amd64_info("%s", ecc_msg);
3097 		return false;
3098 	}
3099 	return true;
3100 }
3101 
3102 static inline void
f17h_determine_edac_ctl_cap(struct mem_ctl_info * mci,struct amd64_pvt * pvt)3103 f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
3104 {
3105 	u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1;
3106 
3107 	for (i = 0; i < NUM_UMCS; i++) {
3108 		if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
3109 			ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
3110 			cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
3111 
3112 			dev_x4  &= !!(pvt->umc[i].dimm_cfg & BIT(6));
3113 			dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7));
3114 		}
3115 	}
3116 
3117 	/* Set chipkill only if ECC is enabled: */
3118 	if (ecc_en) {
3119 		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
3120 
3121 		if (!cpk_en)
3122 			return;
3123 
3124 		if (dev_x4)
3125 			mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3126 		else if (dev_x16)
3127 			mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED;
3128 		else
3129 			mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED;
3130 	}
3131 }
3132 
setup_mci_misc_attrs(struct mem_ctl_info * mci,struct amd64_family_type * fam)3133 static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
3134 				 struct amd64_family_type *fam)
3135 {
3136 	struct amd64_pvt *pvt = mci->pvt_info;
3137 
3138 	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
3139 	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
3140 
3141 	if (pvt->umc) {
3142 		f17h_determine_edac_ctl_cap(mci, pvt);
3143 	} else {
3144 		if (pvt->nbcap & NBCAP_SECDED)
3145 			mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
3146 
3147 		if (pvt->nbcap & NBCAP_CHIPKILL)
3148 			mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3149 	}
3150 
3151 	mci->edac_cap		= determine_edac_cap(pvt);
3152 	mci->mod_name		= EDAC_MOD_STR;
3153 	mci->ctl_name		= fam->ctl_name;
3154 	mci->dev_name		= pci_name(pvt->F3);
3155 	mci->ctl_page_to_phys	= NULL;
3156 
3157 	/* memory scrubber interface */
3158 	mci->set_sdram_scrub_rate = set_scrub_rate;
3159 	mci->get_sdram_scrub_rate = get_scrub_rate;
3160 }
3161 
3162 /*
3163  * returns a pointer to the family descriptor on success, NULL otherwise.
3164  */
per_family_init(struct amd64_pvt * pvt)3165 static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
3166 {
3167 	struct amd64_family_type *fam_type = NULL;
3168 
3169 	pvt->ext_model  = boot_cpu_data.x86_model >> 4;
3170 	pvt->stepping	= boot_cpu_data.x86_stepping;
3171 	pvt->model	= boot_cpu_data.x86_model;
3172 	pvt->fam	= boot_cpu_data.x86;
3173 
3174 	switch (pvt->fam) {
3175 	case 0xf:
3176 		fam_type	= &family_types[K8_CPUS];
3177 		pvt->ops	= &family_types[K8_CPUS].ops;
3178 		break;
3179 
3180 	case 0x10:
3181 		fam_type	= &family_types[F10_CPUS];
3182 		pvt->ops	= &family_types[F10_CPUS].ops;
3183 		break;
3184 
3185 	case 0x15:
3186 		if (pvt->model == 0x30) {
3187 			fam_type = &family_types[F15_M30H_CPUS];
3188 			pvt->ops = &family_types[F15_M30H_CPUS].ops;
3189 			break;
3190 		} else if (pvt->model == 0x60) {
3191 			fam_type = &family_types[F15_M60H_CPUS];
3192 			pvt->ops = &family_types[F15_M60H_CPUS].ops;
3193 			break;
3194 		}
3195 
3196 		fam_type	= &family_types[F15_CPUS];
3197 		pvt->ops	= &family_types[F15_CPUS].ops;
3198 		break;
3199 
3200 	case 0x16:
3201 		if (pvt->model == 0x30) {
3202 			fam_type = &family_types[F16_M30H_CPUS];
3203 			pvt->ops = &family_types[F16_M30H_CPUS].ops;
3204 			break;
3205 		}
3206 		fam_type	= &family_types[F16_CPUS];
3207 		pvt->ops	= &family_types[F16_CPUS].ops;
3208 		break;
3209 
3210 	case 0x17:
3211 		if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
3212 			fam_type = &family_types[F17_M10H_CPUS];
3213 			pvt->ops = &family_types[F17_M10H_CPUS].ops;
3214 			break;
3215 		}
3216 		fam_type	= &family_types[F17_CPUS];
3217 		pvt->ops	= &family_types[F17_CPUS].ops;
3218 		break;
3219 
3220 	default:
3221 		amd64_err("Unsupported family!\n");
3222 		return NULL;
3223 	}
3224 
3225 	amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
3226 		     (pvt->fam == 0xf ?
3227 				(pvt->ext_model >= K8_REV_F  ? "revF or later "
3228 							     : "revE or earlier ")
3229 				 : ""), pvt->mc_node_id);
3230 	return fam_type;
3231 }
3232 
3233 static const struct attribute_group *amd64_edac_attr_groups[] = {
3234 #ifdef CONFIG_EDAC_DEBUG
3235 	&amd64_edac_dbg_group,
3236 #endif
3237 #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
3238 	&amd64_edac_inj_group,
3239 #endif
3240 	NULL
3241 };
3242 
init_one_instance(unsigned int nid)3243 static int init_one_instance(unsigned int nid)
3244 {
3245 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
3246 	struct amd64_family_type *fam_type = NULL;
3247 	struct mem_ctl_info *mci = NULL;
3248 	struct edac_mc_layer layers[2];
3249 	struct amd64_pvt *pvt = NULL;
3250 	u16 pci_id1, pci_id2;
3251 	int err = 0, ret;
3252 
3253 	ret = -ENOMEM;
3254 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
3255 	if (!pvt)
3256 		goto err_ret;
3257 
3258 	pvt->mc_node_id	= nid;
3259 	pvt->F3 = F3;
3260 
3261 	ret = -EINVAL;
3262 	fam_type = per_family_init(pvt);
3263 	if (!fam_type)
3264 		goto err_free;
3265 
3266 	if (pvt->fam >= 0x17) {
3267 		pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
3268 		if (!pvt->umc) {
3269 			ret = -ENOMEM;
3270 			goto err_free;
3271 		}
3272 
3273 		pci_id1 = fam_type->f0_id;
3274 		pci_id2 = fam_type->f6_id;
3275 	} else {
3276 		pci_id1 = fam_type->f1_id;
3277 		pci_id2 = fam_type->f2_id;
3278 	}
3279 
3280 	err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
3281 	if (err)
3282 		goto err_post_init;
3283 
3284 	read_mc_regs(pvt);
3285 
3286 	/*
3287 	 * We need to determine how many memory channels there are. Then use
3288 	 * that information for calculating the size of the dynamic instance
3289 	 * tables in the 'mci' structure.
3290 	 */
3291 	ret = -EINVAL;
3292 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
3293 	if (pvt->channel_count < 0)
3294 		goto err_siblings;
3295 
3296 	ret = -ENOMEM;
3297 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
3298 	layers[0].size = pvt->csels[0].b_cnt;
3299 	layers[0].is_virt_csrow = true;
3300 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
3301 
3302 	/*
3303 	 * Always allocate two channels since we can have setups with DIMMs on
3304 	 * only one channel. Also, this simplifies handling later for the price
3305 	 * of a couple of KBs tops.
3306 	 */
3307 	layers[1].size = 2;
3308 	layers[1].is_virt_csrow = false;
3309 
3310 	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
3311 	if (!mci)
3312 		goto err_siblings;
3313 
3314 	mci->pvt_info = pvt;
3315 	mci->pdev = &pvt->F3->dev;
3316 
3317 	setup_mci_misc_attrs(mci, fam_type);
3318 
3319 	if (init_csrows(mci))
3320 		mci->edac_cap = EDAC_FLAG_NONE;
3321 
3322 	ret = -ENODEV;
3323 	if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
3324 		edac_dbg(1, "failed edac_mc_add_mc()\n");
3325 		goto err_add_mc;
3326 	}
3327 
3328 	return 0;
3329 
3330 err_add_mc:
3331 	edac_mc_free(mci);
3332 
3333 err_siblings:
3334 	free_mc_sibling_devs(pvt);
3335 
3336 err_post_init:
3337 	if (pvt->fam >= 0x17)
3338 		kfree(pvt->umc);
3339 
3340 err_free:
3341 	kfree(pvt);
3342 
3343 err_ret:
3344 	return ret;
3345 }
3346 
probe_one_instance(unsigned int nid)3347 static int probe_one_instance(unsigned int nid)
3348 {
3349 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
3350 	struct ecc_settings *s;
3351 	int ret;
3352 
3353 	ret = -ENOMEM;
3354 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
3355 	if (!s)
3356 		goto err_out;
3357 
3358 	ecc_stngs[nid] = s;
3359 
3360 	if (!ecc_enabled(F3, nid)) {
3361 		ret = 0;
3362 
3363 		if (!ecc_enable_override)
3364 			goto err_enable;
3365 
3366 		if (boot_cpu_data.x86 >= 0x17) {
3367 			amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
3368 			goto err_enable;
3369 		} else
3370 			amd64_warn("Forcing ECC on!\n");
3371 
3372 		if (!enable_ecc_error_reporting(s, nid, F3))
3373 			goto err_enable;
3374 	}
3375 
3376 	ret = init_one_instance(nid);
3377 	if (ret < 0) {
3378 		amd64_err("Error probing instance: %d\n", nid);
3379 
3380 		if (boot_cpu_data.x86 < 0x17)
3381 			restore_ecc_error_reporting(s, nid, F3);
3382 
3383 		goto err_enable;
3384 	}
3385 
3386 	return ret;
3387 
3388 err_enable:
3389 	kfree(s);
3390 	ecc_stngs[nid] = NULL;
3391 
3392 err_out:
3393 	return ret;
3394 }
3395 
remove_one_instance(unsigned int nid)3396 static void remove_one_instance(unsigned int nid)
3397 {
3398 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
3399 	struct ecc_settings *s = ecc_stngs[nid];
3400 	struct mem_ctl_info *mci;
3401 	struct amd64_pvt *pvt;
3402 
3403 	mci = find_mci_by_dev(&F3->dev);
3404 	WARN_ON(!mci);
3405 
3406 	/* Remove from EDAC CORE tracking list */
3407 	mci = edac_mc_del_mc(&F3->dev);
3408 	if (!mci)
3409 		return;
3410 
3411 	pvt = mci->pvt_info;
3412 
3413 	restore_ecc_error_reporting(s, nid, F3);
3414 
3415 	free_mc_sibling_devs(pvt);
3416 
3417 	kfree(ecc_stngs[nid]);
3418 	ecc_stngs[nid] = NULL;
3419 
3420 	/* Free the EDAC CORE resources */
3421 	mci->pvt_info = NULL;
3422 
3423 	kfree(pvt);
3424 	edac_mc_free(mci);
3425 }
3426 
setup_pci_device(void)3427 static void setup_pci_device(void)
3428 {
3429 	struct mem_ctl_info *mci;
3430 	struct amd64_pvt *pvt;
3431 
3432 	if (pci_ctl)
3433 		return;
3434 
3435 	mci = edac_mc_find(0);
3436 	if (!mci)
3437 		return;
3438 
3439 	pvt = mci->pvt_info;
3440 	if (pvt->umc)
3441 		pci_ctl = edac_pci_create_generic_ctl(&pvt->F0->dev, EDAC_MOD_STR);
3442 	else
3443 		pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
3444 	if (!pci_ctl) {
3445 		pr_warn("%s(): Unable to create PCI control\n", __func__);
3446 		pr_warn("%s(): PCI error report via EDAC not set\n", __func__);
3447 	}
3448 }
3449 
3450 static const struct x86_cpu_id amd64_cpuids[] = {
3451 	{ X86_VENDOR_AMD, 0xF,	X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
3452 	{ X86_VENDOR_AMD, 0x10, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
3453 	{ X86_VENDOR_AMD, 0x15, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
3454 	{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
3455 	{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
3456 	{ }
3457 };
3458 MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
3459 
amd64_edac_init(void)3460 static int __init amd64_edac_init(void)
3461 {
3462 	int err = -ENODEV;
3463 	int i;
3464 
3465 	if (!x86_match_cpu(amd64_cpuids))
3466 		return -ENODEV;
3467 
3468 	if (amd_cache_northbridges() < 0)
3469 		return -ENODEV;
3470 
3471 	opstate_init();
3472 
3473 	err = -ENOMEM;
3474 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
3475 	if (!ecc_stngs)
3476 		goto err_free;
3477 
3478 	msrs = msrs_alloc();
3479 	if (!msrs)
3480 		goto err_free;
3481 
3482 	for (i = 0; i < amd_nb_num(); i++) {
3483 		err = probe_one_instance(i);
3484 		if (err) {
3485 			/* unwind properly */
3486 			while (--i >= 0)
3487 				remove_one_instance(i);
3488 
3489 			goto err_pci;
3490 		}
3491 	}
3492 
3493 	if (!edac_has_mcs()) {
3494 		err = -ENODEV;
3495 		goto err_pci;
3496 	}
3497 
3498 	/* register stuff with EDAC MCE */
3499 	if (report_gart_errors)
3500 		amd_report_gart_errors(true);
3501 
3502 	if (boot_cpu_data.x86 >= 0x17)
3503 		amd_register_ecc_decoder(decode_umc_error);
3504 	else
3505 		amd_register_ecc_decoder(decode_bus_error);
3506 
3507 	setup_pci_device();
3508 
3509 #ifdef CONFIG_X86_32
3510 	amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
3511 #endif
3512 
3513 	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
3514 
3515 	return 0;
3516 
3517 err_pci:
3518 	msrs_free(msrs);
3519 	msrs = NULL;
3520 
3521 err_free:
3522 	kfree(ecc_stngs);
3523 	ecc_stngs = NULL;
3524 
3525 	return err;
3526 }
3527 
amd64_edac_exit(void)3528 static void __exit amd64_edac_exit(void)
3529 {
3530 	int i;
3531 
3532 	if (pci_ctl)
3533 		edac_pci_release_generic_ctl(pci_ctl);
3534 
3535 	/* unregister from EDAC MCE */
3536 	amd_report_gart_errors(false);
3537 
3538 	if (boot_cpu_data.x86 >= 0x17)
3539 		amd_unregister_ecc_decoder(decode_umc_error);
3540 	else
3541 		amd_unregister_ecc_decoder(decode_bus_error);
3542 
3543 	for (i = 0; i < amd_nb_num(); i++)
3544 		remove_one_instance(i);
3545 
3546 	kfree(ecc_stngs);
3547 	ecc_stngs = NULL;
3548 
3549 	msrs_free(msrs);
3550 	msrs = NULL;
3551 }
3552 
3553 module_init(amd64_edac_init);
3554 module_exit(amd64_edac_exit);
3555 
3556 MODULE_LICENSE("GPL");
3557 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3558 		"Dave Peterson, Thayne Harbaugh");
3559 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3560 		EDAC_AMD64_VERSION);
3561 
3562 module_param(edac_op_state, int, 0444);
3563 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
3564