1 #include <linux/module.h>
2 #include <linux/slab.h>
3
4 #include "mce_amd.h"
5
6 static struct amd_decoder_ops *fam_ops;
7
8 static u8 xec_mask = 0xf;
9
10 static bool report_gart_errors;
11 static void (*nb_bus_decoder)(int node_id, struct mce *m);
12
amd_report_gart_errors(bool v)13 void amd_report_gart_errors(bool v)
14 {
15 report_gart_errors = v;
16 }
17 EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
amd_register_ecc_decoder(void (* f)(int,struct mce *))19 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
20 {
21 nb_bus_decoder = f;
22 }
23 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
amd_unregister_ecc_decoder(void (* f)(int,struct mce *))25 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
26 {
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
29
30 nb_bus_decoder = NULL;
31 }
32 }
33 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
35 /*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
39
40 /* transaction type */
41 static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42
43 /* cache level */
44 static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
45
46 /* memory transaction type */
47 static const char * const rrrr_msgs[] = {
48 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
49 };
50
51 /* participating processor */
52 const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
53 EXPORT_SYMBOL_GPL(pp_msgs);
54
55 /* request timeout */
56 static const char * const to_msgs[] = { "no timeout", "timed out" };
57
58 /* memory or i/o */
59 static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
60
61 /* internal error type */
62 static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
63
64 static const char * const f15h_mc1_mce_desc[] = {
65 "UC during a demand linefill from L2",
66 "Parity error during data load from IC",
67 "Parity error for IC valid bit",
68 "Main tag parity error",
69 "Parity error in prediction queue",
70 "PFB data/address parity error",
71 "Parity error in the branch status reg",
72 "PFB promotion address error",
73 "Tag error during probe/victimization",
74 "Parity error for IC probe tag valid bit",
75 "PFB non-cacheable bit parity error",
76 "PFB valid bit parity error", /* xec = 0xd */
77 "Microcode Patch Buffer", /* xec = 010 */
78 "uop queue",
79 "insn buffer",
80 "predecode buffer",
81 "fetch address FIFO",
82 "dispatch uop queue"
83 };
84
85 static const char * const f15h_mc2_mce_desc[] = {
86 "Fill ECC error on data fills", /* xec = 0x4 */
87 "Fill parity error on insn fills",
88 "Prefetcher request FIFO parity error",
89 "PRQ address parity error",
90 "PRQ data parity error",
91 "WCC Tag ECC error",
92 "WCC Data ECC error",
93 "WCB Data parity error",
94 "VB Data ECC or parity error",
95 "L2 Tag ECC error", /* xec = 0x10 */
96 "Hard L2 Tag ECC error",
97 "Multiple hits on L2 tag",
98 "XAB parity error",
99 "PRB address parity error"
100 };
101
102 static const char * const mc4_mce_desc[] = {
103 "DRAM ECC error detected on the NB",
104 "CRC error detected on HT link",
105 "Link-defined sync error packets detected on HT link",
106 "HT Master abort",
107 "HT Target abort",
108 "Invalid GART PTE entry during GART table walk",
109 "Unsupported atomic RMW received from an IO link",
110 "Watchdog timeout due to lack of progress",
111 "DRAM ECC error detected on the NB",
112 "SVM DMA Exclusion Vector error",
113 "HT data error detected on link",
114 "Protocol error (link, L3, probe filter)",
115 "NB internal arrays parity error",
116 "DRAM addr/ctl signals parity error",
117 "IO link transmission error",
118 "L3 data cache ECC error", /* xec = 0x1c */
119 "L3 cache tag error",
120 "L3 LRU parity bits error",
121 "ECC Error in the Probe Filter directory"
122 };
123
124 static const char * const mc5_mce_desc[] = {
125 "CPU Watchdog timer expire",
126 "Wakeup array dest tag",
127 "AG payload array",
128 "EX payload array",
129 "IDRF array",
130 "Retire dispatch queue",
131 "Mapper checkpoint array",
132 "Physical register file EX0 port",
133 "Physical register file EX1 port",
134 "Physical register file AG0 port",
135 "Physical register file AG1 port",
136 "Flag register file",
137 "DE error occurred",
138 "Retire status queue"
139 };
140
f12h_mc0_mce(u16 ec,u8 xec)141 static bool f12h_mc0_mce(u16 ec, u8 xec)
142 {
143 bool ret = false;
144
145 if (MEM_ERROR(ec)) {
146 u8 ll = LL(ec);
147 ret = true;
148
149 if (ll == LL_L2)
150 pr_cont("during L1 linefill from L2.\n");
151 else if (ll == LL_L1)
152 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
153 else
154 ret = false;
155 }
156 return ret;
157 }
158
f10h_mc0_mce(u16 ec,u8 xec)159 static bool f10h_mc0_mce(u16 ec, u8 xec)
160 {
161 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
162 pr_cont("during data scrub.\n");
163 return true;
164 }
165 return f12h_mc0_mce(ec, xec);
166 }
167
k8_mc0_mce(u16 ec,u8 xec)168 static bool k8_mc0_mce(u16 ec, u8 xec)
169 {
170 if (BUS_ERROR(ec)) {
171 pr_cont("during system linefill.\n");
172 return true;
173 }
174
175 return f10h_mc0_mce(ec, xec);
176 }
177
cat_mc0_mce(u16 ec,u8 xec)178 static bool cat_mc0_mce(u16 ec, u8 xec)
179 {
180 u8 r4 = R4(ec);
181 bool ret = true;
182
183 if (MEM_ERROR(ec)) {
184
185 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
186 return false;
187
188 switch (r4) {
189 case R4_DRD:
190 case R4_DWR:
191 pr_cont("Data/Tag parity error due to %s.\n",
192 (r4 == R4_DRD ? "load/hw prf" : "store"));
193 break;
194 case R4_EVICT:
195 pr_cont("Copyback parity error on a tag miss.\n");
196 break;
197 case R4_SNOOP:
198 pr_cont("Tag parity error during snoop.\n");
199 break;
200 default:
201 ret = false;
202 }
203 } else if (BUS_ERROR(ec)) {
204
205 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
206 return false;
207
208 pr_cont("System read data error on a ");
209
210 switch (r4) {
211 case R4_RD:
212 pr_cont("TLB reload.\n");
213 break;
214 case R4_DWR:
215 pr_cont("store.\n");
216 break;
217 case R4_DRD:
218 pr_cont("load.\n");
219 break;
220 default:
221 ret = false;
222 }
223 } else {
224 ret = false;
225 }
226
227 return ret;
228 }
229
f15h_mc0_mce(u16 ec,u8 xec)230 static bool f15h_mc0_mce(u16 ec, u8 xec)
231 {
232 bool ret = true;
233
234 if (MEM_ERROR(ec)) {
235
236 switch (xec) {
237 case 0x0:
238 pr_cont("Data Array access error.\n");
239 break;
240
241 case 0x1:
242 pr_cont("UC error during a linefill from L2/NB.\n");
243 break;
244
245 case 0x2:
246 case 0x11:
247 pr_cont("STQ access error.\n");
248 break;
249
250 case 0x3:
251 pr_cont("SCB access error.\n");
252 break;
253
254 case 0x10:
255 pr_cont("Tag error.\n");
256 break;
257
258 case 0x12:
259 pr_cont("LDQ access error.\n");
260 break;
261
262 default:
263 ret = false;
264 }
265 } else if (BUS_ERROR(ec)) {
266
267 if (!xec)
268 pr_cont("System Read Data Error.\n");
269 else
270 pr_cont(" Internal error condition type %d.\n", xec);
271 } else if (INT_ERROR(ec)) {
272 if (xec <= 0x1f)
273 pr_cont("Hardware Assert.\n");
274 else
275 ret = false;
276
277 } else
278 ret = false;
279
280 return ret;
281 }
282
decode_mc0_mce(struct mce * m)283 static void decode_mc0_mce(struct mce *m)
284 {
285 u16 ec = EC(m->status);
286 u8 xec = XEC(m->status, xec_mask);
287
288 pr_emerg(HW_ERR "MC0 Error: ");
289
290 /* TLB error signatures are the same across families */
291 if (TLB_ERROR(ec)) {
292 if (TT(ec) == TT_DATA) {
293 pr_cont("%s TLB %s.\n", LL_MSG(ec),
294 ((xec == 2) ? "locked miss"
295 : (xec ? "multimatch" : "parity")));
296 return;
297 }
298 } else if (fam_ops->mc0_mce(ec, xec))
299 ;
300 else
301 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
302 }
303
k8_mc1_mce(u16 ec,u8 xec)304 static bool k8_mc1_mce(u16 ec, u8 xec)
305 {
306 u8 ll = LL(ec);
307 bool ret = true;
308
309 if (!MEM_ERROR(ec))
310 return false;
311
312 if (ll == 0x2)
313 pr_cont("during a linefill from L2.\n");
314 else if (ll == 0x1) {
315 switch (R4(ec)) {
316 case R4_IRD:
317 pr_cont("Parity error during data load.\n");
318 break;
319
320 case R4_EVICT:
321 pr_cont("Copyback Parity/Victim error.\n");
322 break;
323
324 case R4_SNOOP:
325 pr_cont("Tag Snoop error.\n");
326 break;
327
328 default:
329 ret = false;
330 break;
331 }
332 } else
333 ret = false;
334
335 return ret;
336 }
337
cat_mc1_mce(u16 ec,u8 xec)338 static bool cat_mc1_mce(u16 ec, u8 xec)
339 {
340 u8 r4 = R4(ec);
341 bool ret = true;
342
343 if (!MEM_ERROR(ec))
344 return false;
345
346 if (TT(ec) != TT_INSTR)
347 return false;
348
349 if (r4 == R4_IRD)
350 pr_cont("Data/tag array parity error for a tag hit.\n");
351 else if (r4 == R4_SNOOP)
352 pr_cont("Tag error during snoop/victimization.\n");
353 else if (xec == 0x0)
354 pr_cont("Tag parity error from victim castout.\n");
355 else if (xec == 0x2)
356 pr_cont("Microcode patch RAM parity error.\n");
357 else
358 ret = false;
359
360 return ret;
361 }
362
f15h_mc1_mce(u16 ec,u8 xec)363 static bool f15h_mc1_mce(u16 ec, u8 xec)
364 {
365 bool ret = true;
366
367 if (!MEM_ERROR(ec))
368 return false;
369
370 switch (xec) {
371 case 0x0 ... 0xa:
372 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
373 break;
374
375 case 0xd:
376 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
377 break;
378
379 case 0x10:
380 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
381 break;
382
383 case 0x11 ... 0x15:
384 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
385 break;
386
387 default:
388 ret = false;
389 }
390 return ret;
391 }
392
decode_mc1_mce(struct mce * m)393 static void decode_mc1_mce(struct mce *m)
394 {
395 u16 ec = EC(m->status);
396 u8 xec = XEC(m->status, xec_mask);
397
398 pr_emerg(HW_ERR "MC1 Error: ");
399
400 if (TLB_ERROR(ec))
401 pr_cont("%s TLB %s.\n", LL_MSG(ec),
402 (xec ? "multimatch" : "parity error"));
403 else if (BUS_ERROR(ec)) {
404 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
405
406 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
407 } else if (INT_ERROR(ec)) {
408 if (xec <= 0x3f)
409 pr_cont("Hardware Assert.\n");
410 else
411 goto wrong_mc1_mce;
412 } else if (fam_ops->mc1_mce(ec, xec))
413 ;
414 else
415 goto wrong_mc1_mce;
416
417 return;
418
419 wrong_mc1_mce:
420 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
421 }
422
k8_mc2_mce(u16 ec,u8 xec)423 static bool k8_mc2_mce(u16 ec, u8 xec)
424 {
425 bool ret = true;
426
427 if (xec == 0x1)
428 pr_cont(" in the write data buffers.\n");
429 else if (xec == 0x3)
430 pr_cont(" in the victim data buffers.\n");
431 else if (xec == 0x2 && MEM_ERROR(ec))
432 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
433 else if (xec == 0x0) {
434 if (TLB_ERROR(ec))
435 pr_cont(": %s error in a Page Descriptor Cache or "
436 "Guest TLB.\n", TT_MSG(ec));
437 else if (BUS_ERROR(ec))
438 pr_cont(": %s/ECC error in data read from NB: %s.\n",
439 R4_MSG(ec), PP_MSG(ec));
440 else if (MEM_ERROR(ec)) {
441 u8 r4 = R4(ec);
442
443 if (r4 >= 0x7)
444 pr_cont(": %s error during data copyback.\n",
445 R4_MSG(ec));
446 else if (r4 <= 0x1)
447 pr_cont(": %s parity/ECC error during data "
448 "access from L2.\n", R4_MSG(ec));
449 else
450 ret = false;
451 } else
452 ret = false;
453 } else
454 ret = false;
455
456 return ret;
457 }
458
f15h_mc2_mce(u16 ec,u8 xec)459 static bool f15h_mc2_mce(u16 ec, u8 xec)
460 {
461 bool ret = true;
462
463 if (TLB_ERROR(ec)) {
464 if (xec == 0x0)
465 pr_cont("Data parity TLB read error.\n");
466 else if (xec == 0x1)
467 pr_cont("Poison data provided for TLB fill.\n");
468 else
469 ret = false;
470 } else if (BUS_ERROR(ec)) {
471 if (xec > 2)
472 ret = false;
473
474 pr_cont("Error during attempted NB data read.\n");
475 } else if (MEM_ERROR(ec)) {
476 switch (xec) {
477 case 0x4 ... 0xc:
478 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
479 break;
480
481 case 0x10 ... 0x14:
482 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
483 break;
484
485 default:
486 ret = false;
487 }
488 } else if (INT_ERROR(ec)) {
489 if (xec <= 0x3f)
490 pr_cont("Hardware Assert.\n");
491 else
492 ret = false;
493 }
494
495 return ret;
496 }
497
f16h_mc2_mce(u16 ec,u8 xec)498 static bool f16h_mc2_mce(u16 ec, u8 xec)
499 {
500 u8 r4 = R4(ec);
501
502 if (!MEM_ERROR(ec))
503 return false;
504
505 switch (xec) {
506 case 0x04 ... 0x05:
507 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
508 break;
509
510 case 0x09 ... 0x0b:
511 case 0x0d ... 0x0f:
512 pr_cont("ECC error in L2 tag (%s).\n",
513 ((r4 == R4_GEN) ? "BankReq" :
514 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
515 break;
516
517 case 0x10 ... 0x19:
518 case 0x1b:
519 pr_cont("ECC error in L2 data array (%s).\n",
520 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
521 ((r4 == R4_GEN) ? "Attr" :
522 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
523 break;
524
525 case 0x1c ... 0x1d:
526 case 0x1f:
527 pr_cont("Parity error in L2 attribute bits (%s).\n",
528 ((r4 == R4_RD) ? "Hit" :
529 ((r4 == R4_GEN) ? "Attr" : "Fill")));
530 break;
531
532 default:
533 return false;
534 }
535
536 return true;
537 }
538
decode_mc2_mce(struct mce * m)539 static void decode_mc2_mce(struct mce *m)
540 {
541 u16 ec = EC(m->status);
542 u8 xec = XEC(m->status, xec_mask);
543
544 pr_emerg(HW_ERR "MC2 Error: ");
545
546 if (!fam_ops->mc2_mce(ec, xec))
547 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
548 }
549
decode_mc3_mce(struct mce * m)550 static void decode_mc3_mce(struct mce *m)
551 {
552 u16 ec = EC(m->status);
553 u8 xec = XEC(m->status, xec_mask);
554
555 if (boot_cpu_data.x86 >= 0x14) {
556 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
557 " please report on LKML.\n");
558 return;
559 }
560
561 pr_emerg(HW_ERR "MC3 Error");
562
563 if (xec == 0x0) {
564 u8 r4 = R4(ec);
565
566 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
567 goto wrong_mc3_mce;
568
569 pr_cont(" during %s.\n", R4_MSG(ec));
570 } else
571 goto wrong_mc3_mce;
572
573 return;
574
575 wrong_mc3_mce:
576 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
577 }
578
decode_mc4_mce(struct mce * m)579 static void decode_mc4_mce(struct mce *m)
580 {
581 struct cpuinfo_x86 *c = &boot_cpu_data;
582 int node_id = amd_get_nb_id(m->extcpu);
583 u16 ec = EC(m->status);
584 u8 xec = XEC(m->status, 0x1f);
585 u8 offset = 0;
586
587 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
588
589 switch (xec) {
590 case 0x0 ... 0xe:
591
592 /* special handling for DRAM ECCs */
593 if (xec == 0x0 || xec == 0x8) {
594 /* no ECCs on F11h */
595 if (c->x86 == 0x11)
596 goto wrong_mc4_mce;
597
598 pr_cont("%s.\n", mc4_mce_desc[xec]);
599
600 if (nb_bus_decoder)
601 nb_bus_decoder(node_id, m);
602 return;
603 }
604 break;
605
606 case 0xf:
607 if (TLB_ERROR(ec))
608 pr_cont("GART Table Walk data error.\n");
609 else if (BUS_ERROR(ec))
610 pr_cont("DMA Exclusion Vector Table Walk error.\n");
611 else
612 goto wrong_mc4_mce;
613 return;
614
615 case 0x19:
616 if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
617 pr_cont("Compute Unit Data Error.\n");
618 else
619 goto wrong_mc4_mce;
620 return;
621
622 case 0x1c ... 0x1f:
623 offset = 13;
624 break;
625
626 default:
627 goto wrong_mc4_mce;
628 }
629
630 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
631 return;
632
633 wrong_mc4_mce:
634 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
635 }
636
decode_mc5_mce(struct mce * m)637 static void decode_mc5_mce(struct mce *m)
638 {
639 struct cpuinfo_x86 *c = &boot_cpu_data;
640 u16 ec = EC(m->status);
641 u8 xec = XEC(m->status, xec_mask);
642
643 if (c->x86 == 0xf || c->x86 == 0x11)
644 goto wrong_mc5_mce;
645
646 pr_emerg(HW_ERR "MC5 Error: ");
647
648 if (INT_ERROR(ec)) {
649 if (xec <= 0x1f) {
650 pr_cont("Hardware Assert.\n");
651 return;
652 } else
653 goto wrong_mc5_mce;
654 }
655
656 if (xec == 0x0 || xec == 0xc)
657 pr_cont("%s.\n", mc5_mce_desc[xec]);
658 else if (xec <= 0xd)
659 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
660 else
661 goto wrong_mc5_mce;
662
663 return;
664
665 wrong_mc5_mce:
666 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
667 }
668
decode_mc6_mce(struct mce * m)669 static void decode_mc6_mce(struct mce *m)
670 {
671 u8 xec = XEC(m->status, xec_mask);
672
673 pr_emerg(HW_ERR "MC6 Error: ");
674
675 switch (xec) {
676 case 0x0:
677 pr_cont("Hardware Assertion");
678 break;
679
680 case 0x1:
681 pr_cont("Free List");
682 break;
683
684 case 0x2:
685 pr_cont("Physical Register File");
686 break;
687
688 case 0x3:
689 pr_cont("Retire Queue");
690 break;
691
692 case 0x4:
693 pr_cont("Scheduler table");
694 break;
695
696 case 0x5:
697 pr_cont("Status Register File");
698 break;
699
700 default:
701 goto wrong_mc6_mce;
702 break;
703 }
704
705 pr_cont(" parity error.\n");
706
707 return;
708
709 wrong_mc6_mce:
710 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
711 }
712
amd_decode_err_code(u16 ec)713 static inline void amd_decode_err_code(u16 ec)
714 {
715 if (INT_ERROR(ec)) {
716 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
717 return;
718 }
719
720 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
721
722 if (BUS_ERROR(ec))
723 pr_cont(", mem/io: %s", II_MSG(ec));
724 else
725 pr_cont(", tx: %s", TT_MSG(ec));
726
727 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
728 pr_cont(", mem-tx: %s", R4_MSG(ec));
729
730 if (BUS_ERROR(ec))
731 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
732 }
733
734 pr_cont("\n");
735 }
736
737 /*
738 * Filter out unwanted MCE signatures here.
739 */
amd_filter_mce(struct mce * m)740 static bool amd_filter_mce(struct mce *m)
741 {
742 u8 xec = (m->status >> 16) & 0x1f;
743
744 /*
745 * NB GART TLB error reporting is disabled by default.
746 */
747 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
748 return true;
749
750 return false;
751 }
752
decode_error_status(struct mce * m)753 static const char *decode_error_status(struct mce *m)
754 {
755 if (m->status & MCI_STATUS_UC) {
756 if (m->status & MCI_STATUS_PCC)
757 return "System Fatal error.";
758 if (m->mcgstatus & MCG_STATUS_RIPV)
759 return "Uncorrected, software restartable error.";
760 return "Uncorrected, software containable error.";
761 }
762
763 if (m->status & MCI_STATUS_DEFERRED)
764 return "Deferred error.";
765
766 return "Corrected error, no action required.";
767 }
768
amd_decode_mce(struct notifier_block * nb,unsigned long val,void * data)769 int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
770 {
771 struct mce *m = (struct mce *)data;
772 struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
773 int ecc;
774
775 if (amd_filter_mce(m))
776 return NOTIFY_STOP;
777
778 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
779
780 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
781 m->extcpu,
782 c->x86, c->x86_model, c->x86_mask,
783 m->bank,
784 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
785 ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
786 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
787 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
788 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
789
790 if (c->x86 == 0x15 || c->x86 == 0x16)
791 pr_cont("|%s|%s",
792 ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
793 ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
794
795 /* do the two bits[14:13] together */
796 ecc = (m->status >> 45) & 0x3;
797 if (ecc)
798 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
799
800 pr_cont("]: 0x%016llx\n", m->status);
801
802 if (m->status & MCI_STATUS_ADDRV)
803 pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
804
805 if (!fam_ops)
806 goto err_code;
807
808 switch (m->bank) {
809 case 0:
810 decode_mc0_mce(m);
811 break;
812
813 case 1:
814 decode_mc1_mce(m);
815 break;
816
817 case 2:
818 decode_mc2_mce(m);
819 break;
820
821 case 3:
822 decode_mc3_mce(m);
823 break;
824
825 case 4:
826 decode_mc4_mce(m);
827 break;
828
829 case 5:
830 decode_mc5_mce(m);
831 break;
832
833 case 6:
834 decode_mc6_mce(m);
835 break;
836
837 default:
838 break;
839 }
840
841 err_code:
842 amd_decode_err_code(m->status & 0xffff);
843
844 return NOTIFY_STOP;
845 }
846 EXPORT_SYMBOL_GPL(amd_decode_mce);
847
848 static struct notifier_block amd_mce_dec_nb = {
849 .notifier_call = amd_decode_mce,
850 };
851
mce_amd_init(void)852 static int __init mce_amd_init(void)
853 {
854 struct cpuinfo_x86 *c = &boot_cpu_data;
855
856 if (c->x86_vendor != X86_VENDOR_AMD)
857 return -ENODEV;
858
859 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
860 if (!fam_ops)
861 return -ENOMEM;
862
863 switch (c->x86) {
864 case 0xf:
865 fam_ops->mc0_mce = k8_mc0_mce;
866 fam_ops->mc1_mce = k8_mc1_mce;
867 fam_ops->mc2_mce = k8_mc2_mce;
868 break;
869
870 case 0x10:
871 fam_ops->mc0_mce = f10h_mc0_mce;
872 fam_ops->mc1_mce = k8_mc1_mce;
873 fam_ops->mc2_mce = k8_mc2_mce;
874 break;
875
876 case 0x11:
877 fam_ops->mc0_mce = k8_mc0_mce;
878 fam_ops->mc1_mce = k8_mc1_mce;
879 fam_ops->mc2_mce = k8_mc2_mce;
880 break;
881
882 case 0x12:
883 fam_ops->mc0_mce = f12h_mc0_mce;
884 fam_ops->mc1_mce = k8_mc1_mce;
885 fam_ops->mc2_mce = k8_mc2_mce;
886 break;
887
888 case 0x14:
889 fam_ops->mc0_mce = cat_mc0_mce;
890 fam_ops->mc1_mce = cat_mc1_mce;
891 fam_ops->mc2_mce = k8_mc2_mce;
892 break;
893
894 case 0x15:
895 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
896
897 fam_ops->mc0_mce = f15h_mc0_mce;
898 fam_ops->mc1_mce = f15h_mc1_mce;
899 fam_ops->mc2_mce = f15h_mc2_mce;
900 break;
901
902 case 0x16:
903 xec_mask = 0x1f;
904 fam_ops->mc0_mce = cat_mc0_mce;
905 fam_ops->mc1_mce = cat_mc1_mce;
906 fam_ops->mc2_mce = f16h_mc2_mce;
907 break;
908
909 default:
910 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
911 kfree(fam_ops);
912 fam_ops = NULL;
913 }
914
915 pr_info("MCE: In-kernel MCE decoding enabled.\n");
916
917 mce_register_decode_chain(&amd_mce_dec_nb);
918
919 return 0;
920 }
921 early_initcall(mce_amd_init);
922
923 #ifdef MODULE
mce_amd_exit(void)924 static void __exit mce_amd_exit(void)
925 {
926 mce_unregister_decode_chain(&amd_mce_dec_nb);
927 kfree(fam_ops);
928 }
929
930 MODULE_DESCRIPTION("AMD MCE decoder");
931 MODULE_ALIAS("edac-mce-amd");
932 MODULE_LICENSE("GPL");
933 module_exit(mce_amd_exit);
934 #endif
935