1 /*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35
36 #define INDENT_SP " "
37
38 static char rcd_decode_str[CPER_REC_LEN];
39
40 /*
41 * CPER record ID need to be unique even after reboot, because record
42 * ID is used as index for ERST storage, while CPER records from
43 * multiple boot may co-exist in ERST.
44 */
cper_next_record_id(void)45 u64 cper_next_record_id(void)
46 {
47 static atomic64_t seq;
48
49 if (!atomic64_read(&seq))
50 atomic64_set(&seq, ((u64)get_seconds()) << 32);
51
52 return atomic64_inc_return(&seq);
53 }
54 EXPORT_SYMBOL_GPL(cper_next_record_id);
55
56 static const char * const severity_strs[] = {
57 "recoverable",
58 "fatal",
59 "corrected",
60 "info",
61 };
62
cper_severity_str(unsigned int severity)63 const char *cper_severity_str(unsigned int severity)
64 {
65 return severity < ARRAY_SIZE(severity_strs) ?
66 severity_strs[severity] : "unknown";
67 }
68 EXPORT_SYMBOL_GPL(cper_severity_str);
69
70 /*
71 * cper_print_bits - print strings for set bits
72 * @pfx: prefix for each line, including log level and prefix string
73 * @bits: bit mask
74 * @strs: string array, indexed by bit position
75 * @strs_size: size of the string array: @strs
76 *
77 * For each set bit in @bits, print the corresponding string in @strs.
78 * If the output length is longer than 80, multiple line will be
79 * printed, with @pfx is printed at the beginning of each line.
80 */
cper_print_bits(const char * pfx,unsigned int bits,const char * const strs[],unsigned int strs_size)81 void cper_print_bits(const char *pfx, unsigned int bits,
82 const char * const strs[], unsigned int strs_size)
83 {
84 int i, len = 0;
85 const char *str;
86 char buf[84];
87
88 for (i = 0; i < strs_size; i++) {
89 if (!(bits & (1U << i)))
90 continue;
91 str = strs[i];
92 if (!str)
93 continue;
94 if (len && len + strlen(str) + 2 > 80) {
95 printk("%s\n", buf);
96 len = 0;
97 }
98 if (!len)
99 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
100 else
101 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
102 }
103 if (len)
104 printk("%s\n", buf);
105 }
106
107 static const char * const proc_type_strs[] = {
108 "IA32/X64",
109 "IA64",
110 };
111
112 static const char * const proc_isa_strs[] = {
113 "IA32",
114 "IA64",
115 "X64",
116 };
117
118 static const char * const proc_error_type_strs[] = {
119 "cache error",
120 "TLB error",
121 "bus error",
122 "micro-architectural error",
123 };
124
125 static const char * const proc_op_strs[] = {
126 "unknown or generic",
127 "data read",
128 "data write",
129 "instruction execution",
130 };
131
132 static const char * const proc_flag_strs[] = {
133 "restartable",
134 "precise IP",
135 "overflow",
136 "corrected",
137 };
138
cper_print_proc_generic(const char * pfx,const struct cper_sec_proc_generic * proc)139 static void cper_print_proc_generic(const char *pfx,
140 const struct cper_sec_proc_generic *proc)
141 {
142 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
143 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
144 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
145 proc_type_strs[proc->proc_type] : "unknown");
146 if (proc->validation_bits & CPER_PROC_VALID_ISA)
147 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
148 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
149 proc_isa_strs[proc->proc_isa] : "unknown");
150 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
151 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
152 cper_print_bits(pfx, proc->proc_error_type,
153 proc_error_type_strs,
154 ARRAY_SIZE(proc_error_type_strs));
155 }
156 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
157 printk("%s""operation: %d, %s\n", pfx, proc->operation,
158 proc->operation < ARRAY_SIZE(proc_op_strs) ?
159 proc_op_strs[proc->operation] : "unknown");
160 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
161 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
162 cper_print_bits(pfx, proc->flags, proc_flag_strs,
163 ARRAY_SIZE(proc_flag_strs));
164 }
165 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
166 printk("%s""level: %d\n", pfx, proc->level);
167 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
168 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
169 if (proc->validation_bits & CPER_PROC_VALID_ID)
170 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
171 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
172 printk("%s""target_address: 0x%016llx\n",
173 pfx, proc->target_addr);
174 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
175 printk("%s""requestor_id: 0x%016llx\n",
176 pfx, proc->requestor_id);
177 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
178 printk("%s""responder_id: 0x%016llx\n",
179 pfx, proc->responder_id);
180 if (proc->validation_bits & CPER_PROC_VALID_IP)
181 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
182 }
183
184 static const char * const mem_err_type_strs[] = {
185 "unknown",
186 "no error",
187 "single-bit ECC",
188 "multi-bit ECC",
189 "single-symbol chipkill ECC",
190 "multi-symbol chipkill ECC",
191 "master abort",
192 "target abort",
193 "parity error",
194 "watchdog timeout",
195 "invalid address",
196 "mirror Broken",
197 "memory sparing",
198 "scrub corrected error",
199 "scrub uncorrected error",
200 "physical memory map-out event",
201 };
202
cper_mem_err_type_str(unsigned int etype)203 const char *cper_mem_err_type_str(unsigned int etype)
204 {
205 return etype < ARRAY_SIZE(mem_err_type_strs) ?
206 mem_err_type_strs[etype] : "unknown";
207 }
208 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
209
cper_mem_err_location(struct cper_mem_err_compact * mem,char * msg)210 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
211 {
212 u32 len, n;
213
214 if (!msg)
215 return 0;
216
217 n = 0;
218 len = CPER_REC_LEN - 1;
219 if (mem->validation_bits & CPER_MEM_VALID_NODE)
220 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
221 if (mem->validation_bits & CPER_MEM_VALID_CARD)
222 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
223 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
224 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
225 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
226 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
227 if (mem->validation_bits & CPER_MEM_VALID_BANK)
228 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
229 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
230 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
231 if (mem->validation_bits & CPER_MEM_VALID_ROW)
232 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
233 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
234 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
235 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
236 n += scnprintf(msg + n, len - n, "bit_position: %d ",
237 mem->bit_pos);
238 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
239 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
240 mem->requestor_id);
241 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
242 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
243 mem->responder_id);
244 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
245 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
246 mem->target_id);
247
248 msg[n] = '\0';
249 return n;
250 }
251
cper_dimm_err_location(struct cper_mem_err_compact * mem,char * msg)252 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
253 {
254 u32 len, n;
255 const char *bank = NULL, *device = NULL;
256
257 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
258 return 0;
259
260 n = 0;
261 len = CPER_REC_LEN - 1;
262 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
263 if (bank && device)
264 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
265 else
266 n = snprintf(msg, len,
267 "DIMM location: not present. DMI handle: 0x%.4x ",
268 mem->mem_dev_handle);
269
270 msg[n] = '\0';
271 return n;
272 }
273
cper_mem_err_pack(const struct cper_sec_mem_err * mem,struct cper_mem_err_compact * cmem)274 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
275 struct cper_mem_err_compact *cmem)
276 {
277 cmem->validation_bits = mem->validation_bits;
278 cmem->node = mem->node;
279 cmem->card = mem->card;
280 cmem->module = mem->module;
281 cmem->bank = mem->bank;
282 cmem->device = mem->device;
283 cmem->row = mem->row;
284 cmem->column = mem->column;
285 cmem->bit_pos = mem->bit_pos;
286 cmem->requestor_id = mem->requestor_id;
287 cmem->responder_id = mem->responder_id;
288 cmem->target_id = mem->target_id;
289 cmem->rank = mem->rank;
290 cmem->mem_array_handle = mem->mem_array_handle;
291 cmem->mem_dev_handle = mem->mem_dev_handle;
292 }
293
cper_mem_err_unpack(struct trace_seq * p,struct cper_mem_err_compact * cmem)294 const char *cper_mem_err_unpack(struct trace_seq *p,
295 struct cper_mem_err_compact *cmem)
296 {
297 const char *ret = p->buffer + p->len;
298
299 if (cper_mem_err_location(cmem, rcd_decode_str))
300 trace_seq_printf(p, "%s", rcd_decode_str);
301 if (cper_dimm_err_location(cmem, rcd_decode_str))
302 trace_seq_printf(p, "%s", rcd_decode_str);
303 trace_seq_putc(p, '\0');
304
305 return ret;
306 }
307
cper_print_mem(const char * pfx,const struct cper_sec_mem_err * mem)308 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
309 {
310 struct cper_mem_err_compact cmem;
311
312 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
313 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
314 if (mem->validation_bits & CPER_MEM_VALID_PA)
315 printk("%s""physical_address: 0x%016llx\n",
316 pfx, mem->physical_addr);
317 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
318 printk("%s""physical_address_mask: 0x%016llx\n",
319 pfx, mem->physical_addr_mask);
320 cper_mem_err_pack(mem, &cmem);
321 if (cper_mem_err_location(&cmem, rcd_decode_str))
322 printk("%s%s\n", pfx, rcd_decode_str);
323 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
324 u8 etype = mem->error_type;
325 printk("%s""error_type: %d, %s\n", pfx, etype,
326 cper_mem_err_type_str(etype));
327 }
328 if (cper_dimm_err_location(&cmem, rcd_decode_str))
329 printk("%s%s\n", pfx, rcd_decode_str);
330 }
331
332 static const char * const pcie_port_type_strs[] = {
333 "PCIe end point",
334 "legacy PCI end point",
335 "unknown",
336 "unknown",
337 "root port",
338 "upstream switch port",
339 "downstream switch port",
340 "PCIe to PCI/PCI-X bridge",
341 "PCI/PCI-X to PCIe bridge",
342 "root complex integrated endpoint device",
343 "root complex event collector",
344 };
345
cper_print_pcie(const char * pfx,const struct cper_sec_pcie * pcie,const struct acpi_hest_generic_data * gdata)346 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
347 const struct acpi_hest_generic_data *gdata)
348 {
349 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
350 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
351 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
352 pcie_port_type_strs[pcie->port_type] : "unknown");
353 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
354 printk("%s""version: %d.%d\n", pfx,
355 pcie->version.major, pcie->version.minor);
356 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
357 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
358 pcie->command, pcie->status);
359 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
360 const __u8 *p;
361 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
362 pcie->device_id.segment, pcie->device_id.bus,
363 pcie->device_id.device, pcie->device_id.function);
364 printk("%s""slot: %d\n", pfx,
365 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
366 printk("%s""secondary_bus: 0x%02x\n", pfx,
367 pcie->device_id.secondary_bus);
368 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
369 pcie->device_id.vendor_id, pcie->device_id.device_id);
370 p = pcie->device_id.class_code;
371 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
372 }
373 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
374 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
375 pcie->serial_number.lower, pcie->serial_number.upper);
376 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
377 printk(
378 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
379 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
380 }
381
cper_estatus_print_section(const char * pfx,const struct acpi_hest_generic_data * gdata,int sec_no)382 static void cper_estatus_print_section(
383 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
384 {
385 uuid_le *sec_type = (uuid_le *)gdata->section_type;
386 __u16 severity;
387 char newpfx[64];
388
389 severity = gdata->error_severity;
390 printk("%s""Error %d, type: %s\n", pfx, sec_no,
391 cper_severity_str(severity));
392 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
393 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
394 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
395 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
396
397 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
398 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
399 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
400 printk("%s""section_type: general processor error\n", newpfx);
401 if (gdata->error_data_length >= sizeof(*proc_err))
402 cper_print_proc_generic(newpfx, proc_err);
403 else
404 goto err_section_too_small;
405 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
406 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
407 printk("%s""section_type: memory error\n", newpfx);
408 if (gdata->error_data_length >= sizeof(*mem_err))
409 cper_print_mem(newpfx, mem_err);
410 else
411 goto err_section_too_small;
412 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
413 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
414 printk("%s""section_type: PCIe error\n", newpfx);
415 if (gdata->error_data_length >= sizeof(*pcie))
416 cper_print_pcie(newpfx, pcie, gdata);
417 else
418 goto err_section_too_small;
419 } else
420 printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
421
422 return;
423
424 err_section_too_small:
425 pr_err(FW_WARN "error section length is too small\n");
426 }
427
cper_estatus_print(const char * pfx,const struct acpi_hest_generic_status * estatus)428 void cper_estatus_print(const char *pfx,
429 const struct acpi_hest_generic_status *estatus)
430 {
431 struct acpi_hest_generic_data *gdata;
432 unsigned int data_len, gedata_len;
433 int sec_no = 0;
434 char newpfx[64];
435 __u16 severity;
436
437 severity = estatus->error_severity;
438 if (severity == CPER_SEV_CORRECTED)
439 printk("%s%s\n", pfx,
440 "It has been corrected by h/w "
441 "and requires no further action");
442 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
443 data_len = estatus->data_length;
444 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
445 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
446 while (data_len >= sizeof(*gdata)) {
447 gedata_len = gdata->error_data_length;
448 cper_estatus_print_section(newpfx, gdata, sec_no);
449 data_len -= gedata_len + sizeof(*gdata);
450 gdata = (void *)(gdata + 1) + gedata_len;
451 sec_no++;
452 }
453 }
454 EXPORT_SYMBOL_GPL(cper_estatus_print);
455
cper_estatus_check_header(const struct acpi_hest_generic_status * estatus)456 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
457 {
458 if (estatus->data_length &&
459 estatus->data_length < sizeof(struct acpi_hest_generic_data))
460 return -EINVAL;
461 if (estatus->raw_data_length &&
462 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
463 return -EINVAL;
464
465 return 0;
466 }
467 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
468
cper_estatus_check(const struct acpi_hest_generic_status * estatus)469 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
470 {
471 struct acpi_hest_generic_data *gdata;
472 unsigned int data_len, gedata_len;
473 int rc;
474
475 rc = cper_estatus_check_header(estatus);
476 if (rc)
477 return rc;
478 data_len = estatus->data_length;
479 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
480 while (data_len >= sizeof(*gdata)) {
481 gedata_len = gdata->error_data_length;
482 if (gedata_len > data_len - sizeof(*gdata))
483 return -EINVAL;
484 data_len -= gedata_len + sizeof(*gdata);
485 gdata = (void *)(gdata + 1) + gedata_len;
486 }
487 if (data_len)
488 return -EINVAL;
489
490 return 0;
491 }
492 EXPORT_SYMBOL_GPL(cper_estatus_check);
493