1 /*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various APEI tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.3.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/acpi.h>
32 #include <linux/pci.h>
33 #include <linux/aer.h>
34
35 /*
36 * CPER record ID need to be unique even after reboot, because record
37 * ID is used as index for ERST storage, while CPER records from
38 * multiple boot may co-exist in ERST.
39 */
cper_next_record_id(void)40 u64 cper_next_record_id(void)
41 {
42 static atomic64_t seq;
43
44 if (!atomic64_read(&seq))
45 atomic64_set(&seq, ((u64)get_seconds()) << 32);
46
47 return atomic64_inc_return(&seq);
48 }
49 EXPORT_SYMBOL_GPL(cper_next_record_id);
50
51 static const char *cper_severity_strs[] = {
52 "recoverable",
53 "fatal",
54 "corrected",
55 "info",
56 };
57
cper_severity_str(unsigned int severity)58 static const char *cper_severity_str(unsigned int severity)
59 {
60 return severity < ARRAY_SIZE(cper_severity_strs) ?
61 cper_severity_strs[severity] : "unknown";
62 }
63
64 /*
65 * cper_print_bits - print strings for set bits
66 * @pfx: prefix for each line, including log level and prefix string
67 * @bits: bit mask
68 * @strs: string array, indexed by bit position
69 * @strs_size: size of the string array: @strs
70 *
71 * For each set bit in @bits, print the corresponding string in @strs.
72 * If the output length is longer than 80, multiple line will be
73 * printed, with @pfx is printed at the beginning of each line.
74 */
cper_print_bits(const char * pfx,unsigned int bits,const char * strs[],unsigned int strs_size)75 void cper_print_bits(const char *pfx, unsigned int bits,
76 const char *strs[], unsigned int strs_size)
77 {
78 int i, len = 0;
79 const char *str;
80 char buf[84];
81
82 for (i = 0; i < strs_size; i++) {
83 if (!(bits & (1U << i)))
84 continue;
85 str = strs[i];
86 if (!str)
87 continue;
88 if (len && len + strlen(str) + 2 > 80) {
89 printk("%s\n", buf);
90 len = 0;
91 }
92 if (!len)
93 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
94 else
95 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
96 }
97 if (len)
98 printk("%s\n", buf);
99 }
100
101 static const char *cper_proc_type_strs[] = {
102 "IA32/X64",
103 "IA64",
104 };
105
106 static const char *cper_proc_isa_strs[] = {
107 "IA32",
108 "IA64",
109 "X64",
110 };
111
112 static const char *cper_proc_error_type_strs[] = {
113 "cache error",
114 "TLB error",
115 "bus error",
116 "micro-architectural error",
117 };
118
119 static const char *cper_proc_op_strs[] = {
120 "unknown or generic",
121 "data read",
122 "data write",
123 "instruction execution",
124 };
125
126 static const char *cper_proc_flag_strs[] = {
127 "restartable",
128 "precise IP",
129 "overflow",
130 "corrected",
131 };
132
cper_print_proc_generic(const char * pfx,const struct cper_sec_proc_generic * proc)133 static void cper_print_proc_generic(const char *pfx,
134 const struct cper_sec_proc_generic *proc)
135 {
136 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
137 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
138 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
139 cper_proc_type_strs[proc->proc_type] : "unknown");
140 if (proc->validation_bits & CPER_PROC_VALID_ISA)
141 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
142 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
143 cper_proc_isa_strs[proc->proc_isa] : "unknown");
144 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
145 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
146 cper_print_bits(pfx, proc->proc_error_type,
147 cper_proc_error_type_strs,
148 ARRAY_SIZE(cper_proc_error_type_strs));
149 }
150 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
151 printk("%s""operation: %d, %s\n", pfx, proc->operation,
152 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
153 cper_proc_op_strs[proc->operation] : "unknown");
154 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
155 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
156 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
157 ARRAY_SIZE(cper_proc_flag_strs));
158 }
159 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
160 printk("%s""level: %d\n", pfx, proc->level);
161 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
162 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
163 if (proc->validation_bits & CPER_PROC_VALID_ID)
164 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
165 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
166 printk("%s""target_address: 0x%016llx\n",
167 pfx, proc->target_addr);
168 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
169 printk("%s""requestor_id: 0x%016llx\n",
170 pfx, proc->requestor_id);
171 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
172 printk("%s""responder_id: 0x%016llx\n",
173 pfx, proc->responder_id);
174 if (proc->validation_bits & CPER_PROC_VALID_IP)
175 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
176 }
177
178 static const char *cper_mem_err_type_strs[] = {
179 "unknown",
180 "no error",
181 "single-bit ECC",
182 "multi-bit ECC",
183 "single-symbol chipkill ECC",
184 "multi-symbol chipkill ECC",
185 "master abort",
186 "target abort",
187 "parity error",
188 "watchdog timeout",
189 "invalid address",
190 "mirror Broken",
191 "memory sparing",
192 "scrub corrected error",
193 "scrub uncorrected error",
194 };
195
cper_print_mem(const char * pfx,const struct cper_sec_mem_err * mem)196 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
197 {
198 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
199 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
200 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
201 printk("%s""physical_address: 0x%016llx\n",
202 pfx, mem->physical_addr);
203 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
204 printk("%s""physical_address_mask: 0x%016llx\n",
205 pfx, mem->physical_addr_mask);
206 if (mem->validation_bits & CPER_MEM_VALID_NODE)
207 printk("%s""node: %d\n", pfx, mem->node);
208 if (mem->validation_bits & CPER_MEM_VALID_CARD)
209 printk("%s""card: %d\n", pfx, mem->card);
210 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
211 printk("%s""module: %d\n", pfx, mem->module);
212 if (mem->validation_bits & CPER_MEM_VALID_BANK)
213 printk("%s""bank: %d\n", pfx, mem->bank);
214 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
215 printk("%s""device: %d\n", pfx, mem->device);
216 if (mem->validation_bits & CPER_MEM_VALID_ROW)
217 printk("%s""row: %d\n", pfx, mem->row);
218 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
219 printk("%s""column: %d\n", pfx, mem->column);
220 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
221 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
222 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
223 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
224 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
225 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
226 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
227 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
228 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
229 u8 etype = mem->error_type;
230 printk("%s""error_type: %d, %s\n", pfx, etype,
231 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
232 cper_mem_err_type_strs[etype] : "unknown");
233 }
234 }
235
236 static const char *cper_pcie_port_type_strs[] = {
237 "PCIe end point",
238 "legacy PCI end point",
239 "unknown",
240 "unknown",
241 "root port",
242 "upstream switch port",
243 "downstream switch port",
244 "PCIe to PCI/PCI-X bridge",
245 "PCI/PCI-X to PCIe bridge",
246 "root complex integrated endpoint device",
247 "root complex event collector",
248 };
249
cper_print_pcie(const char * pfx,const struct cper_sec_pcie * pcie,const struct acpi_hest_generic_data * gdata)250 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
251 const struct acpi_hest_generic_data *gdata)
252 {
253 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
254 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
255 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
256 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
257 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
258 printk("%s""version: %d.%d\n", pfx,
259 pcie->version.major, pcie->version.minor);
260 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
261 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
262 pcie->command, pcie->status);
263 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
264 const __u8 *p;
265 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
266 pcie->device_id.segment, pcie->device_id.bus,
267 pcie->device_id.device, pcie->device_id.function);
268 printk("%s""slot: %d\n", pfx,
269 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
270 printk("%s""secondary_bus: 0x%02x\n", pfx,
271 pcie->device_id.secondary_bus);
272 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
273 pcie->device_id.vendor_id, pcie->device_id.device_id);
274 p = pcie->device_id.class_code;
275 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
276 }
277 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
278 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
279 pcie->serial_number.lower, pcie->serial_number.upper);
280 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
281 printk(
282 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
283 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
284 }
285
286 static const char *apei_estatus_section_flag_strs[] = {
287 "primary",
288 "containment warning",
289 "reset",
290 "threshold exceeded",
291 "resource not accessible",
292 "latent error",
293 };
294
apei_estatus_print_section(const char * pfx,const struct acpi_hest_generic_data * gdata,int sec_no)295 static void apei_estatus_print_section(
296 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
297 {
298 uuid_le *sec_type = (uuid_le *)gdata->section_type;
299 __u16 severity;
300
301 severity = gdata->error_severity;
302 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
303 cper_severity_str(severity));
304 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
305 cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
306 ARRAY_SIZE(apei_estatus_section_flag_strs));
307 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
308 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
309 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
310 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
311
312 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
313 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
314 printk("%s""section_type: general processor error\n", pfx);
315 if (gdata->error_data_length >= sizeof(*proc_err))
316 cper_print_proc_generic(pfx, proc_err);
317 else
318 goto err_section_too_small;
319 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
320 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
321 printk("%s""section_type: memory error\n", pfx);
322 if (gdata->error_data_length >= sizeof(*mem_err))
323 cper_print_mem(pfx, mem_err);
324 else
325 goto err_section_too_small;
326 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
327 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
328 printk("%s""section_type: PCIe error\n", pfx);
329 if (gdata->error_data_length >= sizeof(*pcie))
330 cper_print_pcie(pfx, pcie, gdata);
331 else
332 goto err_section_too_small;
333 } else
334 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
335
336 return;
337
338 err_section_too_small:
339 pr_err(FW_WARN "error section length is too small\n");
340 }
341
apei_estatus_print(const char * pfx,const struct acpi_hest_generic_status * estatus)342 void apei_estatus_print(const char *pfx,
343 const struct acpi_hest_generic_status *estatus)
344 {
345 struct acpi_hest_generic_data *gdata;
346 unsigned int data_len, gedata_len;
347 int sec_no = 0;
348 __u16 severity;
349
350 printk("%s""APEI generic hardware error status\n", pfx);
351 severity = estatus->error_severity;
352 printk("%s""severity: %d, %s\n", pfx, severity,
353 cper_severity_str(severity));
354 data_len = estatus->data_length;
355 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
356 while (data_len > sizeof(*gdata)) {
357 gedata_len = gdata->error_data_length;
358 apei_estatus_print_section(pfx, gdata, sec_no);
359 data_len -= gedata_len + sizeof(*gdata);
360 gdata = (void *)(gdata + 1) + gedata_len;
361 sec_no++;
362 }
363 }
364 EXPORT_SYMBOL_GPL(apei_estatus_print);
365
apei_estatus_check_header(const struct acpi_hest_generic_status * estatus)366 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
367 {
368 if (estatus->data_length &&
369 estatus->data_length < sizeof(struct acpi_hest_generic_data))
370 return -EINVAL;
371 if (estatus->raw_data_length &&
372 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
373 return -EINVAL;
374
375 return 0;
376 }
377 EXPORT_SYMBOL_GPL(apei_estatus_check_header);
378
apei_estatus_check(const struct acpi_hest_generic_status * estatus)379 int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
380 {
381 struct acpi_hest_generic_data *gdata;
382 unsigned int data_len, gedata_len;
383 int rc;
384
385 rc = apei_estatus_check_header(estatus);
386 if (rc)
387 return rc;
388 data_len = estatus->data_length;
389 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
390 while (data_len >= sizeof(*gdata)) {
391 gedata_len = gdata->error_data_length;
392 if (gedata_len > data_len - sizeof(*gdata))
393 return -EINVAL;
394 data_len -= gedata_len + sizeof(*gdata);
395 gdata = (void *)(gdata + 1) + gedata_len;
396 }
397 if (data_len)
398 return -EINVAL;
399
400 return 0;
401 }
402 EXPORT_SYMBOL_GPL(apei_estatus_check);
403