• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * UEFI Common Platform Error Record (CPER) support
3  *
4  * Copyright (C) 2010, Intel Corp.
5  *	Author: Huang Ying <ying.huang@intel.com>
6  *
7  * CPER is the format used to describe platform hardware error by
8  * various APEI tables, such as ERST, BERT and HEST etc.
9  *
10  * For more information about CPER, please refer to Appendix N of UEFI
11  * Specification version 2.3.
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25  */
26 
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/acpi.h>
32 #include <linux/pci.h>
33 #include <linux/aer.h>
34 
35 /*
36  * CPER record ID need to be unique even after reboot, because record
37  * ID is used as index for ERST storage, while CPER records from
38  * multiple boot may co-exist in ERST.
39  */
cper_next_record_id(void)40 u64 cper_next_record_id(void)
41 {
42 	static atomic64_t seq;
43 
44 	if (!atomic64_read(&seq))
45 		atomic64_set(&seq, ((u64)get_seconds()) << 32);
46 
47 	return atomic64_inc_return(&seq);
48 }
49 EXPORT_SYMBOL_GPL(cper_next_record_id);
50 
51 static const char *cper_severity_strs[] = {
52 	"recoverable",
53 	"fatal",
54 	"corrected",
55 	"info",
56 };
57 
cper_severity_str(unsigned int severity)58 static const char *cper_severity_str(unsigned int severity)
59 {
60 	return severity < ARRAY_SIZE(cper_severity_strs) ?
61 		cper_severity_strs[severity] : "unknown";
62 }
63 
64 /*
65  * cper_print_bits - print strings for set bits
66  * @pfx: prefix for each line, including log level and prefix string
67  * @bits: bit mask
68  * @strs: string array, indexed by bit position
69  * @strs_size: size of the string array: @strs
70  *
71  * For each set bit in @bits, print the corresponding string in @strs.
72  * If the output length is longer than 80, multiple line will be
73  * printed, with @pfx is printed at the beginning of each line.
74  */
cper_print_bits(const char * pfx,unsigned int bits,const char * strs[],unsigned int strs_size)75 void cper_print_bits(const char *pfx, unsigned int bits,
76 		     const char *strs[], unsigned int strs_size)
77 {
78 	int i, len = 0;
79 	const char *str;
80 	char buf[84];
81 
82 	for (i = 0; i < strs_size; i++) {
83 		if (!(bits & (1U << i)))
84 			continue;
85 		str = strs[i];
86 		if (!str)
87 			continue;
88 		if (len && len + strlen(str) + 2 > 80) {
89 			printk("%s\n", buf);
90 			len = 0;
91 		}
92 		if (!len)
93 			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
94 		else
95 			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
96 	}
97 	if (len)
98 		printk("%s\n", buf);
99 }
100 
101 static const char *cper_proc_type_strs[] = {
102 	"IA32/X64",
103 	"IA64",
104 };
105 
106 static const char *cper_proc_isa_strs[] = {
107 	"IA32",
108 	"IA64",
109 	"X64",
110 };
111 
112 static const char *cper_proc_error_type_strs[] = {
113 	"cache error",
114 	"TLB error",
115 	"bus error",
116 	"micro-architectural error",
117 };
118 
119 static const char *cper_proc_op_strs[] = {
120 	"unknown or generic",
121 	"data read",
122 	"data write",
123 	"instruction execution",
124 };
125 
126 static const char *cper_proc_flag_strs[] = {
127 	"restartable",
128 	"precise IP",
129 	"overflow",
130 	"corrected",
131 };
132 
cper_print_proc_generic(const char * pfx,const struct cper_sec_proc_generic * proc)133 static void cper_print_proc_generic(const char *pfx,
134 				    const struct cper_sec_proc_generic *proc)
135 {
136 	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
137 		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
138 		       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
139 		       cper_proc_type_strs[proc->proc_type] : "unknown");
140 	if (proc->validation_bits & CPER_PROC_VALID_ISA)
141 		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
142 		       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
143 		       cper_proc_isa_strs[proc->proc_isa] : "unknown");
144 	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
145 		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
146 		cper_print_bits(pfx, proc->proc_error_type,
147 				cper_proc_error_type_strs,
148 				ARRAY_SIZE(cper_proc_error_type_strs));
149 	}
150 	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
151 		printk("%s""operation: %d, %s\n", pfx, proc->operation,
152 		       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
153 		       cper_proc_op_strs[proc->operation] : "unknown");
154 	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
155 		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
156 		cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
157 				ARRAY_SIZE(cper_proc_flag_strs));
158 	}
159 	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
160 		printk("%s""level: %d\n", pfx, proc->level);
161 	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
162 		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
163 	if (proc->validation_bits & CPER_PROC_VALID_ID)
164 		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
165 	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
166 		printk("%s""target_address: 0x%016llx\n",
167 		       pfx, proc->target_addr);
168 	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
169 		printk("%s""requestor_id: 0x%016llx\n",
170 		       pfx, proc->requestor_id);
171 	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
172 		printk("%s""responder_id: 0x%016llx\n",
173 		       pfx, proc->responder_id);
174 	if (proc->validation_bits & CPER_PROC_VALID_IP)
175 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
176 }
177 
178 static const char *cper_mem_err_type_strs[] = {
179 	"unknown",
180 	"no error",
181 	"single-bit ECC",
182 	"multi-bit ECC",
183 	"single-symbol chipkill ECC",
184 	"multi-symbol chipkill ECC",
185 	"master abort",
186 	"target abort",
187 	"parity error",
188 	"watchdog timeout",
189 	"invalid address",
190 	"mirror Broken",
191 	"memory sparing",
192 	"scrub corrected error",
193 	"scrub uncorrected error",
194 };
195 
cper_print_mem(const char * pfx,const struct cper_sec_mem_err * mem)196 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
197 {
198 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
199 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
200 	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
201 		printk("%s""physical_address: 0x%016llx\n",
202 		       pfx, mem->physical_addr);
203 	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
204 		printk("%s""physical_address_mask: 0x%016llx\n",
205 		       pfx, mem->physical_addr_mask);
206 	if (mem->validation_bits & CPER_MEM_VALID_NODE)
207 		printk("%s""node: %d\n", pfx, mem->node);
208 	if (mem->validation_bits & CPER_MEM_VALID_CARD)
209 		printk("%s""card: %d\n", pfx, mem->card);
210 	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
211 		printk("%s""module: %d\n", pfx, mem->module);
212 	if (mem->validation_bits & CPER_MEM_VALID_BANK)
213 		printk("%s""bank: %d\n", pfx, mem->bank);
214 	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
215 		printk("%s""device: %d\n", pfx, mem->device);
216 	if (mem->validation_bits & CPER_MEM_VALID_ROW)
217 		printk("%s""row: %d\n", pfx, mem->row);
218 	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
219 		printk("%s""column: %d\n", pfx, mem->column);
220 	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
221 		printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
222 	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
223 		printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
224 	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
225 		printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
226 	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
227 		printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
228 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
229 		u8 etype = mem->error_type;
230 		printk("%s""error_type: %d, %s\n", pfx, etype,
231 		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
232 		       cper_mem_err_type_strs[etype] : "unknown");
233 	}
234 }
235 
236 static const char *cper_pcie_port_type_strs[] = {
237 	"PCIe end point",
238 	"legacy PCI end point",
239 	"unknown",
240 	"unknown",
241 	"root port",
242 	"upstream switch port",
243 	"downstream switch port",
244 	"PCIe to PCI/PCI-X bridge",
245 	"PCI/PCI-X to PCIe bridge",
246 	"root complex integrated endpoint device",
247 	"root complex event collector",
248 };
249 
cper_print_pcie(const char * pfx,const struct cper_sec_pcie * pcie,const struct acpi_hest_generic_data * gdata)250 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
251 			    const struct acpi_hest_generic_data *gdata)
252 {
253 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
254 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
255 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
256 		       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
257 	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
258 		printk("%s""version: %d.%d\n", pfx,
259 		       pcie->version.major, pcie->version.minor);
260 	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
261 		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
262 		       pcie->command, pcie->status);
263 	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
264 		const __u8 *p;
265 		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
266 		       pcie->device_id.segment, pcie->device_id.bus,
267 		       pcie->device_id.device, pcie->device_id.function);
268 		printk("%s""slot: %d\n", pfx,
269 		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
270 		printk("%s""secondary_bus: 0x%02x\n", pfx,
271 		       pcie->device_id.secondary_bus);
272 		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
273 		       pcie->device_id.vendor_id, pcie->device_id.device_id);
274 		p = pcie->device_id.class_code;
275 		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
276 	}
277 	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
278 		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
279 		       pcie->serial_number.lower, pcie->serial_number.upper);
280 	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
281 		printk(
282 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
283 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
284 }
285 
286 static const char *apei_estatus_section_flag_strs[] = {
287 	"primary",
288 	"containment warning",
289 	"reset",
290 	"threshold exceeded",
291 	"resource not accessible",
292 	"latent error",
293 };
294 
apei_estatus_print_section(const char * pfx,const struct acpi_hest_generic_data * gdata,int sec_no)295 static void apei_estatus_print_section(
296 	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
297 {
298 	uuid_le *sec_type = (uuid_le *)gdata->section_type;
299 	__u16 severity;
300 
301 	severity = gdata->error_severity;
302 	printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
303 	       cper_severity_str(severity));
304 	printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
305 	cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
306 			ARRAY_SIZE(apei_estatus_section_flag_strs));
307 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
308 		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
309 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
310 		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
311 
312 	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
313 		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
314 		printk("%s""section_type: general processor error\n", pfx);
315 		if (gdata->error_data_length >= sizeof(*proc_err))
316 			cper_print_proc_generic(pfx, proc_err);
317 		else
318 			goto err_section_too_small;
319 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
320 		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
321 		printk("%s""section_type: memory error\n", pfx);
322 		if (gdata->error_data_length >= sizeof(*mem_err))
323 			cper_print_mem(pfx, mem_err);
324 		else
325 			goto err_section_too_small;
326 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
327 		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
328 		printk("%s""section_type: PCIe error\n", pfx);
329 		if (gdata->error_data_length >= sizeof(*pcie))
330 			cper_print_pcie(pfx, pcie, gdata);
331 		else
332 			goto err_section_too_small;
333 	} else
334 		printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
335 
336 	return;
337 
338 err_section_too_small:
339 	pr_err(FW_WARN "error section length is too small\n");
340 }
341 
apei_estatus_print(const char * pfx,const struct acpi_hest_generic_status * estatus)342 void apei_estatus_print(const char *pfx,
343 			const struct acpi_hest_generic_status *estatus)
344 {
345 	struct acpi_hest_generic_data *gdata;
346 	unsigned int data_len, gedata_len;
347 	int sec_no = 0;
348 	__u16 severity;
349 
350 	printk("%s""APEI generic hardware error status\n", pfx);
351 	severity = estatus->error_severity;
352 	printk("%s""severity: %d, %s\n", pfx, severity,
353 	       cper_severity_str(severity));
354 	data_len = estatus->data_length;
355 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
356 	while (data_len > sizeof(*gdata)) {
357 		gedata_len = gdata->error_data_length;
358 		apei_estatus_print_section(pfx, gdata, sec_no);
359 		data_len -= gedata_len + sizeof(*gdata);
360 		gdata = (void *)(gdata + 1) + gedata_len;
361 		sec_no++;
362 	}
363 }
364 EXPORT_SYMBOL_GPL(apei_estatus_print);
365 
apei_estatus_check_header(const struct acpi_hest_generic_status * estatus)366 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
367 {
368 	if (estatus->data_length &&
369 	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
370 		return -EINVAL;
371 	if (estatus->raw_data_length &&
372 	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
373 		return -EINVAL;
374 
375 	return 0;
376 }
377 EXPORT_SYMBOL_GPL(apei_estatus_check_header);
378 
apei_estatus_check(const struct acpi_hest_generic_status * estatus)379 int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
380 {
381 	struct acpi_hest_generic_data *gdata;
382 	unsigned int data_len, gedata_len;
383 	int rc;
384 
385 	rc = apei_estatus_check_header(estatus);
386 	if (rc)
387 		return rc;
388 	data_len = estatus->data_length;
389 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
390 	while (data_len >= sizeof(*gdata)) {
391 		gedata_len = gdata->error_data_length;
392 		if (gedata_len > data_len - sizeof(*gdata))
393 			return -EINVAL;
394 		data_len -= gedata_len + sizeof(*gdata);
395 		gdata = (void *)(gdata + 1) + gedata_len;
396 	}
397 	if (data_len)
398 		return -EINVAL;
399 
400 	return 0;
401 }
402 EXPORT_SYMBOL_GPL(apei_estatus_check);
403