• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCIe AER software error injection support.
4  *
5  * Debugging PCIe AER code is quite difficult because it is hard to
6  * trigger various real hardware errors. Software based error
7  * injection can fake almost all kinds of errors with the help of a
8  * user space helper tool aer-inject, which can be gotten from:
9  *   https://www.kernel.org/pub/linux/utils/pci/aer-inject/
10  *
11  * Copyright 2009 Intel Corporation.
12  *     Huang Ying <ying.huang@intel.com>
13  */
14 
15 #define dev_fmt(fmt) "aer_inject: " fmt
16 
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/miscdevice.h>
21 #include <linux/pci.h>
22 #include <linux/slab.h>
23 #include <linux/fs.h>
24 #include <linux/uaccess.h>
25 #include <linux/stddef.h>
26 #include <linux/device.h>
27 
28 #include "portdrv.h"
29 #include "../pci.h"
30 
31 /* Override the existing corrected and uncorrected error masks */
32 static bool aer_mask_override;
33 module_param(aer_mask_override, bool, 0);
34 
35 struct aer_error_inj {
36 	u8 bus;
37 	u8 dev;
38 	u8 fn;
39 	u32 uncor_status;
40 	u32 cor_status;
41 	u32 header_log0;
42 	u32 header_log1;
43 	u32 header_log2;
44 	u32 header_log3;
45 	u32 domain;
46 };
47 
48 struct aer_error {
49 	struct list_head list;
50 	u32 domain;
51 	unsigned int bus;
52 	unsigned int devfn;
53 	int pos_cap_err;
54 
55 	u32 uncor_status;
56 	u32 cor_status;
57 	u32 header_log0;
58 	u32 header_log1;
59 	u32 header_log2;
60 	u32 header_log3;
61 	u32 root_status;
62 	u32 source_id;
63 };
64 
65 struct pci_bus_ops {
66 	struct list_head list;
67 	struct pci_bus *bus;
68 	struct pci_ops *ops;
69 };
70 
71 static LIST_HEAD(einjected);
72 
73 static LIST_HEAD(pci_bus_ops_list);
74 
75 /* Protect einjected and pci_bus_ops_list */
76 static DEFINE_SPINLOCK(inject_lock);
77 
aer_error_init(struct aer_error * err,u32 domain,unsigned int bus,unsigned int devfn,int pos_cap_err)78 static void aer_error_init(struct aer_error *err, u32 domain,
79 			   unsigned int bus, unsigned int devfn,
80 			   int pos_cap_err)
81 {
82 	INIT_LIST_HEAD(&err->list);
83 	err->domain = domain;
84 	err->bus = bus;
85 	err->devfn = devfn;
86 	err->pos_cap_err = pos_cap_err;
87 }
88 
89 /* inject_lock must be held before calling */
__find_aer_error(u32 domain,unsigned int bus,unsigned int devfn)90 static struct aer_error *__find_aer_error(u32 domain, unsigned int bus,
91 					  unsigned int devfn)
92 {
93 	struct aer_error *err;
94 
95 	list_for_each_entry(err, &einjected, list) {
96 		if (domain == err->domain &&
97 		    bus == err->bus &&
98 		    devfn == err->devfn)
99 			return err;
100 	}
101 	return NULL;
102 }
103 
104 /* inject_lock must be held before calling */
__find_aer_error_by_dev(struct pci_dev * dev)105 static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev)
106 {
107 	int domain = pci_domain_nr(dev->bus);
108 	if (domain < 0)
109 		return NULL;
110 	return __find_aer_error(domain, dev->bus->number, dev->devfn);
111 }
112 
113 /* inject_lock must be held before calling */
__find_pci_bus_ops(struct pci_bus * bus)114 static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus)
115 {
116 	struct pci_bus_ops *bus_ops;
117 
118 	list_for_each_entry(bus_ops, &pci_bus_ops_list, list) {
119 		if (bus_ops->bus == bus)
120 			return bus_ops->ops;
121 	}
122 	return NULL;
123 }
124 
pci_bus_ops_pop(void)125 static struct pci_bus_ops *pci_bus_ops_pop(void)
126 {
127 	unsigned long flags;
128 	struct pci_bus_ops *bus_ops;
129 
130 	spin_lock_irqsave(&inject_lock, flags);
131 	bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
132 					   struct pci_bus_ops, list);
133 	if (bus_ops)
134 		list_del(&bus_ops->list);
135 	spin_unlock_irqrestore(&inject_lock, flags);
136 	return bus_ops;
137 }
138 
find_pci_config_dword(struct aer_error * err,int where,int * prw1cs)139 static u32 *find_pci_config_dword(struct aer_error *err, int where,
140 				  int *prw1cs)
141 {
142 	int rw1cs = 0;
143 	u32 *target = NULL;
144 
145 	if (err->pos_cap_err == -1)
146 		return NULL;
147 
148 	switch (where - err->pos_cap_err) {
149 	case PCI_ERR_UNCOR_STATUS:
150 		target = &err->uncor_status;
151 		rw1cs = 1;
152 		break;
153 	case PCI_ERR_COR_STATUS:
154 		target = &err->cor_status;
155 		rw1cs = 1;
156 		break;
157 	case PCI_ERR_HEADER_LOG:
158 		target = &err->header_log0;
159 		break;
160 	case PCI_ERR_HEADER_LOG+4:
161 		target = &err->header_log1;
162 		break;
163 	case PCI_ERR_HEADER_LOG+8:
164 		target = &err->header_log2;
165 		break;
166 	case PCI_ERR_HEADER_LOG+12:
167 		target = &err->header_log3;
168 		break;
169 	case PCI_ERR_ROOT_STATUS:
170 		target = &err->root_status;
171 		rw1cs = 1;
172 		break;
173 	case PCI_ERR_ROOT_ERR_SRC:
174 		target = &err->source_id;
175 		break;
176 	}
177 	if (prw1cs)
178 		*prw1cs = rw1cs;
179 	return target;
180 }
181 
aer_inj_read(struct pci_bus * bus,unsigned int devfn,int where,int size,u32 * val)182 static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where,
183 			int size, u32 *val)
184 {
185 	struct pci_ops *ops, *my_ops;
186 	int rv;
187 
188 	ops = __find_pci_bus_ops(bus);
189 	if (!ops)
190 		return -1;
191 
192 	my_ops = bus->ops;
193 	bus->ops = ops;
194 	rv = ops->read(bus, devfn, where, size, val);
195 	bus->ops = my_ops;
196 
197 	return rv;
198 }
199 
aer_inj_write(struct pci_bus * bus,unsigned int devfn,int where,int size,u32 val)200 static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where,
201 			 int size, u32 val)
202 {
203 	struct pci_ops *ops, *my_ops;
204 	int rv;
205 
206 	ops = __find_pci_bus_ops(bus);
207 	if (!ops)
208 		return -1;
209 
210 	my_ops = bus->ops;
211 	bus->ops = ops;
212 	rv = ops->write(bus, devfn, where, size, val);
213 	bus->ops = my_ops;
214 
215 	return rv;
216 }
217 
aer_inj_read_config(struct pci_bus * bus,unsigned int devfn,int where,int size,u32 * val)218 static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
219 			       int where, int size, u32 *val)
220 {
221 	u32 *sim;
222 	struct aer_error *err;
223 	unsigned long flags;
224 	int domain;
225 	int rv;
226 
227 	spin_lock_irqsave(&inject_lock, flags);
228 	if (size != sizeof(u32))
229 		goto out;
230 	domain = pci_domain_nr(bus);
231 	if (domain < 0)
232 		goto out;
233 	err = __find_aer_error(domain, bus->number, devfn);
234 	if (!err)
235 		goto out;
236 
237 	sim = find_pci_config_dword(err, where, NULL);
238 	if (sim) {
239 		*val = *sim;
240 		spin_unlock_irqrestore(&inject_lock, flags);
241 		return 0;
242 	}
243 out:
244 	rv = aer_inj_read(bus, devfn, where, size, val);
245 	spin_unlock_irqrestore(&inject_lock, flags);
246 	return rv;
247 }
248 
aer_inj_write_config(struct pci_bus * bus,unsigned int devfn,int where,int size,u32 val)249 static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
250 				int where, int size, u32 val)
251 {
252 	u32 *sim;
253 	struct aer_error *err;
254 	unsigned long flags;
255 	int rw1cs;
256 	int domain;
257 	int rv;
258 
259 	spin_lock_irqsave(&inject_lock, flags);
260 	if (size != sizeof(u32))
261 		goto out;
262 	domain = pci_domain_nr(bus);
263 	if (domain < 0)
264 		goto out;
265 	err = __find_aer_error(domain, bus->number, devfn);
266 	if (!err)
267 		goto out;
268 
269 	sim = find_pci_config_dword(err, where, &rw1cs);
270 	if (sim) {
271 		if (rw1cs)
272 			*sim ^= val;
273 		else
274 			*sim = val;
275 		spin_unlock_irqrestore(&inject_lock, flags);
276 		return 0;
277 	}
278 out:
279 	rv = aer_inj_write(bus, devfn, where, size, val);
280 	spin_unlock_irqrestore(&inject_lock, flags);
281 	return rv;
282 }
283 
284 static struct pci_ops aer_inj_pci_ops = {
285 	.read = aer_inj_read_config,
286 	.write = aer_inj_write_config,
287 };
288 
pci_bus_ops_init(struct pci_bus_ops * bus_ops,struct pci_bus * bus,struct pci_ops * ops)289 static void pci_bus_ops_init(struct pci_bus_ops *bus_ops,
290 			     struct pci_bus *bus,
291 			     struct pci_ops *ops)
292 {
293 	INIT_LIST_HEAD(&bus_ops->list);
294 	bus_ops->bus = bus;
295 	bus_ops->ops = ops;
296 }
297 
pci_bus_set_aer_ops(struct pci_bus * bus)298 static int pci_bus_set_aer_ops(struct pci_bus *bus)
299 {
300 	struct pci_ops *ops;
301 	struct pci_bus_ops *bus_ops;
302 	unsigned long flags;
303 
304 	bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL);
305 	if (!bus_ops)
306 		return -ENOMEM;
307 	ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
308 	spin_lock_irqsave(&inject_lock, flags);
309 	if (ops == &aer_inj_pci_ops)
310 		goto out;
311 	/*
312 	 * increments the reference count of the pci bus. Otherwise, when we
313 	 * restore the 'pci_ops' in 'aer_inject_exit', the 'pci_bus' may have
314 	 * been freed.
315 	 */
316 	pci_bus_get(bus);
317 
318 	pci_bus_ops_init(bus_ops, bus, ops);
319 	list_add(&bus_ops->list, &pci_bus_ops_list);
320 	bus_ops = NULL;
321 out:
322 	spin_unlock_irqrestore(&inject_lock, flags);
323 	kfree(bus_ops);
324 	return 0;
325 }
326 
aer_inject(struct aer_error_inj * einj)327 static int aer_inject(struct aer_error_inj *einj)
328 {
329 	struct aer_error *err, *rperr;
330 	struct aer_error *err_alloc = NULL, *rperr_alloc = NULL;
331 	struct pci_dev *dev, *rpdev;
332 	struct pcie_device *edev;
333 	struct device *device;
334 	unsigned long flags;
335 	unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
336 	int pos_cap_err, rp_pos_cap_err;
337 	u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
338 	int ret = 0;
339 
340 	dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn);
341 	if (!dev)
342 		return -ENODEV;
343 	rpdev = pcie_find_root_port(dev);
344 	if (!rpdev) {
345 		pci_err(dev, "Root port not found\n");
346 		ret = -ENODEV;
347 		goto out_put;
348 	}
349 
350 	pos_cap_err = dev->aer_cap;
351 	if (!pos_cap_err) {
352 		pci_err(dev, "Device doesn't support AER\n");
353 		ret = -EPROTONOSUPPORT;
354 		goto out_put;
355 	}
356 	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever);
357 	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask);
358 	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
359 			      &uncor_mask);
360 
361 	rp_pos_cap_err = rpdev->aer_cap;
362 	if (!rp_pos_cap_err) {
363 		pci_err(rpdev, "Root port doesn't support AER\n");
364 		ret = -EPROTONOSUPPORT;
365 		goto out_put;
366 	}
367 
368 	err_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
369 	if (!err_alloc) {
370 		ret = -ENOMEM;
371 		goto out_put;
372 	}
373 	rperr_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
374 	if (!rperr_alloc) {
375 		ret = -ENOMEM;
376 		goto out_put;
377 	}
378 
379 	if (aer_mask_override) {
380 		cor_mask_orig = cor_mask;
381 		cor_mask &= !(einj->cor_status);
382 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
383 				       cor_mask);
384 
385 		uncor_mask_orig = uncor_mask;
386 		uncor_mask &= !(einj->uncor_status);
387 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
388 				       uncor_mask);
389 	}
390 
391 	spin_lock_irqsave(&inject_lock, flags);
392 
393 	err = __find_aer_error_by_dev(dev);
394 	if (!err) {
395 		err = err_alloc;
396 		err_alloc = NULL;
397 		aer_error_init(err, einj->domain, einj->bus, devfn,
398 			       pos_cap_err);
399 		list_add(&err->list, &einjected);
400 	}
401 	err->uncor_status |= einj->uncor_status;
402 	err->cor_status |= einj->cor_status;
403 	err->header_log0 = einj->header_log0;
404 	err->header_log1 = einj->header_log1;
405 	err->header_log2 = einj->header_log2;
406 	err->header_log3 = einj->header_log3;
407 
408 	if (!aer_mask_override && einj->cor_status &&
409 	    !(einj->cor_status & ~cor_mask)) {
410 		ret = -EINVAL;
411 		pci_warn(dev, "The correctable error(s) is masked by device\n");
412 		spin_unlock_irqrestore(&inject_lock, flags);
413 		goto out_put;
414 	}
415 	if (!aer_mask_override && einj->uncor_status &&
416 	    !(einj->uncor_status & ~uncor_mask)) {
417 		ret = -EINVAL;
418 		pci_warn(dev, "The uncorrectable error(s) is masked by device\n");
419 		spin_unlock_irqrestore(&inject_lock, flags);
420 		goto out_put;
421 	}
422 
423 	rperr = __find_aer_error_by_dev(rpdev);
424 	if (!rperr) {
425 		rperr = rperr_alloc;
426 		rperr_alloc = NULL;
427 		aer_error_init(rperr, pci_domain_nr(rpdev->bus),
428 			       rpdev->bus->number, rpdev->devfn,
429 			       rp_pos_cap_err);
430 		list_add(&rperr->list, &einjected);
431 	}
432 	if (einj->cor_status) {
433 		if (rperr->root_status & PCI_ERR_ROOT_COR_RCV)
434 			rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
435 		else
436 			rperr->root_status |= PCI_ERR_ROOT_COR_RCV;
437 		rperr->source_id &= 0xffff0000;
438 		rperr->source_id |= (einj->bus << 8) | devfn;
439 	}
440 	if (einj->uncor_status) {
441 		if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)
442 			rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
443 		if (sever & einj->uncor_status) {
444 			rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV;
445 			if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV))
446 				rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL;
447 		} else
448 			rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
449 		rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV;
450 		rperr->source_id &= 0x0000ffff;
451 		rperr->source_id |= ((einj->bus << 8) | devfn) << 16;
452 	}
453 	spin_unlock_irqrestore(&inject_lock, flags);
454 
455 	if (aer_mask_override) {
456 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
457 				       cor_mask_orig);
458 		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
459 				       uncor_mask_orig);
460 	}
461 
462 	ret = pci_bus_set_aer_ops(dev->bus);
463 	if (ret)
464 		goto out_put;
465 	ret = pci_bus_set_aer_ops(rpdev->bus);
466 	if (ret)
467 		goto out_put;
468 
469 	device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER);
470 	if (device) {
471 		edev = to_pcie_device(device);
472 		if (!get_service_data(edev)) {
473 			pci_warn(edev->port, "AER service is not initialized\n");
474 			ret = -EPROTONOSUPPORT;
475 			goto out_put;
476 		}
477 		pci_info(edev->port, "Injecting errors %08x/%08x into device %s\n",
478 			 einj->cor_status, einj->uncor_status, pci_name(dev));
479 		ret = irq_inject_interrupt(edev->irq);
480 	} else {
481 		pci_err(rpdev, "AER device not found\n");
482 		ret = -ENODEV;
483 	}
484 out_put:
485 	kfree(err_alloc);
486 	kfree(rperr_alloc);
487 	pci_dev_put(dev);
488 	return ret;
489 }
490 
aer_inject_write(struct file * filp,const char __user * ubuf,size_t usize,loff_t * off)491 static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf,
492 				size_t usize, loff_t *off)
493 {
494 	struct aer_error_inj einj;
495 	int ret;
496 
497 	if (!capable(CAP_SYS_ADMIN))
498 		return -EPERM;
499 	if (usize < offsetof(struct aer_error_inj, domain) ||
500 	    usize > sizeof(einj))
501 		return -EINVAL;
502 
503 	memset(&einj, 0, sizeof(einj));
504 	if (copy_from_user(&einj, ubuf, usize))
505 		return -EFAULT;
506 
507 	ret = aer_inject(&einj);
508 	return ret ? ret : usize;
509 }
510 
511 static const struct file_operations aer_inject_fops = {
512 	.write = aer_inject_write,
513 	.owner = THIS_MODULE,
514 	.llseek = noop_llseek,
515 };
516 
517 static struct miscdevice aer_inject_device = {
518 	.minor = MISC_DYNAMIC_MINOR,
519 	.name = "aer_inject",
520 	.fops = &aer_inject_fops,
521 };
522 
aer_inject_init(void)523 static int __init aer_inject_init(void)
524 {
525 	return misc_register(&aer_inject_device);
526 }
527 
aer_inject_exit(void)528 static void __exit aer_inject_exit(void)
529 {
530 	struct aer_error *err, *err_next;
531 	unsigned long flags;
532 	struct pci_bus_ops *bus_ops;
533 
534 	misc_deregister(&aer_inject_device);
535 
536 	while ((bus_ops = pci_bus_ops_pop())) {
537 		pci_bus_set_ops(bus_ops->bus, bus_ops->ops);
538 		pci_bus_put(bus_ops->bus);
539 		kfree(bus_ops);
540 	}
541 
542 	spin_lock_irqsave(&inject_lock, flags);
543 	list_for_each_entry_safe(err, err_next, &einjected, list) {
544 		list_del(&err->list);
545 		kfree(err);
546 	}
547 	spin_unlock_irqrestore(&inject_lock, flags);
548 }
549 
550 module_init(aer_inject_init);
551 module_exit(aer_inject_exit);
552 
553 MODULE_DESCRIPTION("PCIe AER software error injector");
554 MODULE_LICENSE("GPL");
555