• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * File:	msi.c
3  * Purpose:	PCI Message Signaled Interrupt (MSI)
4  *
5  * Copyright (C) 2003-2004 Intel
6  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7  */
8 
9 #include <linux/err.h>
10 #include <linux/mm.h>
11 #include <linux/irq.h>
12 #include <linux/interrupt.h>
13 #include <linux/export.h>
14 #include <linux/ioport.h>
15 #include <linux/pci.h>
16 #include <linux/proc_fs.h>
17 #include <linux/msi.h>
18 #include <linux/smp.h>
19 #include <linux/errno.h>
20 #include <linux/io.h>
21 #include <linux/slab.h>
22 
23 #include "pci.h"
24 
25 static int pci_msi_enable = 1;
26 
27 #define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
28 
29 
30 /* Arch hooks */
31 
arch_setup_msi_irq(struct pci_dev * dev,struct msi_desc * desc)32 int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
33 {
34 	struct msi_chip *chip = dev->bus->msi;
35 	int err;
36 
37 	if (!chip || !chip->setup_irq)
38 		return -EINVAL;
39 
40 	err = chip->setup_irq(chip, dev, desc);
41 	if (err < 0)
42 		return err;
43 
44 	irq_set_chip_data(desc->irq, chip);
45 
46 	return 0;
47 }
48 
arch_teardown_msi_irq(unsigned int irq)49 void __weak arch_teardown_msi_irq(unsigned int irq)
50 {
51 	struct msi_chip *chip = irq_get_chip_data(irq);
52 
53 	if (!chip || !chip->teardown_irq)
54 		return;
55 
56 	chip->teardown_irq(chip, irq);
57 }
58 
arch_setup_msi_irqs(struct pci_dev * dev,int nvec,int type)59 int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
60 {
61 	struct msi_desc *entry;
62 	int ret;
63 
64 	/*
65 	 * If an architecture wants to support multiple MSI, it needs to
66 	 * override arch_setup_msi_irqs()
67 	 */
68 	if (type == PCI_CAP_ID_MSI && nvec > 1)
69 		return 1;
70 
71 	list_for_each_entry(entry, &dev->msi_list, list) {
72 		ret = arch_setup_msi_irq(dev, entry);
73 		if (ret < 0)
74 			return ret;
75 		if (ret > 0)
76 			return -ENOSPC;
77 	}
78 
79 	return 0;
80 }
81 
82 /*
83  * We have a default implementation available as a separate non-weak
84  * function, as it is used by the Xen x86 PCI code
85  */
default_teardown_msi_irqs(struct pci_dev * dev)86 void default_teardown_msi_irqs(struct pci_dev *dev)
87 {
88 	struct msi_desc *entry;
89 
90 	list_for_each_entry(entry, &dev->msi_list, list) {
91 		int i, nvec;
92 		if (entry->irq == 0)
93 			continue;
94 		if (entry->nvec_used)
95 			nvec = entry->nvec_used;
96 		else
97 			nvec = 1 << entry->msi_attrib.multiple;
98 		for (i = 0; i < nvec; i++)
99 			arch_teardown_msi_irq(entry->irq + i);
100 	}
101 }
102 
arch_teardown_msi_irqs(struct pci_dev * dev)103 void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
104 {
105 	return default_teardown_msi_irqs(dev);
106 }
107 
default_restore_msi_irq(struct pci_dev * dev,int irq)108 static void default_restore_msi_irq(struct pci_dev *dev, int irq)
109 {
110 	struct msi_desc *entry;
111 
112 	entry = NULL;
113 	if (dev->msix_enabled) {
114 		list_for_each_entry(entry, &dev->msi_list, list) {
115 			if (irq == entry->irq)
116 				break;
117 		}
118 	} else if (dev->msi_enabled)  {
119 		entry = irq_get_msi_desc(irq);
120 	}
121 
122 	if (entry)
123 		__write_msi_msg(entry, &entry->msg);
124 }
125 
arch_restore_msi_irqs(struct pci_dev * dev)126 void __weak arch_restore_msi_irqs(struct pci_dev *dev)
127 {
128 	return default_restore_msi_irqs(dev);
129 }
130 
msi_set_enable(struct pci_dev * dev,int enable)131 static void msi_set_enable(struct pci_dev *dev, int enable)
132 {
133 	u16 control;
134 
135 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
136 	control &= ~PCI_MSI_FLAGS_ENABLE;
137 	if (enable)
138 		control |= PCI_MSI_FLAGS_ENABLE;
139 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
140 }
141 
msix_clear_and_set_ctrl(struct pci_dev * dev,u16 clear,u16 set)142 static void msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
143 {
144 	u16 ctrl;
145 
146 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
147 	ctrl &= ~clear;
148 	ctrl |= set;
149 	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
150 }
151 
msi_mask(unsigned x)152 static inline __attribute_const__ u32 msi_mask(unsigned x)
153 {
154 	/* Don't shift by >= width of type */
155 	if (x >= 5)
156 		return 0xffffffff;
157 	return (1 << (1 << x)) - 1;
158 }
159 
160 /*
161  * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
162  * mask all MSI interrupts by clearing the MSI enable bit does not work
163  * reliably as devices without an INTx disable bit will then generate a
164  * level IRQ which will never be cleared.
165  */
default_msi_mask_irq(struct msi_desc * desc,u32 mask,u32 flag)166 u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
167 {
168 	u32 mask_bits = desc->masked;
169 
170 	if (!desc->msi_attrib.maskbit)
171 		return 0;
172 
173 	mask_bits &= ~mask;
174 	mask_bits |= flag;
175 	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
176 
177 	return mask_bits;
178 }
179 
arch_msi_mask_irq(struct msi_desc * desc,u32 mask,u32 flag)180 __weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
181 {
182 	return default_msi_mask_irq(desc, mask, flag);
183 }
184 
msi_mask_irq(struct msi_desc * desc,u32 mask,u32 flag)185 static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
186 {
187 	desc->masked = arch_msi_mask_irq(desc, mask, flag);
188 }
189 
190 /*
191  * This internal function does not flush PCI writes to the device.
192  * All users must ensure that they read from the device before either
193  * assuming that the device state is up to date, or returning out of this
194  * file.  This saves a few milliseconds when initialising devices with lots
195  * of MSI-X interrupts.
196  */
default_msix_mask_irq(struct msi_desc * desc,u32 flag)197 u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
198 {
199 	u32 mask_bits = desc->masked;
200 	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
201 						PCI_MSIX_ENTRY_VECTOR_CTRL;
202 	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
203 	if (flag)
204 		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
205 	writel(mask_bits, desc->mask_base + offset);
206 
207 	return mask_bits;
208 }
209 
arch_msix_mask_irq(struct msi_desc * desc,u32 flag)210 __weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
211 {
212 	return default_msix_mask_irq(desc, flag);
213 }
214 
msix_mask_irq(struct msi_desc * desc,u32 flag)215 static void msix_mask_irq(struct msi_desc *desc, u32 flag)
216 {
217 	desc->masked = arch_msix_mask_irq(desc, flag);
218 }
219 
msi_set_mask_bit(struct irq_data * data,u32 flag)220 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
221 {
222 	struct msi_desc *desc = irq_data_get_msi(data);
223 
224 	if (desc->msi_attrib.is_msix) {
225 		msix_mask_irq(desc, flag);
226 		readl(desc->mask_base);		/* Flush write to device */
227 	} else {
228 		unsigned offset = data->irq - desc->irq;
229 		msi_mask_irq(desc, 1 << offset, flag << offset);
230 	}
231 }
232 
mask_msi_irq(struct irq_data * data)233 void mask_msi_irq(struct irq_data *data)
234 {
235 	msi_set_mask_bit(data, 1);
236 }
237 
unmask_msi_irq(struct irq_data * data)238 void unmask_msi_irq(struct irq_data *data)
239 {
240 	msi_set_mask_bit(data, 0);
241 }
242 
default_restore_msi_irqs(struct pci_dev * dev)243 void default_restore_msi_irqs(struct pci_dev *dev)
244 {
245 	struct msi_desc *entry;
246 
247 	list_for_each_entry(entry, &dev->msi_list, list) {
248 		default_restore_msi_irq(dev, entry->irq);
249 	}
250 }
251 
__read_msi_msg(struct msi_desc * entry,struct msi_msg * msg)252 void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
253 {
254 	BUG_ON(entry->dev->current_state != PCI_D0);
255 
256 	if (entry->msi_attrib.is_msix) {
257 		void __iomem *base = entry->mask_base +
258 			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
259 
260 		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
261 		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
262 		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
263 	} else {
264 		struct pci_dev *dev = entry->dev;
265 		int pos = dev->msi_cap;
266 		u16 data;
267 
268 		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
269 				      &msg->address_lo);
270 		if (entry->msi_attrib.is_64) {
271 			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
272 					      &msg->address_hi);
273 			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
274 		} else {
275 			msg->address_hi = 0;
276 			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
277 		}
278 		msg->data = data;
279 	}
280 }
281 
read_msi_msg(unsigned int irq,struct msi_msg * msg)282 void read_msi_msg(unsigned int irq, struct msi_msg *msg)
283 {
284 	struct msi_desc *entry = irq_get_msi_desc(irq);
285 
286 	__read_msi_msg(entry, msg);
287 }
288 
__get_cached_msi_msg(struct msi_desc * entry,struct msi_msg * msg)289 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
290 {
291 	/* Assert that the cache is valid, assuming that
292 	 * valid messages are not all-zeroes. */
293 	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
294 		 entry->msg.data));
295 
296 	*msg = entry->msg;
297 }
298 
get_cached_msi_msg(unsigned int irq,struct msi_msg * msg)299 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
300 {
301 	struct msi_desc *entry = irq_get_msi_desc(irq);
302 
303 	__get_cached_msi_msg(entry, msg);
304 }
305 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
306 
__write_msi_msg(struct msi_desc * entry,struct msi_msg * msg)307 void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
308 {
309 	if (entry->dev->current_state != PCI_D0) {
310 		/* Don't touch the hardware now */
311 	} else if (entry->msi_attrib.is_msix) {
312 		void __iomem *base;
313 		base = entry->mask_base +
314 			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
315 
316 		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
317 		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
318 		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
319 	} else {
320 		struct pci_dev *dev = entry->dev;
321 		int pos = dev->msi_cap;
322 		u16 msgctl;
323 
324 		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
325 		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
326 		msgctl |= entry->msi_attrib.multiple << 4;
327 		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
328 
329 		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
330 				       msg->address_lo);
331 		if (entry->msi_attrib.is_64) {
332 			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
333 					       msg->address_hi);
334 			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
335 					      msg->data);
336 		} else {
337 			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
338 					      msg->data);
339 		}
340 	}
341 	entry->msg = *msg;
342 }
343 
write_msi_msg(unsigned int irq,struct msi_msg * msg)344 void write_msi_msg(unsigned int irq, struct msi_msg *msg)
345 {
346 	struct msi_desc *entry = irq_get_msi_desc(irq);
347 
348 	__write_msi_msg(entry, msg);
349 }
350 EXPORT_SYMBOL_GPL(write_msi_msg);
351 
free_msi_irqs(struct pci_dev * dev)352 static void free_msi_irqs(struct pci_dev *dev)
353 {
354 	struct msi_desc *entry, *tmp;
355 	struct attribute **msi_attrs;
356 	struct device_attribute *dev_attr;
357 	int count = 0;
358 
359 	list_for_each_entry(entry, &dev->msi_list, list) {
360 		int i, nvec;
361 		if (!entry->irq)
362 			continue;
363 		if (entry->nvec_used)
364 			nvec = entry->nvec_used;
365 		else
366 			nvec = 1 << entry->msi_attrib.multiple;
367 		for (i = 0; i < nvec; i++)
368 			BUG_ON(irq_has_action(entry->irq + i));
369 	}
370 
371 	arch_teardown_msi_irqs(dev);
372 
373 	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
374 		if (entry->msi_attrib.is_msix) {
375 			if (list_is_last(&entry->list, &dev->msi_list))
376 				iounmap(entry->mask_base);
377 		}
378 
379 		list_del(&entry->list);
380 		kfree(entry);
381 	}
382 
383 	if (dev->msi_irq_groups) {
384 		sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
385 		msi_attrs = dev->msi_irq_groups[0]->attrs;
386 		while (msi_attrs[count]) {
387 			dev_attr = container_of(msi_attrs[count],
388 						struct device_attribute, attr);
389 			kfree(dev_attr->attr.name);
390 			kfree(dev_attr);
391 			++count;
392 		}
393 		kfree(msi_attrs);
394 		kfree(dev->msi_irq_groups[0]);
395 		kfree(dev->msi_irq_groups);
396 		dev->msi_irq_groups = NULL;
397 	}
398 }
399 
alloc_msi_entry(struct pci_dev * dev)400 static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
401 {
402 	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
403 	if (!desc)
404 		return NULL;
405 
406 	INIT_LIST_HEAD(&desc->list);
407 	desc->dev = dev;
408 
409 	return desc;
410 }
411 
pci_intx_for_msi(struct pci_dev * dev,int enable)412 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
413 {
414 	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
415 		pci_intx(dev, enable);
416 }
417 
__pci_restore_msi_state(struct pci_dev * dev)418 static void __pci_restore_msi_state(struct pci_dev *dev)
419 {
420 	u16 control;
421 	struct msi_desc *entry;
422 
423 	if (!dev->msi_enabled)
424 		return;
425 
426 	entry = irq_get_msi_desc(dev->irq);
427 
428 	pci_intx_for_msi(dev, 0);
429 	msi_set_enable(dev, 0);
430 	arch_restore_msi_irqs(dev);
431 
432 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
433 	msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
434 		     entry->masked);
435 	control &= ~PCI_MSI_FLAGS_QSIZE;
436 	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
437 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
438 }
439 
__pci_restore_msix_state(struct pci_dev * dev)440 static void __pci_restore_msix_state(struct pci_dev *dev)
441 {
442 	struct msi_desc *entry;
443 
444 	if (!dev->msix_enabled)
445 		return;
446 	BUG_ON(list_empty(&dev->msi_list));
447 
448 	/* route the table */
449 	pci_intx_for_msi(dev, 0);
450 	msix_clear_and_set_ctrl(dev, 0,
451 				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
452 
453 	arch_restore_msi_irqs(dev);
454 	list_for_each_entry(entry, &dev->msi_list, list) {
455 		msix_mask_irq(entry, entry->masked);
456 	}
457 
458 	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
459 }
460 
pci_restore_msi_state(struct pci_dev * dev)461 void pci_restore_msi_state(struct pci_dev *dev)
462 {
463 	__pci_restore_msi_state(dev);
464 	__pci_restore_msix_state(dev);
465 }
466 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
467 
msi_mode_show(struct device * dev,struct device_attribute * attr,char * buf)468 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
469 			     char *buf)
470 {
471 	struct msi_desc *entry;
472 	unsigned long irq;
473 	int retval;
474 
475 	retval = kstrtoul(attr->attr.name, 10, &irq);
476 	if (retval)
477 		return retval;
478 
479 	entry = irq_get_msi_desc(irq);
480 	if (entry)
481 		return sprintf(buf, "%s\n",
482 				entry->msi_attrib.is_msix ? "msix" : "msi");
483 
484 	return -ENODEV;
485 }
486 
populate_msi_sysfs(struct pci_dev * pdev)487 static int populate_msi_sysfs(struct pci_dev *pdev)
488 {
489 	struct attribute **msi_attrs;
490 	struct attribute *msi_attr;
491 	struct device_attribute *msi_dev_attr;
492 	struct attribute_group *msi_irq_group;
493 	const struct attribute_group **msi_irq_groups;
494 	struct msi_desc *entry;
495 	int ret = -ENOMEM;
496 	int num_msi = 0;
497 	int count = 0;
498 
499 	/* Determine how many msi entries we have */
500 	list_for_each_entry(entry, &pdev->msi_list, list) {
501 		++num_msi;
502 	}
503 	if (!num_msi)
504 		return 0;
505 
506 	/* Dynamically create the MSI attributes for the PCI device */
507 	msi_attrs = kzalloc(sizeof(void *) * (num_msi + 1), GFP_KERNEL);
508 	if (!msi_attrs)
509 		return -ENOMEM;
510 	list_for_each_entry(entry, &pdev->msi_list, list) {
511 		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
512 		if (!msi_dev_attr)
513 			goto error_attrs;
514 		msi_attrs[count] = &msi_dev_attr->attr;
515 
516 		sysfs_attr_init(&msi_dev_attr->attr);
517 		msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
518 						    entry->irq);
519 		if (!msi_dev_attr->attr.name)
520 			goto error_attrs;
521 		msi_dev_attr->attr.mode = S_IRUGO;
522 		msi_dev_attr->show = msi_mode_show;
523 		++count;
524 	}
525 
526 	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
527 	if (!msi_irq_group)
528 		goto error_attrs;
529 	msi_irq_group->name = "msi_irqs";
530 	msi_irq_group->attrs = msi_attrs;
531 
532 	msi_irq_groups = kzalloc(sizeof(void *) * 2, GFP_KERNEL);
533 	if (!msi_irq_groups)
534 		goto error_irq_group;
535 	msi_irq_groups[0] = msi_irq_group;
536 
537 	ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups);
538 	if (ret)
539 		goto error_irq_groups;
540 	pdev->msi_irq_groups = msi_irq_groups;
541 
542 	return 0;
543 
544 error_irq_groups:
545 	kfree(msi_irq_groups);
546 error_irq_group:
547 	kfree(msi_irq_group);
548 error_attrs:
549 	count = 0;
550 	msi_attr = msi_attrs[count];
551 	while (msi_attr) {
552 		msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
553 		kfree(msi_attr->name);
554 		kfree(msi_dev_attr);
555 		++count;
556 		msi_attr = msi_attrs[count];
557 	}
558 	kfree(msi_attrs);
559 	return ret;
560 }
561 
msi_setup_entry(struct pci_dev * dev)562 static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
563 {
564 	u16 control;
565 	struct msi_desc *entry;
566 
567 	/* MSI Entry Initialization */
568 	entry = alloc_msi_entry(dev);
569 	if (!entry)
570 		return NULL;
571 
572 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
573 
574 	entry->msi_attrib.is_msix	= 0;
575 	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
576 	entry->msi_attrib.entry_nr	= 0;
577 	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
578 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
579 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
580 
581 	if (control & PCI_MSI_FLAGS_64BIT)
582 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
583 	else
584 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
585 
586 	/* Save the initial mask status */
587 	if (entry->msi_attrib.maskbit)
588 		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
589 
590 	return entry;
591 }
592 
msi_verify_entries(struct pci_dev * dev)593 static int msi_verify_entries(struct pci_dev *dev)
594 {
595 	struct msi_desc *entry;
596 
597 	list_for_each_entry(entry, &dev->msi_list, list) {
598 		if (!dev->no_64bit_msi || !entry->msg.address_hi)
599 			continue;
600 		dev_err(&dev->dev, "Device has broken 64-bit MSI but arch"
601 			" tried to assign one above 4G\n");
602 		return -EIO;
603 	}
604 	return 0;
605 }
606 
607 /**
608  * msi_capability_init - configure device's MSI capability structure
609  * @dev: pointer to the pci_dev data structure of MSI device function
610  * @nvec: number of interrupts to allocate
611  *
612  * Setup the MSI capability structure of the device with the requested
613  * number of interrupts.  A return value of zero indicates the successful
614  * setup of an entry with the new MSI irq.  A negative return value indicates
615  * an error, and a positive return value indicates the number of interrupts
616  * which could have been allocated.
617  */
msi_capability_init(struct pci_dev * dev,int nvec)618 static int msi_capability_init(struct pci_dev *dev, int nvec)
619 {
620 	struct msi_desc *entry;
621 	int ret;
622 	unsigned mask;
623 
624 	msi_set_enable(dev, 0);	/* Disable MSI during set up */
625 
626 	entry = msi_setup_entry(dev);
627 	if (!entry)
628 		return -ENOMEM;
629 
630 	/* All MSIs are unmasked by default, Mask them all */
631 	mask = msi_mask(entry->msi_attrib.multi_cap);
632 	msi_mask_irq(entry, mask, mask);
633 
634 	list_add_tail(&entry->list, &dev->msi_list);
635 
636 	/* Configure MSI capability structure */
637 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
638 	if (ret) {
639 		msi_mask_irq(entry, mask, ~mask);
640 		free_msi_irqs(dev);
641 		return ret;
642 	}
643 
644 	ret = msi_verify_entries(dev);
645 	if (ret) {
646 		msi_mask_irq(entry, mask, ~mask);
647 		free_msi_irqs(dev);
648 		return ret;
649 	}
650 
651 	ret = populate_msi_sysfs(dev);
652 	if (ret) {
653 		msi_mask_irq(entry, mask, ~mask);
654 		free_msi_irqs(dev);
655 		return ret;
656 	}
657 
658 	/* Set MSI enabled bits	 */
659 	pci_intx_for_msi(dev, 0);
660 	msi_set_enable(dev, 1);
661 	dev->msi_enabled = 1;
662 
663 	dev->irq = entry->irq;
664 	return 0;
665 }
666 
msix_map_region(struct pci_dev * dev,unsigned nr_entries)667 static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
668 {
669 	resource_size_t phys_addr;
670 	u32 table_offset;
671 	u8 bir;
672 
673 	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
674 			      &table_offset);
675 	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
676 	table_offset &= PCI_MSIX_TABLE_OFFSET;
677 	phys_addr = pci_resource_start(dev, bir) + table_offset;
678 
679 	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
680 }
681 
msix_setup_entries(struct pci_dev * dev,void __iomem * base,struct msix_entry * entries,int nvec)682 static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
683 			      struct msix_entry *entries, int nvec)
684 {
685 	struct msi_desc *entry;
686 	int i;
687 
688 	for (i = 0; i < nvec; i++) {
689 		entry = alloc_msi_entry(dev);
690 		if (!entry) {
691 			if (!i)
692 				iounmap(base);
693 			else
694 				free_msi_irqs(dev);
695 			/* No enough memory. Don't try again */
696 			return -ENOMEM;
697 		}
698 
699 		entry->msi_attrib.is_msix	= 1;
700 		entry->msi_attrib.is_64		= 1;
701 		entry->msi_attrib.entry_nr	= entries[i].entry;
702 		entry->msi_attrib.default_irq	= dev->irq;
703 		entry->mask_base		= base;
704 
705 		list_add_tail(&entry->list, &dev->msi_list);
706 	}
707 
708 	return 0;
709 }
710 
msix_program_entries(struct pci_dev * dev,struct msix_entry * entries)711 static void msix_program_entries(struct pci_dev *dev,
712 				 struct msix_entry *entries)
713 {
714 	struct msi_desc *entry;
715 	int i = 0;
716 
717 	list_for_each_entry(entry, &dev->msi_list, list) {
718 		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
719 						PCI_MSIX_ENTRY_VECTOR_CTRL;
720 
721 		entries[i].vector = entry->irq;
722 		irq_set_msi_desc(entry->irq, entry);
723 		entry->masked = readl(entry->mask_base + offset);
724 		msix_mask_irq(entry, 1);
725 		i++;
726 	}
727 }
728 
729 /**
730  * msix_capability_init - configure device's MSI-X capability
731  * @dev: pointer to the pci_dev data structure of MSI-X device function
732  * @entries: pointer to an array of struct msix_entry entries
733  * @nvec: number of @entries
734  *
735  * Setup the MSI-X capability structure of device function with a
736  * single MSI-X irq. A return of zero indicates the successful setup of
737  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
738  **/
msix_capability_init(struct pci_dev * dev,struct msix_entry * entries,int nvec)739 static int msix_capability_init(struct pci_dev *dev,
740 				struct msix_entry *entries, int nvec)
741 {
742 	int ret;
743 	u16 control;
744 	void __iomem *base;
745 
746 	/* Ensure MSI-X is disabled while it is set up */
747 	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
748 
749 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
750 	/* Request & Map MSI-X table region */
751 	base = msix_map_region(dev, msix_table_size(control));
752 	if (!base)
753 		return -ENOMEM;
754 
755 	ret = msix_setup_entries(dev, base, entries, nvec);
756 	if (ret)
757 		return ret;
758 
759 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
760 	if (ret)
761 		goto out_avail;
762 
763 	/* Check if all MSI entries honor device restrictions */
764 	ret = msi_verify_entries(dev);
765 	if (ret)
766 		goto out_free;
767 
768 	/*
769 	 * Some devices require MSI-X to be enabled before we can touch the
770 	 * MSI-X registers.  We need to mask all the vectors to prevent
771 	 * interrupts coming in before they're fully set up.
772 	 */
773 	msix_clear_and_set_ctrl(dev, 0,
774 				PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE);
775 
776 	msix_program_entries(dev, entries);
777 
778 	ret = populate_msi_sysfs(dev);
779 	if (ret)
780 		goto out_free;
781 
782 	/* Set MSI-X enabled bits and unmask the function */
783 	pci_intx_for_msi(dev, 0);
784 	dev->msix_enabled = 1;
785 
786 	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
787 
788 	return 0;
789 
790 out_avail:
791 	if (ret < 0) {
792 		/*
793 		 * If we had some success, report the number of irqs
794 		 * we succeeded in setting up.
795 		 */
796 		struct msi_desc *entry;
797 		int avail = 0;
798 
799 		list_for_each_entry(entry, &dev->msi_list, list) {
800 			if (entry->irq != 0)
801 				avail++;
802 		}
803 		if (avail != 0)
804 			ret = avail;
805 	}
806 
807 out_free:
808 	free_msi_irqs(dev);
809 
810 	return ret;
811 }
812 
813 /**
814  * pci_msi_supported - check whether MSI may be enabled on a device
815  * @dev: pointer to the pci_dev data structure of MSI device function
816  * @nvec: how many MSIs have been requested ?
817  *
818  * Look at global flags, the device itself, and its parent buses
819  * to determine if MSI/-X are supported for the device. If MSI/-X is
820  * supported return 1, else return 0.
821  **/
pci_msi_supported(struct pci_dev * dev,int nvec)822 static int pci_msi_supported(struct pci_dev *dev, int nvec)
823 {
824 	struct pci_bus *bus;
825 
826 	/* MSI must be globally enabled and supported by the device */
827 	if (!pci_msi_enable)
828 		return 0;
829 
830 	if (!dev || dev->no_msi || dev->current_state != PCI_D0)
831 		return 0;
832 
833 	/*
834 	 * You can't ask to have 0 or less MSIs configured.
835 	 *  a) it's stupid ..
836 	 *  b) the list manipulation code assumes nvec >= 1.
837 	 */
838 	if (nvec < 1)
839 		return 0;
840 
841 	/*
842 	 * Any bridge which does NOT route MSI transactions from its
843 	 * secondary bus to its primary bus must set NO_MSI flag on
844 	 * the secondary pci_bus.
845 	 * We expect only arch-specific PCI host bus controller driver
846 	 * or quirks for specific PCI bridges to be setting NO_MSI.
847 	 */
848 	for (bus = dev->bus; bus; bus = bus->parent)
849 		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
850 			return 0;
851 
852 	return 1;
853 }
854 
855 /**
856  * pci_msi_vec_count - Return the number of MSI vectors a device can send
857  * @dev: device to report about
858  *
859  * This function returns the number of MSI vectors a device requested via
860  * Multiple Message Capable register. It returns a negative errno if the
861  * device is not capable sending MSI interrupts. Otherwise, the call succeeds
862  * and returns a power of two, up to a maximum of 2^5 (32), according to the
863  * MSI specification.
864  **/
pci_msi_vec_count(struct pci_dev * dev)865 int pci_msi_vec_count(struct pci_dev *dev)
866 {
867 	int ret;
868 	u16 msgctl;
869 
870 	if (!dev->msi_cap)
871 		return -EINVAL;
872 
873 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
874 	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
875 
876 	return ret;
877 }
878 EXPORT_SYMBOL(pci_msi_vec_count);
879 
pci_msi_shutdown(struct pci_dev * dev)880 void pci_msi_shutdown(struct pci_dev *dev)
881 {
882 	struct msi_desc *desc;
883 	u32 mask;
884 
885 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
886 		return;
887 
888 	BUG_ON(list_empty(&dev->msi_list));
889 	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
890 
891 	msi_set_enable(dev, 0);
892 	pci_intx_for_msi(dev, 1);
893 	dev->msi_enabled = 0;
894 
895 	/* Return the device with MSI unmasked as initial states */
896 	mask = msi_mask(desc->msi_attrib.multi_cap);
897 	/* Keep cached state to be restored */
898 	arch_msi_mask_irq(desc, mask, ~mask);
899 
900 	/* Restore dev->irq to its default pin-assertion irq */
901 	dev->irq = desc->msi_attrib.default_irq;
902 }
903 
pci_disable_msi(struct pci_dev * dev)904 void pci_disable_msi(struct pci_dev *dev)
905 {
906 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
907 		return;
908 
909 	pci_msi_shutdown(dev);
910 	free_msi_irqs(dev);
911 }
912 EXPORT_SYMBOL(pci_disable_msi);
913 
914 /**
915  * pci_msix_vec_count - return the number of device's MSI-X table entries
916  * @dev: pointer to the pci_dev data structure of MSI-X device function
917  * This function returns the number of device's MSI-X table entries and
918  * therefore the number of MSI-X vectors device is capable of sending.
919  * It returns a negative errno if the device is not capable of sending MSI-X
920  * interrupts.
921  **/
pci_msix_vec_count(struct pci_dev * dev)922 int pci_msix_vec_count(struct pci_dev *dev)
923 {
924 	u16 control;
925 
926 	if (!dev->msix_cap)
927 		return -EINVAL;
928 
929 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
930 	return msix_table_size(control);
931 }
932 EXPORT_SYMBOL(pci_msix_vec_count);
933 
934 /**
935  * pci_enable_msix - configure device's MSI-X capability structure
936  * @dev: pointer to the pci_dev data structure of MSI-X device function
937  * @entries: pointer to an array of MSI-X entries
938  * @nvec: number of MSI-X irqs requested for allocation by device driver
939  *
940  * Setup the MSI-X capability structure of device function with the number
941  * of requested irqs upon its software driver call to request for
942  * MSI-X mode enabled on its hardware device function. A return of zero
943  * indicates the successful configuration of MSI-X capability structure
944  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
945  * Or a return of > 0 indicates that driver request is exceeding the number
946  * of irqs or MSI-X vectors available. Driver should use the returned value to
947  * re-send its request.
948  **/
pci_enable_msix(struct pci_dev * dev,struct msix_entry * entries,int nvec)949 int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
950 {
951 	int nr_entries;
952 	int i, j;
953 
954 	if (!pci_msi_supported(dev, nvec))
955 		return -EINVAL;
956 
957 	if (!entries)
958 		return -EINVAL;
959 
960 	nr_entries = pci_msix_vec_count(dev);
961 	if (nr_entries < 0)
962 		return nr_entries;
963 	if (nvec > nr_entries)
964 		return nr_entries;
965 
966 	/* Check for any invalid entries */
967 	for (i = 0; i < nvec; i++) {
968 		if (entries[i].entry >= nr_entries)
969 			return -EINVAL;		/* invalid entry */
970 		for (j = i + 1; j < nvec; j++) {
971 			if (entries[i].entry == entries[j].entry)
972 				return -EINVAL;	/* duplicate entry */
973 		}
974 	}
975 	WARN_ON(!!dev->msix_enabled);
976 
977 	/* Check whether driver already requested for MSI irq */
978 	if (dev->msi_enabled) {
979 		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
980 		return -EINVAL;
981 	}
982 	return msix_capability_init(dev, entries, nvec);
983 }
984 EXPORT_SYMBOL(pci_enable_msix);
985 
pci_msix_shutdown(struct pci_dev * dev)986 void pci_msix_shutdown(struct pci_dev *dev)
987 {
988 	struct msi_desc *entry;
989 
990 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
991 		return;
992 
993 	/* Return the device with MSI-X masked as initial states */
994 	list_for_each_entry(entry, &dev->msi_list, list) {
995 		/* Keep cached states to be restored */
996 		arch_msix_mask_irq(entry, 1);
997 	}
998 
999 	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
1000 	pci_intx_for_msi(dev, 1);
1001 	dev->msix_enabled = 0;
1002 }
1003 
pci_disable_msix(struct pci_dev * dev)1004 void pci_disable_msix(struct pci_dev *dev)
1005 {
1006 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
1007 		return;
1008 
1009 	pci_msix_shutdown(dev);
1010 	free_msi_irqs(dev);
1011 }
1012 EXPORT_SYMBOL(pci_disable_msix);
1013 
pci_no_msi(void)1014 void pci_no_msi(void)
1015 {
1016 	pci_msi_enable = 0;
1017 }
1018 
1019 /**
1020  * pci_msi_enabled - is MSI enabled?
1021  *
1022  * Returns true if MSI has not been disabled by the command-line option
1023  * pci=nomsi.
1024  **/
pci_msi_enabled(void)1025 int pci_msi_enabled(void)
1026 {
1027 	return pci_msi_enable;
1028 }
1029 EXPORT_SYMBOL(pci_msi_enabled);
1030 
pci_msi_init_pci_dev(struct pci_dev * dev)1031 void pci_msi_init_pci_dev(struct pci_dev *dev)
1032 {
1033 	INIT_LIST_HEAD(&dev->msi_list);
1034 
1035 	/* Disable the msi hardware to avoid screaming interrupts
1036 	 * during boot.  This is the power on reset default so
1037 	 * usually this should be a noop.
1038 	 */
1039 	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
1040 	if (dev->msi_cap)
1041 		msi_set_enable(dev, 0);
1042 
1043 	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1044 	if (dev->msix_cap)
1045 		msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
1046 }
1047 
1048 /**
1049  * pci_enable_msi_range - configure device's MSI capability structure
1050  * @dev: device to configure
1051  * @minvec: minimal number of interrupts to configure
1052  * @maxvec: maximum number of interrupts to configure
1053  *
1054  * This function tries to allocate a maximum possible number of interrupts in a
1055  * range between @minvec and @maxvec. It returns a negative errno if an error
1056  * occurs. If it succeeds, it returns the actual number of interrupts allocated
1057  * and updates the @dev's irq member to the lowest new interrupt number;
1058  * the other interrupt numbers allocated to this device are consecutive.
1059  **/
pci_enable_msi_range(struct pci_dev * dev,int minvec,int maxvec)1060 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
1061 {
1062 	int nvec;
1063 	int rc;
1064 
1065 	if (!pci_msi_supported(dev, minvec))
1066 		return -EINVAL;
1067 
1068 	WARN_ON(!!dev->msi_enabled);
1069 
1070 	/* Check whether driver already requested MSI-X irqs */
1071 	if (dev->msix_enabled) {
1072 		dev_info(&dev->dev,
1073 			 "can't enable MSI (MSI-X already enabled)\n");
1074 		return -EINVAL;
1075 	}
1076 
1077 	if (maxvec < minvec)
1078 		return -ERANGE;
1079 
1080 	nvec = pci_msi_vec_count(dev);
1081 	if (nvec < 0)
1082 		return nvec;
1083 	else if (nvec < minvec)
1084 		return -EINVAL;
1085 	else if (nvec > maxvec)
1086 		nvec = maxvec;
1087 
1088 	do {
1089 		rc = msi_capability_init(dev, nvec);
1090 		if (rc < 0) {
1091 			return rc;
1092 		} else if (rc > 0) {
1093 			if (rc < minvec)
1094 				return -ENOSPC;
1095 			nvec = rc;
1096 		}
1097 	} while (rc);
1098 
1099 	return nvec;
1100 }
1101 EXPORT_SYMBOL(pci_enable_msi_range);
1102 
1103 /**
1104  * pci_enable_msix_range - configure device's MSI-X capability structure
1105  * @dev: pointer to the pci_dev data structure of MSI-X device function
1106  * @entries: pointer to an array of MSI-X entries
1107  * @minvec: minimum number of MSI-X irqs requested
1108  * @maxvec: maximum number of MSI-X irqs requested
1109  *
1110  * Setup the MSI-X capability structure of device function with a maximum
1111  * possible number of interrupts in the range between @minvec and @maxvec
1112  * upon its software driver call to request for MSI-X mode enabled on its
1113  * hardware device function. It returns a negative errno if an error occurs.
1114  * If it succeeds, it returns the actual number of interrupts allocated and
1115  * indicates the successful configuration of MSI-X capability structure
1116  * with new allocated MSI-X interrupts.
1117  **/
pci_enable_msix_range(struct pci_dev * dev,struct msix_entry * entries,int minvec,int maxvec)1118 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
1119 			       int minvec, int maxvec)
1120 {
1121 	int nvec = maxvec;
1122 	int rc;
1123 
1124 	if (maxvec < minvec)
1125 		return -ERANGE;
1126 
1127 	do {
1128 		rc = pci_enable_msix(dev, entries, nvec);
1129 		if (rc < 0) {
1130 			return rc;
1131 		} else if (rc > 0) {
1132 			if (rc < minvec)
1133 				return -ENOSPC;
1134 			nvec = rc;
1135 		}
1136 	} while (rc);
1137 
1138 	return nvec;
1139 }
1140 EXPORT_SYMBOL(pci_enable_msix_range);
1141