1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * PCI Hotplug Driver for PowerPC PowerNV platform.
4 *
5 * Copyright Gavin Shan, IBM Corporation 2016.
6 * Copyright (C) 2025 Raptor Engineering, LLC
7 * Copyright (C) 2025 Raptor Computing Systems, LLC
8 */
9
10 #include <linux/bitfield.h>
11 #include <linux/libfdt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/delay.h>
15 #include <linux/pci_hotplug.h>
16 #include <linux/of_fdt.h>
17
18 #include <asm/opal.h>
19 #include <asm/pnv-pci.h>
20 #include <asm/ppc-pci.h>
21
22 #define DRIVER_VERSION "0.1"
23 #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation"
24 #define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver"
25
26 #define SLOT_WARN(sl, x...) \
27 ((sl)->pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x))
28
29 struct pnv_php_event {
30 bool added;
31 struct pnv_php_slot *php_slot;
32 struct work_struct work;
33 };
34
35 static LIST_HEAD(pnv_php_slot_list);
36 static DEFINE_SPINLOCK(pnv_php_lock);
37
38 static void pnv_php_register(struct device_node *dn);
39 static void pnv_php_unregister_one(struct device_node *dn);
40 static void pnv_php_unregister(struct device_node *dn);
41
42 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot);
43
pnv_php_disable_irq(struct pnv_php_slot * php_slot,bool disable_device,bool disable_msi)44 static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
45 bool disable_device, bool disable_msi)
46 {
47 struct pci_dev *pdev = php_slot->pdev;
48 u16 ctrl;
49
50 if (php_slot->irq > 0) {
51 pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
52 ctrl &= ~(PCI_EXP_SLTCTL_HPIE |
53 PCI_EXP_SLTCTL_PDCE |
54 PCI_EXP_SLTCTL_DLLSCE);
55 pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
56
57 free_irq(php_slot->irq, php_slot);
58 php_slot->irq = 0;
59 }
60
61 if (disable_device || disable_msi) {
62 if (pdev->msix_enabled)
63 pci_disable_msix(pdev);
64 else if (pdev->msi_enabled)
65 pci_disable_msi(pdev);
66 }
67
68 if (disable_device)
69 pci_disable_device(pdev);
70 }
71
pnv_php_free_slot(struct kref * kref)72 static void pnv_php_free_slot(struct kref *kref)
73 {
74 struct pnv_php_slot *php_slot = container_of(kref,
75 struct pnv_php_slot, kref);
76
77 WARN_ON(!list_empty(&php_slot->children));
78 pnv_php_disable_irq(php_slot, false, false);
79 destroy_workqueue(php_slot->wq);
80 kfree(php_slot->name);
81 kfree(php_slot);
82 }
83
pnv_php_put_slot(struct pnv_php_slot * php_slot)84 static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
85 {
86
87 if (!php_slot)
88 return;
89
90 kref_put(&php_slot->kref, pnv_php_free_slot);
91 }
92
pnv_php_match(struct device_node * dn,struct pnv_php_slot * php_slot)93 static struct pnv_php_slot *pnv_php_match(struct device_node *dn,
94 struct pnv_php_slot *php_slot)
95 {
96 struct pnv_php_slot *target, *tmp;
97
98 if (php_slot->dn == dn) {
99 kref_get(&php_slot->kref);
100 return php_slot;
101 }
102
103 list_for_each_entry(tmp, &php_slot->children, link) {
104 target = pnv_php_match(dn, tmp);
105 if (target)
106 return target;
107 }
108
109 return NULL;
110 }
111
pnv_php_find_slot(struct device_node * dn)112 struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn)
113 {
114 struct pnv_php_slot *php_slot, *tmp;
115 unsigned long flags;
116
117 spin_lock_irqsave(&pnv_php_lock, flags);
118 list_for_each_entry(tmp, &pnv_php_slot_list, link) {
119 php_slot = pnv_php_match(dn, tmp);
120 if (php_slot) {
121 spin_unlock_irqrestore(&pnv_php_lock, flags);
122 return php_slot;
123 }
124 }
125 spin_unlock_irqrestore(&pnv_php_lock, flags);
126
127 return NULL;
128 }
129 EXPORT_SYMBOL_GPL(pnv_php_find_slot);
130
131 /*
132 * Remove pdn for all children of the indicated device node.
133 * The function should remove pdn in a depth-first manner.
134 */
pnv_php_rmv_pdns(struct device_node * dn)135 static void pnv_php_rmv_pdns(struct device_node *dn)
136 {
137 struct device_node *child;
138
139 for_each_child_of_node(dn, child) {
140 pnv_php_rmv_pdns(child);
141
142 pci_remove_device_node_info(child);
143 }
144 }
145
146 /*
147 * Detach all child nodes of the indicated device nodes. The
148 * function should handle device nodes in depth-first manner.
149 *
150 * We should not invoke of_node_release() as the memory for
151 * individual device node is part of large memory block. The
152 * large block is allocated from memblock (system bootup) or
153 * kmalloc() when unflattening the device tree by OF changeset.
154 * We can not free the large block allocated from memblock. For
155 * later case, it should be released at once.
156 */
pnv_php_detach_device_nodes(struct device_node * parent)157 static void pnv_php_detach_device_nodes(struct device_node *parent)
158 {
159 struct device_node *dn;
160
161 for_each_child_of_node(parent, dn) {
162 pnv_php_detach_device_nodes(dn);
163
164 of_node_put(dn);
165 of_detach_node(dn);
166 }
167 }
168
pnv_php_rmv_devtree(struct pnv_php_slot * php_slot)169 static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot)
170 {
171 pnv_php_rmv_pdns(php_slot->dn);
172
173 /*
174 * Decrease the refcount if the device nodes were created
175 * through OF changeset before detaching them.
176 */
177 if (php_slot->fdt)
178 of_changeset_destroy(&php_slot->ocs);
179 pnv_php_detach_device_nodes(php_slot->dn);
180
181 if (php_slot->fdt) {
182 kfree(php_slot->dt);
183 kfree(php_slot->fdt);
184 php_slot->dt = NULL;
185 php_slot->dn->child = NULL;
186 php_slot->fdt = NULL;
187 }
188 }
189
190 /*
191 * As the nodes in OF changeset are applied in reverse order, we
192 * need revert the nodes in advance so that we have correct node
193 * order after the changeset is applied.
194 */
pnv_php_reverse_nodes(struct device_node * parent)195 static void pnv_php_reverse_nodes(struct device_node *parent)
196 {
197 struct device_node *child, *next;
198
199 /* In-depth first */
200 for_each_child_of_node(parent, child)
201 pnv_php_reverse_nodes(child);
202
203 /* Reverse the nodes in the child list */
204 child = parent->child;
205 parent->child = NULL;
206 while (child) {
207 next = child->sibling;
208
209 child->sibling = parent->child;
210 parent->child = child;
211 child = next;
212 }
213 }
214
pnv_php_populate_changeset(struct of_changeset * ocs,struct device_node * dn)215 static int pnv_php_populate_changeset(struct of_changeset *ocs,
216 struct device_node *dn)
217 {
218 struct device_node *child;
219 int ret = 0;
220
221 for_each_child_of_node(dn, child) {
222 ret = of_changeset_attach_node(ocs, child);
223 if (ret) {
224 of_node_put(child);
225 break;
226 }
227
228 ret = pnv_php_populate_changeset(ocs, child);
229 if (ret) {
230 of_node_put(child);
231 break;
232 }
233 }
234
235 return ret;
236 }
237
pnv_php_add_one_pdn(struct device_node * dn,void * data)238 static void *pnv_php_add_one_pdn(struct device_node *dn, void *data)
239 {
240 struct pci_controller *hose = (struct pci_controller *)data;
241 struct pci_dn *pdn;
242
243 pdn = pci_add_device_node_info(hose, dn);
244 if (!pdn)
245 return ERR_PTR(-ENOMEM);
246
247 return NULL;
248 }
249
pnv_php_add_pdns(struct pnv_php_slot * slot)250 static void pnv_php_add_pdns(struct pnv_php_slot *slot)
251 {
252 struct pci_controller *hose = pci_bus_to_host(slot->bus);
253
254 pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose);
255 }
256
pnv_php_add_devtree(struct pnv_php_slot * php_slot)257 static int pnv_php_add_devtree(struct pnv_php_slot *php_slot)
258 {
259 void *fdt, *fdt1, *dt;
260 int ret;
261
262 /* We don't know the FDT blob size. We try to get it through
263 * maximal memory chunk and then copy it to another chunk that
264 * fits the real size.
265 */
266 fdt1 = kzalloc(0x10000, GFP_KERNEL);
267 if (!fdt1) {
268 ret = -ENOMEM;
269 goto out;
270 }
271
272 ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000);
273 if (ret) {
274 SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret);
275 goto free_fdt1;
276 }
277
278 fdt = kmemdup(fdt1, fdt_totalsize(fdt1), GFP_KERNEL);
279 if (!fdt) {
280 ret = -ENOMEM;
281 goto free_fdt1;
282 }
283
284 /* Unflatten device tree blob */
285 dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL);
286 if (!dt) {
287 ret = -EINVAL;
288 SLOT_WARN(php_slot, "Cannot unflatten FDT\n");
289 goto free_fdt;
290 }
291
292 /* Initialize and apply the changeset */
293 of_changeset_init(&php_slot->ocs);
294 pnv_php_reverse_nodes(php_slot->dn);
295 ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn);
296 if (ret) {
297 pnv_php_reverse_nodes(php_slot->dn);
298 SLOT_WARN(php_slot, "Error %d populating changeset\n",
299 ret);
300 goto free_dt;
301 }
302
303 php_slot->dn->child = NULL;
304 ret = of_changeset_apply(&php_slot->ocs);
305 if (ret) {
306 SLOT_WARN(php_slot, "Error %d applying changeset\n", ret);
307 goto destroy_changeset;
308 }
309
310 /* Add device node firmware data */
311 pnv_php_add_pdns(php_slot);
312 php_slot->fdt = fdt;
313 php_slot->dt = dt;
314 kfree(fdt1);
315 goto out;
316
317 destroy_changeset:
318 of_changeset_destroy(&php_slot->ocs);
319 free_dt:
320 kfree(dt);
321 php_slot->dn->child = NULL;
322 free_fdt:
323 kfree(fdt);
324 free_fdt1:
325 kfree(fdt1);
326 out:
327 return ret;
328 }
329
to_pnv_php_slot(struct hotplug_slot * slot)330 static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot)
331 {
332 return container_of(slot, struct pnv_php_slot, slot);
333 }
334
pnv_php_set_slot_power_state(struct hotplug_slot * slot,uint8_t state)335 int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
336 uint8_t state)
337 {
338 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
339 struct opal_msg msg;
340 int ret;
341
342 ret = pnv_pci_set_power_state(php_slot->id, state, &msg);
343 if (ret > 0) {
344 if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle ||
345 be64_to_cpu(msg.params[2]) != state) {
346 SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n",
347 be64_to_cpu(msg.params[1]),
348 be64_to_cpu(msg.params[2]),
349 be64_to_cpu(msg.params[3]));
350 return -ENOMSG;
351 }
352 if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
353 ret = -ENODEV;
354 goto error;
355 }
356 } else if (ret < 0) {
357 goto error;
358 }
359
360 if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE)
361 pnv_php_rmv_devtree(php_slot);
362 else
363 ret = pnv_php_add_devtree(php_slot);
364
365 return ret;
366
367 error:
368 SLOT_WARN(php_slot, "Error %d powering %s\n",
369 ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
370 return ret;
371 }
372 EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
373
pnv_php_get_power_state(struct hotplug_slot * slot,u8 * state)374 static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
375 {
376 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
377 uint8_t power_state = OPAL_PCI_SLOT_POWER_ON;
378 int ret;
379
380 /*
381 * Retrieve power status from firmware. If we fail
382 * getting that, the power status fails back to
383 * be on.
384 */
385 ret = pnv_pci_get_power_state(php_slot->id, &power_state);
386 if (ret) {
387 SLOT_WARN(php_slot, "Error %d getting power status\n",
388 ret);
389 } else {
390 *state = power_state;
391 }
392
393 return 0;
394 }
395
pcie_check_link_active(struct pci_dev * pdev)396 static int pcie_check_link_active(struct pci_dev *pdev)
397 {
398 u16 lnk_status;
399 int ret;
400
401 ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
402 if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status))
403 return -ENODEV;
404
405 ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
406
407 return ret;
408 }
409
pnv_php_get_adapter_state(struct hotplug_slot * slot,u8 * state)410 static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
411 {
412 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
413 uint8_t presence = OPAL_PCI_SLOT_EMPTY;
414 int ret;
415
416 /*
417 * Retrieve presence status from firmware. If we can't
418 * get that, it will fail back to be empty.
419 */
420 ret = pnv_pci_get_presence_state(php_slot->id, &presence);
421 if (ret >= 0) {
422 if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM &&
423 presence == OPAL_PCI_SLOT_EMPTY) {
424 /*
425 * Similar to pciehp_hpc, check whether the Link Active
426 * bit is set to account for broken downstream bridges
427 * that don't properly assert Presence Detect State, as
428 * was observed on the Microsemi Switchtec PM8533 PFX
429 * [11f8:8533].
430 */
431 if (pcie_check_link_active(php_slot->pdev) > 0)
432 presence = OPAL_PCI_SLOT_PRESENT;
433 }
434
435 *state = presence;
436 ret = 0;
437 } else {
438 SLOT_WARN(php_slot, "Error %d getting presence\n", ret);
439 }
440
441 return ret;
442 }
443
pnv_php_get_attention_state(struct hotplug_slot * slot,u8 * state)444 static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state)
445 {
446 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
447
448 *state = php_slot->attention_state;
449 return 0;
450 }
451
pnv_php_set_attention_state(struct hotplug_slot * slot,u8 state)452 static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
453 {
454 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
455 struct pci_dev *bridge = php_slot->pdev;
456 u16 new, mask;
457
458 php_slot->attention_state = state;
459 if (!bridge)
460 return 0;
461
462 mask = PCI_EXP_SLTCTL_AIC;
463
464 if (state)
465 new = PCI_EXP_SLTCTL_ATTN_IND_ON;
466 else
467 new = PCI_EXP_SLTCTL_ATTN_IND_OFF;
468
469 pcie_capability_clear_and_set_word(bridge, PCI_EXP_SLTCTL, mask, new);
470
471 return 0;
472 }
473
pnv_php_activate_slot(struct pnv_php_slot * php_slot,struct hotplug_slot * slot)474 static int pnv_php_activate_slot(struct pnv_php_slot *php_slot,
475 struct hotplug_slot *slot)
476 {
477 int ret, i;
478
479 /*
480 * Issue initial slot activation command to firmware
481 *
482 * Firmware will power slot on, attempt to train the link, and
483 * discover any downstream devices. If this process fails, firmware
484 * will return an error code and an invalid device tree. Failure
485 * can be caused for multiple reasons, including a faulty
486 * downstream device, poor connection to the downstream device, or
487 * a previously latched PHB fence. On failure, issue fundamental
488 * reset up to three times before aborting.
489 */
490 ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON);
491 if (ret) {
492 SLOT_WARN(
493 php_slot,
494 "PCI slot activation failed with error code %d, possible frozen PHB",
495 ret);
496 SLOT_WARN(
497 php_slot,
498 "Attempting complete PHB reset before retrying slot activation\n");
499 for (i = 0; i < 3; i++) {
500 /*
501 * Slot activation failed, PHB may be fenced from a
502 * prior device failure.
503 *
504 * Use the OPAL fundamental reset call to both try a
505 * device reset and clear any potentially active PHB
506 * fence / freeze.
507 */
508 SLOT_WARN(php_slot, "Try %d...\n", i + 1);
509 pci_set_pcie_reset_state(php_slot->pdev,
510 pcie_warm_reset);
511 msleep(250);
512 pci_set_pcie_reset_state(php_slot->pdev,
513 pcie_deassert_reset);
514
515 ret = pnv_php_set_slot_power_state(
516 slot, OPAL_PCI_SLOT_POWER_ON);
517 if (!ret)
518 break;
519 }
520
521 if (i >= 3)
522 SLOT_WARN(php_slot,
523 "Failed to bring slot online, aborting!\n");
524 }
525
526 return ret;
527 }
528
pnv_php_enable(struct pnv_php_slot * php_slot,bool rescan)529 static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
530 {
531 struct hotplug_slot *slot = &php_slot->slot;
532 uint8_t presence = OPAL_PCI_SLOT_EMPTY;
533 uint8_t power_status = OPAL_PCI_SLOT_POWER_ON;
534 int ret;
535
536 /* Check if the slot has been configured */
537 if (php_slot->state != PNV_PHP_STATE_REGISTERED)
538 return 0;
539
540 /* Retrieve slot presence status */
541 ret = pnv_php_get_adapter_state(slot, &presence);
542 if (ret)
543 return ret;
544
545 /*
546 * Proceed if there have nothing behind the slot. However,
547 * we should leave the slot in registered state at the
548 * beginning. Otherwise, the PCI devices inserted afterwards
549 * won't be probed and populated.
550 */
551 if (presence == OPAL_PCI_SLOT_EMPTY) {
552 if (!php_slot->power_state_check) {
553 php_slot->power_state_check = true;
554
555 return 0;
556 }
557
558 goto scan;
559 }
560
561 /*
562 * If the power supply to the slot is off, we can't detect
563 * adapter presence state. That means we have to turn the
564 * slot on before going to probe slot's presence state.
565 *
566 * On the first time, we don't change the power status to
567 * boost system boot with assumption that the firmware
568 * supplies consistent slot power status: empty slot always
569 * has its power off and non-empty slot has its power on.
570 */
571 if (!php_slot->power_state_check) {
572 php_slot->power_state_check = true;
573
574 ret = pnv_php_get_power_state(slot, &power_status);
575 if (ret)
576 return ret;
577
578 if (power_status != OPAL_PCI_SLOT_POWER_ON)
579 return 0;
580 }
581
582 /* Check the power status. Scan the slot if it is already on */
583 ret = pnv_php_get_power_state(slot, &power_status);
584 if (ret)
585 return ret;
586
587 if (power_status == OPAL_PCI_SLOT_POWER_ON)
588 goto scan;
589
590 /* Power is off, turn it on and then scan the slot */
591 ret = pnv_php_activate_slot(php_slot, slot);
592 if (ret)
593 return ret;
594
595 scan:
596 if (presence == OPAL_PCI_SLOT_PRESENT) {
597 if (rescan) {
598 pci_lock_rescan_remove();
599 pci_hp_add_devices(php_slot->bus);
600 pci_unlock_rescan_remove();
601 }
602
603 /* Rescan for child hotpluggable slots */
604 php_slot->state = PNV_PHP_STATE_POPULATED;
605 if (rescan)
606 pnv_php_register(php_slot->dn);
607 } else {
608 php_slot->state = PNV_PHP_STATE_POPULATED;
609 }
610
611 return 0;
612 }
613
pnv_php_reset_slot(struct hotplug_slot * slot,bool probe)614 static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe)
615 {
616 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
617 struct pci_dev *bridge = php_slot->pdev;
618 uint16_t sts;
619
620 /*
621 * The CAPI folks want pnv_php to drive OpenCAPI slots
622 * which don't have a bridge. Only claim to support
623 * reset_slot() if we have a bridge device (for now...)
624 */
625 if (probe)
626 return !bridge;
627
628 /* mask our interrupt while resetting the bridge */
629 if (php_slot->irq > 0)
630 disable_irq(php_slot->irq);
631
632 pci_bridge_secondary_bus_reset(bridge);
633
634 /* clear any state changes that happened due to the reset */
635 pcie_capability_read_word(php_slot->pdev, PCI_EXP_SLTSTA, &sts);
636 sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
637 pcie_capability_write_word(php_slot->pdev, PCI_EXP_SLTSTA, sts);
638
639 if (php_slot->irq > 0)
640 enable_irq(php_slot->irq);
641
642 return 0;
643 }
644
pnv_php_enable_slot(struct hotplug_slot * slot)645 static int pnv_php_enable_slot(struct hotplug_slot *slot)
646 {
647 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
648 u32 prop32;
649 int ret;
650
651 ret = pnv_php_enable(php_slot, true);
652 if (ret)
653 return ret;
654
655 /* (Re-)enable interrupt if the slot supports surprise hotplug */
656 ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable",
657 &prop32);
658 if (!ret && prop32)
659 pnv_php_enable_irq(php_slot);
660
661 return 0;
662 }
663
664 /*
665 * Disable any hotplug interrupts for all slots on the provided bus, as well as
666 * all downstream slots in preparation for a hot unplug.
667 */
pnv_php_disable_all_irqs(struct pci_bus * bus)668 static int pnv_php_disable_all_irqs(struct pci_bus *bus)
669 {
670 struct pci_bus *child_bus;
671 struct pci_slot *slot;
672
673 /* First go down child buses */
674 list_for_each_entry(child_bus, &bus->children, node)
675 pnv_php_disable_all_irqs(child_bus);
676
677 /* Disable IRQs for all pnv_php slots on this bus */
678 list_for_each_entry(slot, &bus->slots, list) {
679 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug);
680
681 pnv_php_disable_irq(php_slot, false, true);
682 }
683
684 return 0;
685 }
686
687 /*
688 * Disable any hotplug interrupts for all downstream slots on the provided
689 * bus in preparation for a hot unplug.
690 */
pnv_php_disable_all_downstream_irqs(struct pci_bus * bus)691 static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus)
692 {
693 struct pci_bus *child_bus;
694
695 /* Go down child buses, recursively deactivating their IRQs */
696 list_for_each_entry(child_bus, &bus->children, node)
697 pnv_php_disable_all_irqs(child_bus);
698
699 return 0;
700 }
701
pnv_php_disable_slot(struct hotplug_slot * slot)702 static int pnv_php_disable_slot(struct hotplug_slot *slot)
703 {
704 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
705 int ret;
706
707 /*
708 * Allow to disable a slot already in the registered state to
709 * cover cases where the slot couldn't be enabled and never
710 * reached the populated state
711 */
712 if (php_slot->state != PNV_PHP_STATE_POPULATED &&
713 php_slot->state != PNV_PHP_STATE_REGISTERED)
714 return 0;
715
716 /*
717 * Free all IRQ resources from all child slots before remove.
718 * Note that we do not disable the root slot IRQ here as that
719 * would also deactivate the slot hot (re)plug interrupt!
720 */
721 pnv_php_disable_all_downstream_irqs(php_slot->bus);
722
723 /* Remove all devices behind the slot */
724 pci_lock_rescan_remove();
725 pci_hp_remove_devices(php_slot->bus);
726 pci_unlock_rescan_remove();
727
728 /* Detach the child hotpluggable slots */
729 pnv_php_unregister(php_slot->dn);
730
731 /* Notify firmware and remove device nodes */
732 ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF);
733
734 php_slot->state = PNV_PHP_STATE_REGISTERED;
735 return ret;
736 }
737
738 static const struct hotplug_slot_ops php_slot_ops = {
739 .get_power_status = pnv_php_get_power_state,
740 .get_adapter_status = pnv_php_get_adapter_state,
741 .get_attention_status = pnv_php_get_attention_state,
742 .set_attention_status = pnv_php_set_attention_state,
743 .enable_slot = pnv_php_enable_slot,
744 .disable_slot = pnv_php_disable_slot,
745 .reset_slot = pnv_php_reset_slot,
746 };
747
pnv_php_release(struct pnv_php_slot * php_slot)748 static void pnv_php_release(struct pnv_php_slot *php_slot)
749 {
750 unsigned long flags;
751
752 /* Remove from global or child list */
753 spin_lock_irqsave(&pnv_php_lock, flags);
754 list_del(&php_slot->link);
755 spin_unlock_irqrestore(&pnv_php_lock, flags);
756
757 /* Detach from parent */
758 pnv_php_put_slot(php_slot);
759 pnv_php_put_slot(php_slot->parent);
760 }
761
pnv_php_alloc_slot(struct device_node * dn)762 static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
763 {
764 struct pnv_php_slot *php_slot;
765 struct pci_bus *bus;
766 const char *label;
767 uint64_t id;
768 int ret;
769
770 ret = of_property_read_string(dn, "ibm,slot-label", &label);
771 if (ret)
772 return NULL;
773
774 if (pnv_pci_get_slot_id(dn, &id))
775 return NULL;
776
777 bus = pci_find_bus_by_node(dn);
778 if (!bus)
779 return NULL;
780
781 php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL);
782 if (!php_slot)
783 return NULL;
784
785 php_slot->name = kstrdup(label, GFP_KERNEL);
786 if (!php_slot->name) {
787 kfree(php_slot);
788 return NULL;
789 }
790
791 /* Allocate workqueue for this slot's interrupt handling */
792 php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name);
793 if (!php_slot->wq) {
794 SLOT_WARN(php_slot, "Cannot alloc workqueue\n");
795 kfree(php_slot->name);
796 kfree(php_slot);
797 return NULL;
798 }
799
800 if (dn->child && PCI_DN(dn->child))
801 php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn);
802 else
803 php_slot->slot_no = -1; /* Placeholder slot */
804
805 kref_init(&php_slot->kref);
806 php_slot->state = PNV_PHP_STATE_INITIALIZED;
807 php_slot->dn = dn;
808 php_slot->pdev = bus->self;
809 php_slot->bus = bus;
810 php_slot->id = id;
811 php_slot->power_state_check = false;
812 php_slot->slot.ops = &php_slot_ops;
813
814 INIT_LIST_HEAD(&php_slot->children);
815 INIT_LIST_HEAD(&php_slot->link);
816
817 return php_slot;
818 }
819
pnv_php_register_slot(struct pnv_php_slot * php_slot)820 static int pnv_php_register_slot(struct pnv_php_slot *php_slot)
821 {
822 struct pnv_php_slot *parent;
823 struct device_node *dn = php_slot->dn;
824 unsigned long flags;
825 int ret;
826
827 /* Check if the slot is registered or not */
828 parent = pnv_php_find_slot(php_slot->dn);
829 if (parent) {
830 pnv_php_put_slot(parent);
831 return -EEXIST;
832 }
833
834 /* Register PCI slot */
835 ret = pci_hp_register(&php_slot->slot, php_slot->bus,
836 php_slot->slot_no, php_slot->name);
837 if (ret) {
838 SLOT_WARN(php_slot, "Error %d registering slot\n", ret);
839 return ret;
840 }
841
842 /* Attach to the parent's child list or global list */
843 while ((dn = of_get_parent(dn))) {
844 if (!PCI_DN(dn)) {
845 of_node_put(dn);
846 break;
847 }
848
849 parent = pnv_php_find_slot(dn);
850 if (parent) {
851 of_node_put(dn);
852 break;
853 }
854
855 of_node_put(dn);
856 }
857
858 spin_lock_irqsave(&pnv_php_lock, flags);
859 php_slot->parent = parent;
860 if (parent)
861 list_add_tail(&php_slot->link, &parent->children);
862 else
863 list_add_tail(&php_slot->link, &pnv_php_slot_list);
864 spin_unlock_irqrestore(&pnv_php_lock, flags);
865
866 php_slot->state = PNV_PHP_STATE_REGISTERED;
867 return 0;
868 }
869
pnv_php_enable_msix(struct pnv_php_slot * php_slot)870 static int pnv_php_enable_msix(struct pnv_php_slot *php_slot)
871 {
872 struct pci_dev *pdev = php_slot->pdev;
873 struct msix_entry entry;
874 int nr_entries, ret;
875 u16 pcie_flag;
876
877 /* Get total number of MSIx entries */
878 nr_entries = pci_msix_vec_count(pdev);
879 if (nr_entries < 0)
880 return nr_entries;
881
882 /* Check hotplug MSIx entry is in range */
883 pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag);
884 entry.entry = FIELD_GET(PCI_EXP_FLAGS_IRQ, pcie_flag);
885 if (entry.entry >= nr_entries)
886 return -ERANGE;
887
888 /* Enable MSIx */
889 ret = pci_enable_msix_exact(pdev, &entry, 1);
890 if (ret) {
891 SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret);
892 return ret;
893 }
894
895 return entry.vector;
896 }
897
898 static void
pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot * php_slot)899 pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot)
900 {
901 struct pci_dev *pdev = php_slot->pdev;
902 struct eeh_dev *edev;
903 struct eeh_pe *pe;
904 int i, rc;
905
906 /*
907 * When a device is surprise removed from a downstream bridge slot,
908 * the upstream bridge port can still end up frozen due to related EEH
909 * events, which will in turn block the MSI interrupts for slot hotplug
910 * detection.
911 *
912 * Detect and thaw any frozen upstream PE after slot deactivation.
913 */
914 edev = pci_dev_to_eeh_dev(pdev);
915 pe = edev ? edev->pe : NULL;
916 rc = eeh_pe_get_state(pe);
917 if ((rc == -ENODEV) || (rc == -ENOENT)) {
918 SLOT_WARN(
919 php_slot,
920 "Upstream bridge PE state unknown, hotplug detect may fail\n");
921 } else {
922 if (pe->state & EEH_PE_ISOLATED) {
923 SLOT_WARN(
924 php_slot,
925 "Upstream bridge PE %02x frozen, thawing...\n",
926 pe->addr);
927 for (i = 0; i < 3; i++)
928 if (!eeh_unfreeze_pe(pe))
929 break;
930 if (i >= 3)
931 SLOT_WARN(
932 php_slot,
933 "Unable to thaw PE %02x, hotplug detect will fail!\n",
934 pe->addr);
935 else
936 SLOT_WARN(php_slot,
937 "PE %02x thawed successfully\n",
938 pe->addr);
939 }
940 }
941 }
942
pnv_php_event_handler(struct work_struct * work)943 static void pnv_php_event_handler(struct work_struct *work)
944 {
945 struct pnv_php_event *event =
946 container_of(work, struct pnv_php_event, work);
947 struct pnv_php_slot *php_slot = event->php_slot;
948
949 if (event->added) {
950 pnv_php_enable_slot(&php_slot->slot);
951 } else {
952 pnv_php_disable_slot(&php_slot->slot);
953 pnv_php_detect_clear_suprise_removal_freeze(php_slot);
954 }
955
956 kfree(event);
957 }
958
pnv_php_interrupt(int irq,void * data)959 static irqreturn_t pnv_php_interrupt(int irq, void *data)
960 {
961 struct pnv_php_slot *php_slot = data;
962 struct pci_dev *pchild, *pdev = php_slot->pdev;
963 struct eeh_dev *edev;
964 struct eeh_pe *pe;
965 struct pnv_php_event *event;
966 u16 sts, lsts;
967 u8 presence;
968 bool added;
969 unsigned long flags;
970 int ret;
971
972 pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
973 sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
974 pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
975
976 pci_dbg(pdev, "PCI slot [%s]: HP int! DLAct: %d, PresDet: %d\n",
977 php_slot->name,
978 !!(sts & PCI_EXP_SLTSTA_DLLSC),
979 !!(sts & PCI_EXP_SLTSTA_PDC));
980
981 if (sts & PCI_EXP_SLTSTA_DLLSC) {
982 pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts);
983 added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
984 } else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) &&
985 (sts & PCI_EXP_SLTSTA_PDC)) {
986 ret = pnv_pci_get_presence_state(php_slot->id, &presence);
987 if (ret) {
988 SLOT_WARN(php_slot,
989 "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
990 php_slot->name, ret, sts);
991 return IRQ_HANDLED;
992 }
993
994 added = !!(presence == OPAL_PCI_SLOT_PRESENT);
995 } else {
996 pci_dbg(pdev, "PCI slot [%s]: Spurious IRQ?\n", php_slot->name);
997 return IRQ_NONE;
998 }
999
1000 /* Freeze the removed PE to avoid unexpected error reporting */
1001 if (!added) {
1002 pchild = list_first_entry_or_null(&php_slot->bus->devices,
1003 struct pci_dev, bus_list);
1004 edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL;
1005 pe = edev ? edev->pe : NULL;
1006 if (pe) {
1007 eeh_serialize_lock(&flags);
1008 eeh_pe_mark_isolated(pe);
1009 eeh_serialize_unlock(flags);
1010 eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
1011 }
1012 }
1013
1014 /*
1015 * The PE is left in frozen state if the event is missed. It's
1016 * fine as the PCI devices (PE) aren't functional any more.
1017 */
1018 event = kzalloc(sizeof(*event), GFP_ATOMIC);
1019 if (!event) {
1020 SLOT_WARN(php_slot,
1021 "PCI slot [%s] missed hotplug event 0x%04x\n",
1022 php_slot->name, sts);
1023 return IRQ_HANDLED;
1024 }
1025
1026 pci_info(pdev, "PCI slot [%s] %s (IRQ: %d)\n",
1027 php_slot->name, added ? "added" : "removed", irq);
1028 INIT_WORK(&event->work, pnv_php_event_handler);
1029 event->added = added;
1030 event->php_slot = php_slot;
1031 queue_work(php_slot->wq, &event->work);
1032
1033 return IRQ_HANDLED;
1034 }
1035
pnv_php_init_irq(struct pnv_php_slot * php_slot,int irq)1036 static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
1037 {
1038 struct pci_dev *pdev = php_slot->pdev;
1039 u32 broken_pdc = 0;
1040 u16 sts, ctrl;
1041 int ret;
1042
1043 /* Check PDC (Presence Detection Change) is broken or not */
1044 ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc",
1045 &broken_pdc);
1046 if (!ret && broken_pdc)
1047 php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC;
1048
1049 /* Clear pending interrupts */
1050 pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
1051 if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC)
1052 sts |= PCI_EXP_SLTSTA_DLLSC;
1053 else
1054 sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
1055 pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
1056
1057 /* Request the interrupt */
1058 ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED,
1059 php_slot->name, php_slot);
1060 if (ret) {
1061 pnv_php_disable_irq(php_slot, true, true);
1062 SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq);
1063 return;
1064 }
1065
1066 /* Enable the interrupts */
1067 pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
1068 if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) {
1069 ctrl &= ~PCI_EXP_SLTCTL_PDCE;
1070 ctrl |= (PCI_EXP_SLTCTL_HPIE |
1071 PCI_EXP_SLTCTL_DLLSCE);
1072 } else {
1073 ctrl |= (PCI_EXP_SLTCTL_HPIE |
1074 PCI_EXP_SLTCTL_PDCE |
1075 PCI_EXP_SLTCTL_DLLSCE);
1076 }
1077 pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
1078
1079 /* The interrupt is initialized successfully when @irq is valid */
1080 php_slot->irq = irq;
1081 }
1082
pnv_php_enable_irq(struct pnv_php_slot * php_slot)1083 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot)
1084 {
1085 struct pci_dev *pdev = php_slot->pdev;
1086 int irq, ret;
1087
1088 /*
1089 * The MSI/MSIx interrupt might have been occupied by other
1090 * drivers. Don't populate the surprise hotplug capability
1091 * in that case.
1092 */
1093 if (pci_dev_msi_enabled(pdev))
1094 return;
1095
1096 ret = pci_enable_device(pdev);
1097 if (ret) {
1098 SLOT_WARN(php_slot, "Error %d enabling device\n", ret);
1099 return;
1100 }
1101
1102 pci_set_master(pdev);
1103
1104 /* Enable MSIx interrupt */
1105 irq = pnv_php_enable_msix(php_slot);
1106 if (irq > 0) {
1107 pnv_php_init_irq(php_slot, irq);
1108 return;
1109 }
1110
1111 /*
1112 * Use MSI if MSIx doesn't work. Fail back to legacy INTx
1113 * if MSI doesn't work either
1114 */
1115 ret = pci_enable_msi(pdev);
1116 if (!ret || pdev->irq) {
1117 irq = pdev->irq;
1118 pnv_php_init_irq(php_slot, irq);
1119 }
1120 }
1121
pnv_php_register_one(struct device_node * dn)1122 static int pnv_php_register_one(struct device_node *dn)
1123 {
1124 struct pnv_php_slot *php_slot;
1125 u32 prop32;
1126 int ret;
1127
1128 /* Check if it's hotpluggable slot */
1129 ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32);
1130 if (ret || !prop32)
1131 return -ENXIO;
1132
1133 ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32);
1134 if (ret || !prop32)
1135 return -ENXIO;
1136
1137 php_slot = pnv_php_alloc_slot(dn);
1138 if (!php_slot)
1139 return -ENODEV;
1140
1141 ret = pnv_php_register_slot(php_slot);
1142 if (ret)
1143 goto free_slot;
1144
1145 ret = pnv_php_enable(php_slot, false);
1146 if (ret)
1147 goto unregister_slot;
1148
1149 /* Enable interrupt if the slot supports surprise hotplug */
1150 ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32);
1151 if (!ret && prop32)
1152 pnv_php_enable_irq(php_slot);
1153
1154 return 0;
1155
1156 unregister_slot:
1157 pnv_php_unregister_one(php_slot->dn);
1158 free_slot:
1159 pnv_php_put_slot(php_slot);
1160 return ret;
1161 }
1162
pnv_php_register(struct device_node * dn)1163 static void pnv_php_register(struct device_node *dn)
1164 {
1165 struct device_node *child;
1166
1167 /*
1168 * The parent slots should be registered before their
1169 * child slots.
1170 */
1171 for_each_child_of_node(dn, child) {
1172 pnv_php_register_one(child);
1173 pnv_php_register(child);
1174 }
1175 }
1176
pnv_php_unregister_one(struct device_node * dn)1177 static void pnv_php_unregister_one(struct device_node *dn)
1178 {
1179 struct pnv_php_slot *php_slot;
1180
1181 php_slot = pnv_php_find_slot(dn);
1182 if (!php_slot)
1183 return;
1184
1185 php_slot->state = PNV_PHP_STATE_OFFLINE;
1186 pci_hp_deregister(&php_slot->slot);
1187 pnv_php_release(php_slot);
1188 pnv_php_put_slot(php_slot);
1189 }
1190
pnv_php_unregister(struct device_node * dn)1191 static void pnv_php_unregister(struct device_node *dn)
1192 {
1193 struct device_node *child;
1194
1195 /* The child slots should go before their parent slots */
1196 for_each_child_of_node(dn, child) {
1197 pnv_php_unregister(child);
1198 pnv_php_unregister_one(child);
1199 }
1200 }
1201
pnv_php_init(void)1202 static int __init pnv_php_init(void)
1203 {
1204 struct device_node *dn;
1205
1206 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1207 for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
1208 pnv_php_register(dn);
1209
1210 for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
1211 pnv_php_register(dn);
1212
1213 for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
1214 pnv_php_register_one(dn); /* slot directly under the PHB */
1215 return 0;
1216 }
1217
pnv_php_exit(void)1218 static void __exit pnv_php_exit(void)
1219 {
1220 struct device_node *dn;
1221
1222 for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
1223 pnv_php_unregister(dn);
1224
1225 for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
1226 pnv_php_unregister(dn);
1227
1228 for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
1229 pnv_php_unregister_one(dn); /* slot directly under the PHB */
1230 }
1231
1232 module_init(pnv_php_init);
1233 module_exit(pnv_php_exit);
1234
1235 MODULE_VERSION(DRIVER_VERSION);
1236 MODULE_LICENSE("GPL v2");
1237 MODULE_AUTHOR(DRIVER_AUTHOR);
1238 MODULE_DESCRIPTION(DRIVER_DESC);
1239