1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2023 Advanced Micro Devices, Inc. */
3
4 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/types.h>
9 #include <linux/vfio.h>
10
11 #include <linux/pds/pds_common.h>
12 #include <linux/pds/pds_core_if.h>
13 #include <linux/pds/pds_adminq.h>
14
15 #include "vfio_dev.h"
16 #include "pci_drv.h"
17 #include "cmds.h"
18
19 #define PDS_VFIO_DRV_DESCRIPTION "AMD/Pensando VFIO Device Driver"
20 #define PCI_VENDOR_ID_PENSANDO 0x1dd8
21
pds_vfio_recovery(struct pds_vfio_pci_device * pds_vfio)22 static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
23 {
24 bool deferred_reset_needed = false;
25
26 /*
27 * Documentation states that the kernel migration driver must not
28 * generate asynchronous device state transitions outside of
29 * manipulation by the user or the VFIO_DEVICE_RESET ioctl.
30 *
31 * Since recovery is an asynchronous event received from the device,
32 * initiate a deferred reset. Issue a deferred reset in the following
33 * situations:
34 * 1. Migration is in progress, which will cause the next step of
35 * the migration to fail.
36 * 2. If the device is in a state that will be set to
37 * VFIO_DEVICE_STATE_RUNNING on the next action (i.e. VM is
38 * shutdown and device is in VFIO_DEVICE_STATE_STOP).
39 */
40 mutex_lock(&pds_vfio->state_mutex);
41 if ((pds_vfio->state != VFIO_DEVICE_STATE_RUNNING &&
42 pds_vfio->state != VFIO_DEVICE_STATE_ERROR) ||
43 (pds_vfio->state == VFIO_DEVICE_STATE_RUNNING &&
44 pds_vfio_dirty_is_enabled(pds_vfio)))
45 deferred_reset_needed = true;
46 mutex_unlock(&pds_vfio->state_mutex);
47
48 /*
49 * On the next user initiated state transition, the device will
50 * transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the user's
51 * responsibility to reset the device.
52 *
53 * If a VFIO_DEVICE_RESET is requested post recovery and before the next
54 * state transition, then the deferred reset state will be set to
55 * VFIO_DEVICE_STATE_RUNNING.
56 */
57 if (deferred_reset_needed) {
58 mutex_lock(&pds_vfio->reset_mutex);
59 pds_vfio->deferred_reset = true;
60 pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR;
61 mutex_unlock(&pds_vfio->reset_mutex);
62 }
63 }
64
pds_vfio_pci_notify_handler(struct notifier_block * nb,unsigned long ecode,void * data)65 static int pds_vfio_pci_notify_handler(struct notifier_block *nb,
66 unsigned long ecode, void *data)
67 {
68 struct pds_vfio_pci_device *pds_vfio =
69 container_of(nb, struct pds_vfio_pci_device, nb);
70 struct device *dev = pds_vfio_to_dev(pds_vfio);
71 union pds_core_notifyq_comp *event = data;
72
73 dev_dbg(dev, "%s: event code %lu\n", __func__, ecode);
74
75 /*
76 * We don't need to do anything for RESET state==0 as there is no notify
77 * or feedback mechanism available, and it is possible that we won't
78 * even see a state==0 event since the pds_core recovery is pending.
79 *
80 * Any requests from VFIO while state==0 will fail, which will return
81 * error and may cause migration to fail.
82 */
83 if (ecode == PDS_EVENT_RESET) {
84 dev_info(dev, "%s: PDS_EVENT_RESET event received, state==%d\n",
85 __func__, event->reset.state);
86 /*
87 * pds_core device finished recovery and sent us the
88 * notification (state == 1) to allow us to recover
89 */
90 if (event->reset.state == 1)
91 pds_vfio_recovery(pds_vfio);
92 }
93
94 return 0;
95 }
96
97 static int
pds_vfio_pci_register_event_handler(struct pds_vfio_pci_device * pds_vfio)98 pds_vfio_pci_register_event_handler(struct pds_vfio_pci_device *pds_vfio)
99 {
100 struct device *dev = pds_vfio_to_dev(pds_vfio);
101 struct notifier_block *nb = &pds_vfio->nb;
102 int err;
103
104 if (!nb->notifier_call) {
105 nb->notifier_call = pds_vfio_pci_notify_handler;
106 err = pdsc_register_notify(nb);
107 if (err) {
108 nb->notifier_call = NULL;
109 dev_err(dev,
110 "failed to register pds event handler: %pe\n",
111 ERR_PTR(err));
112 return -EINVAL;
113 }
114 dev_dbg(dev, "pds event handler registered\n");
115 }
116
117 return 0;
118 }
119
120 static void
pds_vfio_pci_unregister_event_handler(struct pds_vfio_pci_device * pds_vfio)121 pds_vfio_pci_unregister_event_handler(struct pds_vfio_pci_device *pds_vfio)
122 {
123 if (pds_vfio->nb.notifier_call) {
124 pdsc_unregister_notify(&pds_vfio->nb);
125 pds_vfio->nb.notifier_call = NULL;
126 }
127 }
128
pds_vfio_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)129 static int pds_vfio_pci_probe(struct pci_dev *pdev,
130 const struct pci_device_id *id)
131 {
132 struct pds_vfio_pci_device *pds_vfio;
133 int err;
134
135 pds_vfio = vfio_alloc_device(pds_vfio_pci_device, vfio_coredev.vdev,
136 &pdev->dev, pds_vfio_ops_info());
137 if (IS_ERR(pds_vfio))
138 return PTR_ERR(pds_vfio);
139
140 dev_set_drvdata(&pdev->dev, &pds_vfio->vfio_coredev);
141
142 err = vfio_pci_core_register_device(&pds_vfio->vfio_coredev);
143 if (err)
144 goto out_put_vdev;
145
146 err = pds_vfio_register_client_cmd(pds_vfio);
147 if (err) {
148 dev_err(&pdev->dev, "failed to register as client: %pe\n",
149 ERR_PTR(err));
150 goto out_unregister_coredev;
151 }
152
153 err = pds_vfio_pci_register_event_handler(pds_vfio);
154 if (err)
155 goto out_unregister_client;
156
157 return 0;
158
159 out_unregister_client:
160 pds_vfio_unregister_client_cmd(pds_vfio);
161 out_unregister_coredev:
162 vfio_pci_core_unregister_device(&pds_vfio->vfio_coredev);
163 out_put_vdev:
164 vfio_put_device(&pds_vfio->vfio_coredev.vdev);
165 return err;
166 }
167
pds_vfio_pci_remove(struct pci_dev * pdev)168 static void pds_vfio_pci_remove(struct pci_dev *pdev)
169 {
170 struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev);
171
172 pds_vfio_pci_unregister_event_handler(pds_vfio);
173 pds_vfio_unregister_client_cmd(pds_vfio);
174 vfio_pci_core_unregister_device(&pds_vfio->vfio_coredev);
175 vfio_put_device(&pds_vfio->vfio_coredev.vdev);
176 }
177
178 static const struct pci_device_id pds_vfio_pci_table[] = {
179 { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_PENSANDO, 0x1003) }, /* Ethernet VF */
180 { 0, }
181 };
182 MODULE_DEVICE_TABLE(pci, pds_vfio_pci_table);
183
pds_vfio_pci_aer_reset_done(struct pci_dev * pdev)184 static void pds_vfio_pci_aer_reset_done(struct pci_dev *pdev)
185 {
186 struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev);
187
188 pds_vfio_reset(pds_vfio);
189 }
190
191 static const struct pci_error_handlers pds_vfio_pci_err_handlers = {
192 .reset_done = pds_vfio_pci_aer_reset_done,
193 .error_detected = vfio_pci_core_aer_err_detected,
194 };
195
196 static struct pci_driver pds_vfio_pci_driver = {
197 .name = KBUILD_MODNAME,
198 .id_table = pds_vfio_pci_table,
199 .probe = pds_vfio_pci_probe,
200 .remove = pds_vfio_pci_remove,
201 .err_handler = &pds_vfio_pci_err_handlers,
202 .driver_managed_dma = true,
203 };
204
205 module_pci_driver(pds_vfio_pci_driver);
206
207 MODULE_DESCRIPTION(PDS_VFIO_DRV_DESCRIPTION);
208 MODULE_AUTHOR("Brett Creeley <brett.creeley@amd.com>");
209 MODULE_LICENSE("GPL");
210