1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5 *
6 * Authors:
7 * Sunil Muthuswamy <sunilmut@microsoft.com>
8 * Wei Liu <wei.liu@kernel.org>
9 */
10
11 #include <linux/pci.h>
12 #include <linux/irq.h>
13 #include <asm/mshyperv.h>
14
hv_map_interrupt(union hv_device_id device_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)15 static int hv_map_interrupt(union hv_device_id device_id, bool level,
16 int cpu, int vector, struct hv_interrupt_entry *entry)
17 {
18 struct hv_input_map_device_interrupt *input;
19 struct hv_output_map_device_interrupt *output;
20 struct hv_device_interrupt_descriptor *intr_desc;
21 unsigned long flags;
22 u64 status;
23 int nr_bank, var_size;
24
25 local_irq_save(flags);
26
27 input = *this_cpu_ptr(hyperv_pcpu_input_arg);
28 output = *this_cpu_ptr(hyperv_pcpu_output_arg);
29
30 intr_desc = &input->interrupt_descriptor;
31 memset(input, 0, sizeof(*input));
32 input->partition_id = hv_current_partition_id;
33 input->device_id = device_id.as_uint64;
34 intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
35 intr_desc->vector_count = 1;
36 intr_desc->target.vector = vector;
37
38 if (level)
39 intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
40 else
41 intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
42
43 intr_desc->target.vp_set.valid_bank_mask = 0;
44 intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
45 nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
46 if (nr_bank < 0) {
47 local_irq_restore(flags);
48 pr_err("%s: unable to generate VP set\n", __func__);
49 return EINVAL;
50 }
51 intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
52
53 /*
54 * var-sized hypercall, var-size starts after vp_mask (thus
55 * vp_set.format does not count, but vp_set.valid_bank_mask
56 * does).
57 */
58 var_size = nr_bank + 1;
59
60 status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
61 input, output);
62 *entry = output->interrupt_entry;
63
64 local_irq_restore(flags);
65
66 if (!hv_result_success(status))
67 pr_err("%s: hypercall failed, status %lld\n", __func__, status);
68
69 return hv_result(status);
70 }
71
hv_unmap_interrupt(u64 id,struct hv_interrupt_entry * old_entry)72 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
73 {
74 unsigned long flags;
75 struct hv_input_unmap_device_interrupt *input;
76 struct hv_interrupt_entry *intr_entry;
77 u64 status;
78
79 local_irq_save(flags);
80 input = *this_cpu_ptr(hyperv_pcpu_input_arg);
81
82 memset(input, 0, sizeof(*input));
83 intr_entry = &input->interrupt_entry;
84 input->partition_id = hv_current_partition_id;
85 input->device_id = id;
86 *intr_entry = *old_entry;
87
88 status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
89 local_irq_restore(flags);
90
91 return hv_result(status);
92 }
93
94 #ifdef CONFIG_PCI_MSI
95 struct rid_data {
96 struct pci_dev *bridge;
97 u32 rid;
98 };
99
get_rid_cb(struct pci_dev * pdev,u16 alias,void * data)100 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
101 {
102 struct rid_data *rd = data;
103 u8 bus = PCI_BUS_NUM(rd->rid);
104
105 if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
106 rd->bridge = pdev;
107 rd->rid = alias;
108 }
109
110 return 0;
111 }
112
hv_build_pci_dev_id(struct pci_dev * dev)113 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
114 {
115 union hv_device_id dev_id;
116 struct rid_data data = {
117 .bridge = NULL,
118 .rid = PCI_DEVID(dev->bus->number, dev->devfn)
119 };
120
121 pci_for_each_dma_alias(dev, get_rid_cb, &data);
122
123 dev_id.as_uint64 = 0;
124 dev_id.device_type = HV_DEVICE_TYPE_PCI;
125 dev_id.pci.segment = pci_domain_nr(dev->bus);
126
127 dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
128 dev_id.pci.bdf.device = PCI_SLOT(data.rid);
129 dev_id.pci.bdf.function = PCI_FUNC(data.rid);
130 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
131
132 if (data.bridge) {
133 int pos;
134
135 /*
136 * Microsoft Hypervisor requires a bus range when the bridge is
137 * running in PCI-X mode.
138 *
139 * To distinguish conventional vs PCI-X bridge, we can check
140 * the bridge's PCI-X Secondary Status Register, Secondary Bus
141 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
142 * Specification Revision 1.0 5.2.2.1.3.
143 *
144 * Value zero means it is in conventional mode, otherwise it is
145 * in PCI-X mode.
146 */
147
148 pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
149 if (pos) {
150 u16 status;
151
152 pci_read_config_word(data.bridge, pos +
153 PCI_X_BRIDGE_SSTATUS, &status);
154
155 if (status & PCI_X_SSTATUS_FREQ) {
156 /* Non-zero, PCI-X mode */
157 u8 sec_bus, sub_bus;
158
159 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
160
161 pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
162 dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
163 pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
164 dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
165 }
166 }
167 }
168
169 return dev_id;
170 }
171
hv_map_msi_interrupt(struct pci_dev * dev,int cpu,int vector,struct hv_interrupt_entry * entry)172 static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
173 struct hv_interrupt_entry *entry)
174 {
175 union hv_device_id device_id = hv_build_pci_dev_id(dev);
176
177 return hv_map_interrupt(device_id, false, cpu, vector, entry);
178 }
179
entry_to_msi_msg(struct hv_interrupt_entry * entry,struct msi_msg * msg)180 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
181 {
182 /* High address is always 0 */
183 msg->address_hi = 0;
184 msg->address_lo = entry->msi_entry.address.as_uint32;
185 msg->data = entry->msi_entry.data.as_uint32;
186 }
187
188 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
hv_irq_compose_msi_msg(struct irq_data * data,struct msi_msg * msg)189 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
190 {
191 struct msi_desc *msidesc;
192 struct pci_dev *dev;
193 struct hv_interrupt_entry out_entry, *stored_entry;
194 struct irq_cfg *cfg = irqd_cfg(data);
195 int cpu;
196 u64 status;
197
198 msidesc = irq_data_get_msi_desc(data);
199 dev = msi_desc_to_pci_dev(msidesc);
200
201 if (!cfg) {
202 pr_debug("%s: cfg is NULL", __func__);
203 return;
204 }
205
206 cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
207
208 if (data->chip_data) {
209 /*
210 * This interrupt is already mapped. Let's unmap first.
211 *
212 * We don't use retarget interrupt hypercalls here because
213 * Microsoft Hypervisor doens't allow root to change the vector
214 * or specify VPs outside of the set that is initially used
215 * during mapping.
216 */
217 stored_entry = data->chip_data;
218 data->chip_data = NULL;
219
220 status = hv_unmap_msi_interrupt(dev, stored_entry);
221
222 kfree(stored_entry);
223
224 if (status != HV_STATUS_SUCCESS) {
225 pr_debug("%s: failed to unmap, status %lld", __func__, status);
226 return;
227 }
228 }
229
230 stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
231 if (!stored_entry) {
232 pr_debug("%s: failed to allocate chip data\n", __func__);
233 return;
234 }
235
236 status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
237 if (status != HV_STATUS_SUCCESS) {
238 kfree(stored_entry);
239 return;
240 }
241
242 *stored_entry = out_entry;
243 data->chip_data = stored_entry;
244 entry_to_msi_msg(&out_entry, msg);
245
246 return;
247 }
248
hv_unmap_msi_interrupt(struct pci_dev * dev,struct hv_interrupt_entry * old_entry)249 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
250 {
251 return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
252 }
253
hv_teardown_msi_irq(struct pci_dev * dev,struct irq_data * irqd)254 static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
255 {
256 struct hv_interrupt_entry old_entry;
257 struct msi_msg msg;
258 u64 status;
259
260 if (!irqd->chip_data) {
261 pr_debug("%s: no chip data\n!", __func__);
262 return;
263 }
264
265 old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
266 entry_to_msi_msg(&old_entry, &msg);
267
268 kfree(irqd->chip_data);
269 irqd->chip_data = NULL;
270
271 status = hv_unmap_msi_interrupt(dev, &old_entry);
272
273 if (status != HV_STATUS_SUCCESS)
274 pr_err("%s: hypercall failed, status %lld\n", __func__, status);
275 }
276
hv_msi_free_irq(struct irq_domain * domain,struct msi_domain_info * info,unsigned int virq)277 static void hv_msi_free_irq(struct irq_domain *domain,
278 struct msi_domain_info *info, unsigned int virq)
279 {
280 struct irq_data *irqd = irq_get_irq_data(virq);
281 struct msi_desc *desc;
282
283 if (!irqd)
284 return;
285
286 desc = irq_data_get_msi_desc(irqd);
287 if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
288 return;
289
290 hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
291 }
292
293 /*
294 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
295 * which implement the MSI or MSI-X Capability Structure.
296 */
297 static struct irq_chip hv_pci_msi_controller = {
298 .name = "HV-PCI-MSI",
299 .irq_unmask = pci_msi_unmask_irq,
300 .irq_mask = pci_msi_mask_irq,
301 .irq_ack = irq_chip_ack_parent,
302 .irq_retrigger = irq_chip_retrigger_hierarchy,
303 .irq_compose_msi_msg = hv_irq_compose_msi_msg,
304 .irq_set_affinity = msi_domain_set_affinity,
305 .flags = IRQCHIP_SKIP_SET_WAKE,
306 };
307
308 static struct msi_domain_ops pci_msi_domain_ops = {
309 .msi_free = hv_msi_free_irq,
310 .msi_prepare = pci_msi_prepare,
311 };
312
313 static struct msi_domain_info hv_pci_msi_domain_info = {
314 .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
315 MSI_FLAG_PCI_MSIX,
316 .ops = &pci_msi_domain_ops,
317 .chip = &hv_pci_msi_controller,
318 .handler = handle_edge_irq,
319 .handler_name = "edge",
320 };
321
hv_create_pci_msi_domain(void)322 struct irq_domain * __init hv_create_pci_msi_domain(void)
323 {
324 struct irq_domain *d = NULL;
325 struct fwnode_handle *fn;
326
327 fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
328 if (fn)
329 d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
330
331 /* No point in going further if we can't get an irq domain */
332 BUG_ON(!d);
333
334 return d;
335 }
336
337 #endif /* CONFIG_PCI_MSI */
338
hv_unmap_ioapic_interrupt(int ioapic_id,struct hv_interrupt_entry * entry)339 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
340 {
341 union hv_device_id device_id;
342
343 device_id.as_uint64 = 0;
344 device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
345 device_id.ioapic.ioapic_id = (u8)ioapic_id;
346
347 return hv_unmap_interrupt(device_id.as_uint64, entry);
348 }
349 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
350
hv_map_ioapic_interrupt(int ioapic_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)351 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
352 struct hv_interrupt_entry *entry)
353 {
354 union hv_device_id device_id;
355
356 device_id.as_uint64 = 0;
357 device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
358 device_id.ioapic.ioapic_id = (u8)ioapic_id;
359
360 return hv_map_interrupt(device_id, level, cpu, vector, entry);
361 }
362 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
363