1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO PCI I/O Port & MMIO access
4 *
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20
21 #include "vfio_pci_priv.h"
22
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64 ioread64
25 #define vfio_iowrite64 iowrite64
26 #define vfio_ioread32 ioread32
27 #define vfio_iowrite32 iowrite32
28 #define vfio_ioread16 ioread16
29 #define vfio_iowrite16 iowrite16
30 #else
31 #define vfio_ioread64 ioread64be
32 #define vfio_iowrite64 iowrite64be
33 #define vfio_ioread32 ioread32be
34 #define vfio_iowrite32 iowrite32be
35 #define vfio_ioread16 ioread16be
36 #define vfio_iowrite16 iowrite16be
37 #endif
38 #define vfio_ioread8 ioread8
39 #define vfio_iowrite8 iowrite8
40
41 #define VFIO_IOWRITE(size) \
42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \
43 bool test_mem, u##size val, void __iomem *io) \
44 { \
45 if (test_mem) { \
46 down_read(&vdev->memory_lock); \
47 if (!__vfio_pci_memory_enabled(vdev)) { \
48 up_read(&vdev->memory_lock); \
49 return -EIO; \
50 } \
51 } \
52 \
53 vfio_iowrite##size(val, io); \
54 \
55 if (test_mem) \
56 up_read(&vdev->memory_lock); \
57 \
58 return 0; \
59 } \
60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
61
62 VFIO_IOWRITE(8)
63 VFIO_IOWRITE(16)
64 VFIO_IOWRITE(32)
65 #ifdef iowrite64
66 VFIO_IOWRITE(64)
67 #endif
68
69 #define VFIO_IOREAD(size) \
70 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \
71 bool test_mem, u##size *val, void __iomem *io) \
72 { \
73 if (test_mem) { \
74 down_read(&vdev->memory_lock); \
75 if (!__vfio_pci_memory_enabled(vdev)) { \
76 up_read(&vdev->memory_lock); \
77 return -EIO; \
78 } \
79 } \
80 \
81 *val = vfio_ioread##size(io); \
82 \
83 if (test_mem) \
84 up_read(&vdev->memory_lock); \
85 \
86 return 0; \
87 } \
88 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
89
90 VFIO_IOREAD(8)
91 VFIO_IOREAD(16)
92 VFIO_IOREAD(32)
93 #ifdef ioread64
94 VFIO_IOREAD(64)
95 #endif
96
97 #define VFIO_IORDWR(size) \
98 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
99 bool iswrite, bool test_mem, \
100 void __iomem *io, char __user *buf, \
101 loff_t off, size_t *filled) \
102 { \
103 u##size val; \
104 int ret; \
105 \
106 if (iswrite) { \
107 if (copy_from_user(&val, buf, sizeof(val))) \
108 return -EFAULT; \
109 \
110 ret = vfio_pci_core_iowrite##size(vdev, test_mem, \
111 val, io + off); \
112 if (ret) \
113 return ret; \
114 } else { \
115 ret = vfio_pci_core_ioread##size(vdev, test_mem, \
116 &val, io + off); \
117 if (ret) \
118 return ret; \
119 \
120 if (copy_to_user(buf, &val, sizeof(val))) \
121 return -EFAULT; \
122 } \
123 \
124 *filled = sizeof(val); \
125 return 0; \
126 } \
127
128 VFIO_IORDWR(8)
129 VFIO_IORDWR(16)
130 VFIO_IORDWR(32)
131 #if defined(ioread64) && defined(iowrite64)
132 VFIO_IORDWR(64)
133 #endif
134
135 /*
136 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
137 * range which is inaccessible. The excluded range drops writes and fills
138 * reads with -1. This is intended for handling MSI-X vector tables and
139 * leftover space for ROM BARs.
140 */
vfio_pci_core_do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite)141 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
142 void __iomem *io, char __user *buf,
143 loff_t off, size_t count, size_t x_start,
144 size_t x_end, bool iswrite)
145 {
146 ssize_t done = 0;
147 int ret;
148
149 while (count) {
150 size_t fillable, filled;
151
152 if (off < x_start)
153 fillable = min(count, (size_t)(x_start - off));
154 else if (off >= x_end)
155 fillable = count;
156 else
157 fillable = 0;
158
159 #if defined(ioread64) && defined(iowrite64)
160 if (fillable >= 8 && !(off % 8)) {
161 ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
162 io, buf, off, &filled);
163 if (ret)
164 return ret;
165
166 } else
167 #endif
168 if (fillable >= 4 && !(off % 4)) {
169 ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
170 io, buf, off, &filled);
171 if (ret)
172 return ret;
173
174 } else if (fillable >= 2 && !(off % 2)) {
175 ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
176 io, buf, off, &filled);
177 if (ret)
178 return ret;
179
180 } else if (fillable) {
181 ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
182 io, buf, off, &filled);
183 if (ret)
184 return ret;
185
186 } else {
187 /* Fill reads with -1, drop writes */
188 filled = min(count, (size_t)(x_end - off));
189 if (!iswrite) {
190 u8 val = 0xFF;
191 size_t i;
192
193 for (i = 0; i < filled; i++)
194 if (copy_to_user(buf + i, &val, 1))
195 return -EFAULT;
196 }
197 }
198
199 count -= filled;
200 done += filled;
201 off += filled;
202 buf += filled;
203 }
204
205 return done;
206 }
207 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
208
vfio_pci_core_setup_barmap(struct vfio_pci_core_device * vdev,int bar)209 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
210 {
211 struct pci_dev *pdev = vdev->pdev;
212 int ret;
213 void __iomem *io;
214
215 if (vdev->barmap[bar])
216 return 0;
217
218 ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
219 if (ret)
220 return ret;
221
222 io = pci_iomap(pdev, bar, 0);
223 if (!io) {
224 pci_release_selected_regions(pdev, 1 << bar);
225 return -ENOMEM;
226 }
227
228 vdev->barmap[bar] = io;
229
230 return 0;
231 }
232 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
233
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)234 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
235 size_t count, loff_t *ppos, bool iswrite)
236 {
237 struct pci_dev *pdev = vdev->pdev;
238 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
239 int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
240 size_t x_start = 0, x_end = 0;
241 resource_size_t end;
242 void __iomem *io;
243 struct resource *res = &vdev->pdev->resource[bar];
244 ssize_t done;
245
246 if (pci_resource_start(pdev, bar))
247 end = pci_resource_len(pdev, bar);
248 else if (bar == PCI_ROM_RESOURCE &&
249 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
250 end = 0x20000;
251 else
252 return -EINVAL;
253
254 if (pos >= end)
255 return -EINVAL;
256
257 count = min(count, (size_t)(end - pos));
258
259 if (bar == PCI_ROM_RESOURCE) {
260 /*
261 * The ROM can fill less space than the BAR, so we start the
262 * excluded range at the end of the actual ROM. This makes
263 * filling large ROM BARs much faster.
264 */
265 io = pci_map_rom(pdev, &x_start);
266 if (!io) {
267 done = -ENOMEM;
268 goto out;
269 }
270 x_end = end;
271 } else {
272 int ret = vfio_pci_core_setup_barmap(vdev, bar);
273 if (ret) {
274 done = ret;
275 goto out;
276 }
277
278 io = vdev->barmap[bar];
279 }
280
281 if (bar == vdev->msix_bar) {
282 x_start = vdev->msix_offset;
283 x_end = vdev->msix_offset + vdev->msix_size;
284 }
285
286 done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
287 count, x_start, x_end, iswrite);
288
289 if (done >= 0)
290 *ppos += done;
291
292 if (bar == PCI_ROM_RESOURCE)
293 pci_unmap_rom(pdev, io);
294 out:
295 return done;
296 }
297
298 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)299 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
300 size_t count, loff_t *ppos, bool iswrite)
301 {
302 int ret;
303 loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
304 void __iomem *iomem = NULL;
305 unsigned int rsrc;
306 bool is_ioport;
307 ssize_t done;
308
309 if (!vdev->has_vga)
310 return -EINVAL;
311
312 if (pos > 0xbfffful)
313 return -EINVAL;
314
315 switch ((u32)pos) {
316 case 0xa0000 ... 0xbffff:
317 count = min(count, (size_t)(0xc0000 - pos));
318 iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
319 off = pos - 0xa0000;
320 rsrc = VGA_RSRC_LEGACY_MEM;
321 is_ioport = false;
322 break;
323 case 0x3b0 ... 0x3bb:
324 count = min(count, (size_t)(0x3bc - pos));
325 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
326 off = pos - 0x3b0;
327 rsrc = VGA_RSRC_LEGACY_IO;
328 is_ioport = true;
329 break;
330 case 0x3c0 ... 0x3df:
331 count = min(count, (size_t)(0x3e0 - pos));
332 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
333 off = pos - 0x3c0;
334 rsrc = VGA_RSRC_LEGACY_IO;
335 is_ioport = true;
336 break;
337 default:
338 return -EINVAL;
339 }
340
341 if (!iomem)
342 return -ENOMEM;
343
344 ret = vga_get_interruptible(vdev->pdev, rsrc);
345 if (ret) {
346 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
347 return ret;
348 }
349
350 /*
351 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
352 * probing, so we don't currently worry about access in relation
353 * to the memory enable bit in the command register.
354 */
355 done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
356 0, 0, iswrite);
357
358 vga_put(vdev->pdev, rsrc);
359
360 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
361
362 if (done >= 0)
363 *ppos += done;
364
365 return done;
366 }
367 #endif
368
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)369 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
370 bool test_mem)
371 {
372 switch (ioeventfd->count) {
373 case 1:
374 vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
375 ioeventfd->data, ioeventfd->addr);
376 break;
377 case 2:
378 vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
379 ioeventfd->data, ioeventfd->addr);
380 break;
381 case 4:
382 vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
383 ioeventfd->data, ioeventfd->addr);
384 break;
385 #ifdef iowrite64
386 case 8:
387 vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
388 ioeventfd->data, ioeventfd->addr);
389 break;
390 #endif
391 }
392 }
393
vfio_pci_ioeventfd_handler(void * opaque,void * unused)394 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
395 {
396 struct vfio_pci_ioeventfd *ioeventfd = opaque;
397 struct vfio_pci_core_device *vdev = ioeventfd->vdev;
398
399 if (ioeventfd->test_mem) {
400 if (!down_read_trylock(&vdev->memory_lock))
401 return 1; /* Lock contended, use thread */
402 if (!__vfio_pci_memory_enabled(vdev)) {
403 up_read(&vdev->memory_lock);
404 return 0;
405 }
406 }
407
408 vfio_pci_ioeventfd_do_write(ioeventfd, false);
409
410 if (ioeventfd->test_mem)
411 up_read(&vdev->memory_lock);
412
413 return 0;
414 }
415
vfio_pci_ioeventfd_thread(void * opaque,void * unused)416 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
417 {
418 struct vfio_pci_ioeventfd *ioeventfd = opaque;
419
420 vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
421 }
422
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)423 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
424 uint64_t data, int count, int fd)
425 {
426 struct pci_dev *pdev = vdev->pdev;
427 loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
428 int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
429 struct vfio_pci_ioeventfd *ioeventfd;
430
431 /* Only support ioeventfds into BARs */
432 if (bar > VFIO_PCI_BAR5_REGION_INDEX)
433 return -EINVAL;
434
435 if (pos + count > pci_resource_len(pdev, bar))
436 return -EINVAL;
437
438 /* Disallow ioeventfds working around MSI-X table writes */
439 if (bar == vdev->msix_bar &&
440 !(pos + count <= vdev->msix_offset ||
441 pos >= vdev->msix_offset + vdev->msix_size))
442 return -EINVAL;
443
444 #ifndef iowrite64
445 if (count == 8)
446 return -EINVAL;
447 #endif
448
449 ret = vfio_pci_core_setup_barmap(vdev, bar);
450 if (ret)
451 return ret;
452
453 mutex_lock(&vdev->ioeventfds_lock);
454
455 list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
456 if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
457 ioeventfd->data == data && ioeventfd->count == count) {
458 if (fd == -1) {
459 vfio_virqfd_disable(&ioeventfd->virqfd);
460 list_del(&ioeventfd->next);
461 vdev->ioeventfds_nr--;
462 kfree(ioeventfd);
463 ret = 0;
464 } else
465 ret = -EEXIST;
466
467 goto out_unlock;
468 }
469 }
470
471 if (fd < 0) {
472 ret = -ENODEV;
473 goto out_unlock;
474 }
475
476 if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
477 ret = -ENOSPC;
478 goto out_unlock;
479 }
480
481 ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
482 if (!ioeventfd) {
483 ret = -ENOMEM;
484 goto out_unlock;
485 }
486
487 ioeventfd->vdev = vdev;
488 ioeventfd->addr = vdev->barmap[bar] + pos;
489 ioeventfd->data = data;
490 ioeventfd->pos = pos;
491 ioeventfd->bar = bar;
492 ioeventfd->count = count;
493 ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
494
495 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
496 vfio_pci_ioeventfd_thread, NULL,
497 &ioeventfd->virqfd, fd);
498 if (ret) {
499 kfree(ioeventfd);
500 goto out_unlock;
501 }
502
503 list_add(&ioeventfd->next, &vdev->ioeventfds_list);
504 vdev->ioeventfds_nr++;
505
506 out_unlock:
507 mutex_unlock(&vdev->ioeventfds_lock);
508
509 return ret;
510 }
511