• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20 
21 #include "vfio_pci_priv.h"
22 
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64	ioread64
25 #define vfio_iowrite64	iowrite64
26 #define vfio_ioread32	ioread32
27 #define vfio_iowrite32	iowrite32
28 #define vfio_ioread16	ioread16
29 #define vfio_iowrite16	iowrite16
30 #else
31 #define vfio_ioread64	ioread64be
32 #define vfio_iowrite64	iowrite64be
33 #define vfio_ioread32	ioread32be
34 #define vfio_iowrite32	iowrite32be
35 #define vfio_ioread16	ioread16be
36 #define vfio_iowrite16	iowrite16be
37 #endif
38 #define vfio_ioread8	ioread8
39 #define vfio_iowrite8	iowrite8
40 
41 #define VFIO_IOWRITE(size) \
42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
43 			bool test_mem, u##size val, void __iomem *io)	\
44 {									\
45 	if (test_mem) {							\
46 		down_read(&vdev->memory_lock);				\
47 		if (!__vfio_pci_memory_enabled(vdev)) {			\
48 			up_read(&vdev->memory_lock);			\
49 			return -EIO;					\
50 		}							\
51 	}								\
52 									\
53 	vfio_iowrite##size(val, io);					\
54 									\
55 	if (test_mem)							\
56 		up_read(&vdev->memory_lock);				\
57 									\
58 	return 0;							\
59 }									\
60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
61 
62 VFIO_IOWRITE(8)
63 VFIO_IOWRITE(16)
64 VFIO_IOWRITE(32)
65 #ifdef iowrite64
66 VFIO_IOWRITE(64)
67 #endif
68 
69 #define VFIO_IOREAD(size) \
70 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
71 			bool test_mem, u##size *val, void __iomem *io)	\
72 {									\
73 	if (test_mem) {							\
74 		down_read(&vdev->memory_lock);				\
75 		if (!__vfio_pci_memory_enabled(vdev)) {			\
76 			up_read(&vdev->memory_lock);			\
77 			return -EIO;					\
78 		}							\
79 	}								\
80 									\
81 	*val = vfio_ioread##size(io);					\
82 									\
83 	if (test_mem)							\
84 		up_read(&vdev->memory_lock);				\
85 									\
86 	return 0;							\
87 }									\
88 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
89 
90 VFIO_IOREAD(8)
91 VFIO_IOREAD(16)
92 VFIO_IOREAD(32)
93 #ifdef ioread64
94 VFIO_IOREAD(64)
95 #endif
96 
97 #define VFIO_IORDWR(size)						\
98 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
99 				bool iswrite, bool test_mem,		\
100 				void __iomem *io, char __user *buf,	\
101 				loff_t off, size_t *filled)		\
102 {									\
103 	u##size val;							\
104 	int ret;							\
105 									\
106 	if (iswrite) {							\
107 		if (copy_from_user(&val, buf, sizeof(val)))		\
108 			return -EFAULT;					\
109 									\
110 		ret = vfio_pci_core_iowrite##size(vdev, test_mem,	\
111 						  val, io + off);	\
112 		if (ret)						\
113 			return ret;					\
114 	} else {							\
115 		ret = vfio_pci_core_ioread##size(vdev, test_mem,	\
116 						 &val, io + off);	\
117 		if (ret)						\
118 			return ret;					\
119 									\
120 		if (copy_to_user(buf, &val, sizeof(val)))		\
121 			return -EFAULT;					\
122 	}								\
123 									\
124 	*filled = sizeof(val);						\
125 	return 0;							\
126 }									\
127 
128 VFIO_IORDWR(8)
129 VFIO_IORDWR(16)
130 VFIO_IORDWR(32)
131 #if defined(ioread64) && defined(iowrite64)
132 VFIO_IORDWR(64)
133 #endif
134 
135 /*
136  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
137  * range which is inaccessible.  The excluded range drops writes and fills
138  * reads with -1.  This is intended for handling MSI-X vector tables and
139  * leftover space for ROM BARs.
140  */
vfio_pci_core_do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite)141 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
142 			       void __iomem *io, char __user *buf,
143 			       loff_t off, size_t count, size_t x_start,
144 			       size_t x_end, bool iswrite)
145 {
146 	ssize_t done = 0;
147 	int ret;
148 
149 	while (count) {
150 		size_t fillable, filled;
151 
152 		if (off < x_start)
153 			fillable = min(count, (size_t)(x_start - off));
154 		else if (off >= x_end)
155 			fillable = count;
156 		else
157 			fillable = 0;
158 
159 #if defined(ioread64) && defined(iowrite64)
160 		if (fillable >= 8 && !(off % 8)) {
161 			ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
162 						io, buf, off, &filled);
163 			if (ret)
164 				return ret;
165 
166 		} else
167 #endif
168 		if (fillable >= 4 && !(off % 4)) {
169 			ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
170 						io, buf, off, &filled);
171 			if (ret)
172 				return ret;
173 
174 		} else if (fillable >= 2 && !(off % 2)) {
175 			ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
176 						io, buf, off, &filled);
177 			if (ret)
178 				return ret;
179 
180 		} else if (fillable) {
181 			ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
182 					       io, buf, off, &filled);
183 			if (ret)
184 				return ret;
185 
186 		} else {
187 			/* Fill reads with -1, drop writes */
188 			filled = min(count, (size_t)(x_end - off));
189 			if (!iswrite) {
190 				u8 val = 0xFF;
191 				size_t i;
192 
193 				for (i = 0; i < filled; i++)
194 					if (copy_to_user(buf + i, &val, 1))
195 						return -EFAULT;
196 			}
197 		}
198 
199 		count -= filled;
200 		done += filled;
201 		off += filled;
202 		buf += filled;
203 	}
204 
205 	return done;
206 }
207 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
208 
vfio_pci_core_setup_barmap(struct vfio_pci_core_device * vdev,int bar)209 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
210 {
211 	struct pci_dev *pdev = vdev->pdev;
212 	int ret;
213 	void __iomem *io;
214 
215 	if (vdev->barmap[bar])
216 		return 0;
217 
218 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
219 	if (ret)
220 		return ret;
221 
222 	io = pci_iomap(pdev, bar, 0);
223 	if (!io) {
224 		pci_release_selected_regions(pdev, 1 << bar);
225 		return -ENOMEM;
226 	}
227 
228 	vdev->barmap[bar] = io;
229 
230 	return 0;
231 }
232 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
233 
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)234 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
235 			size_t count, loff_t *ppos, bool iswrite)
236 {
237 	struct pci_dev *pdev = vdev->pdev;
238 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
239 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
240 	size_t x_start = 0, x_end = 0;
241 	resource_size_t end;
242 	void __iomem *io;
243 	struct resource *res = &vdev->pdev->resource[bar];
244 	ssize_t done;
245 
246 	if (pci_resource_start(pdev, bar))
247 		end = pci_resource_len(pdev, bar);
248 	else if (bar == PCI_ROM_RESOURCE &&
249 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
250 		end = 0x20000;
251 	else
252 		return -EINVAL;
253 
254 	if (pos >= end)
255 		return -EINVAL;
256 
257 	count = min(count, (size_t)(end - pos));
258 
259 	if (bar == PCI_ROM_RESOURCE) {
260 		/*
261 		 * The ROM can fill less space than the BAR, so we start the
262 		 * excluded range at the end of the actual ROM.  This makes
263 		 * filling large ROM BARs much faster.
264 		 */
265 		io = pci_map_rom(pdev, &x_start);
266 		if (!io) {
267 			done = -ENOMEM;
268 			goto out;
269 		}
270 		x_end = end;
271 	} else {
272 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
273 		if (ret) {
274 			done = ret;
275 			goto out;
276 		}
277 
278 		io = vdev->barmap[bar];
279 	}
280 
281 	if (bar == vdev->msix_bar) {
282 		x_start = vdev->msix_offset;
283 		x_end = vdev->msix_offset + vdev->msix_size;
284 	}
285 
286 	done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
287 				      count, x_start, x_end, iswrite);
288 
289 	if (done >= 0)
290 		*ppos += done;
291 
292 	if (bar == PCI_ROM_RESOURCE)
293 		pci_unmap_rom(pdev, io);
294 out:
295 	return done;
296 }
297 
298 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)299 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
300 			       size_t count, loff_t *ppos, bool iswrite)
301 {
302 	int ret;
303 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
304 	void __iomem *iomem = NULL;
305 	unsigned int rsrc;
306 	bool is_ioport;
307 	ssize_t done;
308 
309 	if (!vdev->has_vga)
310 		return -EINVAL;
311 
312 	if (pos > 0xbfffful)
313 		return -EINVAL;
314 
315 	switch ((u32)pos) {
316 	case 0xa0000 ... 0xbffff:
317 		count = min(count, (size_t)(0xc0000 - pos));
318 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
319 		off = pos - 0xa0000;
320 		rsrc = VGA_RSRC_LEGACY_MEM;
321 		is_ioport = false;
322 		break;
323 	case 0x3b0 ... 0x3bb:
324 		count = min(count, (size_t)(0x3bc - pos));
325 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
326 		off = pos - 0x3b0;
327 		rsrc = VGA_RSRC_LEGACY_IO;
328 		is_ioport = true;
329 		break;
330 	case 0x3c0 ... 0x3df:
331 		count = min(count, (size_t)(0x3e0 - pos));
332 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
333 		off = pos - 0x3c0;
334 		rsrc = VGA_RSRC_LEGACY_IO;
335 		is_ioport = true;
336 		break;
337 	default:
338 		return -EINVAL;
339 	}
340 
341 	if (!iomem)
342 		return -ENOMEM;
343 
344 	ret = vga_get_interruptible(vdev->pdev, rsrc);
345 	if (ret) {
346 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
347 		return ret;
348 	}
349 
350 	/*
351 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
352 	 * probing, so we don't currently worry about access in relation
353 	 * to the memory enable bit in the command register.
354 	 */
355 	done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
356 				      0, 0, iswrite);
357 
358 	vga_put(vdev->pdev, rsrc);
359 
360 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
361 
362 	if (done >= 0)
363 		*ppos += done;
364 
365 	return done;
366 }
367 #endif
368 
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)369 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
370 					bool test_mem)
371 {
372 	switch (ioeventfd->count) {
373 	case 1:
374 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
375 				       ioeventfd->data, ioeventfd->addr);
376 		break;
377 	case 2:
378 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
379 					ioeventfd->data, ioeventfd->addr);
380 		break;
381 	case 4:
382 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
383 					ioeventfd->data, ioeventfd->addr);
384 		break;
385 #ifdef iowrite64
386 	case 8:
387 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
388 					ioeventfd->data, ioeventfd->addr);
389 		break;
390 #endif
391 	}
392 }
393 
vfio_pci_ioeventfd_handler(void * opaque,void * unused)394 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
395 {
396 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
397 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
398 
399 	if (ioeventfd->test_mem) {
400 		if (!down_read_trylock(&vdev->memory_lock))
401 			return 1; /* Lock contended, use thread */
402 		if (!__vfio_pci_memory_enabled(vdev)) {
403 			up_read(&vdev->memory_lock);
404 			return 0;
405 		}
406 	}
407 
408 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
409 
410 	if (ioeventfd->test_mem)
411 		up_read(&vdev->memory_lock);
412 
413 	return 0;
414 }
415 
vfio_pci_ioeventfd_thread(void * opaque,void * unused)416 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
417 {
418 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
419 
420 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
421 }
422 
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)423 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
424 		       uint64_t data, int count, int fd)
425 {
426 	struct pci_dev *pdev = vdev->pdev;
427 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
428 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
429 	struct vfio_pci_ioeventfd *ioeventfd;
430 
431 	/* Only support ioeventfds into BARs */
432 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
433 		return -EINVAL;
434 
435 	if (pos + count > pci_resource_len(pdev, bar))
436 		return -EINVAL;
437 
438 	/* Disallow ioeventfds working around MSI-X table writes */
439 	if (bar == vdev->msix_bar &&
440 	    !(pos + count <= vdev->msix_offset ||
441 	      pos >= vdev->msix_offset + vdev->msix_size))
442 		return -EINVAL;
443 
444 #ifndef iowrite64
445 	if (count == 8)
446 		return -EINVAL;
447 #endif
448 
449 	ret = vfio_pci_core_setup_barmap(vdev, bar);
450 	if (ret)
451 		return ret;
452 
453 	mutex_lock(&vdev->ioeventfds_lock);
454 
455 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
456 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
457 		    ioeventfd->data == data && ioeventfd->count == count) {
458 			if (fd == -1) {
459 				vfio_virqfd_disable(&ioeventfd->virqfd);
460 				list_del(&ioeventfd->next);
461 				vdev->ioeventfds_nr--;
462 				kfree(ioeventfd);
463 				ret = 0;
464 			} else
465 				ret = -EEXIST;
466 
467 			goto out_unlock;
468 		}
469 	}
470 
471 	if (fd < 0) {
472 		ret = -ENODEV;
473 		goto out_unlock;
474 	}
475 
476 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
477 		ret = -ENOSPC;
478 		goto out_unlock;
479 	}
480 
481 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
482 	if (!ioeventfd) {
483 		ret = -ENOMEM;
484 		goto out_unlock;
485 	}
486 
487 	ioeventfd->vdev = vdev;
488 	ioeventfd->addr = vdev->barmap[bar] + pos;
489 	ioeventfd->data = data;
490 	ioeventfd->pos = pos;
491 	ioeventfd->bar = bar;
492 	ioeventfd->count = count;
493 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
494 
495 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
496 				 vfio_pci_ioeventfd_thread, NULL,
497 				 &ioeventfd->virqfd, fd);
498 	if (ret) {
499 		kfree(ioeventfd);
500 		goto out_unlock;
501 	}
502 
503 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
504 	vdev->ioeventfds_nr++;
505 
506 out_unlock:
507 	mutex_unlock(&vdev->ioeventfds_lock);
508 
509 	return ret;
510 }
511