1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO-KVM bridge pseudo device
4 *
5 * Copyright (C) 2013 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 */
8
9 #include <linux/anon_inodes.h>
10 #include <linux/errno.h>
11 #include <linux/file.h>
12 #include <linux/kvm_host.h>
13 #include <linux/list.h>
14 #include <linux/module.h>
15 #include <linux/mutex.h>
16 #include <linux/slab.h>
17 #include <linux/uaccess.h>
18 #include <linux/vfio.h>
19 #include "vfio.h"
20
21 #ifdef CONFIG_SPAPR_TCE_IOMMU
22 #include <asm/kvm_ppc.h>
23 #endif
24
25 struct kvm_vfio_file {
26 struct list_head node;
27 struct file *file;
28 #ifdef CONFIG_SPAPR_TCE_IOMMU
29 struct iommu_group *iommu_group;
30 #endif
31 };
32
33 struct kvm_vfio {
34 struct list_head file_list;
35 struct mutex lock;
36 bool noncoherent;
37 };
38
39 #ifdef CONFIG_VFIO_PKVM_IOMMU
40 struct kvm_pviommu {
41 struct kvm_device *dev;
42 int fd;
43 };
44 #endif
45
kvm_vfio_file_set_kvm(struct file * file,struct kvm * kvm)46 static void kvm_vfio_file_set_kvm(struct file *file, struct kvm *kvm)
47 {
48 void (*fn)(struct file *file, struct kvm *kvm);
49
50 fn = symbol_get(vfio_file_set_kvm);
51 if (!fn)
52 return;
53
54 fn(file, kvm);
55
56 symbol_put(vfio_file_set_kvm);
57 }
58
kvm_vfio_file_enforced_coherent(struct file * file)59 static bool kvm_vfio_file_enforced_coherent(struct file *file)
60 {
61 bool (*fn)(struct file *file);
62 bool ret;
63
64 fn = symbol_get(vfio_file_enforced_coherent);
65 if (!fn)
66 return false;
67
68 ret = fn(file);
69
70 symbol_put(vfio_file_enforced_coherent);
71
72 return ret;
73 }
74
kvm_vfio_file_is_valid(struct file * file)75 static bool kvm_vfio_file_is_valid(struct file *file)
76 {
77 bool (*fn)(struct file *file);
78 bool ret;
79
80 fn = symbol_get(vfio_file_is_valid);
81 if (!fn)
82 return false;
83
84 ret = fn(file);
85
86 symbol_put(vfio_file_is_valid);
87
88 return ret;
89 }
90
kvm_vfio_file_get_device(struct file * file)91 static struct device *kvm_vfio_file_get_device(struct file *file)
92 {
93 struct device *(*fn)(struct file *file);
94 struct device *dev;
95
96 fn = symbol_get(vfio_file_get_device);
97 if (!fn)
98 return NULL;
99
100 dev = fn(file);
101
102 symbol_put(vfio_file_get_device);
103
104 return dev;
105 }
106
kvm_vfio_file_iommu_group(struct file * file)107 static struct iommu_group *kvm_vfio_file_iommu_group(struct file *file)
108 {
109 struct iommu_group *(*fn)(struct file *file);
110 struct iommu_group *ret;
111
112 fn = symbol_get(vfio_file_iommu_group);
113 if (!fn)
114 return NULL;
115
116 ret = fn(file);
117
118 symbol_put(vfio_file_iommu_group);
119
120 return ret;
121 }
122
123 #ifdef CONFIG_SPAPR_TCE_IOMMU
kvm_spapr_tce_release_vfio_group(struct kvm * kvm,struct kvm_vfio_file * kvf)124 static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
125 struct kvm_vfio_file *kvf)
126 {
127 if (WARN_ON_ONCE(!kvf->iommu_group))
128 return;
129
130 kvm_spapr_tce_release_iommu_group(kvm, kvf->iommu_group);
131 iommu_group_put(kvf->iommu_group);
132 kvf->iommu_group = NULL;
133 }
134 #endif
135
136 /*
137 * Groups/devices can use the same or different IOMMU domains. If the same
138 * then adding a new group/device may change the coherency of groups/devices
139 * we've previously been told about. We don't want to care about any of
140 * that so we retest each group/device and bail as soon as we find one that's
141 * noncoherent. This means we only ever [un]register_noncoherent_dma once
142 * for the whole device.
143 */
kvm_vfio_update_coherency(struct kvm_device * dev)144 static void kvm_vfio_update_coherency(struct kvm_device *dev)
145 {
146 struct kvm_vfio *kv = dev->private;
147 bool noncoherent = false;
148 struct kvm_vfio_file *kvf;
149
150 list_for_each_entry(kvf, &kv->file_list, node) {
151 if (!kvm_vfio_file_enforced_coherent(kvf->file)) {
152 noncoherent = true;
153 break;
154 }
155 }
156
157 if (noncoherent != kv->noncoherent) {
158 kv->noncoherent = noncoherent;
159
160 if (kv->noncoherent)
161 kvm_arch_register_noncoherent_dma(dev->kvm);
162 else
163 kvm_arch_unregister_noncoherent_dma(dev->kvm);
164 }
165 }
166
kvm_vfio_assign_file(struct file * file)167 static int kvm_vfio_assign_file(struct file *file)
168 {
169 struct device *dev;
170 struct iommu_group *group;
171
172 dev = kvm_vfio_file_get_device(file);
173 if (dev)
174 return kvm_arch_assign_device(dev);
175 group = kvm_vfio_file_iommu_group(file);
176 if (group)
177 return kvm_arch_assign_group(group);
178
179 return -ENODEV;
180 }
181
kvm_vfio_reclaim_file(struct file * file)182 static void kvm_vfio_reclaim_file(struct file *file)
183 {
184 struct device *dev;
185 struct iommu_group *group;
186
187 dev = kvm_vfio_file_get_device(file);
188 if (dev) {
189 kvm_arch_reclaim_device(dev);
190 return;
191 }
192 group = kvm_vfio_file_iommu_group(file);
193 if (group)
194 kvm_arch_reclaim_group(group);
195 }
196
kvm_vfio_file_add(struct kvm_device * dev,unsigned int fd)197 static int kvm_vfio_file_add(struct kvm_device *dev, unsigned int fd)
198 {
199 struct kvm_vfio *kv = dev->private;
200 struct kvm_vfio_file *kvf;
201 struct file *filp;
202 int ret = 0;
203
204 filp = fget(fd);
205 if (!filp)
206 return -EBADF;
207
208 /* Ensure the FD is a vfio FD. */
209 if (!kvm_vfio_file_is_valid(filp)) {
210 ret = -EINVAL;
211 goto out_fput;
212 }
213
214 mutex_lock(&kv->lock);
215
216 list_for_each_entry(kvf, &kv->file_list, node) {
217 if (kvf->file == filp) {
218 ret = -EEXIST;
219 goto out_unlock;
220 }
221 }
222
223 kvf = kzalloc(sizeof(*kvf), GFP_KERNEL_ACCOUNT);
224 if (!kvf) {
225 ret = -ENOMEM;
226 goto out_unlock;
227 }
228
229 ret = kvm_vfio_assign_file(filp);
230 if (ret)
231 goto out_unlock;
232
233 kvf->file = get_file(filp);
234 list_add_tail(&kvf->node, &kv->file_list);
235
236 kvm_arch_start_assignment(dev->kvm);
237 kvm_vfio_file_set_kvm(kvf->file, dev->kvm);
238 kvm_vfio_update_coherency(dev);
239
240 out_unlock:
241 mutex_unlock(&kv->lock);
242 out_fput:
243 fput(filp);
244 return ret;
245 }
246
kvm_vfio_file_del(struct kvm_device * dev,unsigned int fd)247 static int kvm_vfio_file_del(struct kvm_device *dev, unsigned int fd)
248 {
249 struct kvm_vfio *kv = dev->private;
250 struct kvm_vfio_file *kvf;
251 struct fd f;
252 int ret;
253
254 f = fdget(fd);
255 if (!fd_file(f))
256 return -EBADF;
257
258 ret = -ENOENT;
259
260 mutex_lock(&kv->lock);
261
262 list_for_each_entry(kvf, &kv->file_list, node) {
263 if (kvf->file != fd_file(f))
264 continue;
265
266 kvm_vfio_reclaim_file(kvf->file);
267 list_del(&kvf->node);
268 kvm_arch_end_assignment(dev->kvm);
269 #ifdef CONFIG_SPAPR_TCE_IOMMU
270 kvm_spapr_tce_release_vfio_group(dev->kvm, kvf);
271 #endif
272 kvm_vfio_file_set_kvm(kvf->file, NULL);
273 fput(kvf->file);
274 kfree(kvf);
275 ret = 0;
276 break;
277 }
278
279 kvm_vfio_update_coherency(dev);
280
281 mutex_unlock(&kv->lock);
282
283 fdput(f);
284
285 return ret;
286 }
287
288 #ifdef CONFIG_SPAPR_TCE_IOMMU
kvm_vfio_file_set_spapr_tce(struct kvm_device * dev,void __user * arg)289 static int kvm_vfio_file_set_spapr_tce(struct kvm_device *dev,
290 void __user *arg)
291 {
292 struct kvm_vfio_spapr_tce param;
293 struct kvm_vfio *kv = dev->private;
294 struct kvm_vfio_file *kvf;
295 struct fd f;
296 int ret;
297
298 if (copy_from_user(¶m, arg, sizeof(struct kvm_vfio_spapr_tce)))
299 return -EFAULT;
300
301 f = fdget(param.groupfd);
302 if (!fd_file(f))
303 return -EBADF;
304
305 ret = -ENOENT;
306
307 mutex_lock(&kv->lock);
308
309 list_for_each_entry(kvf, &kv->file_list, node) {
310 if (kvf->file != fd_file(f))
311 continue;
312
313 if (!kvf->iommu_group) {
314 kvf->iommu_group = kvm_vfio_file_iommu_group(kvf->file);
315 if (WARN_ON_ONCE(!kvf->iommu_group)) {
316 ret = -EIO;
317 goto err_fdput;
318 }
319 }
320
321 ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, param.tablefd,
322 kvf->iommu_group);
323 break;
324 }
325
326 err_fdput:
327 mutex_unlock(&kv->lock);
328 fdput(f);
329 return ret;
330 }
331 #endif
332
kvm_vfio_set_file(struct kvm_device * dev,long attr,void __user * arg)333 static int kvm_vfio_set_file(struct kvm_device *dev, long attr,
334 void __user *arg)
335 {
336 int32_t __user *argp = arg;
337 int32_t fd;
338
339 switch (attr) {
340 case KVM_DEV_VFIO_FILE_ADD:
341 if (get_user(fd, argp))
342 return -EFAULT;
343 return kvm_vfio_file_add(dev, fd);
344
345 case KVM_DEV_VFIO_FILE_DEL:
346 if (get_user(fd, argp))
347 return -EFAULT;
348 return kvm_vfio_file_del(dev, fd);
349
350 #ifdef CONFIG_SPAPR_TCE_IOMMU
351 case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
352 return kvm_vfio_file_set_spapr_tce(dev, arg);
353 #endif
354 }
355
356 return -ENXIO;
357 }
358
359 #ifdef CONFIG_VFIO_PKVM_IOMMU
kvm_vfio_pviommu_set_config(struct file * fiommu,struct kvm_vfio_iommu_config * config)360 static int kvm_vfio_pviommu_set_config(struct file *fiommu, struct kvm_vfio_iommu_config *config)
361 {
362 int vfio_dev_fd = config->device_fd;
363 struct file *filp;
364 int ret;
365 u32 phys_sid;
366 pkvm_handle_t iommu;
367 struct kvm_pviommu *pviommu = fiommu->private_data;
368 struct device *dev;
369
370 filp = fget(vfio_dev_fd);
371 if (!filp)
372 return -EBADF;
373
374 dev = kvm_vfio_file_get_device(filp);
375 if (!dev) {
376 ret = -ENODEV;
377 goto err_fput;
378 }
379
380 ret = kvm_iommu_device_id(dev, config->sid_idx, &iommu, &phys_sid);
381 if (ret)
382 goto err_fput;
383
384 ret = kvm_call_hyp_nvhe(__pkvm_pviommu_add_vsid, pviommu->dev->kvm, pviommu->fd,
385 iommu, phys_sid, config->vsid);
386
387 err_fput:
388 fput(filp);
389 return ret;
390 }
391
pviommufd_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)392 static long pviommufd_ioctl(struct file *filp, unsigned int ioctl,
393 unsigned long arg)
394 {
395 struct kvm_vfio_iommu_config config;
396 __u32 usize;
397
398 switch (ioctl) {
399 case KVM_PVIOMMU_SET_CONFIG:
400 if (copy_from_user(&usize, (void *)arg, sizeof(usize)))
401 return -EFAULT;
402 if (usize < offsetofend(struct kvm_vfio_iommu_config, __reserved))
403 return -EINVAL;
404 if (copy_struct_from_user(&config, sizeof(config), (void *)arg, usize))
405 return -EFAULT;
406
407 return kvm_vfio_pviommu_set_config(filp, &config);
408 default:
409 return -ENXIO;
410 }
411 return 0;
412 }
413
pviommufd_release(struct inode * i,struct file * filp)414 static int pviommufd_release(struct inode *i, struct file *filp)
415 {
416 struct kvm_pviommu *pviommu = filp->private_data;
417
418 kfree(pviommu);
419 return 0;
420 }
421
422 static const struct file_operations pviommu_fops = {
423 .unlocked_ioctl = pviommufd_ioctl,
424 .release = pviommufd_release,
425 };
426
kvm_vfio_pviommu_attach(struct kvm_device * dev)427 static int kvm_vfio_pviommu_attach(struct kvm_device *dev)
428 {
429 int ret;
430 struct kvm_pviommu *pviommu;
431 struct file *filep;
432 int fdno;
433
434 pviommu = kmalloc(sizeof(*pviommu), GFP_KERNEL);
435 if (!pviommu)
436 return -ENOMEM;
437
438 pviommu->dev = dev;
439
440 filep = anon_inode_getfile("kvm-pviommu", &pviommu_fops, pviommu, O_CLOEXEC);
441 if (IS_ERR(filep)) {
442 ret = PTR_ERR(filep);
443 goto out_free;
444 }
445
446 fdno = get_unused_fd_flags(O_CLOEXEC);
447 if (fdno < 0) {
448 ret = fdno;
449 goto out_fput;
450 }
451
452 /* Create pvIOMMU with this ID. */
453 ret = kvm_call_hyp_nvhe(__pkvm_pviommu_attach, dev->kvm, fdno);
454 if (ret)
455 goto out_err;
456
457 pviommu->fd = fdno;
458 fd_install(fdno, filep);
459 return pviommu->fd;
460 out_err:
461 put_unused_fd(fdno);
462 out_fput:
463 fput(filep);
464 out_free:
465 kfree(pviommu);
466 return ret;
467 }
468
kvm_vfio_pviommu_get_info(struct kvm_device * dev,struct kvm_vfio_iommu_info * info)469 static int kvm_vfio_pviommu_get_info(struct kvm_device *dev,
470 struct kvm_vfio_iommu_info *info)
471 {
472 int vfio_dev_fd = info->device_fd;
473 int ret = 0;
474 struct file *filp;
475 struct device *device;
476
477 filp = fget(vfio_dev_fd);
478 if (!filp)
479 return -EBADF;
480
481 device = kvm_vfio_file_get_device(filp);
482 if (!device) {
483 ret = -ENODEV;
484 goto err_fput;
485 }
486
487 info->out_nr_sids = kvm_iommu_device_num_ids(device);
488 err_fput:
489 fput(filp);
490 return ret;
491 }
492
kvm_vfio_pviommu(struct kvm_device * dev,long attr,void __user * arg)493 static int kvm_vfio_pviommu(struct kvm_device *dev, long attr,
494 void __user *arg)
495 {
496 int32_t __user *argp = arg;
497 struct kvm_vfio_iommu_info info;
498 int ret;
499 __u32 usize;
500
501 switch (attr) {
502 case KVM_DEV_VFIO_PVIOMMU_ATTACH:
503 return kvm_vfio_pviommu_attach(dev);
504 case KVM_DEV_VFIO_PVIOMMU_GET_INFO:
505 if (copy_from_user(&usize, arg, sizeof(usize)))
506 return -EFAULT;
507 if (usize < offsetofend(struct kvm_vfio_iommu_info, __reserved))
508 return -EINVAL;
509 if (copy_struct_from_user(&info, sizeof(info), argp, usize))
510 return -EFAULT;
511
512 ret = kvm_vfio_pviommu_get_info(dev, &info);
513 if (ret)
514 return ret;
515 return copy_to_user(arg, &info, usize);
516 }
517 return -ENXIO;
518 }
519 #endif
520
kvm_vfio_set_attr(struct kvm_device * dev,struct kvm_device_attr * attr)521 static int kvm_vfio_set_attr(struct kvm_device *dev,
522 struct kvm_device_attr *attr)
523 {
524 switch (attr->group) {
525 case KVM_DEV_VFIO_FILE:
526 return kvm_vfio_set_file(dev, attr->attr,
527 u64_to_user_ptr(attr->addr));
528 #ifdef CONFIG_VFIO_PKVM_IOMMU
529 case KVM_DEV_VFIO_PVIOMMU:
530 return kvm_vfio_pviommu(dev, attr->attr,
531 u64_to_user_ptr(attr->addr));
532 #endif
533 }
534
535 return -ENXIO;
536 }
537
kvm_vfio_has_attr(struct kvm_device * dev,struct kvm_device_attr * attr)538 static int kvm_vfio_has_attr(struct kvm_device *dev,
539 struct kvm_device_attr *attr)
540 {
541 switch (attr->group) {
542 case KVM_DEV_VFIO_FILE:
543 switch (attr->attr) {
544 case KVM_DEV_VFIO_FILE_ADD:
545 case KVM_DEV_VFIO_FILE_DEL:
546 #ifdef CONFIG_SPAPR_TCE_IOMMU
547 case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
548 #endif
549 return 0;
550 }
551
552 break;
553 #ifdef CONFIG_VFIO_PKVM_IOMMU
554 case KVM_DEV_VFIO_PVIOMMU:
555 switch (attr->attr) {
556 case KVM_DEV_VFIO_PVIOMMU_ATTACH:
557 case KVM_DEV_VFIO_PVIOMMU_GET_INFO:
558 return 0;
559 }
560 #endif
561 break;
562 }
563
564 return -ENXIO;
565 }
566
kvm_vfio_release(struct kvm_device * dev)567 static void kvm_vfio_release(struct kvm_device *dev)
568 {
569 struct kvm_vfio *kv = dev->private;
570 struct kvm_vfio_file *kvf, *tmp;
571
572 list_for_each_entry_safe(kvf, tmp, &kv->file_list, node) {
573 #ifdef CONFIG_SPAPR_TCE_IOMMU
574 kvm_spapr_tce_release_vfio_group(dev->kvm, kvf);
575 #endif
576 kvm_vfio_reclaim_file(kvf->file);
577 kvm_vfio_file_set_kvm(kvf->file, NULL);
578 fput(kvf->file);
579 list_del(&kvf->node);
580 kfree(kvf);
581 kvm_arch_end_assignment(dev->kvm);
582 }
583
584 kvm_vfio_update_coherency(dev);
585
586 kfree(kv);
587 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .release */
588 }
589
590 static int kvm_vfio_create(struct kvm_device *dev, u32 type);
591
592 static struct kvm_device_ops kvm_vfio_ops = {
593 .name = "kvm-vfio",
594 .create = kvm_vfio_create,
595 .release = kvm_vfio_release,
596 .set_attr = kvm_vfio_set_attr,
597 .has_attr = kvm_vfio_has_attr,
598 };
599
kvm_vfio_create(struct kvm_device * dev,u32 type)600 static int kvm_vfio_create(struct kvm_device *dev, u32 type)
601 {
602 struct kvm_device *tmp;
603 struct kvm_vfio *kv;
604
605 lockdep_assert_held(&dev->kvm->lock);
606
607 /* Only one VFIO "device" per VM */
608 list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
609 if (tmp->ops == &kvm_vfio_ops)
610 return -EBUSY;
611
612 kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT);
613 if (!kv)
614 return -ENOMEM;
615
616 INIT_LIST_HEAD(&kv->file_list);
617 mutex_init(&kv->lock);
618
619 dev->private = kv;
620
621 return 0;
622 }
623
kvm_vfio_ops_init(void)624 int kvm_vfio_ops_init(void)
625 {
626 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
627 }
628
kvm_vfio_ops_exit(void)629 void kvm_vfio_ops_exit(void)
630 {
631 kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
632 }
633