1 /* 2 * drivers/android/staging/vsoc.c 3 * 4 * Android Virtual System on a Chip (VSoC) driver 5 * 6 * Copyright (C) 2017 Google, Inc. 7 * 8 * Author: ghartman@google.com 9 * 10 * This software is licensed under the terms of the GNU General Public 11 * License version 2, as published by the Free Software Foundation, and 12 * may be copied, distributed, and modified under those terms. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * 20 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory 21 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> 22 * 23 * Based on cirrusfb.c and 8139cp.c: 24 * Copyright 1999-2001 Jeff Garzik 25 * Copyright 2001-2004 Jeff Garzik 26 */ 27 28 #include <linux/dma-mapping.h> 29 #include <linux/freezer.h> 30 #include <linux/futex.h> 31 #include <linux/init.h> 32 #include <linux/kernel.h> 33 #include <linux/module.h> 34 #include <linux/mutex.h> 35 #include <linux/pci.h> 36 #include <linux/proc_fs.h> 37 #include <linux/sched.h> 38 #include <linux/syscalls.h> 39 #include <linux/uaccess.h> 40 #include <linux/interrupt.h> 41 #include <linux/mutex.h> 42 #include <linux/cdev.h> 43 #include <linux/file.h> 44 #include "uapi/vsoc_shm.h" 45 46 #define VSOC_DEV_NAME "vsoc" 47 48 /* 49 * Description of the ivshmem-doorbell PCI device used by QEmu. These 50 * constants follow docs/specs/ivshmem-spec.txt, which can be found in 51 * the QEmu repository. This was last reconciled with the version that 52 * came out with 2.8 53 */ 54 55 /* 56 * These constants are determined KVM Inter-VM shared memory device 57 * register offsets 58 */ 59 enum { 60 INTR_MASK = 0x00, /* Interrupt Mask */ 61 INTR_STATUS = 0x04, /* Interrupt Status */ 62 IV_POSITION = 0x08, /* VM ID */ 63 DOORBELL = 0x0c, /* Doorbell */ 64 }; 65 66 static const int REGISTER_BAR; /* Equal to 0 */ 67 static const int MAX_REGISTER_BAR_LEN = 0x100; 68 /* 69 * The MSI-x BAR is not used directly. 70 * 71 * static const int MSI_X_BAR = 1; 72 */ 73 static const int SHARED_MEMORY_BAR = 2; 74 75 struct vsoc_region_data { 76 char name[VSOC_DEVICE_NAME_SZ + 1]; 77 wait_queue_head_t interrupt_wait_queue; 78 /* TODO(b/73664181): Use multiple futex wait queues */ 79 wait_queue_head_t futex_wait_queue; 80 /* Flag indicating that an interrupt has been signalled by the host. */ 81 atomic_t *incoming_signalled; 82 /* Flag indicating the guest has signalled the host. */ 83 atomic_t *outgoing_signalled; 84 bool irq_requested; 85 bool device_created; 86 }; 87 88 struct vsoc_device { 89 /* Kernel virtual address of REGISTER_BAR. */ 90 void __iomem *regs; 91 /* Physical address of SHARED_MEMORY_BAR. */ 92 phys_addr_t shm_phys_start; 93 /* Kernel virtual address of SHARED_MEMORY_BAR. */ 94 void __iomem *kernel_mapped_shm; 95 /* Size of the entire shared memory window in bytes. */ 96 size_t shm_size; 97 /* 98 * Pointer to the virtual address of the shared memory layout structure. 99 * This is probably identical to kernel_mapped_shm, but saving this 100 * here saves a lot of annoying casts. 101 */ 102 struct vsoc_shm_layout_descriptor *layout; 103 /* 104 * Points to a table of region descriptors in the kernel's virtual 105 * address space. Calculated from 106 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset 107 */ 108 struct vsoc_device_region *regions; 109 /* Head of a list of permissions that have been granted. */ 110 struct list_head permissions; 111 struct pci_dev *dev; 112 /* Per-region (and therefore per-interrupt) information. */ 113 struct vsoc_region_data *regions_data; 114 /* 115 * Table of msi-x entries. This has to be separated from struct 116 * vsoc_region_data because the kernel deals with them as an array. 117 */ 118 struct msix_entry *msix_entries; 119 /* Mutex that protectes the permission list */ 120 struct mutex mtx; 121 /* Major number assigned by the kernel */ 122 int major; 123 /* Character device assigned by the kernel */ 124 struct cdev cdev; 125 /* Device class assigned by the kernel */ 126 struct class *class; 127 /* 128 * Flags that indicate what we've initialized. These are used to do an 129 * orderly cleanup of the device. 130 */ 131 bool enabled_device; 132 bool requested_regions; 133 bool cdev_added; 134 bool class_added; 135 bool msix_enabled; 136 }; 137 138 static struct vsoc_device vsoc_dev; 139 140 /* 141 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. 142 */ 143 144 struct fd_scoped_permission_node { 145 struct fd_scoped_permission permission; 146 struct list_head list; 147 }; 148 149 struct vsoc_private_data { 150 struct fd_scoped_permission_node *fd_scoped_permission_node; 151 }; 152 153 static long vsoc_ioctl(struct file *, unsigned int, unsigned long); 154 static int vsoc_mmap(struct file *, struct vm_area_struct *); 155 static int vsoc_open(struct inode *, struct file *); 156 static int vsoc_release(struct inode *, struct file *); 157 static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); 158 static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); 159 static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); 160 static int do_create_fd_scoped_permission( 161 struct vsoc_device_region *region_p, 162 struct fd_scoped_permission_node *np, 163 struct fd_scoped_permission_arg __user *arg); 164 static void do_destroy_fd_scoped_permission( 165 struct vsoc_device_region *owner_region_p, 166 struct fd_scoped_permission *perm); 167 static long do_vsoc_describe_region(struct file *, 168 struct vsoc_device_region __user *); 169 static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); 170 171 /** 172 * Validate arguments on entry points to the driver. 173 */ vsoc_validate_inode(struct inode * inode)174 inline int vsoc_validate_inode(struct inode *inode) 175 { 176 if (iminor(inode) >= vsoc_dev.layout->region_count) { 177 dev_err(&vsoc_dev.dev->dev, 178 "describe_region: invalid region %d\n", iminor(inode)); 179 return -ENODEV; 180 } 181 return 0; 182 } 183 vsoc_validate_filep(struct file * filp)184 inline int vsoc_validate_filep(struct file *filp) 185 { 186 int ret = vsoc_validate_inode(file_inode(filp)); 187 188 if (ret) 189 return ret; 190 if (!filp->private_data) { 191 dev_err(&vsoc_dev.dev->dev, 192 "No private data on fd, region %d\n", 193 iminor(file_inode(filp))); 194 return -EBADFD; 195 } 196 return 0; 197 } 198 199 /* Converts from shared memory offset to virtual address */ shm_off_to_virtual_addr(__u32 offset)200 static inline void *shm_off_to_virtual_addr(__u32 offset) 201 { 202 return (void __force *)vsoc_dev.kernel_mapped_shm + offset; 203 } 204 205 /* Converts from shared memory offset to physical address */ shm_off_to_phys_addr(__u32 offset)206 static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) 207 { 208 return vsoc_dev.shm_phys_start + offset; 209 } 210 211 /** 212 * Convenience functions to obtain the region from the inode or file. 213 * Dangerous to call before validating the inode/file. 214 */ vsoc_region_from_inode(struct inode * inode)215 static inline struct vsoc_device_region *vsoc_region_from_inode( 216 struct inode *inode) 217 { 218 return &vsoc_dev.regions[iminor(inode)]; 219 } 220 vsoc_region_from_filep(struct file * inode)221 static inline struct vsoc_device_region *vsoc_region_from_filep( 222 struct file *inode) 223 { 224 return vsoc_region_from_inode(file_inode(inode)); 225 } 226 vsoc_device_region_size(struct vsoc_device_region * r)227 static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) 228 { 229 return r->region_end_offset - r->region_begin_offset; 230 } 231 232 static const struct file_operations vsoc_ops = { 233 .owner = THIS_MODULE, 234 .open = vsoc_open, 235 .mmap = vsoc_mmap, 236 .read = vsoc_read, 237 .unlocked_ioctl = vsoc_ioctl, 238 .compat_ioctl = vsoc_ioctl, 239 .write = vsoc_write, 240 .llseek = vsoc_lseek, 241 .release = vsoc_release, 242 }; 243 244 static struct pci_device_id vsoc_id_table[] = { 245 {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 246 {0}, 247 }; 248 249 MODULE_DEVICE_TABLE(pci, vsoc_id_table); 250 251 static void vsoc_remove_device(struct pci_dev *pdev); 252 static int vsoc_probe_device(struct pci_dev *pdev, 253 const struct pci_device_id *ent); 254 255 static struct pci_driver vsoc_pci_driver = { 256 .name = "vsoc", 257 .id_table = vsoc_id_table, 258 .probe = vsoc_probe_device, 259 .remove = vsoc_remove_device, 260 }; 261 do_create_fd_scoped_permission(struct vsoc_device_region * region_p,struct fd_scoped_permission_node * np,struct fd_scoped_permission_arg __user * arg)262 static int do_create_fd_scoped_permission( 263 struct vsoc_device_region *region_p, 264 struct fd_scoped_permission_node *np, 265 struct fd_scoped_permission_arg __user *arg) 266 { 267 struct file *managed_filp; 268 s32 managed_fd; 269 atomic_t *owner_ptr = NULL; 270 struct vsoc_device_region *managed_region_p; 271 272 if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || 273 copy_from_user(&managed_fd, 274 &arg->managed_region_fd, sizeof(managed_fd))) { 275 return -EFAULT; 276 } 277 managed_filp = fdget(managed_fd).file; 278 /* Check that it's a valid fd, */ 279 if (!managed_filp || vsoc_validate_filep(managed_filp)) 280 return -EPERM; 281 /* EEXIST if the given fd already has a permission. */ 282 if (((struct vsoc_private_data *)managed_filp->private_data)-> 283 fd_scoped_permission_node) 284 return -EEXIST; 285 managed_region_p = vsoc_region_from_filep(managed_filp); 286 /* Check that the provided region is managed by this one */ 287 if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) 288 return -EPERM; 289 /* The area must be well formed and have non-zero size */ 290 if (np->permission.begin_offset >= np->permission.end_offset) 291 return -EINVAL; 292 /* The area must fit in the memory window */ 293 if (np->permission.end_offset > 294 vsoc_device_region_size(managed_region_p)) 295 return -ERANGE; 296 /* The area must be in the region data section */ 297 if (np->permission.begin_offset < 298 managed_region_p->offset_of_region_data) 299 return -ERANGE; 300 /* The area must be page aligned */ 301 if (!PAGE_ALIGNED(np->permission.begin_offset) || 302 !PAGE_ALIGNED(np->permission.end_offset)) 303 return -EINVAL; 304 /* Owner offset must be naturally aligned in the window */ 305 if (np->permission.owner_offset & 306 (sizeof(np->permission.owner_offset) - 1)) 307 return -EINVAL; 308 /* The owner flag must reside in the owner memory */ 309 if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > 310 vsoc_device_region_size(region_p)) 311 return -ERANGE; 312 /* The owner flag must reside in the data section */ 313 if (np->permission.owner_offset < region_p->offset_of_region_data) 314 return -EINVAL; 315 /* The owner value must change to claim the memory */ 316 if (np->permission.owned_value == VSOC_REGION_FREE) 317 return -EINVAL; 318 owner_ptr = 319 (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + 320 np->permission.owner_offset); 321 /* We've already verified that this is in the shared memory window, so 322 * it should be safe to write to this address. 323 */ 324 if (atomic_cmpxchg(owner_ptr, 325 VSOC_REGION_FREE, 326 np->permission.owned_value) != VSOC_REGION_FREE) { 327 return -EBUSY; 328 } 329 ((struct vsoc_private_data *)managed_filp->private_data)-> 330 fd_scoped_permission_node = np; 331 /* The file offset needs to be adjusted if the calling 332 * process did any read/write operations on the fd 333 * before creating the permission. 334 */ 335 if (managed_filp->f_pos) { 336 if (managed_filp->f_pos > np->permission.end_offset) { 337 /* If the offset is beyond the permission end, set it 338 * to the end. 339 */ 340 managed_filp->f_pos = np->permission.end_offset; 341 } else { 342 /* If the offset is within the permission interval 343 * keep it there otherwise reset it to zero. 344 */ 345 if (managed_filp->f_pos < np->permission.begin_offset) { 346 managed_filp->f_pos = 0; 347 } else { 348 managed_filp->f_pos -= 349 np->permission.begin_offset; 350 } 351 } 352 } 353 return 0; 354 } 355 do_destroy_fd_scoped_permission_node(struct vsoc_device_region * owner_region_p,struct fd_scoped_permission_node * node)356 static void do_destroy_fd_scoped_permission_node( 357 struct vsoc_device_region *owner_region_p, 358 struct fd_scoped_permission_node *node) 359 { 360 if (node) { 361 do_destroy_fd_scoped_permission(owner_region_p, 362 &node->permission); 363 mutex_lock(&vsoc_dev.mtx); 364 list_del(&node->list); 365 mutex_unlock(&vsoc_dev.mtx); 366 kfree(node); 367 } 368 } 369 do_destroy_fd_scoped_permission(struct vsoc_device_region * owner_region_p,struct fd_scoped_permission * perm)370 static void do_destroy_fd_scoped_permission( 371 struct vsoc_device_region *owner_region_p, 372 struct fd_scoped_permission *perm) 373 { 374 atomic_t *owner_ptr = NULL; 375 int prev = 0; 376 377 if (!perm) 378 return; 379 owner_ptr = (atomic_t *)shm_off_to_virtual_addr( 380 owner_region_p->region_begin_offset + perm->owner_offset); 381 prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); 382 if (prev != perm->owned_value) 383 dev_err(&vsoc_dev.dev->dev, 384 "%x-%x: owner (%s) %x: expected to be %x was %x", 385 perm->begin_offset, perm->end_offset, 386 owner_region_p->device_name, perm->owner_offset, 387 perm->owned_value, prev); 388 } 389 do_vsoc_describe_region(struct file * filp,struct vsoc_device_region __user * dest)390 static long do_vsoc_describe_region(struct file *filp, 391 struct vsoc_device_region __user *dest) 392 { 393 struct vsoc_device_region *region_p; 394 int retval = vsoc_validate_filep(filp); 395 396 if (retval) 397 return retval; 398 region_p = vsoc_region_from_filep(filp); 399 if (copy_to_user(dest, region_p, sizeof(*region_p))) 400 return -EFAULT; 401 return 0; 402 } 403 404 /** 405 * Implements the inner logic of cond_wait. Copies to and from userspace are 406 * done in the helper function below. 407 */ handle_vsoc_cond_wait(struct file * filp,struct vsoc_cond_wait * arg)408 static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) 409 { 410 DEFINE_WAIT(wait); 411 u32 region_number = iminor(file_inode(filp)); 412 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 413 struct hrtimer_sleeper timeout, *to = NULL; 414 int ret = 0; 415 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 416 atomic_t *address = NULL; 417 struct timespec ts; 418 419 /* Ensure that the offset is aligned */ 420 if (arg->offset & (sizeof(uint32_t) - 1)) 421 return -EADDRNOTAVAIL; 422 /* Ensure that the offset is within shared memory */ 423 if (((uint64_t)arg->offset) + region_p->region_begin_offset + 424 sizeof(uint32_t) > region_p->region_end_offset) 425 return -E2BIG; 426 address = shm_off_to_virtual_addr(region_p->region_begin_offset + 427 arg->offset); 428 429 /* Ensure that the type of wait is valid */ 430 switch (arg->wait_type) { 431 case VSOC_WAIT_IF_EQUAL: 432 break; 433 case VSOC_WAIT_IF_EQUAL_TIMEOUT: 434 to = &timeout; 435 break; 436 default: 437 return -EINVAL; 438 } 439 440 if (to) { 441 /* Copy the user-supplied timesec into the kernel structure. 442 * We do things this way to flatten differences between 32 bit 443 * and 64 bit timespecs. 444 */ 445 ts.tv_sec = arg->wake_time_sec; 446 ts.tv_nsec = arg->wake_time_nsec; 447 448 if (!timespec_valid(&ts)) 449 return -EINVAL; 450 hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, 451 HRTIMER_MODE_ABS); 452 hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts), 453 current->timer_slack_ns); 454 455 hrtimer_init_sleeper(to, current); 456 } 457 458 while (1) { 459 prepare_to_wait(&data->futex_wait_queue, &wait, 460 TASK_INTERRUPTIBLE); 461 /* 462 * Check the sentinel value after prepare_to_wait. If the value 463 * changes after this check the writer will call signal, 464 * changing the task state from INTERRUPTIBLE to RUNNING. That 465 * will ensure that schedule() will eventually schedule this 466 * task. 467 */ 468 if (atomic_read(address) != arg->value) { 469 ret = 0; 470 break; 471 } 472 if (to) { 473 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 474 if (likely(to->task)) 475 freezable_schedule(); 476 hrtimer_cancel(&to->timer); 477 if (!to->task) { 478 ret = -ETIMEDOUT; 479 break; 480 } 481 } else { 482 freezable_schedule(); 483 } 484 /* Count the number of times that we woke up. This is useful 485 * for unit testing. 486 */ 487 ++arg->wakes; 488 if (signal_pending(current)) { 489 ret = -EINTR; 490 break; 491 } 492 } 493 finish_wait(&data->futex_wait_queue, &wait); 494 if (to) 495 destroy_hrtimer_on_stack(&to->timer); 496 return ret; 497 } 498 499 /** 500 * Handles the details of copying from/to userspace to ensure that the copies 501 * happen on all of the return paths of cond_wait. 502 */ do_vsoc_cond_wait(struct file * filp,struct vsoc_cond_wait __user * untrusted_in)503 static int do_vsoc_cond_wait(struct file *filp, 504 struct vsoc_cond_wait __user *untrusted_in) 505 { 506 struct vsoc_cond_wait arg; 507 int rval = 0; 508 509 if (copy_from_user(&arg, untrusted_in, sizeof(arg))) 510 return -EFAULT; 511 /* wakes is an out parameter. Initialize it to something sensible. */ 512 arg.wakes = 0; 513 rval = handle_vsoc_cond_wait(filp, &arg); 514 if (copy_to_user(untrusted_in, &arg, sizeof(arg))) 515 return -EFAULT; 516 return rval; 517 } 518 do_vsoc_cond_wake(struct file * filp,uint32_t offset)519 static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) 520 { 521 struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); 522 u32 region_number = iminor(file_inode(filp)); 523 struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; 524 /* Ensure that the offset is aligned */ 525 if (offset & (sizeof(uint32_t) - 1)) 526 return -EADDRNOTAVAIL; 527 /* Ensure that the offset is within shared memory */ 528 if (((uint64_t)offset) + region_p->region_begin_offset + 529 sizeof(uint32_t) > region_p->region_end_offset) 530 return -E2BIG; 531 /* 532 * TODO(b/73664181): Use multiple futex wait queues. 533 * We need to wake every sleeper when the condition changes. Typically 534 * only a single thread will be waiting on the condition, but there 535 * are exceptions. The worst case is about 10 threads. 536 */ 537 wake_up_interruptible_all(&data->futex_wait_queue); 538 return 0; 539 } 540 vsoc_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)541 static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 542 { 543 int rv = 0; 544 struct vsoc_device_region *region_p; 545 u32 reg_num; 546 struct vsoc_region_data *reg_data; 547 int retval = vsoc_validate_filep(filp); 548 549 if (retval) 550 return retval; 551 region_p = vsoc_region_from_filep(filp); 552 reg_num = iminor(file_inode(filp)); 553 reg_data = vsoc_dev.regions_data + reg_num; 554 switch (cmd) { 555 case VSOC_CREATE_FD_SCOPED_PERMISSION: 556 { 557 struct fd_scoped_permission_node *node = NULL; 558 559 node = kzalloc(sizeof(*node), GFP_KERNEL); 560 /* We can't allocate memory for the permission */ 561 if (!node) 562 return -ENOMEM; 563 INIT_LIST_HEAD(&node->list); 564 rv = do_create_fd_scoped_permission( 565 region_p, 566 node, 567 (struct fd_scoped_permission_arg __user *)arg); 568 if (!rv) { 569 mutex_lock(&vsoc_dev.mtx); 570 list_add(&node->list, &vsoc_dev.permissions); 571 mutex_unlock(&vsoc_dev.mtx); 572 } else { 573 kfree(node); 574 return rv; 575 } 576 } 577 break; 578 579 case VSOC_GET_FD_SCOPED_PERMISSION: 580 { 581 struct fd_scoped_permission_node *node = 582 ((struct vsoc_private_data *)filp->private_data)-> 583 fd_scoped_permission_node; 584 if (!node) 585 return -ENOENT; 586 if (copy_to_user 587 ((struct fd_scoped_permission __user *)arg, 588 &node->permission, sizeof(node->permission))) 589 return -EFAULT; 590 } 591 break; 592 593 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: 594 if (!atomic_xchg( 595 reg_data->outgoing_signalled, 596 1)) { 597 writel(reg_num, vsoc_dev.regs + DOORBELL); 598 return 0; 599 } else { 600 return -EBUSY; 601 } 602 break; 603 604 case VSOC_SEND_INTERRUPT_TO_HOST: 605 writel(reg_num, vsoc_dev.regs + DOORBELL); 606 return 0; 607 608 case VSOC_WAIT_FOR_INCOMING_INTERRUPT: 609 wait_event_interruptible( 610 reg_data->interrupt_wait_queue, 611 (atomic_read(reg_data->incoming_signalled) != 0)); 612 break; 613 614 case VSOC_DESCRIBE_REGION: 615 return do_vsoc_describe_region( 616 filp, 617 (struct vsoc_device_region __user *)arg); 618 619 case VSOC_SELF_INTERRUPT: 620 atomic_set(reg_data->incoming_signalled, 1); 621 wake_up_interruptible(®_data->interrupt_wait_queue); 622 break; 623 624 case VSOC_COND_WAIT: 625 return do_vsoc_cond_wait(filp, 626 (struct vsoc_cond_wait __user *)arg); 627 case VSOC_COND_WAKE: 628 return do_vsoc_cond_wake(filp, arg); 629 630 default: 631 return -EINVAL; 632 } 633 return 0; 634 } 635 vsoc_read(struct file * filp,char __user * buffer,size_t len,loff_t * poffset)636 static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, 637 loff_t *poffset) 638 { 639 __u32 area_off; 640 const void *area_p; 641 ssize_t area_len; 642 int retval = vsoc_validate_filep(filp); 643 644 if (retval) 645 return retval; 646 area_len = vsoc_get_area(filp, &area_off); 647 area_p = shm_off_to_virtual_addr(area_off); 648 area_p += *poffset; 649 area_len -= *poffset; 650 if (area_len <= 0) 651 return 0; 652 if (area_len < len) 653 len = area_len; 654 if (copy_to_user(buffer, area_p, len)) 655 return -EFAULT; 656 *poffset += len; 657 return len; 658 } 659 vsoc_lseek(struct file * filp,loff_t offset,int origin)660 static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) 661 { 662 ssize_t area_len = 0; 663 int retval = vsoc_validate_filep(filp); 664 665 if (retval) 666 return retval; 667 area_len = vsoc_get_area(filp, NULL); 668 switch (origin) { 669 case SEEK_SET: 670 break; 671 672 case SEEK_CUR: 673 if (offset > 0 && offset + filp->f_pos < 0) 674 return -EOVERFLOW; 675 offset += filp->f_pos; 676 break; 677 678 case SEEK_END: 679 if (offset > 0 && offset + area_len < 0) 680 return -EOVERFLOW; 681 offset += area_len; 682 break; 683 684 case SEEK_DATA: 685 if (offset >= area_len) 686 return -EINVAL; 687 if (offset < 0) 688 offset = 0; 689 break; 690 691 case SEEK_HOLE: 692 /* Next hole is always the end of the region, unless offset is 693 * beyond that 694 */ 695 if (offset < area_len) 696 offset = area_len; 697 break; 698 699 default: 700 return -EINVAL; 701 } 702 703 if (offset < 0 || offset > area_len) 704 return -EINVAL; 705 filp->f_pos = offset; 706 707 return offset; 708 } 709 vsoc_write(struct file * filp,const char __user * buffer,size_t len,loff_t * poffset)710 static ssize_t vsoc_write(struct file *filp, const char __user *buffer, 711 size_t len, loff_t *poffset) 712 { 713 __u32 area_off; 714 void *area_p; 715 ssize_t area_len; 716 int retval = vsoc_validate_filep(filp); 717 718 if (retval) 719 return retval; 720 area_len = vsoc_get_area(filp, &area_off); 721 area_p = shm_off_to_virtual_addr(area_off); 722 area_p += *poffset; 723 area_len -= *poffset; 724 if (area_len <= 0) 725 return 0; 726 if (area_len < len) 727 len = area_len; 728 if (copy_from_user(area_p, buffer, len)) 729 return -EFAULT; 730 *poffset += len; 731 return len; 732 } 733 vsoc_interrupt(int irq,void * region_data_v)734 static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) 735 { 736 struct vsoc_region_data *region_data = 737 (struct vsoc_region_data *)region_data_v; 738 int reg_num = region_data - vsoc_dev.regions_data; 739 740 if (unlikely(!region_data)) 741 return IRQ_NONE; 742 743 if (unlikely(reg_num < 0 || 744 reg_num >= vsoc_dev.layout->region_count)) { 745 dev_err(&vsoc_dev.dev->dev, 746 "invalid irq @%p reg_num=0x%04x\n", 747 region_data, reg_num); 748 return IRQ_NONE; 749 } 750 if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { 751 dev_err(&vsoc_dev.dev->dev, 752 "irq not aligned @%p reg_num=0x%04x\n", 753 region_data, reg_num); 754 return IRQ_NONE; 755 } 756 wake_up_interruptible(®ion_data->interrupt_wait_queue); 757 return IRQ_HANDLED; 758 } 759 vsoc_probe_device(struct pci_dev * pdev,const struct pci_device_id * ent)760 static int vsoc_probe_device(struct pci_dev *pdev, 761 const struct pci_device_id *ent) 762 { 763 int result; 764 int i; 765 resource_size_t reg_size; 766 dev_t devt; 767 768 vsoc_dev.dev = pdev; 769 result = pci_enable_device(pdev); 770 if (result) { 771 dev_err(&pdev->dev, 772 "pci_enable_device failed %s: error %d\n", 773 pci_name(pdev), result); 774 return result; 775 } 776 vsoc_dev.enabled_device = true; 777 result = pci_request_regions(pdev, "vsoc"); 778 if (result < 0) { 779 dev_err(&pdev->dev, "pci_request_regions failed\n"); 780 vsoc_remove_device(pdev); 781 return -EBUSY; 782 } 783 vsoc_dev.requested_regions = true; 784 /* Set up the control registers in BAR 0 */ 785 reg_size = pci_resource_len(pdev, REGISTER_BAR); 786 if (reg_size > MAX_REGISTER_BAR_LEN) 787 vsoc_dev.regs = 788 pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); 789 else 790 vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); 791 792 if (!vsoc_dev.regs) { 793 dev_err(&pdev->dev, 794 "cannot map registers of size %zu\n", 795 (size_t)reg_size); 796 vsoc_remove_device(pdev); 797 return -EBUSY; 798 } 799 800 /* Map the shared memory in BAR 2 */ 801 vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); 802 vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); 803 804 dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", 805 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); 806 vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); 807 if (!vsoc_dev.kernel_mapped_shm) { 808 dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); 809 vsoc_remove_device(pdev); 810 return -EBUSY; 811 } 812 813 vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) 814 vsoc_dev.kernel_mapped_shm; 815 dev_info(&pdev->dev, "major_version: %d\n", 816 vsoc_dev.layout->major_version); 817 dev_info(&pdev->dev, "minor_version: %d\n", 818 vsoc_dev.layout->minor_version); 819 dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); 820 dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); 821 if (vsoc_dev.layout->major_version != 822 CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { 823 dev_err(&vsoc_dev.dev->dev, 824 "driver supports only major_version %d\n", 825 CURRENT_VSOC_LAYOUT_MAJOR_VERSION); 826 vsoc_remove_device(pdev); 827 return -EBUSY; 828 } 829 result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, 830 VSOC_DEV_NAME); 831 if (result) { 832 dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); 833 vsoc_remove_device(pdev); 834 return -EBUSY; 835 } 836 vsoc_dev.major = MAJOR(devt); 837 cdev_init(&vsoc_dev.cdev, &vsoc_ops); 838 vsoc_dev.cdev.owner = THIS_MODULE; 839 result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); 840 if (result) { 841 dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); 842 vsoc_remove_device(pdev); 843 return -EBUSY; 844 } 845 vsoc_dev.cdev_added = true; 846 vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); 847 if (IS_ERR(vsoc_dev.class)) { 848 dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); 849 vsoc_remove_device(pdev); 850 return PTR_ERR(vsoc_dev.class); 851 } 852 vsoc_dev.class_added = true; 853 vsoc_dev.regions = (struct vsoc_device_region __force *) 854 ((void *)vsoc_dev.layout + 855 vsoc_dev.layout->vsoc_region_desc_offset); 856 vsoc_dev.msix_entries = kcalloc( 857 vsoc_dev.layout->region_count, 858 sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); 859 if (!vsoc_dev.msix_entries) { 860 dev_err(&vsoc_dev.dev->dev, 861 "unable to allocate msix_entries\n"); 862 vsoc_remove_device(pdev); 863 return -ENOSPC; 864 } 865 vsoc_dev.regions_data = kcalloc( 866 vsoc_dev.layout->region_count, 867 sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); 868 if (!vsoc_dev.regions_data) { 869 dev_err(&vsoc_dev.dev->dev, 870 "unable to allocate regions' data\n"); 871 vsoc_remove_device(pdev); 872 return -ENOSPC; 873 } 874 for (i = 0; i < vsoc_dev.layout->region_count; ++i) 875 vsoc_dev.msix_entries[i].entry = i; 876 877 result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, 878 vsoc_dev.layout->region_count); 879 if (result) { 880 dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); 881 vsoc_remove_device(pdev); 882 return -ENOSPC; 883 } 884 /* Check that all regions are well formed */ 885 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 886 const struct vsoc_device_region *region = vsoc_dev.regions + i; 887 888 if (!PAGE_ALIGNED(region->region_begin_offset) || 889 !PAGE_ALIGNED(region->region_end_offset)) { 890 dev_err(&vsoc_dev.dev->dev, 891 "region %d not aligned (%x:%x)", i, 892 region->region_begin_offset, 893 region->region_end_offset); 894 vsoc_remove_device(pdev); 895 return -EFAULT; 896 } 897 if (region->region_begin_offset >= region->region_end_offset || 898 region->region_end_offset > vsoc_dev.shm_size) { 899 dev_err(&vsoc_dev.dev->dev, 900 "region %d offsets are wrong: %x %x %zx", 901 i, region->region_begin_offset, 902 region->region_end_offset, vsoc_dev.shm_size); 903 vsoc_remove_device(pdev); 904 return -EFAULT; 905 } 906 if (region->managed_by >= vsoc_dev.layout->region_count) { 907 dev_err(&vsoc_dev.dev->dev, 908 "region %d has invalid owner: %u", 909 i, region->managed_by); 910 vsoc_remove_device(pdev); 911 return -EFAULT; 912 } 913 } 914 vsoc_dev.msix_enabled = true; 915 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 916 const struct vsoc_device_region *region = vsoc_dev.regions + i; 917 size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; 918 const struct vsoc_signal_table_layout *h_to_g_signal_table = 919 ®ion->host_to_guest_signal_table; 920 const struct vsoc_signal_table_layout *g_to_h_signal_table = 921 ®ion->guest_to_host_signal_table; 922 923 vsoc_dev.regions_data[i].name[name_sz] = '\0'; 924 memcpy(vsoc_dev.regions_data[i].name, region->device_name, 925 name_sz); 926 dev_info(&pdev->dev, "region %d name=%s\n", 927 i, vsoc_dev.regions_data[i].name); 928 init_waitqueue_head( 929 &vsoc_dev.regions_data[i].interrupt_wait_queue); 930 init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); 931 vsoc_dev.regions_data[i].incoming_signalled = 932 shm_off_to_virtual_addr(region->region_begin_offset) + 933 h_to_g_signal_table->interrupt_signalled_offset; 934 vsoc_dev.regions_data[i].outgoing_signalled = 935 shm_off_to_virtual_addr(region->region_begin_offset) + 936 g_to_h_signal_table->interrupt_signalled_offset; 937 result = request_irq( 938 vsoc_dev.msix_entries[i].vector, 939 vsoc_interrupt, 0, 940 vsoc_dev.regions_data[i].name, 941 vsoc_dev.regions_data + i); 942 if (result) { 943 dev_info(&pdev->dev, 944 "request_irq failed irq=%d vector=%d\n", 945 i, vsoc_dev.msix_entries[i].vector); 946 vsoc_remove_device(pdev); 947 return -ENOSPC; 948 } 949 vsoc_dev.regions_data[i].irq_requested = true; 950 if (!device_create(vsoc_dev.class, NULL, 951 MKDEV(vsoc_dev.major, i), 952 NULL, vsoc_dev.regions_data[i].name)) { 953 dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); 954 vsoc_remove_device(pdev); 955 return -EBUSY; 956 } 957 vsoc_dev.regions_data[i].device_created = true; 958 } 959 return 0; 960 } 961 962 /* 963 * This should undo all of the allocations in the probe function in reverse 964 * order. 965 * 966 * Notes: 967 * 968 * The device may have been partially initialized, so double check 969 * that the allocations happened. 970 * 971 * This function may be called multiple times, so mark resources as freed 972 * as they are deallocated. 973 */ vsoc_remove_device(struct pci_dev * pdev)974 static void vsoc_remove_device(struct pci_dev *pdev) 975 { 976 int i; 977 /* 978 * pdev is the first thing to be set on probe and the last thing 979 * to be cleared here. If it's NULL then there is no cleanup. 980 */ 981 if (!pdev || !vsoc_dev.dev) 982 return; 983 dev_info(&pdev->dev, "remove_device\n"); 984 if (vsoc_dev.regions_data) { 985 for (i = 0; i < vsoc_dev.layout->region_count; ++i) { 986 if (vsoc_dev.regions_data[i].device_created) { 987 device_destroy(vsoc_dev.class, 988 MKDEV(vsoc_dev.major, i)); 989 vsoc_dev.regions_data[i].device_created = false; 990 } 991 if (vsoc_dev.regions_data[i].irq_requested) 992 free_irq(vsoc_dev.msix_entries[i].vector, NULL); 993 vsoc_dev.regions_data[i].irq_requested = false; 994 } 995 kfree(vsoc_dev.regions_data); 996 vsoc_dev.regions_data = NULL; 997 } 998 if (vsoc_dev.msix_enabled) { 999 pci_disable_msix(pdev); 1000 vsoc_dev.msix_enabled = false; 1001 } 1002 kfree(vsoc_dev.msix_entries); 1003 vsoc_dev.msix_entries = NULL; 1004 vsoc_dev.regions = NULL; 1005 if (vsoc_dev.class_added) { 1006 class_destroy(vsoc_dev.class); 1007 vsoc_dev.class_added = false; 1008 } 1009 if (vsoc_dev.cdev_added) { 1010 cdev_del(&vsoc_dev.cdev); 1011 vsoc_dev.cdev_added = false; 1012 } 1013 if (vsoc_dev.major && vsoc_dev.layout) { 1014 unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), 1015 vsoc_dev.layout->region_count); 1016 vsoc_dev.major = 0; 1017 } 1018 vsoc_dev.layout = NULL; 1019 if (vsoc_dev.kernel_mapped_shm) { 1020 pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); 1021 vsoc_dev.kernel_mapped_shm = NULL; 1022 } 1023 if (vsoc_dev.regs) { 1024 pci_iounmap(pdev, vsoc_dev.regs); 1025 vsoc_dev.regs = NULL; 1026 } 1027 if (vsoc_dev.requested_regions) { 1028 pci_release_regions(pdev); 1029 vsoc_dev.requested_regions = false; 1030 } 1031 if (vsoc_dev.enabled_device) { 1032 pci_disable_device(pdev); 1033 vsoc_dev.enabled_device = false; 1034 } 1035 /* Do this last: it indicates that the device is not initialized. */ 1036 vsoc_dev.dev = NULL; 1037 } 1038 vsoc_cleanup_module(void)1039 static void __exit vsoc_cleanup_module(void) 1040 { 1041 vsoc_remove_device(vsoc_dev.dev); 1042 pci_unregister_driver(&vsoc_pci_driver); 1043 } 1044 vsoc_init_module(void)1045 static int __init vsoc_init_module(void) 1046 { 1047 int err = -ENOMEM; 1048 1049 INIT_LIST_HEAD(&vsoc_dev.permissions); 1050 mutex_init(&vsoc_dev.mtx); 1051 1052 err = pci_register_driver(&vsoc_pci_driver); 1053 if (err < 0) 1054 return err; 1055 return 0; 1056 } 1057 vsoc_open(struct inode * inode,struct file * filp)1058 static int vsoc_open(struct inode *inode, struct file *filp) 1059 { 1060 /* Can't use vsoc_validate_filep because filp is still incomplete */ 1061 int ret = vsoc_validate_inode(inode); 1062 1063 if (ret) 1064 return ret; 1065 filp->private_data = 1066 kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); 1067 if (!filp->private_data) 1068 return -ENOMEM; 1069 return 0; 1070 } 1071 vsoc_release(struct inode * inode,struct file * filp)1072 static int vsoc_release(struct inode *inode, struct file *filp) 1073 { 1074 struct vsoc_private_data *private_data = NULL; 1075 struct fd_scoped_permission_node *node = NULL; 1076 struct vsoc_device_region *owner_region_p = NULL; 1077 int retval = vsoc_validate_filep(filp); 1078 1079 if (retval) 1080 return retval; 1081 private_data = (struct vsoc_private_data *)filp->private_data; 1082 if (!private_data) 1083 return 0; 1084 1085 node = private_data->fd_scoped_permission_node; 1086 if (node) { 1087 owner_region_p = vsoc_region_from_inode(inode); 1088 if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { 1089 owner_region_p = 1090 &vsoc_dev.regions[owner_region_p->managed_by]; 1091 } 1092 do_destroy_fd_scoped_permission_node(owner_region_p, node); 1093 private_data->fd_scoped_permission_node = NULL; 1094 } 1095 kfree(private_data); 1096 filp->private_data = NULL; 1097 1098 return 0; 1099 } 1100 1101 /* 1102 * Returns the device relative offset and length of the area specified by the 1103 * fd scoped permission. If there is no fd scoped permission set, a default 1104 * permission covering the entire region is assumed, unless the region is owned 1105 * by another one, in which case the default is a permission with zero size. 1106 */ vsoc_get_area(struct file * filp,__u32 * area_offset)1107 static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) 1108 { 1109 __u32 off = 0; 1110 ssize_t length = 0; 1111 struct vsoc_device_region *region_p; 1112 struct fd_scoped_permission *perm; 1113 1114 region_p = vsoc_region_from_filep(filp); 1115 off = region_p->region_begin_offset; 1116 perm = &((struct vsoc_private_data *)filp->private_data)-> 1117 fd_scoped_permission_node->permission; 1118 if (perm) { 1119 off += perm->begin_offset; 1120 length = perm->end_offset - perm->begin_offset; 1121 } else if (region_p->managed_by == VSOC_REGION_WHOLE) { 1122 /* No permission set and the regions is not owned by another, 1123 * default to full region access. 1124 */ 1125 length = vsoc_device_region_size(region_p); 1126 } else { 1127 /* return zero length, access is denied. */ 1128 length = 0; 1129 } 1130 if (area_offset) 1131 *area_offset = off; 1132 return length; 1133 } 1134 vsoc_mmap(struct file * filp,struct vm_area_struct * vma)1135 static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) 1136 { 1137 unsigned long len = vma->vm_end - vma->vm_start; 1138 __u32 area_off; 1139 phys_addr_t mem_off; 1140 ssize_t area_len; 1141 int retval = vsoc_validate_filep(filp); 1142 1143 if (retval) 1144 return retval; 1145 area_len = vsoc_get_area(filp, &area_off); 1146 /* Add the requested offset */ 1147 area_off += (vma->vm_pgoff << PAGE_SHIFT); 1148 area_len -= (vma->vm_pgoff << PAGE_SHIFT); 1149 if (area_len < len) 1150 return -EINVAL; 1151 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1152 mem_off = shm_off_to_phys_addr(area_off); 1153 if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, 1154 len, vma->vm_page_prot)) 1155 return -EAGAIN; 1156 return 0; 1157 } 1158 1159 module_init(vsoc_init_module); 1160 module_exit(vsoc_cleanup_module); 1161 1162 MODULE_LICENSE("GPL"); 1163 MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); 1164 MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); 1165 MODULE_VERSION("1.0"); 1166