1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/eventfd.h>
16 #include <linux/slab.h>
17 #include <linux/wait.h>
18 #include <linux/dma-map-ops.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/uio.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <uapi/linux/vduse.h>
25 #include <uapi/linux/vdpa.h>
26 #include <uapi/linux/virtio_config.h>
27 #include <uapi/linux/virtio_ids.h>
28 #include <uapi/linux/virtio_blk.h>
29 #include <linux/mod_devicetable.h>
30
31 #include "iova_domain.h"
32
33 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
34 #define DRV_DESC "vDPA Device in Userspace"
35 #define DRV_LICENSE "GPL v2"
36
37 #define VDUSE_DEV_MAX (1U << MINORBITS)
38 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
39 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
40 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
41
42 struct vduse_virtqueue {
43 u16 index;
44 u16 num_max;
45 u32 num;
46 u64 desc_addr;
47 u64 driver_addr;
48 u64 device_addr;
49 struct vdpa_vq_state state;
50 bool ready;
51 bool kicked;
52 spinlock_t kick_lock;
53 spinlock_t irq_lock;
54 struct eventfd_ctx *kickfd;
55 struct vdpa_callback cb;
56 struct work_struct inject;
57 struct work_struct kick;
58 };
59
60 struct vduse_dev;
61
62 struct vduse_vdpa {
63 struct vdpa_device vdpa;
64 struct vduse_dev *dev;
65 };
66
67 struct vduse_dev {
68 struct vduse_vdpa *vdev;
69 struct device *dev;
70 struct vduse_virtqueue *vqs;
71 struct vduse_iova_domain *domain;
72 char *name;
73 struct mutex lock;
74 spinlock_t msg_lock;
75 u64 msg_unique;
76 u32 msg_timeout;
77 wait_queue_head_t waitq;
78 struct list_head send_list;
79 struct list_head recv_list;
80 struct vdpa_callback config_cb;
81 struct work_struct inject;
82 spinlock_t irq_lock;
83 struct rw_semaphore rwsem;
84 int minor;
85 bool broken;
86 bool connected;
87 u64 api_version;
88 u64 device_features;
89 u64 driver_features;
90 u32 device_id;
91 u32 vendor_id;
92 u32 generation;
93 u32 config_size;
94 void *config;
95 u8 status;
96 u32 vq_num;
97 u32 vq_align;
98 };
99
100 struct vduse_dev_msg {
101 struct vduse_dev_request req;
102 struct vduse_dev_response resp;
103 struct list_head list;
104 wait_queue_head_t waitq;
105 bool completed;
106 };
107
108 struct vduse_control {
109 u64 api_version;
110 };
111
112 static DEFINE_MUTEX(vduse_lock);
113 static DEFINE_IDR(vduse_idr);
114
115 static dev_t vduse_major;
116 static struct class *vduse_class;
117 static struct cdev vduse_ctrl_cdev;
118 static struct cdev vduse_cdev;
119 static struct workqueue_struct *vduse_irq_wq;
120
121 static u32 allowed_device_id[] = {
122 VIRTIO_ID_BLOCK,
123 };
124
vdpa_to_vduse(struct vdpa_device * vdpa)125 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
126 {
127 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
128
129 return vdev->dev;
130 }
131
dev_to_vduse(struct device * dev)132 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
133 {
134 struct vdpa_device *vdpa = dev_to_vdpa(dev);
135
136 return vdpa_to_vduse(vdpa);
137 }
138
vduse_find_msg(struct list_head * head,uint32_t request_id)139 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
140 uint32_t request_id)
141 {
142 struct vduse_dev_msg *msg;
143
144 list_for_each_entry(msg, head, list) {
145 if (msg->req.request_id == request_id) {
146 list_del(&msg->list);
147 return msg;
148 }
149 }
150
151 return NULL;
152 }
153
vduse_dequeue_msg(struct list_head * head)154 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
155 {
156 struct vduse_dev_msg *msg = NULL;
157
158 if (!list_empty(head)) {
159 msg = list_first_entry(head, struct vduse_dev_msg, list);
160 list_del(&msg->list);
161 }
162
163 return msg;
164 }
165
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)166 static void vduse_enqueue_msg(struct list_head *head,
167 struct vduse_dev_msg *msg)
168 {
169 list_add_tail(&msg->list, head);
170 }
171
vduse_dev_broken(struct vduse_dev * dev)172 static void vduse_dev_broken(struct vduse_dev *dev)
173 {
174 struct vduse_dev_msg *msg, *tmp;
175
176 if (unlikely(dev->broken))
177 return;
178
179 list_splice_init(&dev->recv_list, &dev->send_list);
180 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
181 list_del(&msg->list);
182 msg->completed = 1;
183 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
184 wake_up(&msg->waitq);
185 }
186 dev->broken = true;
187 wake_up(&dev->waitq);
188 }
189
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)190 static int vduse_dev_msg_sync(struct vduse_dev *dev,
191 struct vduse_dev_msg *msg)
192 {
193 int ret;
194
195 if (unlikely(dev->broken))
196 return -EIO;
197
198 init_waitqueue_head(&msg->waitq);
199 spin_lock(&dev->msg_lock);
200 if (unlikely(dev->broken)) {
201 spin_unlock(&dev->msg_lock);
202 return -EIO;
203 }
204 msg->req.request_id = dev->msg_unique++;
205 vduse_enqueue_msg(&dev->send_list, msg);
206 wake_up(&dev->waitq);
207 spin_unlock(&dev->msg_lock);
208 if (dev->msg_timeout)
209 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
210 (long)dev->msg_timeout * HZ);
211 else
212 ret = wait_event_killable(msg->waitq, msg->completed);
213
214 spin_lock(&dev->msg_lock);
215 if (!msg->completed) {
216 list_del(&msg->list);
217 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
218 /* Mark the device as malfunction when there is a timeout */
219 if (!ret)
220 vduse_dev_broken(dev);
221 }
222 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
223 spin_unlock(&dev->msg_lock);
224
225 return ret;
226 }
227
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)228 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
229 struct vduse_virtqueue *vq,
230 struct vdpa_vq_state_packed *packed)
231 {
232 struct vduse_dev_msg msg = { 0 };
233 int ret;
234
235 msg.req.type = VDUSE_GET_VQ_STATE;
236 msg.req.vq_state.index = vq->index;
237
238 ret = vduse_dev_msg_sync(dev, &msg);
239 if (ret)
240 return ret;
241
242 packed->last_avail_counter =
243 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
244 packed->last_avail_idx =
245 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
246 packed->last_used_counter =
247 msg.resp.vq_state.packed.last_used_counter & 0x0001;
248 packed->last_used_idx =
249 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
250
251 return 0;
252 }
253
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)254 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
255 struct vduse_virtqueue *vq,
256 struct vdpa_vq_state_split *split)
257 {
258 struct vduse_dev_msg msg = { 0 };
259 int ret;
260
261 msg.req.type = VDUSE_GET_VQ_STATE;
262 msg.req.vq_state.index = vq->index;
263
264 ret = vduse_dev_msg_sync(dev, &msg);
265 if (ret)
266 return ret;
267
268 split->avail_index = msg.resp.vq_state.split.avail_index;
269
270 return 0;
271 }
272
vduse_dev_set_status(struct vduse_dev * dev,u8 status)273 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
274 {
275 struct vduse_dev_msg msg = { 0 };
276
277 msg.req.type = VDUSE_SET_STATUS;
278 msg.req.s.status = status;
279
280 return vduse_dev_msg_sync(dev, &msg);
281 }
282
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)283 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
284 u64 start, u64 last)
285 {
286 struct vduse_dev_msg msg = { 0 };
287
288 if (last < start)
289 return -EINVAL;
290
291 msg.req.type = VDUSE_UPDATE_IOTLB;
292 msg.req.iova.start = start;
293 msg.req.iova.last = last;
294
295 return vduse_dev_msg_sync(dev, &msg);
296 }
297
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)298 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
299 {
300 struct file *file = iocb->ki_filp;
301 struct vduse_dev *dev = file->private_data;
302 struct vduse_dev_msg *msg;
303 int size = sizeof(struct vduse_dev_request);
304 ssize_t ret;
305
306 if (iov_iter_count(to) < size)
307 return -EINVAL;
308
309 spin_lock(&dev->msg_lock);
310 while (1) {
311 msg = vduse_dequeue_msg(&dev->send_list);
312 if (msg)
313 break;
314
315 ret = -EAGAIN;
316 if (file->f_flags & O_NONBLOCK)
317 goto unlock;
318
319 spin_unlock(&dev->msg_lock);
320 ret = wait_event_interruptible_exclusive(dev->waitq,
321 !list_empty(&dev->send_list));
322 if (ret)
323 return ret;
324
325 spin_lock(&dev->msg_lock);
326 }
327 spin_unlock(&dev->msg_lock);
328 ret = copy_to_iter(&msg->req, size, to);
329 spin_lock(&dev->msg_lock);
330 if (ret != size) {
331 ret = -EFAULT;
332 vduse_enqueue_msg(&dev->send_list, msg);
333 goto unlock;
334 }
335 vduse_enqueue_msg(&dev->recv_list, msg);
336 unlock:
337 spin_unlock(&dev->msg_lock);
338
339 return ret;
340 }
341
is_mem_zero(const char * ptr,int size)342 static bool is_mem_zero(const char *ptr, int size)
343 {
344 int i;
345
346 for (i = 0; i < size; i++) {
347 if (ptr[i])
348 return false;
349 }
350 return true;
351 }
352
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)353 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
354 {
355 struct file *file = iocb->ki_filp;
356 struct vduse_dev *dev = file->private_data;
357 struct vduse_dev_response resp;
358 struct vduse_dev_msg *msg;
359 size_t ret;
360
361 ret = copy_from_iter(&resp, sizeof(resp), from);
362 if (ret != sizeof(resp))
363 return -EINVAL;
364
365 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
366 return -EINVAL;
367
368 spin_lock(&dev->msg_lock);
369 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
370 if (!msg) {
371 ret = -ENOENT;
372 goto unlock;
373 }
374
375 memcpy(&msg->resp, &resp, sizeof(resp));
376 msg->completed = 1;
377 wake_up(&msg->waitq);
378 unlock:
379 spin_unlock(&dev->msg_lock);
380
381 return ret;
382 }
383
vduse_dev_poll(struct file * file,poll_table * wait)384 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
385 {
386 struct vduse_dev *dev = file->private_data;
387 __poll_t mask = 0;
388
389 poll_wait(file, &dev->waitq, wait);
390
391 spin_lock(&dev->msg_lock);
392
393 if (unlikely(dev->broken))
394 mask |= EPOLLERR;
395 if (!list_empty(&dev->send_list))
396 mask |= EPOLLIN | EPOLLRDNORM;
397 if (!list_empty(&dev->recv_list))
398 mask |= EPOLLOUT | EPOLLWRNORM;
399
400 spin_unlock(&dev->msg_lock);
401
402 return mask;
403 }
404
vduse_dev_reset(struct vduse_dev * dev)405 static void vduse_dev_reset(struct vduse_dev *dev)
406 {
407 int i;
408 struct vduse_iova_domain *domain = dev->domain;
409
410 /* The coherent mappings are handled in vduse_dev_free_coherent() */
411 if (domain->bounce_map)
412 vduse_domain_reset_bounce_map(domain);
413
414 down_write(&dev->rwsem);
415
416 dev->status = 0;
417 dev->driver_features = 0;
418 dev->generation++;
419 spin_lock(&dev->irq_lock);
420 dev->config_cb.callback = NULL;
421 dev->config_cb.private = NULL;
422 spin_unlock(&dev->irq_lock);
423 flush_work(&dev->inject);
424
425 for (i = 0; i < dev->vq_num; i++) {
426 struct vduse_virtqueue *vq = &dev->vqs[i];
427
428 vq->ready = false;
429 vq->desc_addr = 0;
430 vq->driver_addr = 0;
431 vq->device_addr = 0;
432 vq->num = 0;
433 memset(&vq->state, 0, sizeof(vq->state));
434
435 spin_lock(&vq->kick_lock);
436 vq->kicked = false;
437 if (vq->kickfd)
438 eventfd_ctx_put(vq->kickfd);
439 vq->kickfd = NULL;
440 spin_unlock(&vq->kick_lock);
441
442 spin_lock(&vq->irq_lock);
443 vq->cb.callback = NULL;
444 vq->cb.private = NULL;
445 spin_unlock(&vq->irq_lock);
446 flush_work(&vq->inject);
447 flush_work(&vq->kick);
448 }
449
450 up_write(&dev->rwsem);
451 }
452
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)453 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
454 u64 desc_area, u64 driver_area,
455 u64 device_area)
456 {
457 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
458 struct vduse_virtqueue *vq = &dev->vqs[idx];
459
460 vq->desc_addr = desc_area;
461 vq->driver_addr = driver_area;
462 vq->device_addr = device_area;
463
464 return 0;
465 }
466
vduse_vq_kick(struct vduse_virtqueue * vq)467 static void vduse_vq_kick(struct vduse_virtqueue *vq)
468 {
469 spin_lock(&vq->kick_lock);
470 if (!vq->ready)
471 goto unlock;
472
473 if (vq->kickfd)
474 eventfd_signal(vq->kickfd, 1);
475 else
476 vq->kicked = true;
477 unlock:
478 spin_unlock(&vq->kick_lock);
479 }
480
vduse_vq_kick_work(struct work_struct * work)481 static void vduse_vq_kick_work(struct work_struct *work)
482 {
483 struct vduse_virtqueue *vq = container_of(work,
484 struct vduse_virtqueue, kick);
485
486 vduse_vq_kick(vq);
487 }
488
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)489 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
490 {
491 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
492 struct vduse_virtqueue *vq = &dev->vqs[idx];
493
494 if (!eventfd_signal_allowed()) {
495 schedule_work(&vq->kick);
496 return;
497 }
498 vduse_vq_kick(vq);
499 }
500
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)501 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
502 struct vdpa_callback *cb)
503 {
504 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
505 struct vduse_virtqueue *vq = &dev->vqs[idx];
506
507 spin_lock(&vq->irq_lock);
508 vq->cb.callback = cb->callback;
509 vq->cb.private = cb->private;
510 spin_unlock(&vq->irq_lock);
511 }
512
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)513 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
514 {
515 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
516 struct vduse_virtqueue *vq = &dev->vqs[idx];
517
518 vq->num = num;
519 }
520
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)521 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
522 u16 idx, bool ready)
523 {
524 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
525 struct vduse_virtqueue *vq = &dev->vqs[idx];
526
527 vq->ready = ready;
528 }
529
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)530 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
531 {
532 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
533 struct vduse_virtqueue *vq = &dev->vqs[idx];
534
535 return vq->ready;
536 }
537
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)538 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
539 const struct vdpa_vq_state *state)
540 {
541 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542 struct vduse_virtqueue *vq = &dev->vqs[idx];
543
544 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
545 vq->state.packed.last_avail_counter =
546 state->packed.last_avail_counter;
547 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
548 vq->state.packed.last_used_counter =
549 state->packed.last_used_counter;
550 vq->state.packed.last_used_idx = state->packed.last_used_idx;
551 } else
552 vq->state.split.avail_index = state->split.avail_index;
553
554 return 0;
555 }
556
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)557 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
558 struct vdpa_vq_state *state)
559 {
560 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
561 struct vduse_virtqueue *vq = &dev->vqs[idx];
562
563 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
564 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
565
566 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
567 }
568
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)569 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
570 {
571 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
572
573 return dev->vq_align;
574 }
575
vduse_vdpa_get_features(struct vdpa_device * vdpa)576 static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
577 {
578 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
579
580 return dev->device_features;
581 }
582
vduse_vdpa_set_features(struct vdpa_device * vdpa,u64 features)583 static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
584 {
585 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
586
587 dev->driver_features = features;
588 return 0;
589 }
590
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)591 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
592 struct vdpa_callback *cb)
593 {
594 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
595
596 spin_lock(&dev->irq_lock);
597 dev->config_cb.callback = cb->callback;
598 dev->config_cb.private = cb->private;
599 spin_unlock(&dev->irq_lock);
600 }
601
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)602 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
603 {
604 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
605 u16 num_max = 0;
606 int i;
607
608 for (i = 0; i < dev->vq_num; i++)
609 if (num_max < dev->vqs[i].num_max)
610 num_max = dev->vqs[i].num_max;
611
612 return num_max;
613 }
614
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)615 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
616 {
617 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
618
619 return dev->device_id;
620 }
621
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)622 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
623 {
624 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
625
626 return dev->vendor_id;
627 }
628
vduse_vdpa_get_status(struct vdpa_device * vdpa)629 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
630 {
631 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632
633 return dev->status;
634 }
635
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)636 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
637 {
638 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
639
640 if (vduse_dev_set_status(dev, status))
641 return;
642
643 dev->status = status;
644 }
645
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)646 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
647 {
648 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
649
650 return dev->config_size;
651 }
652
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)653 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
654 void *buf, unsigned int len)
655 {
656 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
657
658 /* Initialize the buffer in case of partial copy. */
659 memset(buf, 0, len);
660
661 if (offset > dev->config_size)
662 return;
663
664 if (len > dev->config_size - offset)
665 len = dev->config_size - offset;
666
667 memcpy(buf, dev->config + offset, len);
668 }
669
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)670 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
671 const void *buf, unsigned int len)
672 {
673 /* Now we only support read-only configuration space */
674 }
675
vduse_vdpa_reset(struct vdpa_device * vdpa)676 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
677 {
678 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
679 int ret = vduse_dev_set_status(dev, 0);
680
681 vduse_dev_reset(dev);
682
683 return ret;
684 }
685
vduse_vdpa_get_generation(struct vdpa_device * vdpa)686 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
687 {
688 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
689
690 return dev->generation;
691 }
692
vduse_vdpa_set_map(struct vdpa_device * vdpa,struct vhost_iotlb * iotlb)693 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
694 struct vhost_iotlb *iotlb)
695 {
696 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
697 int ret;
698
699 ret = vduse_domain_set_map(dev->domain, iotlb);
700 if (ret)
701 return ret;
702
703 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
704 if (ret) {
705 vduse_domain_clear_map(dev->domain, iotlb);
706 return ret;
707 }
708
709 return 0;
710 }
711
vduse_vdpa_free(struct vdpa_device * vdpa)712 static void vduse_vdpa_free(struct vdpa_device *vdpa)
713 {
714 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
715
716 dev->vdev = NULL;
717 }
718
719 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
720 .set_vq_address = vduse_vdpa_set_vq_address,
721 .kick_vq = vduse_vdpa_kick_vq,
722 .set_vq_cb = vduse_vdpa_set_vq_cb,
723 .set_vq_num = vduse_vdpa_set_vq_num,
724 .set_vq_ready = vduse_vdpa_set_vq_ready,
725 .get_vq_ready = vduse_vdpa_get_vq_ready,
726 .set_vq_state = vduse_vdpa_set_vq_state,
727 .get_vq_state = vduse_vdpa_get_vq_state,
728 .get_vq_align = vduse_vdpa_get_vq_align,
729 .get_features = vduse_vdpa_get_features,
730 .set_features = vduse_vdpa_set_features,
731 .set_config_cb = vduse_vdpa_set_config_cb,
732 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
733 .get_device_id = vduse_vdpa_get_device_id,
734 .get_vendor_id = vduse_vdpa_get_vendor_id,
735 .get_status = vduse_vdpa_get_status,
736 .set_status = vduse_vdpa_set_status,
737 .get_config_size = vduse_vdpa_get_config_size,
738 .get_config = vduse_vdpa_get_config,
739 .set_config = vduse_vdpa_set_config,
740 .get_generation = vduse_vdpa_get_generation,
741 .reset = vduse_vdpa_reset,
742 .set_map = vduse_vdpa_set_map,
743 .free = vduse_vdpa_free,
744 };
745
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)746 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
747 unsigned long offset, size_t size,
748 enum dma_data_direction dir,
749 unsigned long attrs)
750 {
751 struct vduse_dev *vdev = dev_to_vduse(dev);
752 struct vduse_iova_domain *domain = vdev->domain;
753
754 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
755 }
756
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)757 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
758 size_t size, enum dma_data_direction dir,
759 unsigned long attrs)
760 {
761 struct vduse_dev *vdev = dev_to_vduse(dev);
762 struct vduse_iova_domain *domain = vdev->domain;
763
764 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
765 }
766
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)767 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
768 dma_addr_t *dma_addr, gfp_t flag,
769 unsigned long attrs)
770 {
771 struct vduse_dev *vdev = dev_to_vduse(dev);
772 struct vduse_iova_domain *domain = vdev->domain;
773 unsigned long iova;
774 void *addr;
775
776 *dma_addr = DMA_MAPPING_ERROR;
777 addr = vduse_domain_alloc_coherent(domain, size,
778 (dma_addr_t *)&iova, flag, attrs);
779 if (!addr)
780 return NULL;
781
782 *dma_addr = (dma_addr_t)iova;
783
784 return addr;
785 }
786
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)787 static void vduse_dev_free_coherent(struct device *dev, size_t size,
788 void *vaddr, dma_addr_t dma_addr,
789 unsigned long attrs)
790 {
791 struct vduse_dev *vdev = dev_to_vduse(dev);
792 struct vduse_iova_domain *domain = vdev->domain;
793
794 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
795 }
796
vduse_dev_max_mapping_size(struct device * dev)797 static size_t vduse_dev_max_mapping_size(struct device *dev)
798 {
799 struct vduse_dev *vdev = dev_to_vduse(dev);
800 struct vduse_iova_domain *domain = vdev->domain;
801
802 return domain->bounce_size;
803 }
804
805 static const struct dma_map_ops vduse_dev_dma_ops = {
806 .map_page = vduse_dev_map_page,
807 .unmap_page = vduse_dev_unmap_page,
808 .alloc = vduse_dev_alloc_coherent,
809 .free = vduse_dev_free_coherent,
810 .max_mapping_size = vduse_dev_max_mapping_size,
811 };
812
perm_to_file_flags(u8 perm)813 static unsigned int perm_to_file_flags(u8 perm)
814 {
815 unsigned int flags = 0;
816
817 switch (perm) {
818 case VDUSE_ACCESS_WO:
819 flags |= O_WRONLY;
820 break;
821 case VDUSE_ACCESS_RO:
822 flags |= O_RDONLY;
823 break;
824 case VDUSE_ACCESS_RW:
825 flags |= O_RDWR;
826 break;
827 default:
828 WARN(1, "invalidate vhost IOTLB permission\n");
829 break;
830 }
831
832 return flags;
833 }
834
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)835 static int vduse_kickfd_setup(struct vduse_dev *dev,
836 struct vduse_vq_eventfd *eventfd)
837 {
838 struct eventfd_ctx *ctx = NULL;
839 struct vduse_virtqueue *vq;
840 u32 index;
841
842 if (eventfd->index >= dev->vq_num)
843 return -EINVAL;
844
845 index = array_index_nospec(eventfd->index, dev->vq_num);
846 vq = &dev->vqs[index];
847 if (eventfd->fd >= 0) {
848 ctx = eventfd_ctx_fdget(eventfd->fd);
849 if (IS_ERR(ctx))
850 return PTR_ERR(ctx);
851 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
852 return 0;
853
854 spin_lock(&vq->kick_lock);
855 if (vq->kickfd)
856 eventfd_ctx_put(vq->kickfd);
857 vq->kickfd = ctx;
858 if (vq->ready && vq->kicked && vq->kickfd) {
859 eventfd_signal(vq->kickfd, 1);
860 vq->kicked = false;
861 }
862 spin_unlock(&vq->kick_lock);
863
864 return 0;
865 }
866
vduse_dev_is_ready(struct vduse_dev * dev)867 static bool vduse_dev_is_ready(struct vduse_dev *dev)
868 {
869 int i;
870
871 for (i = 0; i < dev->vq_num; i++)
872 if (!dev->vqs[i].num_max)
873 return false;
874
875 return true;
876 }
877
vduse_dev_irq_inject(struct work_struct * work)878 static void vduse_dev_irq_inject(struct work_struct *work)
879 {
880 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
881
882 spin_lock_bh(&dev->irq_lock);
883 if (dev->config_cb.callback)
884 dev->config_cb.callback(dev->config_cb.private);
885 spin_unlock_bh(&dev->irq_lock);
886 }
887
vduse_vq_irq_inject(struct work_struct * work)888 static void vduse_vq_irq_inject(struct work_struct *work)
889 {
890 struct vduse_virtqueue *vq = container_of(work,
891 struct vduse_virtqueue, inject);
892
893 spin_lock_bh(&vq->irq_lock);
894 if (vq->ready && vq->cb.callback)
895 vq->cb.callback(vq->cb.private);
896 spin_unlock_bh(&vq->irq_lock);
897 }
898
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work)899 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
900 struct work_struct *irq_work)
901 {
902 int ret = -EINVAL;
903
904 down_read(&dev->rwsem);
905 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
906 goto unlock;
907
908 ret = 0;
909 queue_work(vduse_irq_wq, irq_work);
910 unlock:
911 up_read(&dev->rwsem);
912
913 return ret;
914 }
915
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)916 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
917 unsigned long arg)
918 {
919 struct vduse_dev *dev = file->private_data;
920 void __user *argp = (void __user *)arg;
921 int ret;
922
923 if (unlikely(dev->broken))
924 return -EPERM;
925
926 switch (cmd) {
927 case VDUSE_IOTLB_GET_FD: {
928 struct vduse_iotlb_entry entry;
929 struct vhost_iotlb_map *map;
930 struct vdpa_map_file *map_file;
931 struct vduse_iova_domain *domain = dev->domain;
932 struct file *f = NULL;
933
934 ret = -EFAULT;
935 if (copy_from_user(&entry, argp, sizeof(entry)))
936 break;
937
938 ret = -EINVAL;
939 if (entry.start > entry.last)
940 break;
941
942 spin_lock(&domain->iotlb_lock);
943 map = vhost_iotlb_itree_first(domain->iotlb,
944 entry.start, entry.last);
945 if (map) {
946 map_file = (struct vdpa_map_file *)map->opaque;
947 f = get_file(map_file->file);
948 entry.offset = map_file->offset;
949 entry.start = map->start;
950 entry.last = map->last;
951 entry.perm = map->perm;
952 }
953 spin_unlock(&domain->iotlb_lock);
954 ret = -EINVAL;
955 if (!f)
956 break;
957
958 ret = -EFAULT;
959 if (copy_to_user(argp, &entry, sizeof(entry))) {
960 fput(f);
961 break;
962 }
963 ret = receive_fd(f, perm_to_file_flags(entry.perm));
964 fput(f);
965 break;
966 }
967 case VDUSE_DEV_GET_FEATURES:
968 /*
969 * Just mirror what driver wrote here.
970 * The driver is expected to check FEATURE_OK later.
971 */
972 ret = put_user(dev->driver_features, (u64 __user *)argp);
973 break;
974 case VDUSE_DEV_SET_CONFIG: {
975 struct vduse_config_data config;
976 unsigned long size = offsetof(struct vduse_config_data,
977 buffer);
978
979 ret = -EFAULT;
980 if (copy_from_user(&config, argp, size))
981 break;
982
983 ret = -EINVAL;
984 if (config.offset > dev->config_size ||
985 config.length == 0 ||
986 config.length > dev->config_size - config.offset)
987 break;
988
989 ret = -EFAULT;
990 if (copy_from_user(dev->config + config.offset, argp + size,
991 config.length))
992 break;
993
994 ret = 0;
995 break;
996 }
997 case VDUSE_DEV_INJECT_CONFIG_IRQ:
998 ret = vduse_dev_queue_irq_work(dev, &dev->inject);
999 break;
1000 case VDUSE_VQ_SETUP: {
1001 struct vduse_vq_config config;
1002 u32 index;
1003
1004 ret = -EFAULT;
1005 if (copy_from_user(&config, argp, sizeof(config)))
1006 break;
1007
1008 ret = -EINVAL;
1009 if (config.index >= dev->vq_num)
1010 break;
1011
1012 if (!is_mem_zero((const char *)config.reserved,
1013 sizeof(config.reserved)))
1014 break;
1015
1016 index = array_index_nospec(config.index, dev->vq_num);
1017 dev->vqs[index].num_max = config.max_size;
1018 ret = 0;
1019 break;
1020 }
1021 case VDUSE_VQ_GET_INFO: {
1022 struct vduse_vq_info vq_info;
1023 struct vduse_virtqueue *vq;
1024 u32 index;
1025
1026 ret = -EFAULT;
1027 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1028 break;
1029
1030 ret = -EINVAL;
1031 if (vq_info.index >= dev->vq_num)
1032 break;
1033
1034 index = array_index_nospec(vq_info.index, dev->vq_num);
1035 vq = &dev->vqs[index];
1036 vq_info.desc_addr = vq->desc_addr;
1037 vq_info.driver_addr = vq->driver_addr;
1038 vq_info.device_addr = vq->device_addr;
1039 vq_info.num = vq->num;
1040
1041 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1042 vq_info.packed.last_avail_counter =
1043 vq->state.packed.last_avail_counter;
1044 vq_info.packed.last_avail_idx =
1045 vq->state.packed.last_avail_idx;
1046 vq_info.packed.last_used_counter =
1047 vq->state.packed.last_used_counter;
1048 vq_info.packed.last_used_idx =
1049 vq->state.packed.last_used_idx;
1050 } else
1051 vq_info.split.avail_index =
1052 vq->state.split.avail_index;
1053
1054 vq_info.ready = vq->ready;
1055
1056 ret = -EFAULT;
1057 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1058 break;
1059
1060 ret = 0;
1061 break;
1062 }
1063 case VDUSE_VQ_SETUP_KICKFD: {
1064 struct vduse_vq_eventfd eventfd;
1065
1066 ret = -EFAULT;
1067 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1068 break;
1069
1070 ret = vduse_kickfd_setup(dev, &eventfd);
1071 break;
1072 }
1073 case VDUSE_VQ_INJECT_IRQ: {
1074 u32 index;
1075
1076 ret = -EFAULT;
1077 if (get_user(index, (u32 __user *)argp))
1078 break;
1079
1080 ret = -EINVAL;
1081 if (index >= dev->vq_num)
1082 break;
1083
1084 index = array_index_nospec(index, dev->vq_num);
1085 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1086 break;
1087 }
1088 default:
1089 ret = -ENOIOCTLCMD;
1090 break;
1091 }
1092
1093 return ret;
1094 }
1095
vduse_dev_release(struct inode * inode,struct file * file)1096 static int vduse_dev_release(struct inode *inode, struct file *file)
1097 {
1098 struct vduse_dev *dev = file->private_data;
1099
1100 spin_lock(&dev->msg_lock);
1101 /* Make sure the inflight messages can processed after reconncection */
1102 list_splice_init(&dev->recv_list, &dev->send_list);
1103 spin_unlock(&dev->msg_lock);
1104 dev->connected = false;
1105
1106 return 0;
1107 }
1108
vduse_dev_get_from_minor(int minor)1109 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1110 {
1111 struct vduse_dev *dev;
1112
1113 mutex_lock(&vduse_lock);
1114 dev = idr_find(&vduse_idr, minor);
1115 mutex_unlock(&vduse_lock);
1116
1117 return dev;
1118 }
1119
vduse_dev_open(struct inode * inode,struct file * file)1120 static int vduse_dev_open(struct inode *inode, struct file *file)
1121 {
1122 int ret;
1123 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1124
1125 if (!dev)
1126 return -ENODEV;
1127
1128 ret = -EBUSY;
1129 mutex_lock(&dev->lock);
1130 if (dev->connected)
1131 goto unlock;
1132
1133 ret = 0;
1134 dev->connected = true;
1135 file->private_data = dev;
1136 unlock:
1137 mutex_unlock(&dev->lock);
1138
1139 return ret;
1140 }
1141
1142 static const struct file_operations vduse_dev_fops = {
1143 .owner = THIS_MODULE,
1144 .open = vduse_dev_open,
1145 .release = vduse_dev_release,
1146 .read_iter = vduse_dev_read_iter,
1147 .write_iter = vduse_dev_write_iter,
1148 .poll = vduse_dev_poll,
1149 .unlocked_ioctl = vduse_dev_ioctl,
1150 .compat_ioctl = compat_ptr_ioctl,
1151 .llseek = noop_llseek,
1152 };
1153
vduse_dev_create(void)1154 static struct vduse_dev *vduse_dev_create(void)
1155 {
1156 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1157
1158 if (!dev)
1159 return NULL;
1160
1161 mutex_init(&dev->lock);
1162 spin_lock_init(&dev->msg_lock);
1163 INIT_LIST_HEAD(&dev->send_list);
1164 INIT_LIST_HEAD(&dev->recv_list);
1165 spin_lock_init(&dev->irq_lock);
1166 init_rwsem(&dev->rwsem);
1167
1168 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1169 init_waitqueue_head(&dev->waitq);
1170
1171 return dev;
1172 }
1173
vduse_dev_destroy(struct vduse_dev * dev)1174 static void vduse_dev_destroy(struct vduse_dev *dev)
1175 {
1176 kfree(dev);
1177 }
1178
vduse_find_dev(const char * name)1179 static struct vduse_dev *vduse_find_dev(const char *name)
1180 {
1181 struct vduse_dev *dev;
1182 int id;
1183
1184 idr_for_each_entry(&vduse_idr, dev, id)
1185 if (!strcmp(dev->name, name))
1186 return dev;
1187
1188 return NULL;
1189 }
1190
vduse_destroy_dev(char * name)1191 static int vduse_destroy_dev(char *name)
1192 {
1193 struct vduse_dev *dev = vduse_find_dev(name);
1194
1195 if (!dev)
1196 return -EINVAL;
1197
1198 mutex_lock(&dev->lock);
1199 if (dev->vdev || dev->connected) {
1200 mutex_unlock(&dev->lock);
1201 return -EBUSY;
1202 }
1203 dev->connected = true;
1204 mutex_unlock(&dev->lock);
1205
1206 vduse_dev_reset(dev);
1207 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1208 idr_remove(&vduse_idr, dev->minor);
1209 kvfree(dev->config);
1210 kfree(dev->vqs);
1211 vduse_domain_destroy(dev->domain);
1212 kfree(dev->name);
1213 vduse_dev_destroy(dev);
1214 module_put(THIS_MODULE);
1215
1216 return 0;
1217 }
1218
device_is_allowed(u32 device_id)1219 static bool device_is_allowed(u32 device_id)
1220 {
1221 int i;
1222
1223 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1224 if (allowed_device_id[i] == device_id)
1225 return true;
1226
1227 return false;
1228 }
1229
features_is_valid(u64 features)1230 static bool features_is_valid(u64 features)
1231 {
1232 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1233 return false;
1234
1235 /* Now we only support read-only configuration space */
1236 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1237 return false;
1238
1239 return true;
1240 }
1241
vduse_validate_config(struct vduse_dev_config * config)1242 static bool vduse_validate_config(struct vduse_dev_config *config)
1243 {
1244 if (!is_mem_zero((const char *)config->reserved,
1245 sizeof(config->reserved)))
1246 return false;
1247
1248 if (config->vq_align > PAGE_SIZE)
1249 return false;
1250
1251 if (config->config_size > PAGE_SIZE)
1252 return false;
1253
1254 if (config->vq_num > 0xffff)
1255 return false;
1256
1257 if (!config->name[0])
1258 return false;
1259
1260 if (!device_is_allowed(config->device_id))
1261 return false;
1262
1263 if (!features_is_valid(config->features))
1264 return false;
1265
1266 return true;
1267 }
1268
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1269 static ssize_t msg_timeout_show(struct device *device,
1270 struct device_attribute *attr, char *buf)
1271 {
1272 struct vduse_dev *dev = dev_get_drvdata(device);
1273
1274 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1275 }
1276
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1277 static ssize_t msg_timeout_store(struct device *device,
1278 struct device_attribute *attr,
1279 const char *buf, size_t count)
1280 {
1281 struct vduse_dev *dev = dev_get_drvdata(device);
1282 int ret;
1283
1284 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1285 if (ret < 0)
1286 return ret;
1287
1288 return count;
1289 }
1290
1291 static DEVICE_ATTR_RW(msg_timeout);
1292
1293 static struct attribute *vduse_dev_attrs[] = {
1294 &dev_attr_msg_timeout.attr,
1295 NULL
1296 };
1297
1298 ATTRIBUTE_GROUPS(vduse_dev);
1299
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1300 static int vduse_create_dev(struct vduse_dev_config *config,
1301 void *config_buf, u64 api_version)
1302 {
1303 int i, ret;
1304 struct vduse_dev *dev;
1305
1306 ret = -EEXIST;
1307 if (vduse_find_dev(config->name))
1308 goto err;
1309
1310 ret = -ENOMEM;
1311 dev = vduse_dev_create();
1312 if (!dev)
1313 goto err;
1314
1315 dev->api_version = api_version;
1316 dev->device_features = config->features;
1317 dev->device_id = config->device_id;
1318 dev->vendor_id = config->vendor_id;
1319 dev->name = kstrdup(config->name, GFP_KERNEL);
1320 if (!dev->name)
1321 goto err_str;
1322
1323 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1324 VDUSE_BOUNCE_SIZE);
1325 if (!dev->domain)
1326 goto err_domain;
1327
1328 dev->config = config_buf;
1329 dev->config_size = config->config_size;
1330 dev->vq_align = config->vq_align;
1331 dev->vq_num = config->vq_num;
1332 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1333 if (!dev->vqs)
1334 goto err_vqs;
1335
1336 for (i = 0; i < dev->vq_num; i++) {
1337 dev->vqs[i].index = i;
1338 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1339 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1340 spin_lock_init(&dev->vqs[i].kick_lock);
1341 spin_lock_init(&dev->vqs[i].irq_lock);
1342 }
1343
1344 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1345 if (ret < 0)
1346 goto err_idr;
1347
1348 dev->minor = ret;
1349 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1350 dev->dev = device_create_with_groups(vduse_class, NULL,
1351 MKDEV(MAJOR(vduse_major), dev->minor),
1352 dev, vduse_dev_groups, "%s", config->name);
1353 if (IS_ERR(dev->dev)) {
1354 ret = PTR_ERR(dev->dev);
1355 goto err_dev;
1356 }
1357 __module_get(THIS_MODULE);
1358
1359 return 0;
1360 err_dev:
1361 idr_remove(&vduse_idr, dev->minor);
1362 err_idr:
1363 kfree(dev->vqs);
1364 err_vqs:
1365 vduse_domain_destroy(dev->domain);
1366 err_domain:
1367 kfree(dev->name);
1368 err_str:
1369 vduse_dev_destroy(dev);
1370 err:
1371 kvfree(config_buf);
1372 return ret;
1373 }
1374
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1375 static long vduse_ioctl(struct file *file, unsigned int cmd,
1376 unsigned long arg)
1377 {
1378 int ret;
1379 void __user *argp = (void __user *)arg;
1380 struct vduse_control *control = file->private_data;
1381
1382 mutex_lock(&vduse_lock);
1383 switch (cmd) {
1384 case VDUSE_GET_API_VERSION:
1385 ret = put_user(control->api_version, (u64 __user *)argp);
1386 break;
1387 case VDUSE_SET_API_VERSION: {
1388 u64 api_version;
1389
1390 ret = -EFAULT;
1391 if (get_user(api_version, (u64 __user *)argp))
1392 break;
1393
1394 ret = -EINVAL;
1395 if (api_version > VDUSE_API_VERSION)
1396 break;
1397
1398 ret = 0;
1399 control->api_version = api_version;
1400 break;
1401 }
1402 case VDUSE_CREATE_DEV: {
1403 struct vduse_dev_config config;
1404 unsigned long size = offsetof(struct vduse_dev_config, config);
1405 void *buf;
1406
1407 ret = -EFAULT;
1408 if (copy_from_user(&config, argp, size))
1409 break;
1410
1411 ret = -EINVAL;
1412 if (vduse_validate_config(&config) == false)
1413 break;
1414
1415 buf = vmemdup_user(argp + size, config.config_size);
1416 if (IS_ERR(buf)) {
1417 ret = PTR_ERR(buf);
1418 break;
1419 }
1420 config.name[VDUSE_NAME_MAX - 1] = '\0';
1421 ret = vduse_create_dev(&config, buf, control->api_version);
1422 break;
1423 }
1424 case VDUSE_DESTROY_DEV: {
1425 char name[VDUSE_NAME_MAX];
1426
1427 ret = -EFAULT;
1428 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1429 break;
1430
1431 name[VDUSE_NAME_MAX - 1] = '\0';
1432 ret = vduse_destroy_dev(name);
1433 break;
1434 }
1435 default:
1436 ret = -EINVAL;
1437 break;
1438 }
1439 mutex_unlock(&vduse_lock);
1440
1441 return ret;
1442 }
1443
vduse_release(struct inode * inode,struct file * file)1444 static int vduse_release(struct inode *inode, struct file *file)
1445 {
1446 struct vduse_control *control = file->private_data;
1447
1448 kfree(control);
1449 return 0;
1450 }
1451
vduse_open(struct inode * inode,struct file * file)1452 static int vduse_open(struct inode *inode, struct file *file)
1453 {
1454 struct vduse_control *control;
1455
1456 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1457 if (!control)
1458 return -ENOMEM;
1459
1460 control->api_version = VDUSE_API_VERSION;
1461 file->private_data = control;
1462
1463 return 0;
1464 }
1465
1466 static const struct file_operations vduse_ctrl_fops = {
1467 .owner = THIS_MODULE,
1468 .open = vduse_open,
1469 .release = vduse_release,
1470 .unlocked_ioctl = vduse_ioctl,
1471 .compat_ioctl = compat_ptr_ioctl,
1472 .llseek = noop_llseek,
1473 };
1474
vduse_devnode(struct device * dev,umode_t * mode)1475 static char *vduse_devnode(struct device *dev, umode_t *mode)
1476 {
1477 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1478 }
1479
1480 struct vduse_mgmt_dev {
1481 struct vdpa_mgmt_dev mgmt_dev;
1482 struct device dev;
1483 };
1484
1485 static struct vduse_mgmt_dev *vduse_mgmt;
1486
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)1487 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1488 {
1489 struct vduse_vdpa *vdev;
1490 int ret;
1491
1492 if (dev->vdev)
1493 return -EEXIST;
1494
1495 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1496 &vduse_vdpa_config_ops, name, true);
1497 if (IS_ERR(vdev))
1498 return PTR_ERR(vdev);
1499
1500 dev->vdev = vdev;
1501 vdev->dev = dev;
1502 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1503 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1504 if (ret) {
1505 put_device(&vdev->vdpa.dev);
1506 return ret;
1507 }
1508 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1509 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1510 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1511
1512 return 0;
1513 }
1514
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name)1515 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
1516 {
1517 struct vduse_dev *dev;
1518 int ret;
1519
1520 mutex_lock(&vduse_lock);
1521 dev = vduse_find_dev(name);
1522 if (!dev || !vduse_dev_is_ready(dev)) {
1523 mutex_unlock(&vduse_lock);
1524 return -EINVAL;
1525 }
1526 ret = vduse_dev_init_vdpa(dev, name);
1527 mutex_unlock(&vduse_lock);
1528 if (ret)
1529 return ret;
1530
1531 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1532 if (ret) {
1533 put_device(&dev->vdev->vdpa.dev);
1534 return ret;
1535 }
1536
1537 return 0;
1538 }
1539
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)1540 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1541 {
1542 _vdpa_unregister_device(dev);
1543 }
1544
1545 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1546 .dev_add = vdpa_dev_add,
1547 .dev_del = vdpa_dev_del,
1548 };
1549
1550 static struct virtio_device_id id_table[] = {
1551 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1552 { 0 },
1553 };
1554
vduse_mgmtdev_release(struct device * dev)1555 static void vduse_mgmtdev_release(struct device *dev)
1556 {
1557 struct vduse_mgmt_dev *mgmt_dev;
1558
1559 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
1560 kfree(mgmt_dev);
1561 }
1562
vduse_mgmtdev_init(void)1563 static int vduse_mgmtdev_init(void)
1564 {
1565 int ret;
1566
1567 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
1568 if (!vduse_mgmt)
1569 return -ENOMEM;
1570
1571 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
1572 if (ret) {
1573 kfree(vduse_mgmt);
1574 return ret;
1575 }
1576
1577 vduse_mgmt->dev.release = vduse_mgmtdev_release;
1578
1579 ret = device_register(&vduse_mgmt->dev);
1580 if (ret)
1581 goto dev_reg_err;
1582
1583 vduse_mgmt->mgmt_dev.id_table = id_table;
1584 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
1585 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
1586 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
1587 if (ret)
1588 device_unregister(&vduse_mgmt->dev);
1589
1590 return ret;
1591
1592 dev_reg_err:
1593 put_device(&vduse_mgmt->dev);
1594 return ret;
1595 }
1596
vduse_mgmtdev_exit(void)1597 static void vduse_mgmtdev_exit(void)
1598 {
1599 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
1600 device_unregister(&vduse_mgmt->dev);
1601 }
1602
vduse_init(void)1603 static int vduse_init(void)
1604 {
1605 int ret;
1606 struct device *dev;
1607
1608 vduse_class = class_create(THIS_MODULE, "vduse");
1609 if (IS_ERR(vduse_class))
1610 return PTR_ERR(vduse_class);
1611
1612 vduse_class->devnode = vduse_devnode;
1613
1614 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1615 if (ret)
1616 goto err_chardev_region;
1617
1618 /* /dev/vduse/control */
1619 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1620 vduse_ctrl_cdev.owner = THIS_MODULE;
1621 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1622 if (ret)
1623 goto err_ctrl_cdev;
1624
1625 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1626 if (IS_ERR(dev)) {
1627 ret = PTR_ERR(dev);
1628 goto err_device;
1629 }
1630
1631 /* /dev/vduse/$DEVICE */
1632 cdev_init(&vduse_cdev, &vduse_dev_fops);
1633 vduse_cdev.owner = THIS_MODULE;
1634 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1635 VDUSE_DEV_MAX - 1);
1636 if (ret)
1637 goto err_cdev;
1638
1639 vduse_irq_wq = alloc_workqueue("vduse-irq",
1640 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1641 if (!vduse_irq_wq) {
1642 ret = -ENOMEM;
1643 goto err_wq;
1644 }
1645
1646 ret = vduse_domain_init();
1647 if (ret)
1648 goto err_domain;
1649
1650 ret = vduse_mgmtdev_init();
1651 if (ret)
1652 goto err_mgmtdev;
1653
1654 return 0;
1655 err_mgmtdev:
1656 vduse_domain_exit();
1657 err_domain:
1658 destroy_workqueue(vduse_irq_wq);
1659 err_wq:
1660 cdev_del(&vduse_cdev);
1661 err_cdev:
1662 device_destroy(vduse_class, vduse_major);
1663 err_device:
1664 cdev_del(&vduse_ctrl_cdev);
1665 err_ctrl_cdev:
1666 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1667 err_chardev_region:
1668 class_destroy(vduse_class);
1669 return ret;
1670 }
1671 module_init(vduse_init);
1672
vduse_exit(void)1673 static void vduse_exit(void)
1674 {
1675 vduse_mgmtdev_exit();
1676 vduse_domain_exit();
1677 destroy_workqueue(vduse_irq_wq);
1678 cdev_del(&vduse_cdev);
1679 device_destroy(vduse_class, vduse_major);
1680 cdev_del(&vduse_ctrl_cdev);
1681 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1682 class_destroy(vduse_class);
1683 }
1684 module_exit(vduse_exit);
1685
1686 MODULE_LICENSE(DRV_LICENSE);
1687 MODULE_AUTHOR(DRV_AUTHOR);
1688 MODULE_DESCRIPTION(DRV_DESC);
1689