1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Virtio vhost-user driver
4 *
5 * Copyright(c) 2019 Intel Corporation
6 *
7 * This driver allows virtio devices to be used over a vhost-user socket.
8 *
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
11 *
12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13 * where:
14 * <socket> := vhost-user socket path to connect
15 * <virtio_id> := virtio device id (as in virtio_ids.h)
16 * <platform_id> := (optional) platform device id
17 *
18 * example:
19 * virtio_uml.device=/var/uml.socket:1
20 *
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22 */
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <shared/as-layout.h>
30 #include <irq_kern.h>
31 #include <init.h>
32 #include <os.h>
33 #include "vhost_user.h"
34
35 /* Workaround due to a conflict between irq_user.h and irqreturn.h */
36 #ifdef IRQ_NONE
37 #undef IRQ_NONE
38 #endif
39
40 #define MAX_SUPPORTED_QUEUE_SIZE 256
41
42 #define to_virtio_uml_device(_vdev) \
43 container_of(_vdev, struct virtio_uml_device, vdev)
44
45 struct virtio_uml_device {
46 struct virtio_device vdev;
47 struct platform_device *pdev;
48
49 int sock, req_fd;
50 u64 features;
51 u64 protocol_features;
52 u8 status;
53 };
54
55 struct virtio_uml_vq_info {
56 int kick_fd, call_fd;
57 char name[32];
58 };
59
60 extern unsigned long long physmem_size, highmem;
61
62 #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
63
64 /* Vhost-user protocol */
65
full_sendmsg_fds(int fd,const void * buf,unsigned int len,const int * fds,unsigned int fds_num)66 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
67 const int *fds, unsigned int fds_num)
68 {
69 int rc;
70
71 do {
72 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
73 if (rc > 0) {
74 buf += rc;
75 len -= rc;
76 fds = NULL;
77 fds_num = 0;
78 }
79 } while (len && (rc >= 0 || rc == -EINTR));
80
81 if (rc < 0)
82 return rc;
83 return 0;
84 }
85
full_read(int fd,void * buf,int len,bool abortable)86 static int full_read(int fd, void *buf, int len, bool abortable)
87 {
88 int rc;
89
90 do {
91 rc = os_read_file(fd, buf, len);
92 if (rc > 0) {
93 buf += rc;
94 len -= rc;
95 }
96 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
97
98 if (rc < 0)
99 return rc;
100 if (rc == 0)
101 return -ECONNRESET;
102 return 0;
103 }
104
vhost_user_recv_header(int fd,struct vhost_user_msg * msg)105 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
106 {
107 return full_read(fd, msg, sizeof(msg->header), true);
108 }
109
vhost_user_recv(int fd,struct vhost_user_msg * msg,size_t max_payload_size)110 static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
111 size_t max_payload_size)
112 {
113 size_t size;
114 int rc = vhost_user_recv_header(fd, msg);
115
116 if (rc)
117 return rc;
118 size = msg->header.size;
119 if (size > max_payload_size)
120 return -EPROTO;
121 return full_read(fd, &msg->payload, size, false);
122 }
123
vhost_user_recv_resp(struct virtio_uml_device * vu_dev,struct vhost_user_msg * msg,size_t max_payload_size)124 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
125 struct vhost_user_msg *msg,
126 size_t max_payload_size)
127 {
128 int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
129
130 if (rc)
131 return rc;
132
133 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
134 return -EPROTO;
135
136 return 0;
137 }
138
vhost_user_recv_u64(struct virtio_uml_device * vu_dev,u64 * value)139 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
140 u64 *value)
141 {
142 struct vhost_user_msg msg;
143 int rc = vhost_user_recv_resp(vu_dev, &msg,
144 sizeof(msg.payload.integer));
145
146 if (rc)
147 return rc;
148 if (msg.header.size != sizeof(msg.payload.integer))
149 return -EPROTO;
150 *value = msg.payload.integer;
151 return 0;
152 }
153
vhost_user_recv_req(struct virtio_uml_device * vu_dev,struct vhost_user_msg * msg,size_t max_payload_size)154 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
155 struct vhost_user_msg *msg,
156 size_t max_payload_size)
157 {
158 int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
159
160 if (rc)
161 return rc;
162
163 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
164 VHOST_USER_VERSION)
165 return -EPROTO;
166
167 return 0;
168 }
169
vhost_user_send(struct virtio_uml_device * vu_dev,bool need_response,struct vhost_user_msg * msg,int * fds,size_t num_fds)170 static int vhost_user_send(struct virtio_uml_device *vu_dev,
171 bool need_response, struct vhost_user_msg *msg,
172 int *fds, size_t num_fds)
173 {
174 size_t size = sizeof(msg->header) + msg->header.size;
175 bool request_ack;
176 int rc;
177
178 msg->header.flags |= VHOST_USER_VERSION;
179
180 /*
181 * The need_response flag indicates that we already need a response,
182 * e.g. to read the features. In these cases, don't request an ACK as
183 * it is meaningless. Also request an ACK only if supported.
184 */
185 request_ack = !need_response;
186 if (!(vu_dev->protocol_features &
187 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
188 request_ack = false;
189
190 if (request_ack)
191 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
192
193 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
194 if (rc < 0)
195 return rc;
196
197 if (request_ack) {
198 uint64_t status;
199
200 rc = vhost_user_recv_u64(vu_dev, &status);
201 if (rc)
202 return rc;
203
204 if (status) {
205 vu_err(vu_dev, "slave reports error: %llu\n", status);
206 return -EIO;
207 }
208 }
209
210 return 0;
211 }
212
vhost_user_send_no_payload(struct virtio_uml_device * vu_dev,bool need_response,u32 request)213 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
214 bool need_response, u32 request)
215 {
216 struct vhost_user_msg msg = {
217 .header.request = request,
218 };
219
220 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
221 }
222
vhost_user_send_no_payload_fd(struct virtio_uml_device * vu_dev,u32 request,int fd)223 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
224 u32 request, int fd)
225 {
226 struct vhost_user_msg msg = {
227 .header.request = request,
228 };
229
230 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
231 }
232
vhost_user_send_u64(struct virtio_uml_device * vu_dev,u32 request,u64 value)233 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
234 u32 request, u64 value)
235 {
236 struct vhost_user_msg msg = {
237 .header.request = request,
238 .header.size = sizeof(msg.payload.integer),
239 .payload.integer = value,
240 };
241
242 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
243 }
244
vhost_user_set_owner(struct virtio_uml_device * vu_dev)245 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
246 {
247 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
248 }
249
vhost_user_get_features(struct virtio_uml_device * vu_dev,u64 * features)250 static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
251 u64 *features)
252 {
253 int rc = vhost_user_send_no_payload(vu_dev, true,
254 VHOST_USER_GET_FEATURES);
255
256 if (rc)
257 return rc;
258 return vhost_user_recv_u64(vu_dev, features);
259 }
260
vhost_user_set_features(struct virtio_uml_device * vu_dev,u64 features)261 static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
262 u64 features)
263 {
264 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
265 }
266
vhost_user_get_protocol_features(struct virtio_uml_device * vu_dev,u64 * protocol_features)267 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
268 u64 *protocol_features)
269 {
270 int rc = vhost_user_send_no_payload(vu_dev, true,
271 VHOST_USER_GET_PROTOCOL_FEATURES);
272
273 if (rc)
274 return rc;
275 return vhost_user_recv_u64(vu_dev, protocol_features);
276 }
277
vhost_user_set_protocol_features(struct virtio_uml_device * vu_dev,u64 protocol_features)278 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
279 u64 protocol_features)
280 {
281 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
282 protocol_features);
283 }
284
vhost_user_reply(struct virtio_uml_device * vu_dev,struct vhost_user_msg * msg,int response)285 static void vhost_user_reply(struct virtio_uml_device *vu_dev,
286 struct vhost_user_msg *msg, int response)
287 {
288 struct vhost_user_msg reply = {
289 .payload.integer = response,
290 };
291 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
292 int rc;
293
294 reply.header = msg->header;
295 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
296 reply.header.flags |= VHOST_USER_FLAG_REPLY;
297 reply.header.size = sizeof(reply.payload.integer);
298
299 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
300
301 if (rc)
302 vu_err(vu_dev,
303 "sending reply to slave request failed: %d (size %zu)\n",
304 rc, size);
305 }
306
vu_req_interrupt(int irq,void * data)307 static irqreturn_t vu_req_interrupt(int irq, void *data)
308 {
309 struct virtio_uml_device *vu_dev = data;
310 int response = 1;
311 struct {
312 struct vhost_user_msg msg;
313 u8 extra_payload[512];
314 } msg;
315 int rc;
316
317 rc = vhost_user_recv_req(vu_dev, &msg.msg,
318 sizeof(msg.msg.payload) +
319 sizeof(msg.extra_payload));
320
321 if (rc)
322 return IRQ_NONE;
323
324 switch (msg.msg.header.request) {
325 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
326 virtio_config_changed(&vu_dev->vdev);
327 response = 0;
328 break;
329 case VHOST_USER_SLAVE_IOTLB_MSG:
330 /* not supported - VIRTIO_F_IOMMU_PLATFORM */
331 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
332 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
333 default:
334 vu_err(vu_dev, "unexpected slave request %d\n",
335 msg.msg.header.request);
336 }
337
338 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
339 vhost_user_reply(vu_dev, &msg.msg, response);
340
341 return IRQ_HANDLED;
342 }
343
vhost_user_init_slave_req(struct virtio_uml_device * vu_dev)344 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
345 {
346 int rc, req_fds[2];
347
348 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
349 rc = os_pipe(req_fds, true, true);
350 if (rc < 0)
351 return rc;
352 vu_dev->req_fd = req_fds[0];
353
354 rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
355 vu_req_interrupt, IRQF_SHARED,
356 vu_dev->pdev->name, vu_dev);
357 if (rc)
358 goto err_close;
359
360 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
361 req_fds[1]);
362 if (rc)
363 goto err_free_irq;
364
365 goto out;
366
367 err_free_irq:
368 um_free_irq(VIRTIO_IRQ, vu_dev);
369 err_close:
370 os_close_file(req_fds[0]);
371 out:
372 /* Close unused write end of request fds */
373 os_close_file(req_fds[1]);
374 return rc;
375 }
376
vhost_user_init(struct virtio_uml_device * vu_dev)377 static int vhost_user_init(struct virtio_uml_device *vu_dev)
378 {
379 int rc = vhost_user_set_owner(vu_dev);
380
381 if (rc)
382 return rc;
383 rc = vhost_user_get_features(vu_dev, &vu_dev->features);
384 if (rc)
385 return rc;
386
387 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
388 rc = vhost_user_get_protocol_features(vu_dev,
389 &vu_dev->protocol_features);
390 if (rc)
391 return rc;
392 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
393 rc = vhost_user_set_protocol_features(vu_dev,
394 vu_dev->protocol_features);
395 if (rc)
396 return rc;
397 }
398
399 if (vu_dev->protocol_features &
400 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
401 rc = vhost_user_init_slave_req(vu_dev);
402 if (rc)
403 return rc;
404 }
405
406 return 0;
407 }
408
vhost_user_get_config(struct virtio_uml_device * vu_dev,u32 offset,void * buf,u32 len)409 static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
410 u32 offset, void *buf, u32 len)
411 {
412 u32 cfg_size = offset + len;
413 struct vhost_user_msg *msg;
414 size_t payload_size = sizeof(msg->payload.config) + cfg_size;
415 size_t msg_size = sizeof(msg->header) + payload_size;
416 int rc;
417
418 if (!(vu_dev->protocol_features &
419 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
420 return;
421
422 msg = kzalloc(msg_size, GFP_KERNEL);
423 if (!msg)
424 return;
425 msg->header.request = VHOST_USER_GET_CONFIG;
426 msg->header.size = payload_size;
427 msg->payload.config.offset = 0;
428 msg->payload.config.size = cfg_size;
429
430 rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
431 if (rc) {
432 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
433 rc);
434 goto free;
435 }
436
437 rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
438 if (rc) {
439 vu_err(vu_dev,
440 "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
441 rc);
442 goto free;
443 }
444
445 if (msg->header.size != payload_size ||
446 msg->payload.config.size != cfg_size) {
447 rc = -EPROTO;
448 vu_err(vu_dev,
449 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
450 msg->header.size, payload_size,
451 msg->payload.config.size, cfg_size);
452 goto free;
453 }
454 memcpy(buf, msg->payload.config.payload + offset, len);
455
456 free:
457 kfree(msg);
458 }
459
vhost_user_set_config(struct virtio_uml_device * vu_dev,u32 offset,const void * buf,u32 len)460 static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
461 u32 offset, const void *buf, u32 len)
462 {
463 struct vhost_user_msg *msg;
464 size_t payload_size = sizeof(msg->payload.config) + len;
465 size_t msg_size = sizeof(msg->header) + payload_size;
466 int rc;
467
468 if (!(vu_dev->protocol_features &
469 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
470 return;
471
472 msg = kzalloc(msg_size, GFP_KERNEL);
473 if (!msg)
474 return;
475 msg->header.request = VHOST_USER_SET_CONFIG;
476 msg->header.size = payload_size;
477 msg->payload.config.offset = offset;
478 msg->payload.config.size = len;
479 memcpy(msg->payload.config.payload, buf, len);
480
481 rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
482 if (rc)
483 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
484 rc);
485
486 kfree(msg);
487 }
488
vhost_user_init_mem_region(u64 addr,u64 size,int * fd_out,struct vhost_user_mem_region * region_out)489 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
490 struct vhost_user_mem_region *region_out)
491 {
492 unsigned long long mem_offset;
493 int rc = phys_mapping(addr, &mem_offset);
494
495 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
496 return -EFAULT;
497 *fd_out = rc;
498 region_out->guest_addr = addr;
499 region_out->user_addr = addr;
500 region_out->size = size;
501 region_out->mmap_offset = mem_offset;
502
503 /* Ensure mapping is valid for the entire region */
504 rc = phys_mapping(addr + size - 1, &mem_offset);
505 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
506 addr + size - 1, rc, *fd_out))
507 return -EFAULT;
508 return 0;
509 }
510
vhost_user_set_mem_table(struct virtio_uml_device * vu_dev)511 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
512 {
513 struct vhost_user_msg msg = {
514 .header.request = VHOST_USER_SET_MEM_TABLE,
515 .header.size = sizeof(msg.payload.mem_regions),
516 .payload.mem_regions.num = 1,
517 };
518 unsigned long reserved = uml_reserved - uml_physmem;
519 int fds[2];
520 int rc;
521
522 /*
523 * This is a bit tricky, see also the comment with setup_physmem().
524 *
525 * Essentially, setup_physmem() uses a file to mmap() our physmem,
526 * but the code and data we *already* have is omitted. To us, this
527 * is no difference, since they both become part of our address
528 * space and memory consumption. To somebody looking in from the
529 * outside, however, it is different because the part of our memory
530 * consumption that's already part of the binary (code/data) is not
531 * mapped from the file, so it's not visible to another mmap from
532 * the file descriptor.
533 *
534 * Thus, don't advertise this space to the vhost-user slave. This
535 * means that the slave will likely abort or similar when we give
536 * it an address from the hidden range, since it's not marked as
537 * a valid address, but at least that way we detect the issue and
538 * don't just have the slave read an all-zeroes buffer from the
539 * shared memory file, or write something there that we can never
540 * see (depending on the direction of the virtqueue traffic.)
541 *
542 * Since we usually don't want to use .text for virtio buffers,
543 * this effectively means that you cannot use
544 * 1) global variables, which are in the .bss and not in the shm
545 * file-backed memory
546 * 2) the stack in some processes, depending on where they have
547 * their stack (or maybe only no interrupt stack?)
548 *
549 * The stack is already not typically valid for DMA, so this isn't
550 * much of a restriction, but global variables might be encountered.
551 *
552 * It might be possible to fix it by copying around the data that's
553 * between bss_start and where we map the file now, but it's not
554 * something that you typically encounter with virtio drivers, so
555 * it didn't seem worthwhile.
556 */
557 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
558 &fds[0],
559 &msg.payload.mem_regions.regions[0]);
560
561 if (rc < 0)
562 return rc;
563 if (highmem) {
564 msg.payload.mem_regions.num++;
565 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
566 &fds[1], &msg.payload.mem_regions.regions[1]);
567 if (rc < 0)
568 return rc;
569 }
570
571 return vhost_user_send(vu_dev, false, &msg, fds,
572 msg.payload.mem_regions.num);
573 }
574
vhost_user_set_vring_state(struct virtio_uml_device * vu_dev,u32 request,u32 index,u32 num)575 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
576 u32 request, u32 index, u32 num)
577 {
578 struct vhost_user_msg msg = {
579 .header.request = request,
580 .header.size = sizeof(msg.payload.vring_state),
581 .payload.vring_state.index = index,
582 .payload.vring_state.num = num,
583 };
584
585 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
586 }
587
vhost_user_set_vring_num(struct virtio_uml_device * vu_dev,u32 index,u32 num)588 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
589 u32 index, u32 num)
590 {
591 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
592 index, num);
593 }
594
vhost_user_set_vring_base(struct virtio_uml_device * vu_dev,u32 index,u32 offset)595 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
596 u32 index, u32 offset)
597 {
598 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
599 index, offset);
600 }
601
vhost_user_set_vring_addr(struct virtio_uml_device * vu_dev,u32 index,u64 desc,u64 used,u64 avail,u64 log)602 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
603 u32 index, u64 desc, u64 used, u64 avail,
604 u64 log)
605 {
606 struct vhost_user_msg msg = {
607 .header.request = VHOST_USER_SET_VRING_ADDR,
608 .header.size = sizeof(msg.payload.vring_addr),
609 .payload.vring_addr.index = index,
610 .payload.vring_addr.desc = desc,
611 .payload.vring_addr.used = used,
612 .payload.vring_addr.avail = avail,
613 .payload.vring_addr.log = log,
614 };
615
616 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
617 }
618
vhost_user_set_vring_fd(struct virtio_uml_device * vu_dev,u32 request,int index,int fd)619 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
620 u32 request, int index, int fd)
621 {
622 struct vhost_user_msg msg = {
623 .header.request = request,
624 .header.size = sizeof(msg.payload.integer),
625 .payload.integer = index,
626 };
627
628 if (index & ~VHOST_USER_VRING_INDEX_MASK)
629 return -EINVAL;
630 if (fd < 0) {
631 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
632 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
633 }
634 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
635 }
636
vhost_user_set_vring_call(struct virtio_uml_device * vu_dev,int index,int fd)637 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
638 int index, int fd)
639 {
640 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
641 index, fd);
642 }
643
vhost_user_set_vring_kick(struct virtio_uml_device * vu_dev,int index,int fd)644 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
645 int index, int fd)
646 {
647 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
648 index, fd);
649 }
650
vhost_user_set_vring_enable(struct virtio_uml_device * vu_dev,u32 index,bool enable)651 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
652 u32 index, bool enable)
653 {
654 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
655 return 0;
656
657 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
658 index, enable);
659 }
660
661
662 /* Virtio interface */
663
vu_notify(struct virtqueue * vq)664 static bool vu_notify(struct virtqueue *vq)
665 {
666 struct virtio_uml_vq_info *info = vq->priv;
667 const uint64_t n = 1;
668 int rc;
669
670 do {
671 rc = os_write_file(info->kick_fd, &n, sizeof(n));
672 } while (rc == -EINTR);
673 return !WARN(rc != sizeof(n), "write returned %d\n", rc);
674 }
675
vu_interrupt(int irq,void * opaque)676 static irqreturn_t vu_interrupt(int irq, void *opaque)
677 {
678 struct virtqueue *vq = opaque;
679 struct virtio_uml_vq_info *info = vq->priv;
680 uint64_t n;
681 int rc;
682 irqreturn_t ret = IRQ_NONE;
683
684 do {
685 rc = os_read_file(info->call_fd, &n, sizeof(n));
686 if (rc == sizeof(n))
687 ret |= vring_interrupt(irq, vq);
688 } while (rc == sizeof(n) || rc == -EINTR);
689 WARN(rc != -EAGAIN, "read returned %d\n", rc);
690 return ret;
691 }
692
693
vu_get(struct virtio_device * vdev,unsigned offset,void * buf,unsigned len)694 static void vu_get(struct virtio_device *vdev, unsigned offset,
695 void *buf, unsigned len)
696 {
697 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
698
699 vhost_user_get_config(vu_dev, offset, buf, len);
700 }
701
vu_set(struct virtio_device * vdev,unsigned offset,const void * buf,unsigned len)702 static void vu_set(struct virtio_device *vdev, unsigned offset,
703 const void *buf, unsigned len)
704 {
705 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
706
707 vhost_user_set_config(vu_dev, offset, buf, len);
708 }
709
vu_get_status(struct virtio_device * vdev)710 static u8 vu_get_status(struct virtio_device *vdev)
711 {
712 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
713
714 return vu_dev->status;
715 }
716
vu_set_status(struct virtio_device * vdev,u8 status)717 static void vu_set_status(struct virtio_device *vdev, u8 status)
718 {
719 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
720
721 vu_dev->status = status;
722 }
723
vu_reset(struct virtio_device * vdev)724 static void vu_reset(struct virtio_device *vdev)
725 {
726 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
727
728 vu_dev->status = 0;
729 }
730
vu_del_vq(struct virtqueue * vq)731 static void vu_del_vq(struct virtqueue *vq)
732 {
733 struct virtio_uml_vq_info *info = vq->priv;
734
735 um_free_irq(VIRTIO_IRQ, vq);
736
737 os_close_file(info->call_fd);
738 os_close_file(info->kick_fd);
739
740 vring_del_virtqueue(vq);
741 kfree(info);
742 }
743
vu_del_vqs(struct virtio_device * vdev)744 static void vu_del_vqs(struct virtio_device *vdev)
745 {
746 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
747 struct virtqueue *vq, *n;
748 u64 features;
749
750 /* Note: reverse order as a workaround to a decoding bug in snabb */
751 list_for_each_entry_reverse(vq, &vdev->vqs, list)
752 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
753
754 /* Ensure previous messages have been processed */
755 WARN_ON(vhost_user_get_features(vu_dev, &features));
756
757 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
758 vu_del_vq(vq);
759 }
760
vu_setup_vq_call_fd(struct virtio_uml_device * vu_dev,struct virtqueue * vq)761 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
762 struct virtqueue *vq)
763 {
764 struct virtio_uml_vq_info *info = vq->priv;
765 int call_fds[2];
766 int rc;
767
768 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
769 rc = os_pipe(call_fds, true, true);
770 if (rc < 0)
771 return rc;
772
773 info->call_fd = call_fds[0];
774 rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
775 vu_interrupt, IRQF_SHARED, info->name, vq);
776 if (rc)
777 goto close_both;
778
779 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
780 if (rc)
781 goto release_irq;
782
783 goto out;
784
785 release_irq:
786 um_free_irq(VIRTIO_IRQ, vq);
787 close_both:
788 os_close_file(call_fds[0]);
789 out:
790 /* Close (unused) write end of call fds */
791 os_close_file(call_fds[1]);
792
793 return rc;
794 }
795
vu_setup_vq(struct virtio_device * vdev,unsigned index,vq_callback_t * callback,const char * name,bool ctx)796 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
797 unsigned index, vq_callback_t *callback,
798 const char *name, bool ctx)
799 {
800 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
801 struct platform_device *pdev = vu_dev->pdev;
802 struct virtio_uml_vq_info *info;
803 struct virtqueue *vq;
804 int num = MAX_SUPPORTED_QUEUE_SIZE;
805 int rc;
806
807 info = kzalloc(sizeof(*info), GFP_KERNEL);
808 if (!info) {
809 rc = -ENOMEM;
810 goto error_kzalloc;
811 }
812 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
813 pdev->id, name);
814
815 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
816 ctx, vu_notify, callback, info->name);
817 if (!vq) {
818 rc = -ENOMEM;
819 goto error_create;
820 }
821 vq->priv = info;
822 num = virtqueue_get_vring_size(vq);
823
824 rc = os_eventfd(0, 0);
825 if (rc < 0)
826 goto error_kick;
827 info->kick_fd = rc;
828
829 rc = vu_setup_vq_call_fd(vu_dev, vq);
830 if (rc)
831 goto error_call;
832
833 rc = vhost_user_set_vring_num(vu_dev, index, num);
834 if (rc)
835 goto error_setup;
836
837 rc = vhost_user_set_vring_base(vu_dev, index, 0);
838 if (rc)
839 goto error_setup;
840
841 rc = vhost_user_set_vring_addr(vu_dev, index,
842 virtqueue_get_desc_addr(vq),
843 virtqueue_get_used_addr(vq),
844 virtqueue_get_avail_addr(vq),
845 (u64) -1);
846 if (rc)
847 goto error_setup;
848
849 return vq;
850
851 error_setup:
852 um_free_irq(VIRTIO_IRQ, vq);
853 os_close_file(info->call_fd);
854 error_call:
855 os_close_file(info->kick_fd);
856 error_kick:
857 vring_del_virtqueue(vq);
858 error_create:
859 kfree(info);
860 error_kzalloc:
861 return ERR_PTR(rc);
862 }
863
vu_find_vqs(struct virtio_device * vdev,unsigned nvqs,struct virtqueue * vqs[],vq_callback_t * callbacks[],const char * const names[],const bool * ctx,struct irq_affinity * desc)864 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
865 struct virtqueue *vqs[], vq_callback_t *callbacks[],
866 const char * const names[], const bool *ctx,
867 struct irq_affinity *desc)
868 {
869 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
870 int i, queue_idx = 0, rc;
871 struct virtqueue *vq;
872
873 rc = vhost_user_set_mem_table(vu_dev);
874 if (rc)
875 return rc;
876
877 for (i = 0; i < nvqs; ++i) {
878 if (!names[i]) {
879 vqs[i] = NULL;
880 continue;
881 }
882
883 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
884 ctx ? ctx[i] : false);
885 if (IS_ERR(vqs[i])) {
886 rc = PTR_ERR(vqs[i]);
887 goto error_setup;
888 }
889 }
890
891 list_for_each_entry(vq, &vdev->vqs, list) {
892 struct virtio_uml_vq_info *info = vq->priv;
893
894 rc = vhost_user_set_vring_kick(vu_dev, vq->index,
895 info->kick_fd);
896 if (rc)
897 goto error_setup;
898
899 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
900 if (rc)
901 goto error_setup;
902 }
903
904 return 0;
905
906 error_setup:
907 vu_del_vqs(vdev);
908 return rc;
909 }
910
vu_get_features(struct virtio_device * vdev)911 static u64 vu_get_features(struct virtio_device *vdev)
912 {
913 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
914
915 return vu_dev->features;
916 }
917
vu_finalize_features(struct virtio_device * vdev)918 static int vu_finalize_features(struct virtio_device *vdev)
919 {
920 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
921 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
922
923 vring_transport_features(vdev);
924 vu_dev->features = vdev->features | supported;
925
926 return vhost_user_set_features(vu_dev, vu_dev->features);
927 }
928
vu_bus_name(struct virtio_device * vdev)929 static const char *vu_bus_name(struct virtio_device *vdev)
930 {
931 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
932
933 return vu_dev->pdev->name;
934 }
935
936 static const struct virtio_config_ops virtio_uml_config_ops = {
937 .get = vu_get,
938 .set = vu_set,
939 .get_status = vu_get_status,
940 .set_status = vu_set_status,
941 .reset = vu_reset,
942 .find_vqs = vu_find_vqs,
943 .del_vqs = vu_del_vqs,
944 .get_features = vu_get_features,
945 .finalize_features = vu_finalize_features,
946 .bus_name = vu_bus_name,
947 };
948
virtio_uml_release_dev(struct device * d)949 static void virtio_uml_release_dev(struct device *d)
950 {
951 struct virtio_device *vdev =
952 container_of(d, struct virtio_device, dev);
953 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
954
955 /* might not have been opened due to not negotiating the feature */
956 if (vu_dev->req_fd >= 0) {
957 um_free_irq(VIRTIO_IRQ, vu_dev);
958 os_close_file(vu_dev->req_fd);
959 }
960
961 os_close_file(vu_dev->sock);
962 kfree(vu_dev);
963 }
964
965 /* Platform device */
966
967 struct virtio_uml_platform_data {
968 u32 virtio_device_id;
969 const char *socket_path;
970 };
971
virtio_uml_probe(struct platform_device * pdev)972 static int virtio_uml_probe(struct platform_device *pdev)
973 {
974 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
975 struct virtio_uml_device *vu_dev;
976 int rc;
977
978 if (!pdata)
979 return -EINVAL;
980
981 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
982 if (!vu_dev)
983 return -ENOMEM;
984
985 vu_dev->vdev.dev.parent = &pdev->dev;
986 vu_dev->vdev.dev.release = virtio_uml_release_dev;
987 vu_dev->vdev.config = &virtio_uml_config_ops;
988 vu_dev->vdev.id.device = pdata->virtio_device_id;
989 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
990 vu_dev->pdev = pdev;
991 vu_dev->req_fd = -1;
992
993 do {
994 rc = os_connect_socket(pdata->socket_path);
995 } while (rc == -EINTR);
996 if (rc < 0)
997 goto error_free;
998 vu_dev->sock = rc;
999
1000 rc = vhost_user_init(vu_dev);
1001 if (rc)
1002 goto error_init;
1003
1004 platform_set_drvdata(pdev, vu_dev);
1005
1006 rc = register_virtio_device(&vu_dev->vdev);
1007 if (rc)
1008 put_device(&vu_dev->vdev.dev);
1009 return rc;
1010
1011 error_init:
1012 os_close_file(vu_dev->sock);
1013 error_free:
1014 kfree(vu_dev);
1015 return rc;
1016 }
1017
virtio_uml_remove(struct platform_device * pdev)1018 static int virtio_uml_remove(struct platform_device *pdev)
1019 {
1020 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1021
1022 unregister_virtio_device(&vu_dev->vdev);
1023 return 0;
1024 }
1025
1026 /* Command line device list */
1027
vu_cmdline_release_dev(struct device * d)1028 static void vu_cmdline_release_dev(struct device *d)
1029 {
1030 }
1031
1032 static struct device vu_cmdline_parent = {
1033 .init_name = "virtio-uml-cmdline",
1034 .release = vu_cmdline_release_dev,
1035 };
1036
1037 static bool vu_cmdline_parent_registered;
1038 static int vu_cmdline_id;
1039
vu_cmdline_set(const char * device,const struct kernel_param * kp)1040 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1041 {
1042 const char *ids = strchr(device, ':');
1043 unsigned int virtio_device_id;
1044 int processed, consumed, err;
1045 char *socket_path;
1046 struct virtio_uml_platform_data pdata;
1047 struct platform_device *pdev;
1048
1049 if (!ids || ids == device)
1050 return -EINVAL;
1051
1052 processed = sscanf(ids, ":%u%n:%d%n",
1053 &virtio_device_id, &consumed,
1054 &vu_cmdline_id, &consumed);
1055
1056 if (processed < 1 || ids[consumed])
1057 return -EINVAL;
1058
1059 if (!vu_cmdline_parent_registered) {
1060 err = device_register(&vu_cmdline_parent);
1061 if (err) {
1062 pr_err("Failed to register parent device!\n");
1063 put_device(&vu_cmdline_parent);
1064 return err;
1065 }
1066 vu_cmdline_parent_registered = true;
1067 }
1068
1069 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1070 if (!socket_path)
1071 return -ENOMEM;
1072
1073 pdata.virtio_device_id = (u32) virtio_device_id;
1074 pdata.socket_path = socket_path;
1075
1076 pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1077 vu_cmdline_id, virtio_device_id, socket_path);
1078
1079 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1080 vu_cmdline_id++, &pdata,
1081 sizeof(pdata));
1082 err = PTR_ERR_OR_ZERO(pdev);
1083 if (err)
1084 goto free;
1085 return 0;
1086
1087 free:
1088 kfree(socket_path);
1089 return err;
1090 }
1091
vu_cmdline_get_device(struct device * dev,void * data)1092 static int vu_cmdline_get_device(struct device *dev, void *data)
1093 {
1094 struct platform_device *pdev = to_platform_device(dev);
1095 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1096 char *buffer = data;
1097 unsigned int len = strlen(buffer);
1098
1099 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1100 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1101 return 0;
1102 }
1103
vu_cmdline_get(char * buffer,const struct kernel_param * kp)1104 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1105 {
1106 buffer[0] = '\0';
1107 if (vu_cmdline_parent_registered)
1108 device_for_each_child(&vu_cmdline_parent, buffer,
1109 vu_cmdline_get_device);
1110 return strlen(buffer) + 1;
1111 }
1112
1113 static const struct kernel_param_ops vu_cmdline_param_ops = {
1114 .set = vu_cmdline_set,
1115 .get = vu_cmdline_get,
1116 };
1117
1118 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1119 __uml_help(vu_cmdline_param_ops,
1120 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1121 " Configure a virtio device over a vhost-user socket.\n"
1122 " See virtio_ids.h for a list of possible virtio device id values.\n"
1123 " Optionally use a specific platform_device id.\n\n"
1124 );
1125
1126
vu_unregister_cmdline_device(struct device * dev,void * data)1127 static int vu_unregister_cmdline_device(struct device *dev, void *data)
1128 {
1129 struct platform_device *pdev = to_platform_device(dev);
1130 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1131
1132 kfree(pdata->socket_path);
1133 platform_device_unregister(pdev);
1134 return 0;
1135 }
1136
vu_unregister_cmdline_devices(void)1137 static void vu_unregister_cmdline_devices(void)
1138 {
1139 if (vu_cmdline_parent_registered) {
1140 device_for_each_child(&vu_cmdline_parent, NULL,
1141 vu_unregister_cmdline_device);
1142 device_unregister(&vu_cmdline_parent);
1143 vu_cmdline_parent_registered = false;
1144 }
1145 }
1146
1147 /* Platform driver */
1148
1149 static const struct of_device_id virtio_uml_match[] = {
1150 { .compatible = "virtio,uml", },
1151 { }
1152 };
1153 MODULE_DEVICE_TABLE(of, virtio_uml_match);
1154
1155 static struct platform_driver virtio_uml_driver = {
1156 .probe = virtio_uml_probe,
1157 .remove = virtio_uml_remove,
1158 .driver = {
1159 .name = "virtio-uml",
1160 .of_match_table = virtio_uml_match,
1161 },
1162 };
1163
virtio_uml_init(void)1164 static int __init virtio_uml_init(void)
1165 {
1166 return platform_driver_register(&virtio_uml_driver);
1167 }
1168
virtio_uml_exit(void)1169 static void __exit virtio_uml_exit(void)
1170 {
1171 platform_driver_unregister(&virtio_uml_driver);
1172 vu_unregister_cmdline_devices();
1173 }
1174
1175 module_init(virtio_uml_init);
1176 module_exit(virtio_uml_exit);
1177 __uml_exitcall(virtio_uml_exit);
1178
1179 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1180 MODULE_LICENSE("GPL");
1181