1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * virtio-fs: Virtio Filesystem
4 * Copyright (C) 2018 Red Hat, Inc.
5 */
6
7 #include <linux/fs.h>
8 #include <linux/module.h>
9 #include <linux/virtio.h>
10 #include <linux/virtio_fs.h>
11 #include <linux/delay.h>
12 #include <linux/fs_context.h>
13 #include <linux/highmem.h>
14 #include "fuse_i.h"
15
16 /* List of virtio-fs device instances and a lock for the list. Also provides
17 * mutual exclusion in device removal and mounting path
18 */
19 static DEFINE_MUTEX(virtio_fs_mutex);
20 static LIST_HEAD(virtio_fs_instances);
21
22 enum {
23 VQ_HIPRIO,
24 VQ_REQUEST
25 };
26
27 /* Per-virtqueue state */
28 struct virtio_fs_vq {
29 spinlock_t lock;
30 struct virtqueue *vq; /* protected by ->lock */
31 struct work_struct done_work;
32 struct list_head queued_reqs;
33 struct list_head end_reqs; /* End these requests */
34 struct delayed_work dispatch_work;
35 struct fuse_dev *fud;
36 bool connected;
37 long in_flight;
38 char name[24];
39 } ____cacheline_aligned_in_smp;
40
41 /* A virtio-fs device instance */
42 struct virtio_fs {
43 struct kref refcount;
44 struct list_head list; /* on virtio_fs_instances */
45 char *tag;
46 struct virtio_fs_vq *vqs;
47 unsigned int nvqs; /* number of virtqueues */
48 unsigned int num_request_queues; /* number of request queues */
49 };
50
51 struct virtio_fs_forget {
52 struct fuse_in_header ih;
53 struct fuse_forget_in arg;
54 /* This request can be temporarily queued on virt queue */
55 struct list_head list;
56 };
57
58 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
59 struct fuse_req *req, bool in_flight);
60
vq_to_fsvq(struct virtqueue * vq)61 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
62 {
63 struct virtio_fs *fs = vq->vdev->priv;
64
65 return &fs->vqs[vq->index];
66 }
67
vq_to_fpq(struct virtqueue * vq)68 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
69 {
70 return &vq_to_fsvq(vq)->fud->pq;
71 }
72
73 /* Should be called with fsvq->lock held. */
inc_in_flight_req(struct virtio_fs_vq * fsvq)74 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
75 {
76 fsvq->in_flight++;
77 }
78
79 /* Should be called with fsvq->lock held. */
dec_in_flight_req(struct virtio_fs_vq * fsvq)80 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
81 {
82 WARN_ON(fsvq->in_flight <= 0);
83 fsvq->in_flight--;
84 }
85
release_virtio_fs_obj(struct kref * ref)86 static void release_virtio_fs_obj(struct kref *ref)
87 {
88 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
89
90 kfree(vfs->vqs);
91 kfree(vfs);
92 }
93
94 /* Make sure virtiofs_mutex is held */
virtio_fs_put(struct virtio_fs * fs)95 static void virtio_fs_put(struct virtio_fs *fs)
96 {
97 kref_put(&fs->refcount, release_virtio_fs_obj);
98 }
99
virtio_fs_fiq_release(struct fuse_iqueue * fiq)100 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
101 {
102 struct virtio_fs *vfs = fiq->priv;
103
104 mutex_lock(&virtio_fs_mutex);
105 virtio_fs_put(vfs);
106 mutex_unlock(&virtio_fs_mutex);
107 }
108
virtio_fs_drain_queue(struct virtio_fs_vq * fsvq)109 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
110 {
111 WARN_ON(fsvq->in_flight < 0);
112
113 /* Wait for in flight requests to finish.*/
114 while (1) {
115 spin_lock(&fsvq->lock);
116 if (!fsvq->in_flight) {
117 spin_unlock(&fsvq->lock);
118 break;
119 }
120 spin_unlock(&fsvq->lock);
121 /* TODO use completion instead of timeout */
122 usleep_range(1000, 2000);
123 }
124
125 flush_work(&fsvq->done_work);
126 flush_delayed_work(&fsvq->dispatch_work);
127 }
128
virtio_fs_drain_all_queues(struct virtio_fs * fs)129 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
130 {
131 struct virtio_fs_vq *fsvq;
132 int i;
133
134 for (i = 0; i < fs->nvqs; i++) {
135 fsvq = &fs->vqs[i];
136 virtio_fs_drain_queue(fsvq);
137 }
138 }
139
virtio_fs_start_all_queues(struct virtio_fs * fs)140 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
141 {
142 struct virtio_fs_vq *fsvq;
143 int i;
144
145 for (i = 0; i < fs->nvqs; i++) {
146 fsvq = &fs->vqs[i];
147 spin_lock(&fsvq->lock);
148 fsvq->connected = true;
149 spin_unlock(&fsvq->lock);
150 }
151 }
152
153 /* Add a new instance to the list or return -EEXIST if tag name exists*/
virtio_fs_add_instance(struct virtio_fs * fs)154 static int virtio_fs_add_instance(struct virtio_fs *fs)
155 {
156 struct virtio_fs *fs2;
157 bool duplicate = false;
158
159 mutex_lock(&virtio_fs_mutex);
160
161 list_for_each_entry(fs2, &virtio_fs_instances, list) {
162 if (strcmp(fs->tag, fs2->tag) == 0)
163 duplicate = true;
164 }
165
166 if (!duplicate)
167 list_add_tail(&fs->list, &virtio_fs_instances);
168
169 mutex_unlock(&virtio_fs_mutex);
170
171 if (duplicate)
172 return -EEXIST;
173 return 0;
174 }
175
176 /* Return the virtio_fs with a given tag, or NULL */
virtio_fs_find_instance(const char * tag)177 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
178 {
179 struct virtio_fs *fs;
180
181 mutex_lock(&virtio_fs_mutex);
182
183 list_for_each_entry(fs, &virtio_fs_instances, list) {
184 if (strcmp(fs->tag, tag) == 0) {
185 kref_get(&fs->refcount);
186 goto found;
187 }
188 }
189
190 fs = NULL; /* not found */
191
192 found:
193 mutex_unlock(&virtio_fs_mutex);
194
195 return fs;
196 }
197
virtio_fs_free_devs(struct virtio_fs * fs)198 static void virtio_fs_free_devs(struct virtio_fs *fs)
199 {
200 unsigned int i;
201
202 for (i = 0; i < fs->nvqs; i++) {
203 struct virtio_fs_vq *fsvq = &fs->vqs[i];
204
205 if (!fsvq->fud)
206 continue;
207
208 fuse_dev_free(fsvq->fud);
209 fsvq->fud = NULL;
210 }
211 }
212
213 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
virtio_fs_read_tag(struct virtio_device * vdev,struct virtio_fs * fs)214 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
215 {
216 char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
217 char *end;
218 size_t len;
219
220 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
221 &tag_buf, sizeof(tag_buf));
222 end = memchr(tag_buf, '\0', sizeof(tag_buf));
223 if (end == tag_buf)
224 return -EINVAL; /* empty tag */
225 if (!end)
226 end = &tag_buf[sizeof(tag_buf)];
227
228 len = end - tag_buf;
229 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
230 if (!fs->tag)
231 return -ENOMEM;
232 memcpy(fs->tag, tag_buf, len);
233 fs->tag[len] = '\0';
234 return 0;
235 }
236
237 /* Work function for hiprio completion */
virtio_fs_hiprio_done_work(struct work_struct * work)238 static void virtio_fs_hiprio_done_work(struct work_struct *work)
239 {
240 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
241 done_work);
242 struct virtqueue *vq = fsvq->vq;
243
244 /* Free completed FUSE_FORGET requests */
245 spin_lock(&fsvq->lock);
246 do {
247 unsigned int len;
248 void *req;
249
250 virtqueue_disable_cb(vq);
251
252 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
253 kfree(req);
254 dec_in_flight_req(fsvq);
255 }
256 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
257 spin_unlock(&fsvq->lock);
258 }
259
virtio_fs_request_dispatch_work(struct work_struct * work)260 static void virtio_fs_request_dispatch_work(struct work_struct *work)
261 {
262 struct fuse_req *req;
263 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
264 dispatch_work.work);
265 struct fuse_conn *fc = fsvq->fud->fc;
266 int ret;
267
268 pr_debug("virtio-fs: worker %s called.\n", __func__);
269 while (1) {
270 spin_lock(&fsvq->lock);
271 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
272 list);
273 if (!req) {
274 spin_unlock(&fsvq->lock);
275 break;
276 }
277
278 list_del_init(&req->list);
279 spin_unlock(&fsvq->lock);
280 fuse_request_end(fc, req);
281 }
282
283 /* Dispatch pending requests */
284 while (1) {
285 spin_lock(&fsvq->lock);
286 req = list_first_entry_or_null(&fsvq->queued_reqs,
287 struct fuse_req, list);
288 if (!req) {
289 spin_unlock(&fsvq->lock);
290 return;
291 }
292 list_del_init(&req->list);
293 spin_unlock(&fsvq->lock);
294
295 ret = virtio_fs_enqueue_req(fsvq, req, true);
296 if (ret < 0) {
297 if (ret == -ENOMEM || ret == -ENOSPC) {
298 spin_lock(&fsvq->lock);
299 list_add_tail(&req->list, &fsvq->queued_reqs);
300 schedule_delayed_work(&fsvq->dispatch_work,
301 msecs_to_jiffies(1));
302 spin_unlock(&fsvq->lock);
303 return;
304 }
305 req->out.h.error = ret;
306 spin_lock(&fsvq->lock);
307 dec_in_flight_req(fsvq);
308 spin_unlock(&fsvq->lock);
309 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
310 ret);
311 fuse_request_end(fc, req);
312 }
313 }
314 }
315
virtio_fs_hiprio_dispatch_work(struct work_struct * work)316 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
317 {
318 struct virtio_fs_forget *forget;
319 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
320 dispatch_work.work);
321 struct virtqueue *vq = fsvq->vq;
322 struct scatterlist sg;
323 struct scatterlist *sgs[] = {&sg};
324 bool notify;
325 int ret;
326
327 pr_debug("virtio-fs: worker %s called.\n", __func__);
328 while (1) {
329 spin_lock(&fsvq->lock);
330 forget = list_first_entry_or_null(&fsvq->queued_reqs,
331 struct virtio_fs_forget, list);
332 if (!forget) {
333 spin_unlock(&fsvq->lock);
334 return;
335 }
336
337 list_del(&forget->list);
338 if (!fsvq->connected) {
339 dec_in_flight_req(fsvq);
340 spin_unlock(&fsvq->lock);
341 kfree(forget);
342 continue;
343 }
344
345 sg_init_one(&sg, forget, sizeof(*forget));
346
347 /* Enqueue the request */
348 dev_dbg(&vq->vdev->dev, "%s\n", __func__);
349 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
350 if (ret < 0) {
351 if (ret == -ENOMEM || ret == -ENOSPC) {
352 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
353 ret);
354 list_add_tail(&forget->list,
355 &fsvq->queued_reqs);
356 schedule_delayed_work(&fsvq->dispatch_work,
357 msecs_to_jiffies(1));
358 } else {
359 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
360 ret);
361 dec_in_flight_req(fsvq);
362 kfree(forget);
363 }
364 spin_unlock(&fsvq->lock);
365 return;
366 }
367
368 notify = virtqueue_kick_prepare(vq);
369 spin_unlock(&fsvq->lock);
370
371 if (notify)
372 virtqueue_notify(vq);
373 pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
374 __func__);
375 }
376 }
377
378 /* Allocate and copy args into req->argbuf */
copy_args_to_argbuf(struct fuse_req * req)379 static int copy_args_to_argbuf(struct fuse_req *req)
380 {
381 struct fuse_args *args = req->args;
382 unsigned int offset = 0;
383 unsigned int num_in;
384 unsigned int num_out;
385 unsigned int len;
386 unsigned int i;
387
388 num_in = args->in_numargs - args->in_pages;
389 num_out = args->out_numargs - args->out_pages;
390 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
391 fuse_len_args(num_out, args->out_args);
392
393 req->argbuf = kmalloc(len, GFP_ATOMIC);
394 if (!req->argbuf)
395 return -ENOMEM;
396
397 for (i = 0; i < num_in; i++) {
398 memcpy(req->argbuf + offset,
399 args->in_args[i].value,
400 args->in_args[i].size);
401 offset += args->in_args[i].size;
402 }
403
404 return 0;
405 }
406
407 /* Copy args out of and free req->argbuf */
copy_args_from_argbuf(struct fuse_args * args,struct fuse_req * req)408 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
409 {
410 unsigned int remaining;
411 unsigned int offset;
412 unsigned int num_in;
413 unsigned int num_out;
414 unsigned int i;
415
416 remaining = req->out.h.len - sizeof(req->out.h);
417 num_in = args->in_numargs - args->in_pages;
418 num_out = args->out_numargs - args->out_pages;
419 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
420
421 for (i = 0; i < num_out; i++) {
422 unsigned int argsize = args->out_args[i].size;
423
424 if (args->out_argvar &&
425 i == args->out_numargs - 1 &&
426 argsize > remaining) {
427 argsize = remaining;
428 }
429
430 memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
431 offset += argsize;
432
433 if (i != args->out_numargs - 1)
434 remaining -= argsize;
435 }
436
437 /* Store the actual size of the variable-length arg */
438 if (args->out_argvar)
439 args->out_args[args->out_numargs - 1].size = remaining;
440
441 kfree(req->argbuf);
442 req->argbuf = NULL;
443 }
444
445 /* Work function for request completion */
virtio_fs_requests_done_work(struct work_struct * work)446 static void virtio_fs_requests_done_work(struct work_struct *work)
447 {
448 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
449 done_work);
450 struct fuse_pqueue *fpq = &fsvq->fud->pq;
451 struct fuse_conn *fc = fsvq->fud->fc;
452 struct virtqueue *vq = fsvq->vq;
453 struct fuse_req *req;
454 struct fuse_args_pages *ap;
455 struct fuse_req *next;
456 struct fuse_args *args;
457 unsigned int len, i, thislen;
458 struct page *page;
459 LIST_HEAD(reqs);
460
461 /* Collect completed requests off the virtqueue */
462 spin_lock(&fsvq->lock);
463 do {
464 virtqueue_disable_cb(vq);
465
466 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
467 spin_lock(&fpq->lock);
468 list_move_tail(&req->list, &reqs);
469 spin_unlock(&fpq->lock);
470 }
471 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
472 spin_unlock(&fsvq->lock);
473
474 /* End requests */
475 list_for_each_entry_safe(req, next, &reqs, list) {
476 /*
477 * TODO verify that server properly follows FUSE protocol
478 * (oh.uniq, oh.len)
479 */
480 args = req->args;
481 copy_args_from_argbuf(args, req);
482
483 if (args->out_pages && args->page_zeroing) {
484 len = args->out_args[args->out_numargs - 1].size;
485 ap = container_of(args, typeof(*ap), args);
486 for (i = 0; i < ap->num_pages; i++) {
487 thislen = ap->descs[i].length;
488 if (len < thislen) {
489 WARN_ON(ap->descs[i].offset);
490 page = ap->pages[i];
491 zero_user_segment(page, len, thislen);
492 len = 0;
493 } else {
494 len -= thislen;
495 }
496 }
497 }
498
499 spin_lock(&fpq->lock);
500 clear_bit(FR_SENT, &req->flags);
501 list_del_init(&req->list);
502 spin_unlock(&fpq->lock);
503
504 fuse_request_end(fc, req);
505 spin_lock(&fsvq->lock);
506 dec_in_flight_req(fsvq);
507 spin_unlock(&fsvq->lock);
508 }
509 }
510
511 /* Virtqueue interrupt handler */
virtio_fs_vq_done(struct virtqueue * vq)512 static void virtio_fs_vq_done(struct virtqueue *vq)
513 {
514 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
515
516 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
517
518 schedule_work(&fsvq->done_work);
519 }
520
521 /* Initialize virtqueues */
virtio_fs_setup_vqs(struct virtio_device * vdev,struct virtio_fs * fs)522 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
523 struct virtio_fs *fs)
524 {
525 struct virtqueue **vqs;
526 vq_callback_t **callbacks;
527 const char **names;
528 unsigned int i;
529 int ret = 0;
530
531 virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
532 &fs->num_request_queues);
533 if (fs->num_request_queues == 0)
534 return -EINVAL;
535
536 fs->nvqs = 1 + fs->num_request_queues;
537 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
538 if (!fs->vqs)
539 return -ENOMEM;
540
541 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
542 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
543 GFP_KERNEL);
544 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
545 if (!vqs || !callbacks || !names) {
546 ret = -ENOMEM;
547 goto out;
548 }
549
550 callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
551 snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
552 "hiprio");
553 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
554 INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
555 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
556 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
557 INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
558 virtio_fs_hiprio_dispatch_work);
559 spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
560
561 /* Initialize the requests virtqueues */
562 for (i = VQ_REQUEST; i < fs->nvqs; i++) {
563 spin_lock_init(&fs->vqs[i].lock);
564 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
565 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
566 virtio_fs_request_dispatch_work);
567 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
568 INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
569 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
570 "requests.%u", i - VQ_REQUEST);
571 callbacks[i] = virtio_fs_vq_done;
572 names[i] = fs->vqs[i].name;
573 }
574
575 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
576 if (ret < 0)
577 goto out;
578
579 for (i = 0; i < fs->nvqs; i++)
580 fs->vqs[i].vq = vqs[i];
581
582 virtio_fs_start_all_queues(fs);
583 out:
584 kfree(names);
585 kfree(callbacks);
586 kfree(vqs);
587 if (ret)
588 kfree(fs->vqs);
589 return ret;
590 }
591
592 /* Free virtqueues (device must already be reset) */
virtio_fs_cleanup_vqs(struct virtio_device * vdev,struct virtio_fs * fs)593 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
594 struct virtio_fs *fs)
595 {
596 vdev->config->del_vqs(vdev);
597 }
598
virtio_fs_probe(struct virtio_device * vdev)599 static int virtio_fs_probe(struct virtio_device *vdev)
600 {
601 struct virtio_fs *fs;
602 int ret;
603
604 fs = kzalloc(sizeof(*fs), GFP_KERNEL);
605 if (!fs)
606 return -ENOMEM;
607 kref_init(&fs->refcount);
608 vdev->priv = fs;
609
610 ret = virtio_fs_read_tag(vdev, fs);
611 if (ret < 0)
612 goto out;
613
614 ret = virtio_fs_setup_vqs(vdev, fs);
615 if (ret < 0)
616 goto out;
617
618 /* TODO vq affinity */
619
620 /* Bring the device online in case the filesystem is mounted and
621 * requests need to be sent before we return.
622 */
623 virtio_device_ready(vdev);
624
625 ret = virtio_fs_add_instance(fs);
626 if (ret < 0)
627 goto out_vqs;
628
629 return 0;
630
631 out_vqs:
632 vdev->config->reset(vdev);
633 virtio_fs_cleanup_vqs(vdev, fs);
634
635 out:
636 vdev->priv = NULL;
637 kfree(fs);
638 return ret;
639 }
640
virtio_fs_stop_all_queues(struct virtio_fs * fs)641 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
642 {
643 struct virtio_fs_vq *fsvq;
644 int i;
645
646 for (i = 0; i < fs->nvqs; i++) {
647 fsvq = &fs->vqs[i];
648 spin_lock(&fsvq->lock);
649 fsvq->connected = false;
650 spin_unlock(&fsvq->lock);
651 }
652 }
653
virtio_fs_remove(struct virtio_device * vdev)654 static void virtio_fs_remove(struct virtio_device *vdev)
655 {
656 struct virtio_fs *fs = vdev->priv;
657
658 mutex_lock(&virtio_fs_mutex);
659 /* This device is going away. No one should get new reference */
660 list_del_init(&fs->list);
661 virtio_fs_stop_all_queues(fs);
662 virtio_fs_drain_all_queues(fs);
663 vdev->config->reset(vdev);
664 virtio_fs_cleanup_vqs(vdev, fs);
665
666 vdev->priv = NULL;
667 /* Put device reference on virtio_fs object */
668 virtio_fs_put(fs);
669 mutex_unlock(&virtio_fs_mutex);
670 }
671
672 #ifdef CONFIG_PM_SLEEP
virtio_fs_freeze(struct virtio_device * vdev)673 static int virtio_fs_freeze(struct virtio_device *vdev)
674 {
675 /* TODO need to save state here */
676 pr_warn("virtio-fs: suspend/resume not yet supported\n");
677 return -EOPNOTSUPP;
678 }
679
virtio_fs_restore(struct virtio_device * vdev)680 static int virtio_fs_restore(struct virtio_device *vdev)
681 {
682 /* TODO need to restore state here */
683 return 0;
684 }
685 #endif /* CONFIG_PM_SLEEP */
686
687 const static struct virtio_device_id id_table[] = {
688 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
689 {},
690 };
691
692 const static unsigned int feature_table[] = {};
693
694 static struct virtio_driver virtio_fs_driver = {
695 .driver.name = KBUILD_MODNAME,
696 .driver.owner = THIS_MODULE,
697 .id_table = id_table,
698 .feature_table = feature_table,
699 .feature_table_size = ARRAY_SIZE(feature_table),
700 .probe = virtio_fs_probe,
701 .remove = virtio_fs_remove,
702 #ifdef CONFIG_PM_SLEEP
703 .freeze = virtio_fs_freeze,
704 .restore = virtio_fs_restore,
705 #endif
706 };
707
virtio_fs_wake_forget_and_unlock(struct fuse_iqueue * fiq)708 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
709 __releases(fiq->lock)
710 {
711 struct fuse_forget_link *link;
712 struct virtio_fs_forget *forget;
713 struct scatterlist sg;
714 struct scatterlist *sgs[] = {&sg};
715 struct virtio_fs *fs;
716 struct virtqueue *vq;
717 struct virtio_fs_vq *fsvq;
718 bool notify;
719 u64 unique;
720 int ret;
721
722 link = fuse_dequeue_forget(fiq, 1, NULL);
723 unique = fuse_get_unique(fiq);
724
725 fs = fiq->priv;
726 fsvq = &fs->vqs[VQ_HIPRIO];
727 spin_unlock(&fiq->lock);
728
729 /* Allocate a buffer for the request */
730 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
731
732 forget->ih = (struct fuse_in_header){
733 .opcode = FUSE_FORGET,
734 .nodeid = link->forget_one.nodeid,
735 .unique = unique,
736 .len = sizeof(*forget),
737 };
738 forget->arg = (struct fuse_forget_in){
739 .nlookup = link->forget_one.nlookup,
740 };
741
742 sg_init_one(&sg, forget, sizeof(*forget));
743
744 /* Enqueue the request */
745 spin_lock(&fsvq->lock);
746
747 if (!fsvq->connected) {
748 kfree(forget);
749 spin_unlock(&fsvq->lock);
750 goto out;
751 }
752
753 vq = fsvq->vq;
754 dev_dbg(&vq->vdev->dev, "%s\n", __func__);
755
756 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
757 if (ret < 0) {
758 if (ret == -ENOMEM || ret == -ENOSPC) {
759 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
760 ret);
761 list_add_tail(&forget->list, &fsvq->queued_reqs);
762 schedule_delayed_work(&fsvq->dispatch_work,
763 msecs_to_jiffies(1));
764 inc_in_flight_req(fsvq);
765 } else {
766 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
767 ret);
768 kfree(forget);
769 }
770 spin_unlock(&fsvq->lock);
771 goto out;
772 }
773
774 inc_in_flight_req(fsvq);
775 notify = virtqueue_kick_prepare(vq);
776
777 spin_unlock(&fsvq->lock);
778
779 if (notify)
780 virtqueue_notify(vq);
781 out:
782 kfree(link);
783 }
784
virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue * fiq)785 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
786 __releases(fiq->lock)
787 {
788 /*
789 * TODO interrupts.
790 *
791 * Normal fs operations on a local filesystems aren't interruptible.
792 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
793 * with shared lock between host and guest.
794 */
795 spin_unlock(&fiq->lock);
796 }
797
798 /* Return the number of scatter-gather list elements required */
sg_count_fuse_req(struct fuse_req * req)799 static unsigned int sg_count_fuse_req(struct fuse_req *req)
800 {
801 struct fuse_args *args = req->args;
802 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
803 unsigned int total_sgs = 1 /* fuse_in_header */;
804
805 if (args->in_numargs - args->in_pages)
806 total_sgs += 1;
807
808 if (args->in_pages)
809 total_sgs += ap->num_pages;
810
811 if (!test_bit(FR_ISREPLY, &req->flags))
812 return total_sgs;
813
814 total_sgs += 1 /* fuse_out_header */;
815
816 if (args->out_numargs - args->out_pages)
817 total_sgs += 1;
818
819 if (args->out_pages)
820 total_sgs += ap->num_pages;
821
822 return total_sgs;
823 }
824
825 /* Add pages to scatter-gather list and return number of elements used */
sg_init_fuse_pages(struct scatterlist * sg,struct page ** pages,struct fuse_page_desc * page_descs,unsigned int num_pages,unsigned int total_len)826 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
827 struct page **pages,
828 struct fuse_page_desc *page_descs,
829 unsigned int num_pages,
830 unsigned int total_len)
831 {
832 unsigned int i;
833 unsigned int this_len;
834
835 for (i = 0; i < num_pages && total_len; i++) {
836 sg_init_table(&sg[i], 1);
837 this_len = min(page_descs[i].length, total_len);
838 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
839 total_len -= this_len;
840 }
841
842 return i;
843 }
844
845 /* Add args to scatter-gather list and return number of elements used */
sg_init_fuse_args(struct scatterlist * sg,struct fuse_req * req,struct fuse_arg * args,unsigned int numargs,bool argpages,void * argbuf,unsigned int * len_used)846 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
847 struct fuse_req *req,
848 struct fuse_arg *args,
849 unsigned int numargs,
850 bool argpages,
851 void *argbuf,
852 unsigned int *len_used)
853 {
854 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
855 unsigned int total_sgs = 0;
856 unsigned int len;
857
858 len = fuse_len_args(numargs - argpages, args);
859 if (len)
860 sg_init_one(&sg[total_sgs++], argbuf, len);
861
862 if (argpages)
863 total_sgs += sg_init_fuse_pages(&sg[total_sgs],
864 ap->pages, ap->descs,
865 ap->num_pages,
866 args[numargs - 1].size);
867
868 if (len_used)
869 *len_used = len;
870
871 return total_sgs;
872 }
873
874 /* Add a request to a virtqueue and kick the device */
virtio_fs_enqueue_req(struct virtio_fs_vq * fsvq,struct fuse_req * req,bool in_flight)875 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
876 struct fuse_req *req, bool in_flight)
877 {
878 /* requests need at least 4 elements */
879 struct scatterlist *stack_sgs[6];
880 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
881 struct scatterlist **sgs = stack_sgs;
882 struct scatterlist *sg = stack_sg;
883 struct virtqueue *vq;
884 struct fuse_args *args = req->args;
885 unsigned int argbuf_used = 0;
886 unsigned int out_sgs = 0;
887 unsigned int in_sgs = 0;
888 unsigned int total_sgs;
889 unsigned int i;
890 int ret;
891 bool notify;
892 struct fuse_pqueue *fpq;
893
894 /* Does the sglist fit on the stack? */
895 total_sgs = sg_count_fuse_req(req);
896 if (total_sgs > ARRAY_SIZE(stack_sgs)) {
897 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
898 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
899 if (!sgs || !sg) {
900 ret = -ENOMEM;
901 goto out;
902 }
903 }
904
905 /* Use a bounce buffer since stack args cannot be mapped */
906 ret = copy_args_to_argbuf(req);
907 if (ret < 0)
908 goto out;
909
910 /* Request elements */
911 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
912 out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
913 (struct fuse_arg *)args->in_args,
914 args->in_numargs, args->in_pages,
915 req->argbuf, &argbuf_used);
916
917 /* Reply elements */
918 if (test_bit(FR_ISREPLY, &req->flags)) {
919 sg_init_one(&sg[out_sgs + in_sgs++],
920 &req->out.h, sizeof(req->out.h));
921 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
922 args->out_args, args->out_numargs,
923 args->out_pages,
924 req->argbuf + argbuf_used, NULL);
925 }
926
927 WARN_ON(out_sgs + in_sgs != total_sgs);
928
929 for (i = 0; i < total_sgs; i++)
930 sgs[i] = &sg[i];
931
932 spin_lock(&fsvq->lock);
933
934 if (!fsvq->connected) {
935 spin_unlock(&fsvq->lock);
936 ret = -ENOTCONN;
937 goto out;
938 }
939
940 vq = fsvq->vq;
941 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
942 if (ret < 0) {
943 spin_unlock(&fsvq->lock);
944 goto out;
945 }
946
947 /* Request successfully sent. */
948 fpq = &fsvq->fud->pq;
949 spin_lock(&fpq->lock);
950 list_add_tail(&req->list, fpq->processing);
951 spin_unlock(&fpq->lock);
952 set_bit(FR_SENT, &req->flags);
953 /* matches barrier in request_wait_answer() */
954 smp_mb__after_atomic();
955
956 if (!in_flight)
957 inc_in_flight_req(fsvq);
958 notify = virtqueue_kick_prepare(vq);
959
960 spin_unlock(&fsvq->lock);
961
962 if (notify)
963 virtqueue_notify(vq);
964
965 out:
966 if (ret < 0 && req->argbuf) {
967 kfree(req->argbuf);
968 req->argbuf = NULL;
969 }
970 if (sgs != stack_sgs) {
971 kfree(sgs);
972 kfree(sg);
973 }
974
975 return ret;
976 }
977
virtio_fs_wake_pending_and_unlock(struct fuse_iqueue * fiq)978 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
979 __releases(fiq->lock)
980 {
981 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
982 struct virtio_fs *fs;
983 struct fuse_req *req;
984 struct virtio_fs_vq *fsvq;
985 int ret;
986
987 WARN_ON(list_empty(&fiq->pending));
988 req = list_last_entry(&fiq->pending, struct fuse_req, list);
989 clear_bit(FR_PENDING, &req->flags);
990 list_del_init(&req->list);
991 WARN_ON(!list_empty(&fiq->pending));
992 spin_unlock(&fiq->lock);
993
994 fs = fiq->priv;
995
996 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
997 __func__, req->in.h.opcode, req->in.h.unique,
998 req->in.h.nodeid, req->in.h.len,
999 fuse_len_args(req->args->out_numargs, req->args->out_args));
1000
1001 fsvq = &fs->vqs[queue_id];
1002 ret = virtio_fs_enqueue_req(fsvq, req, false);
1003 if (ret < 0) {
1004 if (ret == -ENOMEM || ret == -ENOSPC) {
1005 /*
1006 * Virtqueue full. Retry submission from worker
1007 * context as we might be holding fc->bg_lock.
1008 */
1009 spin_lock(&fsvq->lock);
1010 list_add_tail(&req->list, &fsvq->queued_reqs);
1011 inc_in_flight_req(fsvq);
1012 schedule_delayed_work(&fsvq->dispatch_work,
1013 msecs_to_jiffies(1));
1014 spin_unlock(&fsvq->lock);
1015 return;
1016 }
1017 req->out.h.error = ret;
1018 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
1019
1020 /* Can't end request in submission context. Use a worker */
1021 spin_lock(&fsvq->lock);
1022 list_add_tail(&req->list, &fsvq->end_reqs);
1023 schedule_delayed_work(&fsvq->dispatch_work, 0);
1024 spin_unlock(&fsvq->lock);
1025 return;
1026 }
1027 }
1028
1029 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
1030 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock,
1031 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock,
1032 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock,
1033 .release = virtio_fs_fiq_release,
1034 };
1035
virtio_fs_fill_super(struct super_block * sb)1036 static int virtio_fs_fill_super(struct super_block *sb)
1037 {
1038 struct fuse_conn *fc = get_fuse_conn_super(sb);
1039 struct virtio_fs *fs = fc->iq.priv;
1040 unsigned int i;
1041 int err;
1042 struct fuse_fs_context ctx = {
1043 .rootmode = S_IFDIR,
1044 .default_permissions = 1,
1045 .allow_other = 1,
1046 .max_read = UINT_MAX,
1047 .blksize = 512,
1048 .destroy = true,
1049 .no_control = true,
1050 .no_force_umount = true,
1051 .no_mount_options = true,
1052 };
1053
1054 mutex_lock(&virtio_fs_mutex);
1055
1056 /* After holding mutex, make sure virtiofs device is still there.
1057 * Though we are holding a reference to it, drive ->remove might
1058 * still have cleaned up virtual queues. In that case bail out.
1059 */
1060 err = -EINVAL;
1061 if (list_empty(&fs->list)) {
1062 pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1063 goto err;
1064 }
1065
1066 err = -ENOMEM;
1067 /* Allocate fuse_dev for hiprio and notification queues */
1068 for (i = 0; i < VQ_REQUEST; i++) {
1069 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1070
1071 fsvq->fud = fuse_dev_alloc();
1072 if (!fsvq->fud)
1073 goto err_free_fuse_devs;
1074 }
1075
1076 ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
1077 err = fuse_fill_super_common(sb, &ctx);
1078 if (err < 0)
1079 goto err_free_fuse_devs;
1080
1081 fc = fs->vqs[VQ_REQUEST].fud->fc;
1082
1083 for (i = 0; i < fs->nvqs; i++) {
1084 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1085
1086 if (i == VQ_REQUEST)
1087 continue; /* already initialized */
1088 fuse_dev_install(fsvq->fud, fc);
1089 }
1090
1091 /* Previous unmount will stop all queues. Start these again */
1092 virtio_fs_start_all_queues(fs);
1093 fuse_send_init(fc);
1094 mutex_unlock(&virtio_fs_mutex);
1095 return 0;
1096
1097 err_free_fuse_devs:
1098 virtio_fs_free_devs(fs);
1099 err:
1100 mutex_unlock(&virtio_fs_mutex);
1101 return err;
1102 }
1103
virtio_kill_sb(struct super_block * sb)1104 static void virtio_kill_sb(struct super_block *sb)
1105 {
1106 struct fuse_conn *fc = get_fuse_conn_super(sb);
1107 struct virtio_fs *vfs;
1108 struct virtio_fs_vq *fsvq;
1109
1110 /* If mount failed, we can still be called without any fc */
1111 if (!fc)
1112 return fuse_kill_sb_anon(sb);
1113
1114 vfs = fc->iq.priv;
1115 fsvq = &vfs->vqs[VQ_HIPRIO];
1116
1117 /* Stop forget queue. Soon destroy will be sent */
1118 spin_lock(&fsvq->lock);
1119 fsvq->connected = false;
1120 spin_unlock(&fsvq->lock);
1121 virtio_fs_drain_all_queues(vfs);
1122
1123 fuse_kill_sb_anon(sb);
1124
1125 /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
1126 * and drain one more time and free fuse devices. Freeing fuse
1127 * devices will drop their reference on fuse_conn and that in
1128 * turn will drop its reference on virtio_fs object.
1129 */
1130 virtio_fs_stop_all_queues(vfs);
1131 virtio_fs_drain_all_queues(vfs);
1132 virtio_fs_free_devs(vfs);
1133 }
1134
virtio_fs_test_super(struct super_block * sb,struct fs_context * fsc)1135 static int virtio_fs_test_super(struct super_block *sb,
1136 struct fs_context *fsc)
1137 {
1138 struct fuse_conn *fc = fsc->s_fs_info;
1139
1140 return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
1141 }
1142
virtio_fs_set_super(struct super_block * sb,struct fs_context * fsc)1143 static int virtio_fs_set_super(struct super_block *sb,
1144 struct fs_context *fsc)
1145 {
1146 int err;
1147
1148 err = get_anon_bdev(&sb->s_dev);
1149 if (!err)
1150 fuse_conn_get(fsc->s_fs_info);
1151
1152 return err;
1153 }
1154
virtio_fs_get_tree(struct fs_context * fsc)1155 static int virtio_fs_get_tree(struct fs_context *fsc)
1156 {
1157 struct virtio_fs *fs;
1158 struct super_block *sb;
1159 struct fuse_conn *fc;
1160 int err;
1161
1162 /* This gets a reference on virtio_fs object. This ptr gets installed
1163 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1164 * to drop the reference to this object.
1165 */
1166 fs = virtio_fs_find_instance(fsc->source);
1167 if (!fs) {
1168 pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1169 return -EINVAL;
1170 }
1171
1172 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1173 if (!fc) {
1174 mutex_lock(&virtio_fs_mutex);
1175 virtio_fs_put(fs);
1176 mutex_unlock(&virtio_fs_mutex);
1177 return -ENOMEM;
1178 }
1179
1180 fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
1181 fs);
1182 fc->release = fuse_free_conn;
1183 fc->delete_stale = true;
1184
1185 fsc->s_fs_info = fc;
1186 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
1187 fuse_conn_put(fc);
1188 if (IS_ERR(sb))
1189 return PTR_ERR(sb);
1190
1191 if (!sb->s_root) {
1192 err = virtio_fs_fill_super(sb);
1193 if (err) {
1194 deactivate_locked_super(sb);
1195 return err;
1196 }
1197
1198 sb->s_flags |= SB_ACTIVE;
1199 }
1200
1201 WARN_ON(fsc->root);
1202 fsc->root = dget(sb->s_root);
1203 return 0;
1204 }
1205
1206 static const struct fs_context_operations virtio_fs_context_ops = {
1207 .get_tree = virtio_fs_get_tree,
1208 };
1209
virtio_fs_init_fs_context(struct fs_context * fsc)1210 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1211 {
1212 fsc->ops = &virtio_fs_context_ops;
1213 return 0;
1214 }
1215
1216 static struct file_system_type virtio_fs_type = {
1217 .owner = THIS_MODULE,
1218 .name = "virtiofs",
1219 .init_fs_context = virtio_fs_init_fs_context,
1220 .kill_sb = virtio_kill_sb,
1221 };
1222
virtio_fs_init(void)1223 static int __init virtio_fs_init(void)
1224 {
1225 int ret;
1226
1227 ret = register_virtio_driver(&virtio_fs_driver);
1228 if (ret < 0)
1229 return ret;
1230
1231 ret = register_filesystem(&virtio_fs_type);
1232 if (ret < 0) {
1233 unregister_virtio_driver(&virtio_fs_driver);
1234 return ret;
1235 }
1236
1237 return 0;
1238 }
1239 module_init(virtio_fs_init);
1240
virtio_fs_exit(void)1241 static void __exit virtio_fs_exit(void)
1242 {
1243 unregister_filesystem(&virtio_fs_type);
1244 unregister_virtio_driver(&virtio_fs_driver);
1245 }
1246 module_exit(virtio_fs_exit);
1247
1248 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1249 MODULE_DESCRIPTION("Virtio Filesystem");
1250 MODULE_LICENSE("GPL");
1251 MODULE_ALIAS_FS(KBUILD_MODNAME);
1252 MODULE_DEVICE_TABLE(virtio, id_table);
1253