1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
5 */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <xen/xen.h>
15
16 #ifdef DEBUG
17 /* For development, we want to crash whenever the ring is screwed. */
18 #define BAD_RING(_vq, fmt, args...) \
19 do { \
20 dev_err(&(_vq)->vq.vdev->dev, \
21 "%s:"fmt, (_vq)->vq.name, ##args); \
22 BUG(); \
23 } while (0)
24 /* Caller is supposed to guarantee no reentry. */
25 #define START_USE(_vq) \
26 do { \
27 if ((_vq)->in_use) \
28 panic("%s:in_use = %i\n", \
29 (_vq)->vq.name, (_vq)->in_use); \
30 (_vq)->in_use = __LINE__; \
31 } while (0)
32 #define END_USE(_vq) \
33 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
34 #define LAST_ADD_TIME_UPDATE(_vq) \
35 do { \
36 ktime_t now = ktime_get(); \
37 \
38 /* No kick or get, with .1 second between? Warn. */ \
39 if ((_vq)->last_add_time_valid) \
40 WARN_ON(ktime_to_ms(ktime_sub(now, \
41 (_vq)->last_add_time)) > 100); \
42 (_vq)->last_add_time = now; \
43 (_vq)->last_add_time_valid = true; \
44 } while (0)
45 #define LAST_ADD_TIME_CHECK(_vq) \
46 do { \
47 if ((_vq)->last_add_time_valid) { \
48 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
49 (_vq)->last_add_time)) > 100); \
50 } \
51 } while (0)
52 #define LAST_ADD_TIME_INVALID(_vq) \
53 ((_vq)->last_add_time_valid = false)
54 #else
55 #define BAD_RING(_vq, fmt, args...) \
56 do { \
57 dev_err(&_vq->vq.vdev->dev, \
58 "%s:"fmt, (_vq)->vq.name, ##args); \
59 (_vq)->broken = true; \
60 } while (0)
61 #define START_USE(vq)
62 #define END_USE(vq)
63 #define LAST_ADD_TIME_UPDATE(vq)
64 #define LAST_ADD_TIME_CHECK(vq)
65 #define LAST_ADD_TIME_INVALID(vq)
66 #endif
67
68 struct vring_desc_state_split {
69 void *data; /* Data for callback. */
70 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
71 };
72
73 struct vring_desc_state_packed {
74 void *data; /* Data for callback. */
75 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
76 u16 num; /* Descriptor list length. */
77 u16 last; /* The last desc state in a list. */
78 };
79
80 struct vring_desc_extra {
81 dma_addr_t addr; /* Buffer DMA addr. */
82 u32 len; /* Buffer length. */
83 u16 flags; /* Descriptor flags. */
84 u16 next; /* The next desc state in a list. */
85 };
86
87 struct vring_virtqueue {
88 struct virtqueue vq;
89
90 /* Is this a packed ring? */
91 bool packed_ring;
92
93 /* Is DMA API used? */
94 bool use_dma_api;
95
96 /* Can we use weak barriers? */
97 bool weak_barriers;
98
99 /* Other side has made a mess, don't try any more. */
100 bool broken;
101
102 /* Host supports indirect buffers */
103 bool indirect;
104
105 /* Host publishes avail event idx */
106 bool event;
107
108 /* Head of free buffer list. */
109 unsigned int free_head;
110 /* Number we've added since last sync. */
111 unsigned int num_added;
112
113 /* Last used index we've seen. */
114 u16 last_used_idx;
115
116 union {
117 /* Available for split ring */
118 struct {
119 /* Actual memory layout for this queue. */
120 struct vring vring;
121
122 /* Last written value to avail->flags */
123 u16 avail_flags_shadow;
124
125 /*
126 * Last written value to avail->idx in
127 * guest byte order.
128 */
129 u16 avail_idx_shadow;
130
131 /* Per-descriptor state. */
132 struct vring_desc_state_split *desc_state;
133 struct vring_desc_extra *desc_extra;
134
135 /* DMA address and size information */
136 dma_addr_t queue_dma_addr;
137 size_t queue_size_in_bytes;
138 } split;
139
140 /* Available for packed ring */
141 struct {
142 /* Actual memory layout for this queue. */
143 struct {
144 unsigned int num;
145 struct vring_packed_desc *desc;
146 struct vring_packed_desc_event *driver;
147 struct vring_packed_desc_event *device;
148 } vring;
149
150 /* Driver ring wrap counter. */
151 bool avail_wrap_counter;
152
153 /* Device ring wrap counter. */
154 bool used_wrap_counter;
155
156 /* Avail used flags. */
157 u16 avail_used_flags;
158
159 /* Index of the next avail descriptor. */
160 u16 next_avail_idx;
161
162 /*
163 * Last written value to driver->flags in
164 * guest byte order.
165 */
166 u16 event_flags_shadow;
167
168 /* Per-descriptor state. */
169 struct vring_desc_state_packed *desc_state;
170 struct vring_desc_extra *desc_extra;
171
172 /* DMA address and size information */
173 dma_addr_t ring_dma_addr;
174 dma_addr_t driver_event_dma_addr;
175 dma_addr_t device_event_dma_addr;
176 size_t ring_size_in_bytes;
177 size_t event_size_in_bytes;
178 } packed;
179 };
180
181 /* How to notify other side. FIXME: commonalize hcalls! */
182 bool (*notify)(struct virtqueue *vq);
183
184 /* DMA, allocation, and size information */
185 bool we_own_ring;
186
187 #ifdef DEBUG
188 /* They're supposed to lock for us. */
189 unsigned int in_use;
190
191 /* Figure out if their kicks are too delayed. */
192 bool last_add_time_valid;
193 ktime_t last_add_time;
194 #endif
195 };
196
197
198 /*
199 * Helpers.
200 */
201
202 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
203
virtqueue_use_indirect(struct virtqueue * _vq,unsigned int total_sg)204 static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
205 unsigned int total_sg)
206 {
207 struct vring_virtqueue *vq = to_vvq(_vq);
208
209 /*
210 * If the host supports indirect descriptor tables, and we have multiple
211 * buffers, then go indirect. FIXME: tune this threshold
212 */
213 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
214 }
215
216 /*
217 * Modern virtio devices have feature bits to specify whether they need a
218 * quirk and bypass the IOMMU. If not there, just use the DMA API.
219 *
220 * If there, the interaction between virtio and DMA API is messy.
221 *
222 * On most systems with virtio, physical addresses match bus addresses,
223 * and it doesn't particularly matter whether we use the DMA API.
224 *
225 * On some systems, including Xen and any system with a physical device
226 * that speaks virtio behind a physical IOMMU, we must use the DMA API
227 * for virtio DMA to work at all.
228 *
229 * On other systems, including SPARC and PPC64, virtio-pci devices are
230 * enumerated as though they are behind an IOMMU, but the virtio host
231 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
232 * there or somehow map everything as the identity.
233 *
234 * For the time being, we preserve historic behavior and bypass the DMA
235 * API.
236 *
237 * TODO: install a per-device DMA ops structure that does the right thing
238 * taking into account all the above quirks, and use the DMA API
239 * unconditionally on data path.
240 */
241
vring_use_dma_api(struct virtio_device * vdev)242 static bool vring_use_dma_api(struct virtio_device *vdev)
243 {
244 if (!virtio_has_dma_quirk(vdev))
245 return true;
246
247 /* Otherwise, we are left to guess. */
248 /*
249 * In theory, it's possible to have a buggy QEMU-supposed
250 * emulated Q35 IOMMU and Xen enabled at the same time. On
251 * such a configuration, virtio has never worked and will
252 * not work without an even larger kludge. Instead, enable
253 * the DMA API if we're a Xen guest, which at least allows
254 * all of the sensible Xen configurations to work correctly.
255 */
256 if (xen_domain())
257 return true;
258
259 return false;
260 }
261
virtio_max_dma_size(struct virtio_device * vdev)262 size_t virtio_max_dma_size(struct virtio_device *vdev)
263 {
264 size_t max_segment_size = SIZE_MAX;
265
266 if (vring_use_dma_api(vdev))
267 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
268
269 return max_segment_size;
270 }
271 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
272
vring_alloc_queue(struct virtio_device * vdev,size_t size,dma_addr_t * dma_handle,gfp_t flag)273 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
274 dma_addr_t *dma_handle, gfp_t flag)
275 {
276 if (vring_use_dma_api(vdev)) {
277 return dma_alloc_coherent(vdev->dev.parent, size,
278 dma_handle, flag);
279 } else {
280 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
281
282 if (queue) {
283 phys_addr_t phys_addr = virt_to_phys(queue);
284 *dma_handle = (dma_addr_t)phys_addr;
285
286 /*
287 * Sanity check: make sure we dind't truncate
288 * the address. The only arches I can find that
289 * have 64-bit phys_addr_t but 32-bit dma_addr_t
290 * are certain non-highmem MIPS and x86
291 * configurations, but these configurations
292 * should never allocate physical pages above 32
293 * bits, so this is fine. Just in case, throw a
294 * warning and abort if we end up with an
295 * unrepresentable address.
296 */
297 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
298 free_pages_exact(queue, PAGE_ALIGN(size));
299 return NULL;
300 }
301 }
302 return queue;
303 }
304 }
305
vring_free_queue(struct virtio_device * vdev,size_t size,void * queue,dma_addr_t dma_handle)306 static void vring_free_queue(struct virtio_device *vdev, size_t size,
307 void *queue, dma_addr_t dma_handle)
308 {
309 if (vring_use_dma_api(vdev))
310 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
311 else
312 free_pages_exact(queue, PAGE_ALIGN(size));
313 }
314
315 /*
316 * The DMA ops on various arches are rather gnarly right now, and
317 * making all of the arch DMA ops work on the vring device itself
318 * is a mess. For now, we use the parent device for DMA ops.
319 */
vring_dma_dev(const struct vring_virtqueue * vq)320 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
321 {
322 return vq->vq.vdev->dev.parent;
323 }
324
325 /* Map one sg entry. */
vring_map_one_sg(const struct vring_virtqueue * vq,struct scatterlist * sg,enum dma_data_direction direction)326 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
327 struct scatterlist *sg,
328 enum dma_data_direction direction)
329 {
330 if (!vq->use_dma_api)
331 return (dma_addr_t)sg_phys(sg);
332
333 /*
334 * We can't use dma_map_sg, because we don't use scatterlists in
335 * the way it expects (we don't guarantee that the scatterlist
336 * will exist for the lifetime of the mapping).
337 */
338 return dma_map_page(vring_dma_dev(vq),
339 sg_page(sg), sg->offset, sg->length,
340 direction);
341 }
342
vring_map_single(const struct vring_virtqueue * vq,void * cpu_addr,size_t size,enum dma_data_direction direction)343 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
344 void *cpu_addr, size_t size,
345 enum dma_data_direction direction)
346 {
347 if (!vq->use_dma_api)
348 return (dma_addr_t)virt_to_phys(cpu_addr);
349
350 return dma_map_single(vring_dma_dev(vq),
351 cpu_addr, size, direction);
352 }
353
vring_mapping_error(const struct vring_virtqueue * vq,dma_addr_t addr)354 static int vring_mapping_error(const struct vring_virtqueue *vq,
355 dma_addr_t addr)
356 {
357 if (!vq->use_dma_api)
358 return 0;
359
360 return dma_mapping_error(vring_dma_dev(vq), addr);
361 }
362
363
364 /*
365 * Split ring specific functions - *_split().
366 */
367
vring_unmap_one_split_indirect(const struct vring_virtqueue * vq,struct vring_desc * desc)368 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
369 struct vring_desc *desc)
370 {
371 u16 flags;
372
373 if (!vq->use_dma_api)
374 return;
375
376 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
377
378 if (flags & VRING_DESC_F_INDIRECT) {
379 dma_unmap_single(vring_dma_dev(vq),
380 virtio64_to_cpu(vq->vq.vdev, desc->addr),
381 virtio32_to_cpu(vq->vq.vdev, desc->len),
382 (flags & VRING_DESC_F_WRITE) ?
383 DMA_FROM_DEVICE : DMA_TO_DEVICE);
384 } else {
385 dma_unmap_page(vring_dma_dev(vq),
386 virtio64_to_cpu(vq->vq.vdev, desc->addr),
387 virtio32_to_cpu(vq->vq.vdev, desc->len),
388 (flags & VRING_DESC_F_WRITE) ?
389 DMA_FROM_DEVICE : DMA_TO_DEVICE);
390 }
391 }
392
vring_unmap_one_split(const struct vring_virtqueue * vq,unsigned int i)393 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
394 unsigned int i)
395 {
396 struct vring_desc_extra *extra = vq->split.desc_extra;
397 u16 flags;
398
399 if (!vq->use_dma_api)
400 goto out;
401
402 flags = extra[i].flags;
403
404 if (flags & VRING_DESC_F_INDIRECT) {
405 dma_unmap_single(vring_dma_dev(vq),
406 extra[i].addr,
407 extra[i].len,
408 (flags & VRING_DESC_F_WRITE) ?
409 DMA_FROM_DEVICE : DMA_TO_DEVICE);
410 } else {
411 dma_unmap_page(vring_dma_dev(vq),
412 extra[i].addr,
413 extra[i].len,
414 (flags & VRING_DESC_F_WRITE) ?
415 DMA_FROM_DEVICE : DMA_TO_DEVICE);
416 }
417
418 out:
419 return extra[i].next;
420 }
421
alloc_indirect_split(struct virtqueue * _vq,unsigned int total_sg,gfp_t gfp)422 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
423 unsigned int total_sg,
424 gfp_t gfp)
425 {
426 struct vring_desc *desc;
427 unsigned int i;
428
429 /*
430 * We require lowmem mappings for the descriptors because
431 * otherwise virt_to_phys will give us bogus addresses in the
432 * virtqueue.
433 */
434 gfp &= ~__GFP_HIGHMEM;
435
436 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
437 if (!desc)
438 return NULL;
439
440 for (i = 0; i < total_sg; i++)
441 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
442 return desc;
443 }
444
virtqueue_add_desc_split(struct virtqueue * vq,struct vring_desc * desc,unsigned int i,dma_addr_t addr,unsigned int len,u16 flags,bool indirect)445 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
446 struct vring_desc *desc,
447 unsigned int i,
448 dma_addr_t addr,
449 unsigned int len,
450 u16 flags,
451 bool indirect)
452 {
453 struct vring_virtqueue *vring = to_vvq(vq);
454 struct vring_desc_extra *extra = vring->split.desc_extra;
455 u16 next;
456
457 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
458 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
459 desc[i].len = cpu_to_virtio32(vq->vdev, len);
460
461 if (!indirect) {
462 next = extra[i].next;
463 desc[i].next = cpu_to_virtio16(vq->vdev, next);
464
465 extra[i].addr = addr;
466 extra[i].len = len;
467 extra[i].flags = flags;
468 } else
469 next = virtio16_to_cpu(vq->vdev, desc[i].next);
470
471 return next;
472 }
473
virtqueue_add_split(struct virtqueue * _vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,gfp_t gfp)474 static inline int virtqueue_add_split(struct virtqueue *_vq,
475 struct scatterlist *sgs[],
476 unsigned int total_sg,
477 unsigned int out_sgs,
478 unsigned int in_sgs,
479 void *data,
480 void *ctx,
481 gfp_t gfp)
482 {
483 struct vring_virtqueue *vq = to_vvq(_vq);
484 struct scatterlist *sg;
485 struct vring_desc *desc;
486 unsigned int i, n, avail, descs_used, prev, err_idx;
487 int head;
488 bool indirect;
489
490 START_USE(vq);
491
492 BUG_ON(data == NULL);
493 BUG_ON(ctx && vq->indirect);
494
495 if (unlikely(vq->broken)) {
496 END_USE(vq);
497 return -EIO;
498 }
499
500 LAST_ADD_TIME_UPDATE(vq);
501
502 BUG_ON(total_sg == 0);
503
504 head = vq->free_head;
505
506 if (virtqueue_use_indirect(_vq, total_sg))
507 desc = alloc_indirect_split(_vq, total_sg, gfp);
508 else {
509 desc = NULL;
510 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
511 }
512
513 if (desc) {
514 /* Use a single buffer which doesn't continue */
515 indirect = true;
516 /* Set up rest to use this indirect table. */
517 i = 0;
518 descs_used = 1;
519 } else {
520 indirect = false;
521 desc = vq->split.vring.desc;
522 i = head;
523 descs_used = total_sg;
524 }
525
526 if (vq->vq.num_free < descs_used) {
527 pr_debug("Can't add buf len %i - avail = %i\n",
528 descs_used, vq->vq.num_free);
529 /* FIXME: for historical reasons, we force a notify here if
530 * there are outgoing parts to the buffer. Presumably the
531 * host should service the ring ASAP. */
532 if (out_sgs)
533 vq->notify(&vq->vq);
534 if (indirect)
535 kfree(desc);
536 END_USE(vq);
537 return -ENOSPC;
538 }
539
540 for (n = 0; n < out_sgs; n++) {
541 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
542 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
543 if (vring_mapping_error(vq, addr))
544 goto unmap_release;
545
546 prev = i;
547 /* Note that we trust indirect descriptor
548 * table since it use stream DMA mapping.
549 */
550 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
551 VRING_DESC_F_NEXT,
552 indirect);
553 }
554 }
555 for (; n < (out_sgs + in_sgs); n++) {
556 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
557 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
558 if (vring_mapping_error(vq, addr))
559 goto unmap_release;
560
561 prev = i;
562 /* Note that we trust indirect descriptor
563 * table since it use stream DMA mapping.
564 */
565 i = virtqueue_add_desc_split(_vq, desc, i, addr,
566 sg->length,
567 VRING_DESC_F_NEXT |
568 VRING_DESC_F_WRITE,
569 indirect);
570 }
571 }
572 /* Last one doesn't continue. */
573 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
574 if (!indirect && vq->use_dma_api)
575 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
576 ~VRING_DESC_F_NEXT;
577
578 if (indirect) {
579 /* Now that the indirect table is filled in, map it. */
580 dma_addr_t addr = vring_map_single(
581 vq, desc, total_sg * sizeof(struct vring_desc),
582 DMA_TO_DEVICE);
583 if (vring_mapping_error(vq, addr))
584 goto unmap_release;
585
586 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
587 head, addr,
588 total_sg * sizeof(struct vring_desc),
589 VRING_DESC_F_INDIRECT,
590 false);
591 }
592
593 /* We're using some buffers from the free list. */
594 vq->vq.num_free -= descs_used;
595
596 /* Update free pointer */
597 if (indirect)
598 vq->free_head = vq->split.desc_extra[head].next;
599 else
600 vq->free_head = i;
601
602 /* Store token and indirect buffer state. */
603 vq->split.desc_state[head].data = data;
604 if (indirect)
605 vq->split.desc_state[head].indir_desc = desc;
606 else
607 vq->split.desc_state[head].indir_desc = ctx;
608
609 /* Put entry in available array (but don't update avail->idx until they
610 * do sync). */
611 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
612 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
613
614 /* Descriptors and available array need to be set before we expose the
615 * new available array entries. */
616 virtio_wmb(vq->weak_barriers);
617 vq->split.avail_idx_shadow++;
618 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
619 vq->split.avail_idx_shadow);
620 vq->num_added++;
621
622 pr_debug("Added buffer head %i to %p\n", head, vq);
623 END_USE(vq);
624
625 /* This is very unlikely, but theoretically possible. Kick
626 * just in case. */
627 if (unlikely(vq->num_added == (1 << 16) - 1))
628 virtqueue_kick(_vq);
629
630 return 0;
631
632 unmap_release:
633 err_idx = i;
634
635 if (indirect)
636 i = 0;
637 else
638 i = head;
639
640 for (n = 0; n < total_sg; n++) {
641 if (i == err_idx)
642 break;
643 if (indirect) {
644 vring_unmap_one_split_indirect(vq, &desc[i]);
645 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
646 } else
647 i = vring_unmap_one_split(vq, i);
648 }
649
650 if (indirect)
651 kfree(desc);
652
653 END_USE(vq);
654 return -ENOMEM;
655 }
656
virtqueue_kick_prepare_split(struct virtqueue * _vq)657 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
658 {
659 struct vring_virtqueue *vq = to_vvq(_vq);
660 u16 new, old;
661 bool needs_kick;
662
663 START_USE(vq);
664 /* We need to expose available array entries before checking avail
665 * event. */
666 virtio_mb(vq->weak_barriers);
667
668 old = vq->split.avail_idx_shadow - vq->num_added;
669 new = vq->split.avail_idx_shadow;
670 vq->num_added = 0;
671
672 LAST_ADD_TIME_CHECK(vq);
673 LAST_ADD_TIME_INVALID(vq);
674
675 if (vq->event) {
676 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
677 vring_avail_event(&vq->split.vring)),
678 new, old);
679 } else {
680 needs_kick = !(vq->split.vring.used->flags &
681 cpu_to_virtio16(_vq->vdev,
682 VRING_USED_F_NO_NOTIFY));
683 }
684 END_USE(vq);
685 return needs_kick;
686 }
687
detach_buf_split(struct vring_virtqueue * vq,unsigned int head,void ** ctx)688 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
689 void **ctx)
690 {
691 unsigned int i, j;
692 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
693
694 /* Clear data ptr. */
695 vq->split.desc_state[head].data = NULL;
696
697 /* Put back on free list: unmap first-level descriptors and find end */
698 i = head;
699
700 while (vq->split.vring.desc[i].flags & nextflag) {
701 vring_unmap_one_split(vq, i);
702 i = vq->split.desc_extra[i].next;
703 vq->vq.num_free++;
704 }
705
706 vring_unmap_one_split(vq, i);
707 vq->split.desc_extra[i].next = vq->free_head;
708 vq->free_head = head;
709
710 /* Plus final descriptor */
711 vq->vq.num_free++;
712
713 if (vq->indirect) {
714 struct vring_desc *indir_desc =
715 vq->split.desc_state[head].indir_desc;
716 u32 len;
717
718 /* Free the indirect table, if any, now that it's unmapped. */
719 if (!indir_desc)
720 return;
721
722 len = vq->split.desc_extra[head].len;
723
724 BUG_ON(!(vq->split.desc_extra[head].flags &
725 VRING_DESC_F_INDIRECT));
726 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
727
728 for (j = 0; j < len / sizeof(struct vring_desc); j++)
729 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
730
731 kfree(indir_desc);
732 vq->split.desc_state[head].indir_desc = NULL;
733 } else if (ctx) {
734 *ctx = vq->split.desc_state[head].indir_desc;
735 }
736 }
737
more_used_split(const struct vring_virtqueue * vq)738 static inline bool more_used_split(const struct vring_virtqueue *vq)
739 {
740 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
741 vq->split.vring.used->idx);
742 }
743
virtqueue_get_buf_ctx_split(struct virtqueue * _vq,unsigned int * len,void ** ctx)744 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
745 unsigned int *len,
746 void **ctx)
747 {
748 struct vring_virtqueue *vq = to_vvq(_vq);
749 void *ret;
750 unsigned int i;
751 u16 last_used;
752
753 START_USE(vq);
754
755 if (unlikely(vq->broken)) {
756 END_USE(vq);
757 return NULL;
758 }
759
760 if (!more_used_split(vq)) {
761 pr_debug("No more buffers in queue\n");
762 END_USE(vq);
763 return NULL;
764 }
765
766 /* Only get used array entries after they have been exposed by host. */
767 virtio_rmb(vq->weak_barriers);
768
769 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
770 i = virtio32_to_cpu(_vq->vdev,
771 vq->split.vring.used->ring[last_used].id);
772 *len = virtio32_to_cpu(_vq->vdev,
773 vq->split.vring.used->ring[last_used].len);
774
775 if (unlikely(i >= vq->split.vring.num)) {
776 BAD_RING(vq, "id %u out of range\n", i);
777 return NULL;
778 }
779 if (unlikely(!vq->split.desc_state[i].data)) {
780 BAD_RING(vq, "id %u is not a head!\n", i);
781 return NULL;
782 }
783
784 /* detach_buf_split clears data, so grab it now. */
785 ret = vq->split.desc_state[i].data;
786 detach_buf_split(vq, i, ctx);
787 vq->last_used_idx++;
788 /* If we expect an interrupt for the next entry, tell host
789 * by writing event index and flush out the write before
790 * the read in the next get_buf call. */
791 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
792 virtio_store_mb(vq->weak_barriers,
793 &vring_used_event(&vq->split.vring),
794 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
795
796 LAST_ADD_TIME_INVALID(vq);
797
798 END_USE(vq);
799 return ret;
800 }
801
virtqueue_disable_cb_split(struct virtqueue * _vq)802 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
803 {
804 struct vring_virtqueue *vq = to_vvq(_vq);
805
806 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
807 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
808 if (!vq->event)
809 vq->split.vring.avail->flags =
810 cpu_to_virtio16(_vq->vdev,
811 vq->split.avail_flags_shadow);
812 }
813 }
814
virtqueue_enable_cb_prepare_split(struct virtqueue * _vq)815 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
816 {
817 struct vring_virtqueue *vq = to_vvq(_vq);
818 u16 last_used_idx;
819
820 START_USE(vq);
821
822 /* We optimistically turn back on interrupts, then check if there was
823 * more to do. */
824 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
825 * either clear the flags bit or point the event index at the next
826 * entry. Always do both to keep code simple. */
827 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
828 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
829 if (!vq->event)
830 vq->split.vring.avail->flags =
831 cpu_to_virtio16(_vq->vdev,
832 vq->split.avail_flags_shadow);
833 }
834 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
835 last_used_idx = vq->last_used_idx);
836 END_USE(vq);
837 return last_used_idx;
838 }
839
virtqueue_poll_split(struct virtqueue * _vq,unsigned last_used_idx)840 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
841 {
842 struct vring_virtqueue *vq = to_vvq(_vq);
843
844 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
845 vq->split.vring.used->idx);
846 }
847
virtqueue_enable_cb_delayed_split(struct virtqueue * _vq)848 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
849 {
850 struct vring_virtqueue *vq = to_vvq(_vq);
851 u16 bufs;
852
853 START_USE(vq);
854
855 /* We optimistically turn back on interrupts, then check if there was
856 * more to do. */
857 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
858 * either clear the flags bit or point the event index at the next
859 * entry. Always update the event index to keep code simple. */
860 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
861 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
862 if (!vq->event)
863 vq->split.vring.avail->flags =
864 cpu_to_virtio16(_vq->vdev,
865 vq->split.avail_flags_shadow);
866 }
867 /* TODO: tune this threshold */
868 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
869
870 virtio_store_mb(vq->weak_barriers,
871 &vring_used_event(&vq->split.vring),
872 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
873
874 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
875 - vq->last_used_idx) > bufs)) {
876 END_USE(vq);
877 return false;
878 }
879
880 END_USE(vq);
881 return true;
882 }
883
virtqueue_detach_unused_buf_split(struct virtqueue * _vq)884 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
885 {
886 struct vring_virtqueue *vq = to_vvq(_vq);
887 unsigned int i;
888 void *buf;
889
890 START_USE(vq);
891
892 for (i = 0; i < vq->split.vring.num; i++) {
893 if (!vq->split.desc_state[i].data)
894 continue;
895 /* detach_buf_split clears data, so grab it now. */
896 buf = vq->split.desc_state[i].data;
897 detach_buf_split(vq, i, NULL);
898 vq->split.avail_idx_shadow--;
899 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
900 vq->split.avail_idx_shadow);
901 END_USE(vq);
902 return buf;
903 }
904 /* That should have freed everything. */
905 BUG_ON(vq->vq.num_free != vq->split.vring.num);
906
907 END_USE(vq);
908 return NULL;
909 }
910
vring_create_virtqueue_split(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name)911 static struct virtqueue *vring_create_virtqueue_split(
912 unsigned int index,
913 unsigned int num,
914 unsigned int vring_align,
915 struct virtio_device *vdev,
916 bool weak_barriers,
917 bool may_reduce_num,
918 bool context,
919 bool (*notify)(struct virtqueue *),
920 void (*callback)(struct virtqueue *),
921 const char *name)
922 {
923 struct virtqueue *vq;
924 void *queue = NULL;
925 dma_addr_t dma_addr;
926 size_t queue_size_in_bytes;
927 struct vring vring;
928
929 /* We assume num is a power of 2. */
930 if (num & (num - 1)) {
931 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
932 return NULL;
933 }
934
935 /* TODO: allocate each queue chunk individually */
936 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
937 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
938 &dma_addr,
939 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
940 if (queue)
941 break;
942 if (!may_reduce_num)
943 return NULL;
944 }
945
946 if (!num)
947 return NULL;
948
949 if (!queue) {
950 /* Try to get a single page. You are my only hope! */
951 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
952 &dma_addr, GFP_KERNEL|__GFP_ZERO);
953 }
954 if (!queue)
955 return NULL;
956
957 queue_size_in_bytes = vring_size(num, vring_align);
958 vring_init(&vring, num, queue, vring_align);
959
960 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
961 notify, callback, name);
962 if (!vq) {
963 vring_free_queue(vdev, queue_size_in_bytes, queue,
964 dma_addr);
965 return NULL;
966 }
967
968 to_vvq(vq)->split.queue_dma_addr = dma_addr;
969 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
970 to_vvq(vq)->we_own_ring = true;
971
972 return vq;
973 }
974
975
976 /*
977 * Packed ring specific functions - *_packed().
978 */
979
vring_unmap_state_packed(const struct vring_virtqueue * vq,struct vring_desc_extra * state)980 static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
981 struct vring_desc_extra *state)
982 {
983 u16 flags;
984
985 if (!vq->use_dma_api)
986 return;
987
988 flags = state->flags;
989
990 if (flags & VRING_DESC_F_INDIRECT) {
991 dma_unmap_single(vring_dma_dev(vq),
992 state->addr, state->len,
993 (flags & VRING_DESC_F_WRITE) ?
994 DMA_FROM_DEVICE : DMA_TO_DEVICE);
995 } else {
996 dma_unmap_page(vring_dma_dev(vq),
997 state->addr, state->len,
998 (flags & VRING_DESC_F_WRITE) ?
999 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1000 }
1001 }
1002
vring_unmap_desc_packed(const struct vring_virtqueue * vq,struct vring_packed_desc * desc)1003 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1004 struct vring_packed_desc *desc)
1005 {
1006 u16 flags;
1007
1008 if (!vq->use_dma_api)
1009 return;
1010
1011 flags = le16_to_cpu(desc->flags);
1012
1013 if (flags & VRING_DESC_F_INDIRECT) {
1014 dma_unmap_single(vring_dma_dev(vq),
1015 le64_to_cpu(desc->addr),
1016 le32_to_cpu(desc->len),
1017 (flags & VRING_DESC_F_WRITE) ?
1018 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1019 } else {
1020 dma_unmap_page(vring_dma_dev(vq),
1021 le64_to_cpu(desc->addr),
1022 le32_to_cpu(desc->len),
1023 (flags & VRING_DESC_F_WRITE) ?
1024 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1025 }
1026 }
1027
alloc_indirect_packed(unsigned int total_sg,gfp_t gfp)1028 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1029 gfp_t gfp)
1030 {
1031 struct vring_packed_desc *desc;
1032
1033 /*
1034 * We require lowmem mappings for the descriptors because
1035 * otherwise virt_to_phys will give us bogus addresses in the
1036 * virtqueue.
1037 */
1038 gfp &= ~__GFP_HIGHMEM;
1039
1040 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1041
1042 return desc;
1043 }
1044
virtqueue_add_indirect_packed(struct vring_virtqueue * vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,gfp_t gfp)1045 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1046 struct scatterlist *sgs[],
1047 unsigned int total_sg,
1048 unsigned int out_sgs,
1049 unsigned int in_sgs,
1050 void *data,
1051 gfp_t gfp)
1052 {
1053 struct vring_packed_desc *desc;
1054 struct scatterlist *sg;
1055 unsigned int i, n, err_idx;
1056 u16 head, id;
1057 dma_addr_t addr;
1058
1059 head = vq->packed.next_avail_idx;
1060 desc = alloc_indirect_packed(total_sg, gfp);
1061 if (!desc)
1062 return -ENOMEM;
1063
1064 if (unlikely(vq->vq.num_free < 1)) {
1065 pr_debug("Can't add buf len 1 - avail = 0\n");
1066 kfree(desc);
1067 END_USE(vq);
1068 return -ENOSPC;
1069 }
1070
1071 i = 0;
1072 id = vq->free_head;
1073 BUG_ON(id == vq->packed.vring.num);
1074
1075 for (n = 0; n < out_sgs + in_sgs; n++) {
1076 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1077 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1078 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1079 if (vring_mapping_error(vq, addr))
1080 goto unmap_release;
1081
1082 desc[i].flags = cpu_to_le16(n < out_sgs ?
1083 0 : VRING_DESC_F_WRITE);
1084 desc[i].addr = cpu_to_le64(addr);
1085 desc[i].len = cpu_to_le32(sg->length);
1086 i++;
1087 }
1088 }
1089
1090 /* Now that the indirect table is filled in, map it. */
1091 addr = vring_map_single(vq, desc,
1092 total_sg * sizeof(struct vring_packed_desc),
1093 DMA_TO_DEVICE);
1094 if (vring_mapping_error(vq, addr))
1095 goto unmap_release;
1096
1097 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1098 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1099 sizeof(struct vring_packed_desc));
1100 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1101
1102 if (vq->use_dma_api) {
1103 vq->packed.desc_extra[id].addr = addr;
1104 vq->packed.desc_extra[id].len = total_sg *
1105 sizeof(struct vring_packed_desc);
1106 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1107 vq->packed.avail_used_flags;
1108 }
1109
1110 /*
1111 * A driver MUST NOT make the first descriptor in the list
1112 * available before all subsequent descriptors comprising
1113 * the list are made available.
1114 */
1115 virtio_wmb(vq->weak_barriers);
1116 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1117 vq->packed.avail_used_flags);
1118
1119 /* We're using some buffers from the free list. */
1120 vq->vq.num_free -= 1;
1121
1122 /* Update free pointer */
1123 n = head + 1;
1124 if (n >= vq->packed.vring.num) {
1125 n = 0;
1126 vq->packed.avail_wrap_counter ^= 1;
1127 vq->packed.avail_used_flags ^=
1128 1 << VRING_PACKED_DESC_F_AVAIL |
1129 1 << VRING_PACKED_DESC_F_USED;
1130 }
1131 vq->packed.next_avail_idx = n;
1132 vq->free_head = vq->packed.desc_extra[id].next;
1133
1134 /* Store token and indirect buffer state. */
1135 vq->packed.desc_state[id].num = 1;
1136 vq->packed.desc_state[id].data = data;
1137 vq->packed.desc_state[id].indir_desc = desc;
1138 vq->packed.desc_state[id].last = id;
1139
1140 vq->num_added += 1;
1141
1142 pr_debug("Added buffer head %i to %p\n", head, vq);
1143 END_USE(vq);
1144
1145 return 0;
1146
1147 unmap_release:
1148 err_idx = i;
1149
1150 for (i = 0; i < err_idx; i++)
1151 vring_unmap_desc_packed(vq, &desc[i]);
1152
1153 kfree(desc);
1154
1155 END_USE(vq);
1156 return -ENOMEM;
1157 }
1158
virtqueue_add_packed(struct virtqueue * _vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,gfp_t gfp)1159 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1160 struct scatterlist *sgs[],
1161 unsigned int total_sg,
1162 unsigned int out_sgs,
1163 unsigned int in_sgs,
1164 void *data,
1165 void *ctx,
1166 gfp_t gfp)
1167 {
1168 struct vring_virtqueue *vq = to_vvq(_vq);
1169 struct vring_packed_desc *desc;
1170 struct scatterlist *sg;
1171 unsigned int i, n, c, descs_used, err_idx;
1172 __le16 head_flags, flags;
1173 u16 head, id, prev, curr, avail_used_flags;
1174 int err;
1175
1176 START_USE(vq);
1177
1178 BUG_ON(data == NULL);
1179 BUG_ON(ctx && vq->indirect);
1180
1181 if (unlikely(vq->broken)) {
1182 END_USE(vq);
1183 return -EIO;
1184 }
1185
1186 LAST_ADD_TIME_UPDATE(vq);
1187
1188 BUG_ON(total_sg == 0);
1189
1190 if (virtqueue_use_indirect(_vq, total_sg)) {
1191 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1192 in_sgs, data, gfp);
1193 if (err != -ENOMEM) {
1194 END_USE(vq);
1195 return err;
1196 }
1197
1198 /* fall back on direct */
1199 }
1200
1201 head = vq->packed.next_avail_idx;
1202 avail_used_flags = vq->packed.avail_used_flags;
1203
1204 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1205
1206 desc = vq->packed.vring.desc;
1207 i = head;
1208 descs_used = total_sg;
1209
1210 if (unlikely(vq->vq.num_free < descs_used)) {
1211 pr_debug("Can't add buf len %i - avail = %i\n",
1212 descs_used, vq->vq.num_free);
1213 END_USE(vq);
1214 return -ENOSPC;
1215 }
1216
1217 id = vq->free_head;
1218 BUG_ON(id == vq->packed.vring.num);
1219
1220 curr = id;
1221 c = 0;
1222 for (n = 0; n < out_sgs + in_sgs; n++) {
1223 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1224 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1225 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1226 if (vring_mapping_error(vq, addr))
1227 goto unmap_release;
1228
1229 flags = cpu_to_le16(vq->packed.avail_used_flags |
1230 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1231 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1232 if (i == head)
1233 head_flags = flags;
1234 else
1235 desc[i].flags = flags;
1236
1237 desc[i].addr = cpu_to_le64(addr);
1238 desc[i].len = cpu_to_le32(sg->length);
1239 desc[i].id = cpu_to_le16(id);
1240
1241 if (unlikely(vq->use_dma_api)) {
1242 vq->packed.desc_extra[curr].addr = addr;
1243 vq->packed.desc_extra[curr].len = sg->length;
1244 vq->packed.desc_extra[curr].flags =
1245 le16_to_cpu(flags);
1246 }
1247 prev = curr;
1248 curr = vq->packed.desc_extra[curr].next;
1249
1250 if ((unlikely(++i >= vq->packed.vring.num))) {
1251 i = 0;
1252 vq->packed.avail_used_flags ^=
1253 1 << VRING_PACKED_DESC_F_AVAIL |
1254 1 << VRING_PACKED_DESC_F_USED;
1255 }
1256 }
1257 }
1258
1259 if (i <= head)
1260 vq->packed.avail_wrap_counter ^= 1;
1261
1262 /* We're using some buffers from the free list. */
1263 vq->vq.num_free -= descs_used;
1264
1265 /* Update free pointer */
1266 vq->packed.next_avail_idx = i;
1267 vq->free_head = curr;
1268
1269 /* Store token. */
1270 vq->packed.desc_state[id].num = descs_used;
1271 vq->packed.desc_state[id].data = data;
1272 vq->packed.desc_state[id].indir_desc = ctx;
1273 vq->packed.desc_state[id].last = prev;
1274
1275 /*
1276 * A driver MUST NOT make the first descriptor in the list
1277 * available before all subsequent descriptors comprising
1278 * the list are made available.
1279 */
1280 virtio_wmb(vq->weak_barriers);
1281 vq->packed.vring.desc[head].flags = head_flags;
1282 vq->num_added += descs_used;
1283
1284 pr_debug("Added buffer head %i to %p\n", head, vq);
1285 END_USE(vq);
1286
1287 return 0;
1288
1289 unmap_release:
1290 err_idx = i;
1291 i = head;
1292 curr = vq->free_head;
1293
1294 vq->packed.avail_used_flags = avail_used_flags;
1295
1296 for (n = 0; n < total_sg; n++) {
1297 if (i == err_idx)
1298 break;
1299 vring_unmap_state_packed(vq,
1300 &vq->packed.desc_extra[curr]);
1301 curr = vq->packed.desc_extra[curr].next;
1302 i++;
1303 if (i >= vq->packed.vring.num)
1304 i = 0;
1305 }
1306
1307 END_USE(vq);
1308 return -EIO;
1309 }
1310
virtqueue_kick_prepare_packed(struct virtqueue * _vq)1311 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1312 {
1313 struct vring_virtqueue *vq = to_vvq(_vq);
1314 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1315 bool needs_kick;
1316 union {
1317 struct {
1318 __le16 off_wrap;
1319 __le16 flags;
1320 };
1321 u32 u32;
1322 } snapshot;
1323
1324 START_USE(vq);
1325
1326 /*
1327 * We need to expose the new flags value before checking notification
1328 * suppressions.
1329 */
1330 virtio_mb(vq->weak_barriers);
1331
1332 old = vq->packed.next_avail_idx - vq->num_added;
1333 new = vq->packed.next_avail_idx;
1334 vq->num_added = 0;
1335
1336 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1337 flags = le16_to_cpu(snapshot.flags);
1338
1339 LAST_ADD_TIME_CHECK(vq);
1340 LAST_ADD_TIME_INVALID(vq);
1341
1342 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1343 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1344 goto out;
1345 }
1346
1347 off_wrap = le16_to_cpu(snapshot.off_wrap);
1348
1349 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1350 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1351 if (wrap_counter != vq->packed.avail_wrap_counter)
1352 event_idx -= vq->packed.vring.num;
1353
1354 needs_kick = vring_need_event(event_idx, new, old);
1355 out:
1356 END_USE(vq);
1357 return needs_kick;
1358 }
1359
detach_buf_packed(struct vring_virtqueue * vq,unsigned int id,void ** ctx)1360 static void detach_buf_packed(struct vring_virtqueue *vq,
1361 unsigned int id, void **ctx)
1362 {
1363 struct vring_desc_state_packed *state = NULL;
1364 struct vring_packed_desc *desc;
1365 unsigned int i, curr;
1366
1367 state = &vq->packed.desc_state[id];
1368
1369 /* Clear data ptr. */
1370 state->data = NULL;
1371
1372 vq->packed.desc_extra[state->last].next = vq->free_head;
1373 vq->free_head = id;
1374 vq->vq.num_free += state->num;
1375
1376 if (unlikely(vq->use_dma_api)) {
1377 curr = id;
1378 for (i = 0; i < state->num; i++) {
1379 vring_unmap_state_packed(vq,
1380 &vq->packed.desc_extra[curr]);
1381 curr = vq->packed.desc_extra[curr].next;
1382 }
1383 }
1384
1385 if (vq->indirect) {
1386 u32 len;
1387
1388 /* Free the indirect table, if any, now that it's unmapped. */
1389 desc = state->indir_desc;
1390 if (!desc)
1391 return;
1392
1393 if (vq->use_dma_api) {
1394 len = vq->packed.desc_extra[id].len;
1395 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1396 i++)
1397 vring_unmap_desc_packed(vq, &desc[i]);
1398 }
1399 kfree(desc);
1400 state->indir_desc = NULL;
1401 } else if (ctx) {
1402 *ctx = state->indir_desc;
1403 }
1404 }
1405
is_used_desc_packed(const struct vring_virtqueue * vq,u16 idx,bool used_wrap_counter)1406 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1407 u16 idx, bool used_wrap_counter)
1408 {
1409 bool avail, used;
1410 u16 flags;
1411
1412 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1413 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1414 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1415
1416 return avail == used && used == used_wrap_counter;
1417 }
1418
more_used_packed(const struct vring_virtqueue * vq)1419 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1420 {
1421 return is_used_desc_packed(vq, vq->last_used_idx,
1422 vq->packed.used_wrap_counter);
1423 }
1424
virtqueue_get_buf_ctx_packed(struct virtqueue * _vq,unsigned int * len,void ** ctx)1425 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1426 unsigned int *len,
1427 void **ctx)
1428 {
1429 struct vring_virtqueue *vq = to_vvq(_vq);
1430 u16 last_used, id;
1431 void *ret;
1432
1433 START_USE(vq);
1434
1435 if (unlikely(vq->broken)) {
1436 END_USE(vq);
1437 return NULL;
1438 }
1439
1440 if (!more_used_packed(vq)) {
1441 pr_debug("No more buffers in queue\n");
1442 END_USE(vq);
1443 return NULL;
1444 }
1445
1446 /* Only get used elements after they have been exposed by host. */
1447 virtio_rmb(vq->weak_barriers);
1448
1449 last_used = vq->last_used_idx;
1450 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1451 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1452
1453 if (unlikely(id >= vq->packed.vring.num)) {
1454 BAD_RING(vq, "id %u out of range\n", id);
1455 return NULL;
1456 }
1457 if (unlikely(!vq->packed.desc_state[id].data)) {
1458 BAD_RING(vq, "id %u is not a head!\n", id);
1459 return NULL;
1460 }
1461
1462 /* detach_buf_packed clears data, so grab it now. */
1463 ret = vq->packed.desc_state[id].data;
1464 detach_buf_packed(vq, id, ctx);
1465
1466 vq->last_used_idx += vq->packed.desc_state[id].num;
1467 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1468 vq->last_used_idx -= vq->packed.vring.num;
1469 vq->packed.used_wrap_counter ^= 1;
1470 }
1471
1472 /*
1473 * If we expect an interrupt for the next entry, tell host
1474 * by writing event index and flush out the write before
1475 * the read in the next get_buf call.
1476 */
1477 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1478 virtio_store_mb(vq->weak_barriers,
1479 &vq->packed.vring.driver->off_wrap,
1480 cpu_to_le16(vq->last_used_idx |
1481 (vq->packed.used_wrap_counter <<
1482 VRING_PACKED_EVENT_F_WRAP_CTR)));
1483
1484 LAST_ADD_TIME_INVALID(vq);
1485
1486 END_USE(vq);
1487 return ret;
1488 }
1489
virtqueue_disable_cb_packed(struct virtqueue * _vq)1490 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1491 {
1492 struct vring_virtqueue *vq = to_vvq(_vq);
1493
1494 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1495 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1496 vq->packed.vring.driver->flags =
1497 cpu_to_le16(vq->packed.event_flags_shadow);
1498 }
1499 }
1500
virtqueue_enable_cb_prepare_packed(struct virtqueue * _vq)1501 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1502 {
1503 struct vring_virtqueue *vq = to_vvq(_vq);
1504
1505 START_USE(vq);
1506
1507 /*
1508 * We optimistically turn back on interrupts, then check if there was
1509 * more to do.
1510 */
1511
1512 if (vq->event) {
1513 vq->packed.vring.driver->off_wrap =
1514 cpu_to_le16(vq->last_used_idx |
1515 (vq->packed.used_wrap_counter <<
1516 VRING_PACKED_EVENT_F_WRAP_CTR));
1517 /*
1518 * We need to update event offset and event wrap
1519 * counter first before updating event flags.
1520 */
1521 virtio_wmb(vq->weak_barriers);
1522 }
1523
1524 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1525 vq->packed.event_flags_shadow = vq->event ?
1526 VRING_PACKED_EVENT_FLAG_DESC :
1527 VRING_PACKED_EVENT_FLAG_ENABLE;
1528 vq->packed.vring.driver->flags =
1529 cpu_to_le16(vq->packed.event_flags_shadow);
1530 }
1531
1532 END_USE(vq);
1533 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1534 VRING_PACKED_EVENT_F_WRAP_CTR);
1535 }
1536
virtqueue_poll_packed(struct virtqueue * _vq,u16 off_wrap)1537 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1538 {
1539 struct vring_virtqueue *vq = to_vvq(_vq);
1540 bool wrap_counter;
1541 u16 used_idx;
1542
1543 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1544 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1545
1546 return is_used_desc_packed(vq, used_idx, wrap_counter);
1547 }
1548
virtqueue_enable_cb_delayed_packed(struct virtqueue * _vq)1549 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1550 {
1551 struct vring_virtqueue *vq = to_vvq(_vq);
1552 u16 used_idx, wrap_counter;
1553 u16 bufs;
1554
1555 START_USE(vq);
1556
1557 /*
1558 * We optimistically turn back on interrupts, then check if there was
1559 * more to do.
1560 */
1561
1562 if (vq->event) {
1563 /* TODO: tune this threshold */
1564 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1565 wrap_counter = vq->packed.used_wrap_counter;
1566
1567 used_idx = vq->last_used_idx + bufs;
1568 if (used_idx >= vq->packed.vring.num) {
1569 used_idx -= vq->packed.vring.num;
1570 wrap_counter ^= 1;
1571 }
1572
1573 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1574 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1575
1576 /*
1577 * We need to update event offset and event wrap
1578 * counter first before updating event flags.
1579 */
1580 virtio_wmb(vq->weak_barriers);
1581 }
1582
1583 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1584 vq->packed.event_flags_shadow = vq->event ?
1585 VRING_PACKED_EVENT_FLAG_DESC :
1586 VRING_PACKED_EVENT_FLAG_ENABLE;
1587 vq->packed.vring.driver->flags =
1588 cpu_to_le16(vq->packed.event_flags_shadow);
1589 }
1590
1591 /*
1592 * We need to update event suppression structure first
1593 * before re-checking for more used buffers.
1594 */
1595 virtio_mb(vq->weak_barriers);
1596
1597 if (is_used_desc_packed(vq,
1598 vq->last_used_idx,
1599 vq->packed.used_wrap_counter)) {
1600 END_USE(vq);
1601 return false;
1602 }
1603
1604 END_USE(vq);
1605 return true;
1606 }
1607
virtqueue_detach_unused_buf_packed(struct virtqueue * _vq)1608 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1609 {
1610 struct vring_virtqueue *vq = to_vvq(_vq);
1611 unsigned int i;
1612 void *buf;
1613
1614 START_USE(vq);
1615
1616 for (i = 0; i < vq->packed.vring.num; i++) {
1617 if (!vq->packed.desc_state[i].data)
1618 continue;
1619 /* detach_buf clears data, so grab it now. */
1620 buf = vq->packed.desc_state[i].data;
1621 detach_buf_packed(vq, i, NULL);
1622 END_USE(vq);
1623 return buf;
1624 }
1625 /* That should have freed everything. */
1626 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1627
1628 END_USE(vq);
1629 return NULL;
1630 }
1631
vring_alloc_desc_extra(struct vring_virtqueue * vq,unsigned int num)1632 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1633 unsigned int num)
1634 {
1635 struct vring_desc_extra *desc_extra;
1636 unsigned int i;
1637
1638 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1639 GFP_KERNEL);
1640 if (!desc_extra)
1641 return NULL;
1642
1643 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1644
1645 for (i = 0; i < num - 1; i++)
1646 desc_extra[i].next = i + 1;
1647
1648 return desc_extra;
1649 }
1650
vring_create_virtqueue_packed(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name)1651 static struct virtqueue *vring_create_virtqueue_packed(
1652 unsigned int index,
1653 unsigned int num,
1654 unsigned int vring_align,
1655 struct virtio_device *vdev,
1656 bool weak_barriers,
1657 bool may_reduce_num,
1658 bool context,
1659 bool (*notify)(struct virtqueue *),
1660 void (*callback)(struct virtqueue *),
1661 const char *name)
1662 {
1663 struct vring_virtqueue *vq;
1664 struct vring_packed_desc *ring;
1665 struct vring_packed_desc_event *driver, *device;
1666 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1667 size_t ring_size_in_bytes, event_size_in_bytes;
1668
1669 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1670
1671 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1672 &ring_dma_addr,
1673 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1674 if (!ring)
1675 goto err_ring;
1676
1677 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1678
1679 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1680 &driver_event_dma_addr,
1681 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1682 if (!driver)
1683 goto err_driver;
1684
1685 device = vring_alloc_queue(vdev, event_size_in_bytes,
1686 &device_event_dma_addr,
1687 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1688 if (!device)
1689 goto err_device;
1690
1691 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1692 if (!vq)
1693 goto err_vq;
1694
1695 vq->vq.callback = callback;
1696 vq->vq.vdev = vdev;
1697 vq->vq.name = name;
1698 vq->vq.num_free = num;
1699 vq->vq.index = index;
1700 vq->we_own_ring = true;
1701 vq->notify = notify;
1702 vq->weak_barriers = weak_barriers;
1703 vq->broken = false;
1704 vq->last_used_idx = 0;
1705 vq->num_added = 0;
1706 vq->packed_ring = true;
1707 vq->use_dma_api = vring_use_dma_api(vdev);
1708 #ifdef DEBUG
1709 vq->in_use = false;
1710 vq->last_add_time_valid = false;
1711 #endif
1712
1713 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1714 !context;
1715 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1716
1717 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1718 vq->weak_barriers = false;
1719
1720 vq->packed.ring_dma_addr = ring_dma_addr;
1721 vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1722 vq->packed.device_event_dma_addr = device_event_dma_addr;
1723
1724 vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1725 vq->packed.event_size_in_bytes = event_size_in_bytes;
1726
1727 vq->packed.vring.num = num;
1728 vq->packed.vring.desc = ring;
1729 vq->packed.vring.driver = driver;
1730 vq->packed.vring.device = device;
1731
1732 vq->packed.next_avail_idx = 0;
1733 vq->packed.avail_wrap_counter = 1;
1734 vq->packed.used_wrap_counter = 1;
1735 vq->packed.event_flags_shadow = 0;
1736 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1737
1738 vq->packed.desc_state = kmalloc_array(num,
1739 sizeof(struct vring_desc_state_packed),
1740 GFP_KERNEL);
1741 if (!vq->packed.desc_state)
1742 goto err_desc_state;
1743
1744 memset(vq->packed.desc_state, 0,
1745 num * sizeof(struct vring_desc_state_packed));
1746
1747 /* Put everything in free lists. */
1748 vq->free_head = 0;
1749
1750 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1751 if (!vq->packed.desc_extra)
1752 goto err_desc_extra;
1753
1754 /* No callback? Tell other side not to bother us. */
1755 if (!callback) {
1756 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1757 vq->packed.vring.driver->flags =
1758 cpu_to_le16(vq->packed.event_flags_shadow);
1759 }
1760
1761 spin_lock(&vdev->vqs_list_lock);
1762 list_add_tail(&vq->vq.list, &vdev->vqs);
1763 spin_unlock(&vdev->vqs_list_lock);
1764 return &vq->vq;
1765
1766 err_desc_extra:
1767 kfree(vq->packed.desc_state);
1768 err_desc_state:
1769 kfree(vq);
1770 err_vq:
1771 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1772 err_device:
1773 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1774 err_driver:
1775 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1776 err_ring:
1777 return NULL;
1778 }
1779
1780
1781 /*
1782 * Generic functions and exported symbols.
1783 */
1784
virtqueue_add(struct virtqueue * _vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,gfp_t gfp)1785 static inline int virtqueue_add(struct virtqueue *_vq,
1786 struct scatterlist *sgs[],
1787 unsigned int total_sg,
1788 unsigned int out_sgs,
1789 unsigned int in_sgs,
1790 void *data,
1791 void *ctx,
1792 gfp_t gfp)
1793 {
1794 struct vring_virtqueue *vq = to_vvq(_vq);
1795
1796 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1797 out_sgs, in_sgs, data, ctx, gfp) :
1798 virtqueue_add_split(_vq, sgs, total_sg,
1799 out_sgs, in_sgs, data, ctx, gfp);
1800 }
1801
1802 /**
1803 * virtqueue_add_sgs - expose buffers to other end
1804 * @_vq: the struct virtqueue we're talking about.
1805 * @sgs: array of terminated scatterlists.
1806 * @out_sgs: the number of scatterlists readable by other side
1807 * @in_sgs: the number of scatterlists which are writable (after readable ones)
1808 * @data: the token identifying the buffer.
1809 * @gfp: how to do memory allocations (if necessary).
1810 *
1811 * Caller must ensure we don't call this with other virtqueue operations
1812 * at the same time (except where noted).
1813 *
1814 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1815 */
virtqueue_add_sgs(struct virtqueue * _vq,struct scatterlist * sgs[],unsigned int out_sgs,unsigned int in_sgs,void * data,gfp_t gfp)1816 int virtqueue_add_sgs(struct virtqueue *_vq,
1817 struct scatterlist *sgs[],
1818 unsigned int out_sgs,
1819 unsigned int in_sgs,
1820 void *data,
1821 gfp_t gfp)
1822 {
1823 unsigned int i, total_sg = 0;
1824
1825 /* Count them first. */
1826 for (i = 0; i < out_sgs + in_sgs; i++) {
1827 struct scatterlist *sg;
1828
1829 for (sg = sgs[i]; sg; sg = sg_next(sg))
1830 total_sg++;
1831 }
1832 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1833 data, NULL, gfp);
1834 }
1835 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1836
1837 /**
1838 * virtqueue_add_outbuf - expose output buffers to other end
1839 * @vq: the struct virtqueue we're talking about.
1840 * @sg: scatterlist (must be well-formed and terminated!)
1841 * @num: the number of entries in @sg readable by other side
1842 * @data: the token identifying the buffer.
1843 * @gfp: how to do memory allocations (if necessary).
1844 *
1845 * Caller must ensure we don't call this with other virtqueue operations
1846 * at the same time (except where noted).
1847 *
1848 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1849 */
virtqueue_add_outbuf(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,gfp_t gfp)1850 int virtqueue_add_outbuf(struct virtqueue *vq,
1851 struct scatterlist *sg, unsigned int num,
1852 void *data,
1853 gfp_t gfp)
1854 {
1855 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1856 }
1857 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1858
1859 /**
1860 * virtqueue_add_inbuf - expose input buffers to other end
1861 * @vq: the struct virtqueue we're talking about.
1862 * @sg: scatterlist (must be well-formed and terminated!)
1863 * @num: the number of entries in @sg writable by other side
1864 * @data: the token identifying the buffer.
1865 * @gfp: how to do memory allocations (if necessary).
1866 *
1867 * Caller must ensure we don't call this with other virtqueue operations
1868 * at the same time (except where noted).
1869 *
1870 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1871 */
virtqueue_add_inbuf(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,gfp_t gfp)1872 int virtqueue_add_inbuf(struct virtqueue *vq,
1873 struct scatterlist *sg, unsigned int num,
1874 void *data,
1875 gfp_t gfp)
1876 {
1877 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1878 }
1879 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1880
1881 /**
1882 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1883 * @vq: the struct virtqueue we're talking about.
1884 * @sg: scatterlist (must be well-formed and terminated!)
1885 * @num: the number of entries in @sg writable by other side
1886 * @data: the token identifying the buffer.
1887 * @ctx: extra context for the token
1888 * @gfp: how to do memory allocations (if necessary).
1889 *
1890 * Caller must ensure we don't call this with other virtqueue operations
1891 * at the same time (except where noted).
1892 *
1893 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1894 */
virtqueue_add_inbuf_ctx(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,void * ctx,gfp_t gfp)1895 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1896 struct scatterlist *sg, unsigned int num,
1897 void *data,
1898 void *ctx,
1899 gfp_t gfp)
1900 {
1901 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1902 }
1903 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1904
1905 /**
1906 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1907 * @_vq: the struct virtqueue
1908 *
1909 * Instead of virtqueue_kick(), you can do:
1910 * if (virtqueue_kick_prepare(vq))
1911 * virtqueue_notify(vq);
1912 *
1913 * This is sometimes useful because the virtqueue_kick_prepare() needs
1914 * to be serialized, but the actual virtqueue_notify() call does not.
1915 */
virtqueue_kick_prepare(struct virtqueue * _vq)1916 bool virtqueue_kick_prepare(struct virtqueue *_vq)
1917 {
1918 struct vring_virtqueue *vq = to_vvq(_vq);
1919
1920 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1921 virtqueue_kick_prepare_split(_vq);
1922 }
1923 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1924
1925 /**
1926 * virtqueue_notify - second half of split virtqueue_kick call.
1927 * @_vq: the struct virtqueue
1928 *
1929 * This does not need to be serialized.
1930 *
1931 * Returns false if host notify failed or queue is broken, otherwise true.
1932 */
virtqueue_notify(struct virtqueue * _vq)1933 bool virtqueue_notify(struct virtqueue *_vq)
1934 {
1935 struct vring_virtqueue *vq = to_vvq(_vq);
1936
1937 if (unlikely(vq->broken))
1938 return false;
1939
1940 /* Prod other side to tell it about changes. */
1941 if (!vq->notify(_vq)) {
1942 vq->broken = true;
1943 return false;
1944 }
1945 return true;
1946 }
1947 EXPORT_SYMBOL_GPL(virtqueue_notify);
1948
1949 /**
1950 * virtqueue_kick - update after add_buf
1951 * @vq: the struct virtqueue
1952 *
1953 * After one or more virtqueue_add_* calls, invoke this to kick
1954 * the other side.
1955 *
1956 * Caller must ensure we don't call this with other virtqueue
1957 * operations at the same time (except where noted).
1958 *
1959 * Returns false if kick failed, otherwise true.
1960 */
virtqueue_kick(struct virtqueue * vq)1961 bool virtqueue_kick(struct virtqueue *vq)
1962 {
1963 if (virtqueue_kick_prepare(vq))
1964 return virtqueue_notify(vq);
1965 return true;
1966 }
1967 EXPORT_SYMBOL_GPL(virtqueue_kick);
1968
1969 /**
1970 * virtqueue_get_buf - get the next used buffer
1971 * @_vq: the struct virtqueue we're talking about.
1972 * @len: the length written into the buffer
1973 * @ctx: extra context for the token
1974 *
1975 * If the device wrote data into the buffer, @len will be set to the
1976 * amount written. This means you don't need to clear the buffer
1977 * beforehand to ensure there's no data leakage in the case of short
1978 * writes.
1979 *
1980 * Caller must ensure we don't call this with other virtqueue
1981 * operations at the same time (except where noted).
1982 *
1983 * Returns NULL if there are no used buffers, or the "data" token
1984 * handed to virtqueue_add_*().
1985 */
virtqueue_get_buf_ctx(struct virtqueue * _vq,unsigned int * len,void ** ctx)1986 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1987 void **ctx)
1988 {
1989 struct vring_virtqueue *vq = to_vvq(_vq);
1990
1991 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1992 virtqueue_get_buf_ctx_split(_vq, len, ctx);
1993 }
1994 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1995
virtqueue_get_buf(struct virtqueue * _vq,unsigned int * len)1996 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1997 {
1998 return virtqueue_get_buf_ctx(_vq, len, NULL);
1999 }
2000 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2001 /**
2002 * virtqueue_disable_cb - disable callbacks
2003 * @_vq: the struct virtqueue we're talking about.
2004 *
2005 * Note that this is not necessarily synchronous, hence unreliable and only
2006 * useful as an optimization.
2007 *
2008 * Unlike other operations, this need not be serialized.
2009 */
virtqueue_disable_cb(struct virtqueue * _vq)2010 void virtqueue_disable_cb(struct virtqueue *_vq)
2011 {
2012 struct vring_virtqueue *vq = to_vvq(_vq);
2013
2014 if (vq->packed_ring)
2015 virtqueue_disable_cb_packed(_vq);
2016 else
2017 virtqueue_disable_cb_split(_vq);
2018 }
2019 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2020
2021 /**
2022 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2023 * @_vq: the struct virtqueue we're talking about.
2024 *
2025 * This re-enables callbacks; it returns current queue state
2026 * in an opaque unsigned value. This value should be later tested by
2027 * virtqueue_poll, to detect a possible race between the driver checking for
2028 * more work, and enabling callbacks.
2029 *
2030 * Caller must ensure we don't call this with other virtqueue
2031 * operations at the same time (except where noted).
2032 */
virtqueue_enable_cb_prepare(struct virtqueue * _vq)2033 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2034 {
2035 struct vring_virtqueue *vq = to_vvq(_vq);
2036
2037 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2038 virtqueue_enable_cb_prepare_split(_vq);
2039 }
2040 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2041
2042 /**
2043 * virtqueue_poll - query pending used buffers
2044 * @_vq: the struct virtqueue we're talking about.
2045 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2046 *
2047 * Returns "true" if there are pending used buffers in the queue.
2048 *
2049 * This does not need to be serialized.
2050 */
virtqueue_poll(struct virtqueue * _vq,unsigned last_used_idx)2051 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
2052 {
2053 struct vring_virtqueue *vq = to_vvq(_vq);
2054
2055 if (unlikely(vq->broken))
2056 return false;
2057
2058 virtio_mb(vq->weak_barriers);
2059 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2060 virtqueue_poll_split(_vq, last_used_idx);
2061 }
2062 EXPORT_SYMBOL_GPL(virtqueue_poll);
2063
2064 /**
2065 * virtqueue_enable_cb - restart callbacks after disable_cb.
2066 * @_vq: the struct virtqueue we're talking about.
2067 *
2068 * This re-enables callbacks; it returns "false" if there are pending
2069 * buffers in the queue, to detect a possible race between the driver
2070 * checking for more work, and enabling callbacks.
2071 *
2072 * Caller must ensure we don't call this with other virtqueue
2073 * operations at the same time (except where noted).
2074 */
virtqueue_enable_cb(struct virtqueue * _vq)2075 bool virtqueue_enable_cb(struct virtqueue *_vq)
2076 {
2077 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
2078
2079 return !virtqueue_poll(_vq, last_used_idx);
2080 }
2081 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2082
2083 /**
2084 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2085 * @_vq: the struct virtqueue we're talking about.
2086 *
2087 * This re-enables callbacks but hints to the other side to delay
2088 * interrupts until most of the available buffers have been processed;
2089 * it returns "false" if there are many pending buffers in the queue,
2090 * to detect a possible race between the driver checking for more work,
2091 * and enabling callbacks.
2092 *
2093 * Caller must ensure we don't call this with other virtqueue
2094 * operations at the same time (except where noted).
2095 */
virtqueue_enable_cb_delayed(struct virtqueue * _vq)2096 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2097 {
2098 struct vring_virtqueue *vq = to_vvq(_vq);
2099
2100 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2101 virtqueue_enable_cb_delayed_split(_vq);
2102 }
2103 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2104
2105 /**
2106 * virtqueue_detach_unused_buf - detach first unused buffer
2107 * @_vq: the struct virtqueue we're talking about.
2108 *
2109 * Returns NULL or the "data" token handed to virtqueue_add_*().
2110 * This is not valid on an active queue; it is useful only for device
2111 * shutdown.
2112 */
virtqueue_detach_unused_buf(struct virtqueue * _vq)2113 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2114 {
2115 struct vring_virtqueue *vq = to_vvq(_vq);
2116
2117 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2118 virtqueue_detach_unused_buf_split(_vq);
2119 }
2120 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2121
more_used(const struct vring_virtqueue * vq)2122 static inline bool more_used(const struct vring_virtqueue *vq)
2123 {
2124 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2125 }
2126
vring_interrupt(int irq,void * _vq)2127 irqreturn_t vring_interrupt(int irq, void *_vq)
2128 {
2129 struct vring_virtqueue *vq = to_vvq(_vq);
2130
2131 if (!more_used(vq)) {
2132 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2133 return IRQ_NONE;
2134 }
2135
2136 if (unlikely(vq->broken))
2137 return IRQ_HANDLED;
2138
2139 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2140 if (vq->vq.callback)
2141 vq->vq.callback(&vq->vq);
2142
2143 return IRQ_HANDLED;
2144 }
2145 EXPORT_SYMBOL_GPL(vring_interrupt);
2146
2147 /* Only available for split ring */
__vring_new_virtqueue(unsigned int index,struct vring vring,struct virtio_device * vdev,bool weak_barriers,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name)2148 struct virtqueue *__vring_new_virtqueue(unsigned int index,
2149 struct vring vring,
2150 struct virtio_device *vdev,
2151 bool weak_barriers,
2152 bool context,
2153 bool (*notify)(struct virtqueue *),
2154 void (*callback)(struct virtqueue *),
2155 const char *name)
2156 {
2157 struct vring_virtqueue *vq;
2158
2159 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2160 return NULL;
2161
2162 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2163 if (!vq)
2164 return NULL;
2165
2166 vq->packed_ring = false;
2167 vq->vq.callback = callback;
2168 vq->vq.vdev = vdev;
2169 vq->vq.name = name;
2170 vq->vq.num_free = vring.num;
2171 vq->vq.index = index;
2172 vq->we_own_ring = false;
2173 vq->notify = notify;
2174 vq->weak_barriers = weak_barriers;
2175 vq->broken = false;
2176 vq->last_used_idx = 0;
2177 vq->num_added = 0;
2178 vq->use_dma_api = vring_use_dma_api(vdev);
2179 #ifdef DEBUG
2180 vq->in_use = false;
2181 vq->last_add_time_valid = false;
2182 #endif
2183
2184 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2185 !context;
2186 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2187
2188 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2189 vq->weak_barriers = false;
2190
2191 vq->split.queue_dma_addr = 0;
2192 vq->split.queue_size_in_bytes = 0;
2193
2194 vq->split.vring = vring;
2195 vq->split.avail_flags_shadow = 0;
2196 vq->split.avail_idx_shadow = 0;
2197
2198 /* No callback? Tell other side not to bother us. */
2199 if (!callback) {
2200 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2201 if (!vq->event)
2202 vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2203 vq->split.avail_flags_shadow);
2204 }
2205
2206 vq->split.desc_state = kmalloc_array(vring.num,
2207 sizeof(struct vring_desc_state_split), GFP_KERNEL);
2208 if (!vq->split.desc_state)
2209 goto err_state;
2210
2211 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
2212 if (!vq->split.desc_extra)
2213 goto err_extra;
2214
2215 /* Put everything in free lists. */
2216 vq->free_head = 0;
2217 memset(vq->split.desc_state, 0, vring.num *
2218 sizeof(struct vring_desc_state_split));
2219
2220 spin_lock(&vdev->vqs_list_lock);
2221 list_add_tail(&vq->vq.list, &vdev->vqs);
2222 spin_unlock(&vdev->vqs_list_lock);
2223 return &vq->vq;
2224
2225 err_extra:
2226 kfree(vq->split.desc_state);
2227 err_state:
2228 kfree(vq);
2229 return NULL;
2230 }
2231 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2232
vring_create_virtqueue(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name)2233 struct virtqueue *vring_create_virtqueue(
2234 unsigned int index,
2235 unsigned int num,
2236 unsigned int vring_align,
2237 struct virtio_device *vdev,
2238 bool weak_barriers,
2239 bool may_reduce_num,
2240 bool context,
2241 bool (*notify)(struct virtqueue *),
2242 void (*callback)(struct virtqueue *),
2243 const char *name)
2244 {
2245
2246 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2247 return vring_create_virtqueue_packed(index, num, vring_align,
2248 vdev, weak_barriers, may_reduce_num,
2249 context, notify, callback, name);
2250
2251 return vring_create_virtqueue_split(index, num, vring_align,
2252 vdev, weak_barriers, may_reduce_num,
2253 context, notify, callback, name);
2254 }
2255 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2256
2257 /* Only available for split ring */
vring_new_virtqueue(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool context,void * pages,bool (* notify)(struct virtqueue * vq),void (* callback)(struct virtqueue * vq),const char * name)2258 struct virtqueue *vring_new_virtqueue(unsigned int index,
2259 unsigned int num,
2260 unsigned int vring_align,
2261 struct virtio_device *vdev,
2262 bool weak_barriers,
2263 bool context,
2264 void *pages,
2265 bool (*notify)(struct virtqueue *vq),
2266 void (*callback)(struct virtqueue *vq),
2267 const char *name)
2268 {
2269 struct vring vring;
2270
2271 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2272 return NULL;
2273
2274 vring_init(&vring, num, pages, vring_align);
2275 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2276 notify, callback, name);
2277 }
2278 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2279
vring_del_virtqueue(struct virtqueue * _vq)2280 void vring_del_virtqueue(struct virtqueue *_vq)
2281 {
2282 struct vring_virtqueue *vq = to_vvq(_vq);
2283
2284 if (vq->we_own_ring) {
2285 if (vq->packed_ring) {
2286 vring_free_queue(vq->vq.vdev,
2287 vq->packed.ring_size_in_bytes,
2288 vq->packed.vring.desc,
2289 vq->packed.ring_dma_addr);
2290
2291 vring_free_queue(vq->vq.vdev,
2292 vq->packed.event_size_in_bytes,
2293 vq->packed.vring.driver,
2294 vq->packed.driver_event_dma_addr);
2295
2296 vring_free_queue(vq->vq.vdev,
2297 vq->packed.event_size_in_bytes,
2298 vq->packed.vring.device,
2299 vq->packed.device_event_dma_addr);
2300
2301 kfree(vq->packed.desc_state);
2302 kfree(vq->packed.desc_extra);
2303 } else {
2304 vring_free_queue(vq->vq.vdev,
2305 vq->split.queue_size_in_bytes,
2306 vq->split.vring.desc,
2307 vq->split.queue_dma_addr);
2308 }
2309 }
2310 if (!vq->packed_ring) {
2311 kfree(vq->split.desc_state);
2312 kfree(vq->split.desc_extra);
2313 }
2314 spin_lock(&vq->vq.vdev->vqs_list_lock);
2315 list_del(&_vq->list);
2316 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2317 kfree(vq);
2318 }
2319 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2320
2321 /* Manipulates transport-specific feature bits. */
vring_transport_features(struct virtio_device * vdev)2322 void vring_transport_features(struct virtio_device *vdev)
2323 {
2324 unsigned int i;
2325
2326 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2327 switch (i) {
2328 case VIRTIO_RING_F_INDIRECT_DESC:
2329 break;
2330 case VIRTIO_RING_F_EVENT_IDX:
2331 break;
2332 case VIRTIO_F_VERSION_1:
2333 break;
2334 case VIRTIO_F_ACCESS_PLATFORM:
2335 break;
2336 case VIRTIO_F_RING_PACKED:
2337 break;
2338 case VIRTIO_F_ORDER_PLATFORM:
2339 break;
2340 default:
2341 /* We don't understand this bit. */
2342 __virtio_clear_bit(vdev, i);
2343 }
2344 }
2345 }
2346 EXPORT_SYMBOL_GPL(vring_transport_features);
2347
2348 /**
2349 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2350 * @_vq: the struct virtqueue containing the vring of interest.
2351 *
2352 * Returns the size of the vring. This is mainly used for boasting to
2353 * userspace. Unlike other operations, this need not be serialized.
2354 */
virtqueue_get_vring_size(struct virtqueue * _vq)2355 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2356 {
2357
2358 struct vring_virtqueue *vq = to_vvq(_vq);
2359
2360 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2361 }
2362 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2363
virtqueue_is_broken(struct virtqueue * _vq)2364 bool virtqueue_is_broken(struct virtqueue *_vq)
2365 {
2366 struct vring_virtqueue *vq = to_vvq(_vq);
2367
2368 return READ_ONCE(vq->broken);
2369 }
2370 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2371
2372 /*
2373 * This should prevent the device from being used, allowing drivers to
2374 * recover. You may need to grab appropriate locks to flush.
2375 */
virtio_break_device(struct virtio_device * dev)2376 void virtio_break_device(struct virtio_device *dev)
2377 {
2378 struct virtqueue *_vq;
2379
2380 spin_lock(&dev->vqs_list_lock);
2381 list_for_each_entry(_vq, &dev->vqs, list) {
2382 struct vring_virtqueue *vq = to_vvq(_vq);
2383
2384 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2385 WRITE_ONCE(vq->broken, true);
2386 }
2387 spin_unlock(&dev->vqs_list_lock);
2388 }
2389 EXPORT_SYMBOL_GPL(virtio_break_device);
2390
virtqueue_get_desc_addr(struct virtqueue * _vq)2391 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2392 {
2393 struct vring_virtqueue *vq = to_vvq(_vq);
2394
2395 BUG_ON(!vq->we_own_ring);
2396
2397 if (vq->packed_ring)
2398 return vq->packed.ring_dma_addr;
2399
2400 return vq->split.queue_dma_addr;
2401 }
2402 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2403
virtqueue_get_avail_addr(struct virtqueue * _vq)2404 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2405 {
2406 struct vring_virtqueue *vq = to_vvq(_vq);
2407
2408 BUG_ON(!vq->we_own_ring);
2409
2410 if (vq->packed_ring)
2411 return vq->packed.driver_event_dma_addr;
2412
2413 return vq->split.queue_dma_addr +
2414 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2415 }
2416 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2417
virtqueue_get_used_addr(struct virtqueue * _vq)2418 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2419 {
2420 struct vring_virtqueue *vq = to_vvq(_vq);
2421
2422 BUG_ON(!vq->we_own_ring);
2423
2424 if (vq->packed_ring)
2425 return vq->packed.device_event_dma_addr;
2426
2427 return vq->split.queue_dma_addr +
2428 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2429 }
2430 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2431
2432 /* Only available for split ring */
virtqueue_get_vring(struct virtqueue * vq)2433 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2434 {
2435 return &to_vvq(vq)->split.vring;
2436 }
2437 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2438
2439 MODULE_LICENSE("GPL");
2440