1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * common code for virtio vsock
4 *
5 * Copyright (C) 2013-2015 Red Hat, Inc.
6 * Author: Asias He <asias@redhat.com>
7 * Stefan Hajnoczi <stefanha@redhat.com>
8 */
9 #include <linux/spinlock.h>
10 #include <linux/module.h>
11 #include <linux/sched/signal.h>
12 #include <linux/ctype.h>
13 #include <linux/list.h>
14 #include <linux/virtio_vsock.h>
15 #include <uapi/linux/vsockmon.h>
16
17 #include <net/sock.h>
18 #include <net/af_vsock.h>
19
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/vsock_virtio_transport_common.h>
22
23 /* How long to wait for graceful shutdown of a connection */
24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
25
26 /* Threshold for detecting small packets to copy */
27 #define GOOD_COPY_LEN 128
28
29 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
30 bool cancel_timeout);
31
32 static const struct virtio_transport *
virtio_transport_get_ops(struct vsock_sock * vsk)33 virtio_transport_get_ops(struct vsock_sock *vsk)
34 {
35 const struct vsock_transport *t = vsock_core_get_transport(vsk);
36
37 if (WARN_ON(!t))
38 return NULL;
39
40 return container_of(t, struct virtio_transport, transport);
41 }
42
virtio_transport_can_zcopy(const struct virtio_transport * t_ops,struct virtio_vsock_pkt_info * info,size_t pkt_len)43 static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
44 struct virtio_vsock_pkt_info *info,
45 size_t pkt_len)
46 {
47 struct iov_iter *iov_iter;
48
49 if (!info->msg)
50 return false;
51
52 iov_iter = &info->msg->msg_iter;
53
54 if (iov_iter->iov_offset)
55 return false;
56
57 /* We can't send whole iov. */
58 if (iov_iter->count > pkt_len)
59 return false;
60
61 /* Check that transport can send data in zerocopy mode. */
62 t_ops = virtio_transport_get_ops(info->vsk);
63
64 if (t_ops->can_msgzerocopy) {
65 int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
66
67 /* +1 is for packet header. */
68 return t_ops->can_msgzerocopy(pages_to_send + 1);
69 }
70
71 return true;
72 }
73
virtio_transport_init_zcopy_skb(struct vsock_sock * vsk,struct sk_buff * skb,struct msghdr * msg,bool zerocopy)74 static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
75 struct sk_buff *skb,
76 struct msghdr *msg,
77 bool zerocopy)
78 {
79 struct ubuf_info *uarg;
80
81 if (msg->msg_ubuf) {
82 uarg = msg->msg_ubuf;
83 net_zcopy_get(uarg);
84 } else {
85 struct iov_iter *iter = &msg->msg_iter;
86 struct ubuf_info_msgzc *uarg_zc;
87
88 uarg = msg_zerocopy_realloc(sk_vsock(vsk),
89 iter->count,
90 NULL);
91 if (!uarg)
92 return -1;
93
94 uarg_zc = uarg_to_msgzc(uarg);
95 uarg_zc->zerocopy = zerocopy ? 1 : 0;
96 }
97
98 skb_zcopy_init(skb, uarg);
99
100 return 0;
101 }
102
virtio_transport_fill_skb(struct sk_buff * skb,struct virtio_vsock_pkt_info * info,size_t len,bool zcopy)103 static int virtio_transport_fill_skb(struct sk_buff *skb,
104 struct virtio_vsock_pkt_info *info,
105 size_t len,
106 bool zcopy)
107 {
108 if (zcopy)
109 return __zerocopy_sg_from_iter(info->msg, NULL, skb,
110 &info->msg->msg_iter,
111 len);
112
113 virtio_vsock_skb_put(skb, len);
114 return skb_copy_datagram_from_iter(skb, 0, &info->msg->msg_iter, len);
115 }
116
virtio_transport_init_hdr(struct sk_buff * skb,struct virtio_vsock_pkt_info * info,size_t payload_len,u32 src_cid,u32 src_port,u32 dst_cid,u32 dst_port)117 static void virtio_transport_init_hdr(struct sk_buff *skb,
118 struct virtio_vsock_pkt_info *info,
119 size_t payload_len,
120 u32 src_cid,
121 u32 src_port,
122 u32 dst_cid,
123 u32 dst_port)
124 {
125 struct virtio_vsock_hdr *hdr;
126
127 hdr = virtio_vsock_hdr(skb);
128 hdr->type = cpu_to_le16(info->type);
129 hdr->op = cpu_to_le16(info->op);
130 hdr->src_cid = cpu_to_le64(src_cid);
131 hdr->dst_cid = cpu_to_le64(dst_cid);
132 hdr->src_port = cpu_to_le32(src_port);
133 hdr->dst_port = cpu_to_le32(dst_port);
134 hdr->flags = cpu_to_le32(info->flags);
135 hdr->len = cpu_to_le32(payload_len);
136 hdr->buf_alloc = cpu_to_le32(0);
137 hdr->fwd_cnt = cpu_to_le32(0);
138 }
139
virtio_transport_copy_nonlinear_skb(const struct sk_buff * skb,void * dst,size_t len)140 static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
141 void *dst,
142 size_t len)
143 {
144 struct iov_iter iov_iter = { 0 };
145 struct kvec kvec;
146 size_t to_copy;
147
148 kvec.iov_base = dst;
149 kvec.iov_len = len;
150
151 iov_iter.iter_type = ITER_KVEC;
152 iov_iter.kvec = &kvec;
153 iov_iter.nr_segs = 1;
154
155 to_copy = min_t(size_t, len, skb->len);
156
157 skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
158 &iov_iter, to_copy);
159 }
160
161 /* Packet capture */
virtio_transport_build_skb(void * opaque)162 static struct sk_buff *virtio_transport_build_skb(void *opaque)
163 {
164 struct virtio_vsock_hdr *pkt_hdr;
165 struct sk_buff *pkt = opaque;
166 struct af_vsockmon_hdr *hdr;
167 struct sk_buff *skb;
168 size_t payload_len;
169
170 /* A packet could be split to fit the RX buffer, so we can retrieve
171 * the payload length from the header and the buffer pointer taking
172 * care of the offset in the original packet.
173 */
174 pkt_hdr = virtio_vsock_hdr(pkt);
175 payload_len = pkt->len;
176
177 skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
178 GFP_ATOMIC);
179 if (!skb)
180 return NULL;
181
182 hdr = skb_put(skb, sizeof(*hdr));
183
184 /* pkt->hdr is little-endian so no need to byteswap here */
185 hdr->src_cid = pkt_hdr->src_cid;
186 hdr->src_port = pkt_hdr->src_port;
187 hdr->dst_cid = pkt_hdr->dst_cid;
188 hdr->dst_port = pkt_hdr->dst_port;
189
190 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
191 hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
192 memset(hdr->reserved, 0, sizeof(hdr->reserved));
193
194 switch (le16_to_cpu(pkt_hdr->op)) {
195 case VIRTIO_VSOCK_OP_REQUEST:
196 case VIRTIO_VSOCK_OP_RESPONSE:
197 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
198 break;
199 case VIRTIO_VSOCK_OP_RST:
200 case VIRTIO_VSOCK_OP_SHUTDOWN:
201 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
202 break;
203 case VIRTIO_VSOCK_OP_RW:
204 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
205 break;
206 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
207 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
208 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
209 break;
210 default:
211 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
212 break;
213 }
214
215 skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
216
217 if (payload_len) {
218 if (skb_is_nonlinear(pkt)) {
219 void *data = skb_put(skb, payload_len);
220
221 virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
222 } else {
223 skb_put_data(skb, pkt->data, payload_len);
224 }
225 }
226
227 return skb;
228 }
229
virtio_transport_deliver_tap_pkt(struct sk_buff * skb)230 void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
231 {
232 if (virtio_vsock_skb_tap_delivered(skb))
233 return;
234
235 vsock_deliver_tap(virtio_transport_build_skb, skb);
236 virtio_vsock_skb_set_tap_delivered(skb);
237 }
238 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
239
virtio_transport_get_type(struct sock * sk)240 static u16 virtio_transport_get_type(struct sock *sk)
241 {
242 if (sk->sk_type == SOCK_STREAM)
243 return VIRTIO_VSOCK_TYPE_STREAM;
244 else
245 return VIRTIO_VSOCK_TYPE_SEQPACKET;
246 }
247
248 /* Returns new sk_buff on success, otherwise returns NULL. */
virtio_transport_alloc_skb(struct virtio_vsock_pkt_info * info,size_t payload_len,bool zcopy,u32 src_cid,u32 src_port,u32 dst_cid,u32 dst_port)249 static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
250 size_t payload_len,
251 bool zcopy,
252 u32 src_cid,
253 u32 src_port,
254 u32 dst_cid,
255 u32 dst_port)
256 {
257 struct vsock_sock *vsk;
258 struct sk_buff *skb;
259 size_t skb_len;
260
261 skb_len = VIRTIO_VSOCK_SKB_HEADROOM;
262
263 if (!zcopy)
264 skb_len += payload_len;
265
266 skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
267 if (!skb)
268 return NULL;
269
270 virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port,
271 dst_cid, dst_port);
272
273 vsk = info->vsk;
274
275 /* If 'vsk' != NULL then payload is always present, so we
276 * will never call '__zerocopy_sg_from_iter()' below without
277 * setting skb owner in 'skb_set_owner_w()'. The only case
278 * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message
279 * without payload.
280 */
281 WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy);
282
283 /* Set owner here, because '__zerocopy_sg_from_iter()' uses
284 * owner of skb without check to update 'sk_wmem_alloc'.
285 */
286 if (vsk)
287 skb_set_owner_w(skb, sk_vsock(vsk));
288
289 if (info->msg && payload_len > 0) {
290 int err;
291
292 err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
293 if (err)
294 goto out;
295
296 if (msg_data_left(info->msg) == 0 &&
297 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
298 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
299
300 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
301
302 if (info->msg->msg_flags & MSG_EOR)
303 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
304 }
305 }
306
307 if (info->reply)
308 virtio_vsock_skb_set_reply(skb);
309
310 trace_virtio_transport_alloc_pkt(src_cid, src_port,
311 dst_cid, dst_port,
312 payload_len,
313 info->type,
314 info->op,
315 info->flags,
316 zcopy);
317
318 return skb;
319 out:
320 kfree_skb(skb);
321 return NULL;
322 }
323
324 /* This function can only be used on connecting/connected sockets,
325 * since a socket assigned to a transport is required.
326 *
327 * Do not use on listener sockets!
328 */
virtio_transport_send_pkt_info(struct vsock_sock * vsk,struct virtio_vsock_pkt_info * info)329 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
330 struct virtio_vsock_pkt_info *info)
331 {
332 /* ANDROID:
333 *
334 * Older host kernels (including the 5.10-based images used by
335 * Cuttlefish) only support linear SKBs on the RX path.
336 * Consequently, if we transmit a VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
337 * packet, the host allocation can fail and the packet will be
338 * silently dropped.
339 *
340 * As a nasty workaround, limit the entire SKB to ~28KiB, which
341 * allows for 4KiB of SKB wiggle room whilst keeping the
342 * allocation below PAGE_ALLOC_COSTLY_ORDER.
343 *
344 * This can be removed when all supported host kernels have
345 * support for non-linear RX buffers introduced by Change-Id
346 * I4212a8daf9f19b5bbffc06ce93338c823de7bb19.
347 */
348 u32 max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
349 SKB_WITH_OVERHEAD(SZ_32K - VIRTIO_VSOCK_SKB_HEADROOM) - SZ_4K);
350 u32 src_cid, src_port, dst_cid, dst_port;
351 const struct virtio_transport *t_ops;
352 struct virtio_vsock_sock *vvs;
353 u32 pkt_len = info->pkt_len;
354 bool can_zcopy = false;
355 u32 rest_len;
356 int ret;
357
358 info->type = virtio_transport_get_type(sk_vsock(vsk));
359
360 t_ops = virtio_transport_get_ops(vsk);
361 if (unlikely(!t_ops))
362 return -EFAULT;
363
364 src_cid = t_ops->transport.get_local_cid();
365 src_port = vsk->local_addr.svm_port;
366 if (!info->remote_cid) {
367 dst_cid = vsk->remote_addr.svm_cid;
368 dst_port = vsk->remote_addr.svm_port;
369 } else {
370 dst_cid = info->remote_cid;
371 dst_port = info->remote_port;
372 }
373
374 vvs = vsk->trans;
375
376 /* virtio_transport_get_credit might return less than pkt_len credit */
377 pkt_len = virtio_transport_get_credit(vvs, pkt_len);
378
379 /* Do not send zero length OP_RW pkt */
380 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
381 return pkt_len;
382
383 if (info->msg) {
384 /* If zerocopy is not enabled by 'setsockopt()', we behave as
385 * there is no MSG_ZEROCOPY flag set.
386 */
387 if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
388 info->msg->msg_flags &= ~MSG_ZEROCOPY;
389
390 if (info->msg->msg_flags & MSG_ZEROCOPY)
391 can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
392
393 if (can_zcopy)
394 max_skb_len = min_t(u32, max_skb_len,
395 (MAX_SKB_FRAGS * PAGE_SIZE));
396 }
397
398 rest_len = pkt_len;
399
400 do {
401 struct sk_buff *skb;
402 size_t skb_len;
403
404 skb_len = min(max_skb_len, rest_len);
405
406 skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
407 src_cid, src_port,
408 dst_cid, dst_port);
409 if (!skb) {
410 ret = -ENOMEM;
411 break;
412 }
413
414 /* We process buffer part by part, allocating skb on
415 * each iteration. If this is last skb for this buffer
416 * and MSG_ZEROCOPY mode is in use - we must allocate
417 * completion for the current syscall.
418 */
419 if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
420 skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
421 if (virtio_transport_init_zcopy_skb(vsk, skb,
422 info->msg,
423 can_zcopy)) {
424 kfree_skb(skb);
425 ret = -ENOMEM;
426 break;
427 }
428 }
429
430 virtio_transport_inc_tx_pkt(vvs, skb);
431
432 ret = t_ops->send_pkt(skb);
433 if (ret < 0)
434 break;
435
436 /* Both virtio and vhost 'send_pkt()' returns 'skb_len',
437 * but for reliability use 'ret' instead of 'skb_len'.
438 * Also if partial send happens (e.g. 'ret' != 'skb_len')
439 * somehow, we break this loop, but account such returned
440 * value in 'virtio_transport_put_credit()'.
441 */
442 rest_len -= ret;
443
444 if (WARN_ONCE(ret != skb_len,
445 "'send_pkt()' returns %i, but %zu expected\n",
446 ret, skb_len))
447 break;
448 } while (rest_len);
449
450 virtio_transport_put_credit(vvs, rest_len);
451
452 /* Return number of bytes, if any data has been sent. */
453 if (rest_len != pkt_len)
454 ret = pkt_len - rest_len;
455
456 return ret;
457 }
458
virtio_transport_inc_rx_pkt(struct virtio_vsock_sock * vvs,u32 len)459 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
460 u32 len)
461 {
462 if (vvs->buf_used + len > vvs->buf_alloc)
463 return false;
464
465 vvs->rx_bytes += len;
466 vvs->buf_used += len;
467 return true;
468 }
469
virtio_transport_dec_rx_pkt(struct virtio_vsock_sock * vvs,u32 bytes_read,u32 bytes_dequeued)470 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
471 u32 bytes_read, u32 bytes_dequeued)
472 {
473 vvs->rx_bytes -= bytes_read;
474 vvs->buf_used -= bytes_dequeued;
475 vvs->fwd_cnt += bytes_dequeued;
476 }
477
virtio_transport_inc_tx_pkt(struct virtio_vsock_sock * vvs,struct sk_buff * skb)478 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
479 {
480 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
481
482 spin_lock_bh(&vvs->rx_lock);
483 vvs->last_fwd_cnt = vvs->fwd_cnt;
484 hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
485 hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
486 spin_unlock_bh(&vvs->rx_lock);
487 }
488 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
489
virtio_transport_consume_skb_sent(struct sk_buff * skb,bool consume)490 void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
491 {
492 struct sock *s = skb->sk;
493
494 if (s && skb->len) {
495 struct vsock_sock *vs = vsock_sk(s);
496 struct virtio_vsock_sock *vvs;
497
498 vvs = vs->trans;
499
500 spin_lock_bh(&vvs->tx_lock);
501 vvs->bytes_unsent -= skb->len;
502 spin_unlock_bh(&vvs->tx_lock);
503 }
504
505 if (consume)
506 consume_skb(skb);
507 }
508 EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
509
virtio_transport_get_credit(struct virtio_vsock_sock * vvs,u32 credit)510 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
511 {
512 u32 ret;
513
514 if (!credit)
515 return 0;
516
517 spin_lock_bh(&vvs->tx_lock);
518 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
519 if (ret > credit)
520 ret = credit;
521 vvs->tx_cnt += ret;
522 vvs->bytes_unsent += ret;
523 spin_unlock_bh(&vvs->tx_lock);
524
525 return ret;
526 }
527 EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
528
virtio_transport_put_credit(struct virtio_vsock_sock * vvs,u32 credit)529 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
530 {
531 if (!credit)
532 return;
533
534 spin_lock_bh(&vvs->tx_lock);
535 vvs->tx_cnt -= credit;
536 vvs->bytes_unsent -= credit;
537 spin_unlock_bh(&vvs->tx_lock);
538 }
539 EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
540
virtio_transport_send_credit_update(struct vsock_sock * vsk)541 static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
542 {
543 struct virtio_vsock_pkt_info info = {
544 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
545 .vsk = vsk,
546 };
547
548 return virtio_transport_send_pkt_info(vsk, &info);
549 }
550
551 static ssize_t
virtio_transport_stream_do_peek(struct vsock_sock * vsk,struct msghdr * msg,size_t len)552 virtio_transport_stream_do_peek(struct vsock_sock *vsk,
553 struct msghdr *msg,
554 size_t len)
555 {
556 struct virtio_vsock_sock *vvs = vsk->trans;
557 struct sk_buff *skb;
558 size_t total = 0;
559 int err;
560
561 spin_lock_bh(&vvs->rx_lock);
562
563 skb_queue_walk(&vvs->rx_queue, skb) {
564 size_t bytes;
565
566 bytes = len - total;
567 if (bytes > skb->len)
568 bytes = skb->len;
569
570 spin_unlock_bh(&vvs->rx_lock);
571
572 /* sk_lock is held by caller so no one else can dequeue.
573 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
574 */
575 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
576 &msg->msg_iter, bytes);
577 if (err)
578 goto out;
579
580 total += bytes;
581
582 spin_lock_bh(&vvs->rx_lock);
583
584 if (total == len)
585 break;
586 }
587
588 spin_unlock_bh(&vvs->rx_lock);
589
590 return total;
591
592 out:
593 if (total)
594 err = total;
595 return err;
596 }
597
598 static ssize_t
virtio_transport_stream_do_dequeue(struct vsock_sock * vsk,struct msghdr * msg,size_t len)599 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
600 struct msghdr *msg,
601 size_t len)
602 {
603 struct virtio_vsock_sock *vvs = vsk->trans;
604 struct sk_buff *skb;
605 u32 fwd_cnt_delta;
606 bool low_rx_bytes;
607 int err = -EFAULT;
608 size_t total = 0;
609 u32 free_space;
610
611 spin_lock_bh(&vvs->rx_lock);
612
613 if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes,
614 "rx_queue is empty, but rx_bytes is non-zero\n")) {
615 spin_unlock_bh(&vvs->rx_lock);
616 return err;
617 }
618
619 while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
620 size_t bytes, dequeued = 0;
621
622 skb = skb_peek(&vvs->rx_queue);
623
624 bytes = min_t(size_t, len - total,
625 skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
626
627 /* sk_lock is held by caller so no one else can dequeue.
628 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
629 */
630 spin_unlock_bh(&vvs->rx_lock);
631
632 err = skb_copy_datagram_iter(skb,
633 VIRTIO_VSOCK_SKB_CB(skb)->offset,
634 &msg->msg_iter, bytes);
635 if (err)
636 goto out;
637
638 spin_lock_bh(&vvs->rx_lock);
639
640 total += bytes;
641
642 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
643
644 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
645 dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
646 __skb_unlink(skb, &vvs->rx_queue);
647 consume_skb(skb);
648 }
649
650 virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
651 }
652
653 fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
654 free_space = vvs->buf_alloc - fwd_cnt_delta;
655 low_rx_bytes = (vvs->rx_bytes <
656 sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
657
658 spin_unlock_bh(&vvs->rx_lock);
659
660 /* To reduce the number of credit update messages,
661 * don't update credits as long as lots of space is available.
662 * Note: the limit chosen here is arbitrary. Setting the limit
663 * too high causes extra messages. Too low causes transmitter
664 * stalls. As stalls are in theory more expensive than extra
665 * messages, we set the limit to a high value. TODO: experiment
666 * with different values. Also send credit update message when
667 * number of bytes in rx queue is not enough to wake up reader.
668 */
669 if (fwd_cnt_delta &&
670 (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes))
671 virtio_transport_send_credit_update(vsk);
672
673 return total;
674
675 out:
676 if (total)
677 err = total;
678 return err;
679 }
680
681 static ssize_t
virtio_transport_seqpacket_do_peek(struct vsock_sock * vsk,struct msghdr * msg)682 virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
683 struct msghdr *msg)
684 {
685 struct virtio_vsock_sock *vvs = vsk->trans;
686 struct sk_buff *skb;
687 size_t total, len;
688
689 spin_lock_bh(&vvs->rx_lock);
690
691 if (!vvs->msg_count) {
692 spin_unlock_bh(&vvs->rx_lock);
693 return 0;
694 }
695
696 total = 0;
697 len = msg_data_left(msg);
698
699 skb_queue_walk(&vvs->rx_queue, skb) {
700 struct virtio_vsock_hdr *hdr;
701
702 if (total < len) {
703 size_t bytes;
704 int err;
705
706 bytes = len - total;
707 if (bytes > skb->len)
708 bytes = skb->len;
709
710 spin_unlock_bh(&vvs->rx_lock);
711
712 /* sk_lock is held by caller so no one else can dequeue.
713 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
714 */
715 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
716 &msg->msg_iter, bytes);
717 if (err)
718 return err;
719
720 spin_lock_bh(&vvs->rx_lock);
721 }
722
723 total += skb->len;
724 hdr = virtio_vsock_hdr(skb);
725
726 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
727 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
728 msg->msg_flags |= MSG_EOR;
729
730 break;
731 }
732 }
733
734 spin_unlock_bh(&vvs->rx_lock);
735
736 return total;
737 }
738
virtio_transport_seqpacket_do_dequeue(struct vsock_sock * vsk,struct msghdr * msg,int flags)739 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
740 struct msghdr *msg,
741 int flags)
742 {
743 struct virtio_vsock_sock *vvs = vsk->trans;
744 int dequeued_len = 0;
745 size_t user_buf_len = msg_data_left(msg);
746 bool msg_ready = false;
747 struct sk_buff *skb;
748
749 spin_lock_bh(&vvs->rx_lock);
750
751 if (vvs->msg_count == 0) {
752 spin_unlock_bh(&vvs->rx_lock);
753 return 0;
754 }
755
756 while (!msg_ready) {
757 struct virtio_vsock_hdr *hdr;
758 size_t pkt_len;
759
760 skb = __skb_dequeue(&vvs->rx_queue);
761 if (!skb)
762 break;
763 hdr = virtio_vsock_hdr(skb);
764 pkt_len = (size_t)le32_to_cpu(hdr->len);
765
766 if (dequeued_len >= 0) {
767 size_t bytes_to_copy;
768
769 bytes_to_copy = min(user_buf_len, pkt_len);
770
771 if (bytes_to_copy) {
772 int err;
773
774 /* sk_lock is held by caller so no one else can dequeue.
775 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
776 */
777 spin_unlock_bh(&vvs->rx_lock);
778
779 err = skb_copy_datagram_iter(skb, 0,
780 &msg->msg_iter,
781 bytes_to_copy);
782 if (err) {
783 /* Copy of message failed. Rest of
784 * fragments will be freed without copy.
785 */
786 dequeued_len = err;
787 } else {
788 user_buf_len -= bytes_to_copy;
789 }
790
791 spin_lock_bh(&vvs->rx_lock);
792 }
793
794 if (dequeued_len >= 0)
795 dequeued_len += pkt_len;
796 }
797
798 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
799 msg_ready = true;
800 vvs->msg_count--;
801
802 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
803 msg->msg_flags |= MSG_EOR;
804 }
805
806 virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
807 kfree_skb(skb);
808 }
809
810 spin_unlock_bh(&vvs->rx_lock);
811
812 virtio_transport_send_credit_update(vsk);
813
814 return dequeued_len;
815 }
816
817 ssize_t
virtio_transport_stream_dequeue(struct vsock_sock * vsk,struct msghdr * msg,size_t len,int flags)818 virtio_transport_stream_dequeue(struct vsock_sock *vsk,
819 struct msghdr *msg,
820 size_t len, int flags)
821 {
822 if (flags & MSG_PEEK)
823 return virtio_transport_stream_do_peek(vsk, msg, len);
824 else
825 return virtio_transport_stream_do_dequeue(vsk, msg, len);
826 }
827 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
828
829 ssize_t
virtio_transport_seqpacket_dequeue(struct vsock_sock * vsk,struct msghdr * msg,int flags)830 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
831 struct msghdr *msg,
832 int flags)
833 {
834 if (flags & MSG_PEEK)
835 return virtio_transport_seqpacket_do_peek(vsk, msg);
836 else
837 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
838 }
839 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
840
841 int
virtio_transport_seqpacket_enqueue(struct vsock_sock * vsk,struct msghdr * msg,size_t len)842 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
843 struct msghdr *msg,
844 size_t len)
845 {
846 struct virtio_vsock_sock *vvs = vsk->trans;
847
848 spin_lock_bh(&vvs->tx_lock);
849
850 if (len > vvs->peer_buf_alloc) {
851 spin_unlock_bh(&vvs->tx_lock);
852 return -EMSGSIZE;
853 }
854
855 spin_unlock_bh(&vvs->tx_lock);
856
857 return virtio_transport_stream_enqueue(vsk, msg, len);
858 }
859 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
860
861 int
virtio_transport_dgram_dequeue(struct vsock_sock * vsk,struct msghdr * msg,size_t len,int flags)862 virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
863 struct msghdr *msg,
864 size_t len, int flags)
865 {
866 return -EOPNOTSUPP;
867 }
868 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
869
virtio_transport_stream_has_data(struct vsock_sock * vsk)870 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
871 {
872 struct virtio_vsock_sock *vvs = vsk->trans;
873 s64 bytes;
874
875 spin_lock_bh(&vvs->rx_lock);
876 bytes = vvs->rx_bytes;
877 spin_unlock_bh(&vvs->rx_lock);
878
879 return bytes;
880 }
881 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
882
virtio_transport_seqpacket_has_data(struct vsock_sock * vsk)883 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
884 {
885 struct virtio_vsock_sock *vvs = vsk->trans;
886 u32 msg_count;
887
888 spin_lock_bh(&vvs->rx_lock);
889 msg_count = vvs->msg_count;
890 spin_unlock_bh(&vvs->rx_lock);
891
892 return msg_count;
893 }
894 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
895
virtio_transport_has_space(struct vsock_sock * vsk)896 static s64 virtio_transport_has_space(struct vsock_sock *vsk)
897 {
898 struct virtio_vsock_sock *vvs = vsk->trans;
899 s64 bytes;
900
901 bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
902 if (bytes < 0)
903 bytes = 0;
904
905 return bytes;
906 }
907
virtio_transport_stream_has_space(struct vsock_sock * vsk)908 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
909 {
910 struct virtio_vsock_sock *vvs = vsk->trans;
911 s64 bytes;
912
913 spin_lock_bh(&vvs->tx_lock);
914 bytes = virtio_transport_has_space(vsk);
915 spin_unlock_bh(&vvs->tx_lock);
916
917 return bytes;
918 }
919 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
920
virtio_transport_do_socket_init(struct vsock_sock * vsk,struct vsock_sock * psk)921 int virtio_transport_do_socket_init(struct vsock_sock *vsk,
922 struct vsock_sock *psk)
923 {
924 struct virtio_vsock_sock *vvs;
925
926 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
927 if (!vvs)
928 return -ENOMEM;
929
930 vsk->trans = vvs;
931 vvs->vsk = vsk;
932 if (psk && psk->trans) {
933 struct virtio_vsock_sock *ptrans = psk->trans;
934
935 vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
936 }
937
938 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
939 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
940
941 vvs->buf_alloc = vsk->buffer_size;
942
943 spin_lock_init(&vvs->rx_lock);
944 spin_lock_init(&vvs->tx_lock);
945 skb_queue_head_init(&vvs->rx_queue);
946
947 return 0;
948 }
949 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
950
951 /* sk_lock held by the caller */
virtio_transport_notify_buffer_size(struct vsock_sock * vsk,u64 * val)952 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
953 {
954 struct virtio_vsock_sock *vvs = vsk->trans;
955
956 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
957 *val = VIRTIO_VSOCK_MAX_BUF_SIZE;
958
959 vvs->buf_alloc = *val;
960
961 virtio_transport_send_credit_update(vsk);
962 }
963 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
964
965 int
virtio_transport_notify_poll_in(struct vsock_sock * vsk,size_t target,bool * data_ready_now)966 virtio_transport_notify_poll_in(struct vsock_sock *vsk,
967 size_t target,
968 bool *data_ready_now)
969 {
970 *data_ready_now = vsock_stream_has_data(vsk) >= target;
971
972 return 0;
973 }
974 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
975
976 int
virtio_transport_notify_poll_out(struct vsock_sock * vsk,size_t target,bool * space_avail_now)977 virtio_transport_notify_poll_out(struct vsock_sock *vsk,
978 size_t target,
979 bool *space_avail_now)
980 {
981 s64 free_space;
982
983 free_space = vsock_stream_has_space(vsk);
984 if (free_space > 0)
985 *space_avail_now = true;
986 else if (free_space == 0)
987 *space_avail_now = false;
988
989 return 0;
990 }
991 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
992
virtio_transport_notify_recv_init(struct vsock_sock * vsk,size_t target,struct vsock_transport_recv_notify_data * data)993 int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
994 size_t target, struct vsock_transport_recv_notify_data *data)
995 {
996 return 0;
997 }
998 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
999
virtio_transport_notify_recv_pre_block(struct vsock_sock * vsk,size_t target,struct vsock_transport_recv_notify_data * data)1000 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
1001 size_t target, struct vsock_transport_recv_notify_data *data)
1002 {
1003 return 0;
1004 }
1005 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
1006
virtio_transport_notify_recv_pre_dequeue(struct vsock_sock * vsk,size_t target,struct vsock_transport_recv_notify_data * data)1007 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
1008 size_t target, struct vsock_transport_recv_notify_data *data)
1009 {
1010 return 0;
1011 }
1012 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
1013
virtio_transport_notify_recv_post_dequeue(struct vsock_sock * vsk,size_t target,ssize_t copied,bool data_read,struct vsock_transport_recv_notify_data * data)1014 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
1015 size_t target, ssize_t copied, bool data_read,
1016 struct vsock_transport_recv_notify_data *data)
1017 {
1018 return 0;
1019 }
1020 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
1021
virtio_transport_notify_send_init(struct vsock_sock * vsk,struct vsock_transport_send_notify_data * data)1022 int virtio_transport_notify_send_init(struct vsock_sock *vsk,
1023 struct vsock_transport_send_notify_data *data)
1024 {
1025 return 0;
1026 }
1027 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
1028
virtio_transport_notify_send_pre_block(struct vsock_sock * vsk,struct vsock_transport_send_notify_data * data)1029 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
1030 struct vsock_transport_send_notify_data *data)
1031 {
1032 return 0;
1033 }
1034 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
1035
virtio_transport_notify_send_pre_enqueue(struct vsock_sock * vsk,struct vsock_transport_send_notify_data * data)1036 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
1037 struct vsock_transport_send_notify_data *data)
1038 {
1039 return 0;
1040 }
1041 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
1042
virtio_transport_notify_send_post_enqueue(struct vsock_sock * vsk,ssize_t written,struct vsock_transport_send_notify_data * data)1043 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
1044 ssize_t written, struct vsock_transport_send_notify_data *data)
1045 {
1046 return 0;
1047 }
1048 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
1049
virtio_transport_stream_rcvhiwat(struct vsock_sock * vsk)1050 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1051 {
1052 return vsk->buffer_size;
1053 }
1054 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
1055
virtio_transport_stream_is_active(struct vsock_sock * vsk)1056 bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
1057 {
1058 return true;
1059 }
1060 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
1061
virtio_transport_stream_allow(u32 cid,u32 port)1062 bool virtio_transport_stream_allow(u32 cid, u32 port)
1063 {
1064 return true;
1065 }
1066 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
1067
virtio_transport_dgram_bind(struct vsock_sock * vsk,struct sockaddr_vm * addr)1068 int virtio_transport_dgram_bind(struct vsock_sock *vsk,
1069 struct sockaddr_vm *addr)
1070 {
1071 return -EOPNOTSUPP;
1072 }
1073 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
1074
virtio_transport_dgram_allow(u32 cid,u32 port)1075 bool virtio_transport_dgram_allow(u32 cid, u32 port)
1076 {
1077 return false;
1078 }
1079 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
1080
virtio_transport_connect(struct vsock_sock * vsk)1081 int virtio_transport_connect(struct vsock_sock *vsk)
1082 {
1083 struct virtio_vsock_pkt_info info = {
1084 .op = VIRTIO_VSOCK_OP_REQUEST,
1085 .vsk = vsk,
1086 };
1087
1088 return virtio_transport_send_pkt_info(vsk, &info);
1089 }
1090 EXPORT_SYMBOL_GPL(virtio_transport_connect);
1091
virtio_transport_shutdown(struct vsock_sock * vsk,int mode)1092 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
1093 {
1094 struct virtio_vsock_pkt_info info = {
1095 .op = VIRTIO_VSOCK_OP_SHUTDOWN,
1096 .flags = (mode & RCV_SHUTDOWN ?
1097 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
1098 (mode & SEND_SHUTDOWN ?
1099 VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
1100 .vsk = vsk,
1101 };
1102
1103 return virtio_transport_send_pkt_info(vsk, &info);
1104 }
1105 EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
1106
1107 int
virtio_transport_dgram_enqueue(struct vsock_sock * vsk,struct sockaddr_vm * remote_addr,struct msghdr * msg,size_t dgram_len)1108 virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
1109 struct sockaddr_vm *remote_addr,
1110 struct msghdr *msg,
1111 size_t dgram_len)
1112 {
1113 return -EOPNOTSUPP;
1114 }
1115 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
1116
1117 ssize_t
virtio_transport_stream_enqueue(struct vsock_sock * vsk,struct msghdr * msg,size_t len)1118 virtio_transport_stream_enqueue(struct vsock_sock *vsk,
1119 struct msghdr *msg,
1120 size_t len)
1121 {
1122 struct virtio_vsock_pkt_info info = {
1123 .op = VIRTIO_VSOCK_OP_RW,
1124 .msg = msg,
1125 .pkt_len = len,
1126 .vsk = vsk,
1127 };
1128
1129 return virtio_transport_send_pkt_info(vsk, &info);
1130 }
1131 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
1132
virtio_transport_destruct(struct vsock_sock * vsk)1133 void virtio_transport_destruct(struct vsock_sock *vsk)
1134 {
1135 struct virtio_vsock_sock *vvs = vsk->trans;
1136
1137 virtio_transport_cancel_close_work(vsk, true);
1138
1139 kfree(vvs);
1140 vsk->trans = NULL;
1141 }
1142 EXPORT_SYMBOL_GPL(virtio_transport_destruct);
1143
virtio_transport_unsent_bytes(struct vsock_sock * vsk)1144 ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
1145 {
1146 struct virtio_vsock_sock *vvs = vsk->trans;
1147 size_t ret;
1148
1149 spin_lock_bh(&vvs->tx_lock);
1150 ret = vvs->bytes_unsent;
1151 spin_unlock_bh(&vvs->tx_lock);
1152
1153 return ret;
1154 }
1155 EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
1156
virtio_transport_reset(struct vsock_sock * vsk,struct sk_buff * skb)1157 static int virtio_transport_reset(struct vsock_sock *vsk,
1158 struct sk_buff *skb)
1159 {
1160 struct virtio_vsock_pkt_info info = {
1161 .op = VIRTIO_VSOCK_OP_RST,
1162 .reply = !!skb,
1163 .vsk = vsk,
1164 };
1165
1166 /* Send RST only if the original pkt is not a RST pkt */
1167 if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
1168 return 0;
1169
1170 return virtio_transport_send_pkt_info(vsk, &info);
1171 }
1172
1173 /* Normally packets are associated with a socket. There may be no socket if an
1174 * attempt was made to connect to a socket that does not exist.
1175 */
virtio_transport_reset_no_sock(const struct virtio_transport * t,struct sk_buff * skb)1176 static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
1177 struct sk_buff *skb)
1178 {
1179 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1180 struct virtio_vsock_pkt_info info = {
1181 .op = VIRTIO_VSOCK_OP_RST,
1182 .type = le16_to_cpu(hdr->type),
1183 .reply = true,
1184 };
1185 struct sk_buff *reply;
1186
1187 /* Send RST only if the original pkt is not a RST pkt */
1188 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
1189 return 0;
1190
1191 if (!t)
1192 return -ENOTCONN;
1193
1194 reply = virtio_transport_alloc_skb(&info, 0, false,
1195 le64_to_cpu(hdr->dst_cid),
1196 le32_to_cpu(hdr->dst_port),
1197 le64_to_cpu(hdr->src_cid),
1198 le32_to_cpu(hdr->src_port));
1199 if (!reply)
1200 return -ENOMEM;
1201
1202 return t->send_pkt(reply);
1203 }
1204
1205 /* This function should be called with sk_lock held and SOCK_DONE set */
virtio_transport_remove_sock(struct vsock_sock * vsk)1206 static void virtio_transport_remove_sock(struct vsock_sock *vsk)
1207 {
1208 struct virtio_vsock_sock *vvs = vsk->trans;
1209
1210 /* We don't need to take rx_lock, as the socket is closing and we are
1211 * removing it.
1212 */
1213 __skb_queue_purge(&vvs->rx_queue);
1214 vsock_remove_sock(vsk);
1215 }
1216
virtio_transport_wait_close(struct sock * sk,long timeout)1217 static void virtio_transport_wait_close(struct sock *sk, long timeout)
1218 {
1219 if (timeout) {
1220 DEFINE_WAIT_FUNC(wait, woken_wake_function);
1221
1222 add_wait_queue(sk_sleep(sk), &wait);
1223
1224 do {
1225 if (sk_wait_event(sk, &timeout,
1226 sock_flag(sk, SOCK_DONE), &wait))
1227 break;
1228 } while (!signal_pending(current) && timeout);
1229
1230 remove_wait_queue(sk_sleep(sk), &wait);
1231 }
1232 }
1233
virtio_transport_cancel_close_work(struct vsock_sock * vsk,bool cancel_timeout)1234 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
1235 bool cancel_timeout)
1236 {
1237 struct sock *sk = sk_vsock(vsk);
1238
1239 if (vsk->close_work_scheduled &&
1240 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
1241 vsk->close_work_scheduled = false;
1242
1243 virtio_transport_remove_sock(vsk);
1244
1245 /* Release refcnt obtained when we scheduled the timeout */
1246 sock_put(sk);
1247 }
1248 }
1249
virtio_transport_do_close(struct vsock_sock * vsk,bool cancel_timeout)1250 static void virtio_transport_do_close(struct vsock_sock *vsk,
1251 bool cancel_timeout)
1252 {
1253 struct sock *sk = sk_vsock(vsk);
1254
1255 sock_set_flag(sk, SOCK_DONE);
1256 vsk->peer_shutdown = SHUTDOWN_MASK;
1257 if (vsock_stream_has_data(vsk) <= 0)
1258 sk->sk_state = TCP_CLOSING;
1259 sk->sk_state_change(sk);
1260
1261 virtio_transport_cancel_close_work(vsk, cancel_timeout);
1262 }
1263
virtio_transport_close_timeout(struct work_struct * work)1264 static void virtio_transport_close_timeout(struct work_struct *work)
1265 {
1266 struct vsock_sock *vsk =
1267 container_of(work, struct vsock_sock, close_work.work);
1268 struct sock *sk = sk_vsock(vsk);
1269
1270 sock_hold(sk);
1271 lock_sock(sk);
1272
1273 if (!sock_flag(sk, SOCK_DONE)) {
1274 (void)virtio_transport_reset(vsk, NULL);
1275
1276 virtio_transport_do_close(vsk, false);
1277 }
1278
1279 vsk->close_work_scheduled = false;
1280
1281 release_sock(sk);
1282 sock_put(sk);
1283 }
1284
1285 /* User context, vsk->sk is locked */
virtio_transport_close(struct vsock_sock * vsk)1286 static bool virtio_transport_close(struct vsock_sock *vsk)
1287 {
1288 struct sock *sk = &vsk->sk;
1289
1290 if (!(sk->sk_state == TCP_ESTABLISHED ||
1291 sk->sk_state == TCP_CLOSING))
1292 return true;
1293
1294 /* Already received SHUTDOWN from peer, reply with RST */
1295 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
1296 (void)virtio_transport_reset(vsk, NULL);
1297 return true;
1298 }
1299
1300 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
1301 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
1302
1303 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
1304 virtio_transport_wait_close(sk, sk->sk_lingertime);
1305
1306 if (sock_flag(sk, SOCK_DONE)) {
1307 return true;
1308 }
1309
1310 sock_hold(sk);
1311 INIT_DELAYED_WORK(&vsk->close_work,
1312 virtio_transport_close_timeout);
1313 vsk->close_work_scheduled = true;
1314 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
1315 return false;
1316 }
1317
virtio_transport_release(struct vsock_sock * vsk)1318 void virtio_transport_release(struct vsock_sock *vsk)
1319 {
1320 struct sock *sk = &vsk->sk;
1321 bool remove_sock = true;
1322
1323 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
1324 remove_sock = virtio_transport_close(vsk);
1325
1326 if (remove_sock) {
1327 sock_set_flag(sk, SOCK_DONE);
1328 virtio_transport_remove_sock(vsk);
1329 }
1330 }
1331 EXPORT_SYMBOL_GPL(virtio_transport_release);
1332
1333 static int
virtio_transport_recv_connecting(struct sock * sk,struct sk_buff * skb)1334 virtio_transport_recv_connecting(struct sock *sk,
1335 struct sk_buff *skb)
1336 {
1337 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1338 struct vsock_sock *vsk = vsock_sk(sk);
1339 int skerr;
1340 int err;
1341
1342 switch (le16_to_cpu(hdr->op)) {
1343 case VIRTIO_VSOCK_OP_RESPONSE:
1344 sk->sk_state = TCP_ESTABLISHED;
1345 sk->sk_socket->state = SS_CONNECTED;
1346 vsock_insert_connected(vsk);
1347 sk->sk_state_change(sk);
1348 break;
1349 case VIRTIO_VSOCK_OP_INVALID:
1350 break;
1351 case VIRTIO_VSOCK_OP_RST:
1352 skerr = ECONNRESET;
1353 err = 0;
1354 goto destroy;
1355 default:
1356 skerr = EPROTO;
1357 err = -EINVAL;
1358 goto destroy;
1359 }
1360 return 0;
1361
1362 destroy:
1363 virtio_transport_reset(vsk, skb);
1364 sk->sk_state = TCP_CLOSE;
1365 sk->sk_err = skerr;
1366 sk_error_report(sk);
1367 return err;
1368 }
1369
1370 static void
virtio_transport_recv_enqueue(struct vsock_sock * vsk,struct sk_buff * skb)1371 virtio_transport_recv_enqueue(struct vsock_sock *vsk,
1372 struct sk_buff *skb)
1373 {
1374 struct virtio_vsock_sock *vvs = vsk->trans;
1375 bool can_enqueue, free_pkt = false;
1376 struct virtio_vsock_hdr *hdr;
1377 u32 len;
1378
1379 hdr = virtio_vsock_hdr(skb);
1380 len = le32_to_cpu(hdr->len);
1381
1382 spin_lock_bh(&vvs->rx_lock);
1383
1384 can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
1385 if (!can_enqueue) {
1386 free_pkt = true;
1387 goto out;
1388 }
1389
1390 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
1391 vvs->msg_count++;
1392
1393 /* Try to copy small packets into the buffer of last packet queued,
1394 * to avoid wasting memory queueing the entire buffer with a small
1395 * payload.
1396 */
1397 if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
1398 struct virtio_vsock_hdr *last_hdr;
1399 struct sk_buff *last_skb;
1400
1401 last_skb = skb_peek_tail(&vvs->rx_queue);
1402 last_hdr = virtio_vsock_hdr(last_skb);
1403
1404 /* If there is space in the last packet queued, we copy the
1405 * new packet in its buffer. We avoid this if the last packet
1406 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
1407 * delimiter of SEQPACKET message, so 'pkt' is the first packet
1408 * of a new message.
1409 */
1410 if (skb->len < skb_tailroom(last_skb) &&
1411 !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
1412 memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
1413 free_pkt = true;
1414 last_hdr->flags |= hdr->flags;
1415 le32_add_cpu(&last_hdr->len, len);
1416 goto out;
1417 }
1418 }
1419
1420 __skb_queue_tail(&vvs->rx_queue, skb);
1421
1422 out:
1423 spin_unlock_bh(&vvs->rx_lock);
1424 if (free_pkt)
1425 kfree_skb(skb);
1426 }
1427
1428 static int
virtio_transport_recv_connected(struct sock * sk,struct sk_buff * skb)1429 virtio_transport_recv_connected(struct sock *sk,
1430 struct sk_buff *skb)
1431 {
1432 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1433 struct vsock_sock *vsk = vsock_sk(sk);
1434 int err = 0;
1435
1436 switch (le16_to_cpu(hdr->op)) {
1437 case VIRTIO_VSOCK_OP_RW:
1438 virtio_transport_recv_enqueue(vsk, skb);
1439 vsock_data_ready(sk);
1440 return err;
1441 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
1442 virtio_transport_send_credit_update(vsk);
1443 break;
1444 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
1445 sk->sk_write_space(sk);
1446 break;
1447 case VIRTIO_VSOCK_OP_SHUTDOWN:
1448 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
1449 vsk->peer_shutdown |= RCV_SHUTDOWN;
1450 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
1451 vsk->peer_shutdown |= SEND_SHUTDOWN;
1452 if (vsk->peer_shutdown == SHUTDOWN_MASK) {
1453 if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
1454 (void)virtio_transport_reset(vsk, NULL);
1455 virtio_transport_do_close(vsk, true);
1456 }
1457 /* Remove this socket anyway because the remote peer sent
1458 * the shutdown. This way a new connection will succeed
1459 * if the remote peer uses the same source port,
1460 * even if the old socket is still unreleased, but now disconnected.
1461 */
1462 vsock_remove_sock(vsk);
1463 }
1464 if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
1465 sk->sk_state_change(sk);
1466 break;
1467 case VIRTIO_VSOCK_OP_RST:
1468 virtio_transport_do_close(vsk, true);
1469 break;
1470 default:
1471 err = -EINVAL;
1472 break;
1473 }
1474
1475 kfree_skb(skb);
1476 return err;
1477 }
1478
1479 static void
virtio_transport_recv_disconnecting(struct sock * sk,struct sk_buff * skb)1480 virtio_transport_recv_disconnecting(struct sock *sk,
1481 struct sk_buff *skb)
1482 {
1483 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1484 struct vsock_sock *vsk = vsock_sk(sk);
1485
1486 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
1487 virtio_transport_do_close(vsk, true);
1488 }
1489
1490 static int
virtio_transport_send_response(struct vsock_sock * vsk,struct sk_buff * skb)1491 virtio_transport_send_response(struct vsock_sock *vsk,
1492 struct sk_buff *skb)
1493 {
1494 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1495 struct virtio_vsock_pkt_info info = {
1496 .op = VIRTIO_VSOCK_OP_RESPONSE,
1497 .remote_cid = le64_to_cpu(hdr->src_cid),
1498 .remote_port = le32_to_cpu(hdr->src_port),
1499 .reply = true,
1500 .vsk = vsk,
1501 };
1502
1503 return virtio_transport_send_pkt_info(vsk, &info);
1504 }
1505
virtio_transport_space_update(struct sock * sk,struct sk_buff * skb)1506 static bool virtio_transport_space_update(struct sock *sk,
1507 struct sk_buff *skb)
1508 {
1509 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1510 struct vsock_sock *vsk = vsock_sk(sk);
1511 struct virtio_vsock_sock *vvs = vsk->trans;
1512 bool space_available;
1513
1514 /* Listener sockets are not associated with any transport, so we are
1515 * not able to take the state to see if there is space available in the
1516 * remote peer, but since they are only used to receive requests, we
1517 * can assume that there is always space available in the other peer.
1518 */
1519 if (!vvs)
1520 return true;
1521
1522 /* buf_alloc and fwd_cnt is always included in the hdr */
1523 spin_lock_bh(&vvs->tx_lock);
1524 vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
1525 vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
1526 space_available = virtio_transport_has_space(vsk);
1527 spin_unlock_bh(&vvs->tx_lock);
1528 return space_available;
1529 }
1530
1531 /* Handle server socket */
1532 static int
virtio_transport_recv_listen(struct sock * sk,struct sk_buff * skb,struct virtio_transport * t)1533 virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
1534 struct virtio_transport *t)
1535 {
1536 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1537 struct vsock_sock *vsk = vsock_sk(sk);
1538 struct vsock_sock *vchild;
1539 struct sock *child;
1540 int ret;
1541
1542 if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
1543 virtio_transport_reset_no_sock(t, skb);
1544 return -EINVAL;
1545 }
1546
1547 if (sk_acceptq_is_full(sk)) {
1548 virtio_transport_reset_no_sock(t, skb);
1549 return -ENOMEM;
1550 }
1551
1552 /* __vsock_release() might have already flushed accept_queue.
1553 * Subsequent enqueues would lead to a memory leak.
1554 */
1555 if (sk->sk_shutdown == SHUTDOWN_MASK) {
1556 virtio_transport_reset_no_sock(t, skb);
1557 return -ESHUTDOWN;
1558 }
1559
1560 child = vsock_create_connected(sk);
1561 if (!child) {
1562 virtio_transport_reset_no_sock(t, skb);
1563 return -ENOMEM;
1564 }
1565
1566 sk_acceptq_added(sk);
1567
1568 lock_sock_nested(child, SINGLE_DEPTH_NESTING);
1569
1570 child->sk_state = TCP_ESTABLISHED;
1571
1572 vchild = vsock_sk(child);
1573 vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
1574 le32_to_cpu(hdr->dst_port));
1575 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
1576 le32_to_cpu(hdr->src_port));
1577
1578 ret = vsock_assign_transport(vchild, vsk);
1579 /* Transport assigned (looking at remote_addr) must be the same
1580 * where we received the request.
1581 */
1582 if (ret || vchild->transport != &t->transport) {
1583 release_sock(child);
1584 virtio_transport_reset_no_sock(t, skb);
1585 sock_put(child);
1586 return ret;
1587 }
1588
1589 if (virtio_transport_space_update(child, skb))
1590 child->sk_write_space(child);
1591
1592 vsock_insert_connected(vchild);
1593 vsock_enqueue_accept(sk, child);
1594 virtio_transport_send_response(vchild, skb);
1595
1596 release_sock(child);
1597
1598 sk->sk_data_ready(sk);
1599 return 0;
1600 }
1601
virtio_transport_valid_type(u16 type)1602 static bool virtio_transport_valid_type(u16 type)
1603 {
1604 return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
1605 (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
1606 }
1607
1608 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
1609 * lock.
1610 */
virtio_transport_recv_pkt(struct virtio_transport * t,struct sk_buff * skb)1611 void virtio_transport_recv_pkt(struct virtio_transport *t,
1612 struct sk_buff *skb)
1613 {
1614 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1615 struct sockaddr_vm src, dst;
1616 struct vsock_sock *vsk;
1617 struct sock *sk;
1618 bool space_available;
1619
1620 vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
1621 le32_to_cpu(hdr->src_port));
1622 vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
1623 le32_to_cpu(hdr->dst_port));
1624
1625 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
1626 dst.svm_cid, dst.svm_port,
1627 le32_to_cpu(hdr->len),
1628 le16_to_cpu(hdr->type),
1629 le16_to_cpu(hdr->op),
1630 le32_to_cpu(hdr->flags),
1631 le32_to_cpu(hdr->buf_alloc),
1632 le32_to_cpu(hdr->fwd_cnt));
1633
1634 if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
1635 (void)virtio_transport_reset_no_sock(t, skb);
1636 goto free_pkt;
1637 }
1638
1639 /* The socket must be in connected or bound table
1640 * otherwise send reset back
1641 */
1642 sk = vsock_find_connected_socket(&src, &dst);
1643 if (!sk) {
1644 sk = vsock_find_bound_socket(&dst);
1645 if (!sk) {
1646 (void)virtio_transport_reset_no_sock(t, skb);
1647 goto free_pkt;
1648 }
1649 }
1650
1651 if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
1652 (void)virtio_transport_reset_no_sock(t, skb);
1653 sock_put(sk);
1654 goto free_pkt;
1655 }
1656
1657 if (!skb_set_owner_sk_safe(skb, sk)) {
1658 WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n");
1659 goto free_pkt;
1660 }
1661
1662 vsk = vsock_sk(sk);
1663
1664 lock_sock(sk);
1665
1666 /* Check if sk has been closed or assigned to another transport before
1667 * lock_sock (note: listener sockets are not assigned to any transport)
1668 */
1669 if (sock_flag(sk, SOCK_DONE) ||
1670 (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
1671 (void)virtio_transport_reset_no_sock(t, skb);
1672 release_sock(sk);
1673 sock_put(sk);
1674 goto free_pkt;
1675 }
1676
1677 space_available = virtio_transport_space_update(sk, skb);
1678
1679 /* Update CID in case it has changed after a transport reset event */
1680 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
1681 vsk->local_addr.svm_cid = dst.svm_cid;
1682
1683 if (space_available)
1684 sk->sk_write_space(sk);
1685
1686 switch (sk->sk_state) {
1687 case TCP_LISTEN:
1688 virtio_transport_recv_listen(sk, skb, t);
1689 kfree_skb(skb);
1690 break;
1691 case TCP_SYN_SENT:
1692 virtio_transport_recv_connecting(sk, skb);
1693 kfree_skb(skb);
1694 break;
1695 case TCP_ESTABLISHED:
1696 virtio_transport_recv_connected(sk, skb);
1697 break;
1698 case TCP_CLOSING:
1699 virtio_transport_recv_disconnecting(sk, skb);
1700 kfree_skb(skb);
1701 break;
1702 default:
1703 (void)virtio_transport_reset_no_sock(t, skb);
1704 kfree_skb(skb);
1705 break;
1706 }
1707
1708 release_sock(sk);
1709
1710 /* Release refcnt obtained when we fetched this socket out of the
1711 * bound or connected list.
1712 */
1713 sock_put(sk);
1714 return;
1715
1716 free_pkt:
1717 kfree_skb(skb);
1718 }
1719 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
1720
1721 /* Remove skbs found in a queue that have a vsk that matches.
1722 *
1723 * Each skb is freed.
1724 *
1725 * Returns the count of skbs that were reply packets.
1726 */
virtio_transport_purge_skbs(void * vsk,struct sk_buff_head * queue)1727 int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
1728 {
1729 struct sk_buff_head freeme;
1730 struct sk_buff *skb, *tmp;
1731 int cnt = 0;
1732
1733 skb_queue_head_init(&freeme);
1734
1735 spin_lock_bh(&queue->lock);
1736 skb_queue_walk_safe(queue, skb, tmp) {
1737 if (vsock_sk(skb->sk) != vsk)
1738 continue;
1739
1740 __skb_unlink(skb, queue);
1741 __skb_queue_tail(&freeme, skb);
1742
1743 if (virtio_vsock_skb_reply(skb))
1744 cnt++;
1745 }
1746 spin_unlock_bh(&queue->lock);
1747
1748 __skb_queue_purge(&freeme);
1749
1750 return cnt;
1751 }
1752 EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
1753
virtio_transport_read_skb(struct vsock_sock * vsk,skb_read_actor_t recv_actor)1754 int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor)
1755 {
1756 struct virtio_vsock_sock *vvs = vsk->trans;
1757 struct sock *sk = sk_vsock(vsk);
1758 struct virtio_vsock_hdr *hdr;
1759 struct sk_buff *skb;
1760 u32 pkt_len;
1761 int off = 0;
1762 int err;
1763
1764 spin_lock_bh(&vvs->rx_lock);
1765 /* Use __skb_recv_datagram() for race-free handling of the receive. It
1766 * works for types other than dgrams.
1767 */
1768 skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err);
1769 if (!skb) {
1770 spin_unlock_bh(&vvs->rx_lock);
1771 return err;
1772 }
1773
1774 hdr = virtio_vsock_hdr(skb);
1775 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
1776 vvs->msg_count--;
1777
1778 pkt_len = le32_to_cpu(hdr->len);
1779 virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
1780 spin_unlock_bh(&vvs->rx_lock);
1781
1782 virtio_transport_send_credit_update(vsk);
1783
1784 return recv_actor(sk, skb);
1785 }
1786 EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
1787
virtio_transport_notify_set_rcvlowat(struct vsock_sock * vsk,int val)1788 int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
1789 {
1790 struct virtio_vsock_sock *vvs = vsk->trans;
1791 bool send_update;
1792
1793 spin_lock_bh(&vvs->rx_lock);
1794
1795 /* If number of available bytes is less than new SO_RCVLOWAT value,
1796 * kick sender to send more data, because sender may sleep in its
1797 * 'send()' syscall waiting for enough space at our side. Also
1798 * don't send credit update when peer already knows actual value -
1799 * such transmission will be useless.
1800 */
1801 send_update = (vvs->rx_bytes < val) &&
1802 (vvs->fwd_cnt != vvs->last_fwd_cnt);
1803
1804 spin_unlock_bh(&vvs->rx_lock);
1805
1806 if (send_update) {
1807 int err;
1808
1809 err = virtio_transport_send_credit_update(vsk);
1810 if (err < 0)
1811 return err;
1812 }
1813
1814 return 0;
1815 }
1816 EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat);
1817
1818 MODULE_LICENSE("GPL v2");
1819 MODULE_AUTHOR("Asias He");
1820 MODULE_DESCRIPTION("common code for virtio vsock");
1821