• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	SUCS NET3:
3  *
4  *	Generic datagram handling routines. These are generic for all
5  *	protocols. Possibly a generic IP version on top of these would
6  *	make sense. Not tonight however 8-).
7  *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
8  *	NetROM layer all have identical poll code and mostly
9  *	identical recvmsg() code. So we share it here. The poll was
10  *	shared before but buried in udp.c so I moved it.
11  *
12  *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
13  *						     udp.c code)
14  *
15  *	Fixes:
16  *		Alan Cox	:	NULL return from skb_peek_copy()
17  *					understood
18  *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
19  *					skb_peek_copy stuff.
20  *		Alan Cox	:	Added support for SOCK_SEQPACKET.
21  *					IPX can no longer use the SO_TYPE hack
22  *					but AX.25 now works right, and SPX is
23  *					feasible.
24  *		Alan Cox	:	Fixed write poll of non IP protocol
25  *					crash.
26  *		Florian  La Roche:	Changed for my new skbuff handling.
27  *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
28  *		Linus Torvalds	:	BSD semantic fixes.
29  *		Alan Cox	:	Datagram iovec handling
30  *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
31  *		Alan Cox	:	POSIXisms
32  *		Pete Wyckoff    :       Unconnected accept() fix.
33  *
34  */
35 
36 #include <linux/module.h>
37 #include <linux/types.h>
38 #include <linux/kernel.h>
39 #include <asm/uaccess.h>
40 #include <asm/system.h>
41 #include <linux/mm.h>
42 #include <linux/interrupt.h>
43 #include <linux/errno.h>
44 #include <linux/sched.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/rtnetlink.h>
48 #include <linux/poll.h>
49 #include <linux/highmem.h>
50 #include <linux/spinlock.h>
51 
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 
55 #include <net/checksum.h>
56 #include <net/sock.h>
57 #include <net/tcp_states.h>
58 
59 /*
60  *	Is a socket 'connection oriented' ?
61  */
connection_based(struct sock * sk)62 static inline int connection_based(struct sock *sk)
63 {
64 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
65 }
66 
67 /*
68  * Wait for a packet..
69  */
wait_for_packet(struct sock * sk,int * err,long * timeo_p)70 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
71 {
72 	int error;
73 	DEFINE_WAIT(wait);
74 
75 	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
76 
77 	/* Socket errors? */
78 	error = sock_error(sk);
79 	if (error)
80 		goto out_err;
81 
82 	if (!skb_queue_empty(&sk->sk_receive_queue))
83 		goto out;
84 
85 	/* Socket shut down? */
86 	if (sk->sk_shutdown & RCV_SHUTDOWN)
87 		goto out_noerr;
88 
89 	/* Sequenced packets can come disconnected.
90 	 * If so we report the problem
91 	 */
92 	error = -ENOTCONN;
93 	if (connection_based(sk) &&
94 	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
95 		goto out_err;
96 
97 	/* handle signals */
98 	if (signal_pending(current))
99 		goto interrupted;
100 
101 	error = 0;
102 	*timeo_p = schedule_timeout(*timeo_p);
103 out:
104 	finish_wait(sk->sk_sleep, &wait);
105 	return error;
106 interrupted:
107 	error = sock_intr_errno(*timeo_p);
108 out_err:
109 	*err = error;
110 	goto out;
111 out_noerr:
112 	*err = 0;
113 	error = 1;
114 	goto out;
115 }
116 
117 /**
118  *	__skb_recv_datagram - Receive a datagram skbuff
119  *	@sk: socket
120  *	@flags: MSG_ flags
121  *	@peeked: returns non-zero if this packet has been seen before
122  *	@err: error code returned
123  *
124  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
125  *	and possible races. This replaces identical code in packet, raw and
126  *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
127  *	the long standing peek and read race for datagram sockets. If you
128  *	alter this routine remember it must be re-entrant.
129  *
130  *	This function will lock the socket if a skb is returned, so the caller
131  *	needs to unlock the socket in that case (usually by calling
132  *	skb_free_datagram)
133  *
134  *	* It does not lock socket since today. This function is
135  *	* free of race conditions. This measure should/can improve
136  *	* significantly datagram socket latencies at high loads,
137  *	* when data copying to user space takes lots of time.
138  *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
139  *	*  8) Great win.)
140  *	*			                    --ANK (980729)
141  *
142  *	The order of the tests when we find no data waiting are specified
143  *	quite explicitly by POSIX 1003.1g, don't change them without having
144  *	the standard around please.
145  */
__skb_recv_datagram(struct sock * sk,unsigned flags,int * peeked,int * err)146 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
147 				    int *peeked, int *err)
148 {
149 	struct sk_buff *skb;
150 	long timeo;
151 	/*
152 	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
153 	 */
154 	int error = sock_error(sk);
155 
156 	if (error)
157 		goto no_packet;
158 
159 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
160 
161 	do {
162 		/* Again only user level code calls this function, so nothing
163 		 * interrupt level will suddenly eat the receive_queue.
164 		 *
165 		 * Look at current nfs client by the way...
166 		 * However, this function was corrent in any case. 8)
167 		 */
168 		unsigned long cpu_flags;
169 
170 		spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
171 		skb = skb_peek(&sk->sk_receive_queue);
172 		if (skb) {
173 			*peeked = skb->peeked;
174 			if (flags & MSG_PEEK) {
175 				skb->peeked = 1;
176 				atomic_inc(&skb->users);
177 			} else
178 				__skb_unlink(skb, &sk->sk_receive_queue);
179 		}
180 		spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
181 
182 		if (skb)
183 			return skb;
184 
185 		/* User doesn't want to wait */
186 		error = -EAGAIN;
187 		if (!timeo)
188 			goto no_packet;
189 
190 	} while (!wait_for_packet(sk, err, &timeo));
191 
192 	return NULL;
193 
194 no_packet:
195 	*err = error;
196 	return NULL;
197 }
198 EXPORT_SYMBOL(__skb_recv_datagram);
199 
skb_recv_datagram(struct sock * sk,unsigned flags,int noblock,int * err)200 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
201 				  int noblock, int *err)
202 {
203 	int peeked;
204 
205 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
206 				   &peeked, err);
207 }
208 
skb_free_datagram(struct sock * sk,struct sk_buff * skb)209 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
210 {
211 	kfree_skb(skb);
212 	sk_mem_reclaim_partial(sk);
213 }
214 
215 /**
216  *	skb_kill_datagram - Free a datagram skbuff forcibly
217  *	@sk: socket
218  *	@skb: datagram skbuff
219  *	@flags: MSG_ flags
220  *
221  *	This function frees a datagram skbuff that was received by
222  *	skb_recv_datagram.  The flags argument must match the one
223  *	used for skb_recv_datagram.
224  *
225  *	If the MSG_PEEK flag is set, and the packet is still on the
226  *	receive queue of the socket, it will be taken off the queue
227  *	before it is freed.
228  *
229  *	This function currently only disables BH when acquiring the
230  *	sk_receive_queue lock.  Therefore it must not be used in a
231  *	context where that lock is acquired in an IRQ context.
232  *
233  *	It returns 0 if the packet was removed by us.
234  */
235 
skb_kill_datagram(struct sock * sk,struct sk_buff * skb,unsigned int flags)236 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
237 {
238 	int err = 0;
239 
240 	if (flags & MSG_PEEK) {
241 		err = -ENOENT;
242 		spin_lock_bh(&sk->sk_receive_queue.lock);
243 		if (skb == skb_peek(&sk->sk_receive_queue)) {
244 			__skb_unlink(skb, &sk->sk_receive_queue);
245 			atomic_dec(&skb->users);
246 			err = 0;
247 		}
248 		spin_unlock_bh(&sk->sk_receive_queue.lock);
249 	}
250 
251 	skb_free_datagram(sk, skb);
252 	return err;
253 }
254 
255 EXPORT_SYMBOL(skb_kill_datagram);
256 
257 /**
258  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
259  *	@skb: buffer to copy
260  *	@offset: offset in the buffer to start copying from
261  *	@to: io vector to copy to
262  *	@len: amount of data to copy from buffer to iovec
263  *
264  *	Note: the iovec is modified during the copy.
265  */
skb_copy_datagram_iovec(const struct sk_buff * skb,int offset,struct iovec * to,int len)266 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
267 			    struct iovec *to, int len)
268 {
269 	int start = skb_headlen(skb);
270 	int i, copy = start - offset;
271 
272 	/* Copy header. */
273 	if (copy > 0) {
274 		if (copy > len)
275 			copy = len;
276 		if (memcpy_toiovec(to, skb->data + offset, copy))
277 			goto fault;
278 		if ((len -= copy) == 0)
279 			return 0;
280 		offset += copy;
281 	}
282 
283 	/* Copy paged appendix. Hmm... why does this look so complicated? */
284 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
285 		int end;
286 
287 		WARN_ON(start > offset + len);
288 
289 		end = start + skb_shinfo(skb)->frags[i].size;
290 		if ((copy = end - offset) > 0) {
291 			int err;
292 			u8  *vaddr;
293 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
294 			struct page *page = frag->page;
295 
296 			if (copy > len)
297 				copy = len;
298 			vaddr = kmap(page);
299 			err = memcpy_toiovec(to, vaddr + frag->page_offset +
300 					     offset - start, copy);
301 			kunmap(page);
302 			if (err)
303 				goto fault;
304 			if (!(len -= copy))
305 				return 0;
306 			offset += copy;
307 		}
308 		start = end;
309 	}
310 
311 	if (skb_shinfo(skb)->frag_list) {
312 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
313 
314 		for (; list; list = list->next) {
315 			int end;
316 
317 			WARN_ON(start > offset + len);
318 
319 			end = start + list->len;
320 			if ((copy = end - offset) > 0) {
321 				if (copy > len)
322 					copy = len;
323 				if (skb_copy_datagram_iovec(list,
324 							    offset - start,
325 							    to, copy))
326 					goto fault;
327 				if ((len -= copy) == 0)
328 					return 0;
329 				offset += copy;
330 			}
331 			start = end;
332 		}
333 	}
334 	if (!len)
335 		return 0;
336 
337 fault:
338 	return -EFAULT;
339 }
340 
341 /**
342  *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
343  *	@skb: buffer to copy
344  *	@offset: offset in the buffer to start copying to
345  *	@from: io vector to copy to
346  *	@len: amount of data to copy to buffer from iovec
347  *
348  *	Returns 0 or -EFAULT.
349  *	Note: the iovec is modified during the copy.
350  */
skb_copy_datagram_from_iovec(struct sk_buff * skb,int offset,struct iovec * from,int len)351 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
352 				 struct iovec *from, int len)
353 {
354 	int start = skb_headlen(skb);
355 	int i, copy = start - offset;
356 
357 	/* Copy header. */
358 	if (copy > 0) {
359 		if (copy > len)
360 			copy = len;
361 		if (memcpy_fromiovec(skb->data + offset, from, copy))
362 			goto fault;
363 		if ((len -= copy) == 0)
364 			return 0;
365 		offset += copy;
366 	}
367 
368 	/* Copy paged appendix. Hmm... why does this look so complicated? */
369 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
370 		int end;
371 
372 		WARN_ON(start > offset + len);
373 
374 		end = start + skb_shinfo(skb)->frags[i].size;
375 		if ((copy = end - offset) > 0) {
376 			int err;
377 			u8  *vaddr;
378 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
379 			struct page *page = frag->page;
380 
381 			if (copy > len)
382 				copy = len;
383 			vaddr = kmap(page);
384 			err = memcpy_fromiovec(vaddr + frag->page_offset +
385 					       offset - start, from, copy);
386 			kunmap(page);
387 			if (err)
388 				goto fault;
389 
390 			if (!(len -= copy))
391 				return 0;
392 			offset += copy;
393 		}
394 		start = end;
395 	}
396 
397 	if (skb_shinfo(skb)->frag_list) {
398 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
399 
400 		for (; list; list = list->next) {
401 			int end;
402 
403 			WARN_ON(start > offset + len);
404 
405 			end = start + list->len;
406 			if ((copy = end - offset) > 0) {
407 				if (copy > len)
408 					copy = len;
409 				if (skb_copy_datagram_from_iovec(list,
410 								 offset - start,
411 								 from, copy))
412 					goto fault;
413 				if ((len -= copy) == 0)
414 					return 0;
415 				offset += copy;
416 			}
417 			start = end;
418 		}
419 	}
420 	if (!len)
421 		return 0;
422 
423 fault:
424 	return -EFAULT;
425 }
426 EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
427 
skb_copy_and_csum_datagram(const struct sk_buff * skb,int offset,u8 __user * to,int len,__wsum * csump)428 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
429 				      u8 __user *to, int len,
430 				      __wsum *csump)
431 {
432 	int start = skb_headlen(skb);
433 	int pos = 0;
434 	int i, copy = start - offset;
435 
436 	/* Copy header. */
437 	if (copy > 0) {
438 		int err = 0;
439 		if (copy > len)
440 			copy = len;
441 		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
442 					       *csump, &err);
443 		if (err)
444 			goto fault;
445 		if ((len -= copy) == 0)
446 			return 0;
447 		offset += copy;
448 		to += copy;
449 		pos = copy;
450 	}
451 
452 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
453 		int end;
454 
455 		WARN_ON(start > offset + len);
456 
457 		end = start + skb_shinfo(skb)->frags[i].size;
458 		if ((copy = end - offset) > 0) {
459 			__wsum csum2;
460 			int err = 0;
461 			u8  *vaddr;
462 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
463 			struct page *page = frag->page;
464 
465 			if (copy > len)
466 				copy = len;
467 			vaddr = kmap(page);
468 			csum2 = csum_and_copy_to_user(vaddr +
469 							frag->page_offset +
470 							offset - start,
471 						      to, copy, 0, &err);
472 			kunmap(page);
473 			if (err)
474 				goto fault;
475 			*csump = csum_block_add(*csump, csum2, pos);
476 			if (!(len -= copy))
477 				return 0;
478 			offset += copy;
479 			to += copy;
480 			pos += copy;
481 		}
482 		start = end;
483 	}
484 
485 	if (skb_shinfo(skb)->frag_list) {
486 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
487 
488 		for (; list; list=list->next) {
489 			int end;
490 
491 			WARN_ON(start > offset + len);
492 
493 			end = start + list->len;
494 			if ((copy = end - offset) > 0) {
495 				__wsum csum2 = 0;
496 				if (copy > len)
497 					copy = len;
498 				if (skb_copy_and_csum_datagram(list,
499 							       offset - start,
500 							       to, copy,
501 							       &csum2))
502 					goto fault;
503 				*csump = csum_block_add(*csump, csum2, pos);
504 				if ((len -= copy) == 0)
505 					return 0;
506 				offset += copy;
507 				to += copy;
508 				pos += copy;
509 			}
510 			start = end;
511 		}
512 	}
513 	if (!len)
514 		return 0;
515 
516 fault:
517 	return -EFAULT;
518 }
519 
__skb_checksum_complete_head(struct sk_buff * skb,int len)520 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
521 {
522 	__sum16 sum;
523 
524 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
525 	if (likely(!sum)) {
526 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
527 			netdev_rx_csum_fault(skb->dev);
528 		skb->ip_summed = CHECKSUM_UNNECESSARY;
529 	}
530 	return sum;
531 }
532 EXPORT_SYMBOL(__skb_checksum_complete_head);
533 
__skb_checksum_complete(struct sk_buff * skb)534 __sum16 __skb_checksum_complete(struct sk_buff *skb)
535 {
536 	return __skb_checksum_complete_head(skb, skb->len);
537 }
538 EXPORT_SYMBOL(__skb_checksum_complete);
539 
540 /**
541  *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
542  *	@skb: skbuff
543  *	@hlen: hardware length
544  *	@iov: io vector
545  *
546  *	Caller _must_ check that skb will fit to this iovec.
547  *
548  *	Returns: 0       - success.
549  *		 -EINVAL - checksum failure.
550  *		 -EFAULT - fault during copy. Beware, in this case iovec
551  *			   can be modified!
552  */
skb_copy_and_csum_datagram_iovec(struct sk_buff * skb,int hlen,struct iovec * iov)553 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
554 				     int hlen, struct iovec *iov)
555 {
556 	__wsum csum;
557 	int chunk = skb->len - hlen;
558 
559 	if (!chunk)
560 		return 0;
561 
562 	/* Skip filled elements.
563 	 * Pretty silly, look at memcpy_toiovec, though 8)
564 	 */
565 	while (!iov->iov_len)
566 		iov++;
567 
568 	if (iov->iov_len < chunk) {
569 		if (__skb_checksum_complete(skb))
570 			goto csum_error;
571 		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
572 			goto fault;
573 	} else {
574 		csum = csum_partial(skb->data, hlen, skb->csum);
575 		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
576 					       chunk, &csum))
577 			goto fault;
578 		if (csum_fold(csum))
579 			goto csum_error;
580 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
581 			netdev_rx_csum_fault(skb->dev);
582 		iov->iov_len -= chunk;
583 		iov->iov_base += chunk;
584 	}
585 	return 0;
586 csum_error:
587 	return -EINVAL;
588 fault:
589 	return -EFAULT;
590 }
591 
592 /**
593  * 	datagram_poll - generic datagram poll
594  *	@file: file struct
595  *	@sock: socket
596  *	@wait: poll table
597  *
598  *	Datagram poll: Again totally generic. This also handles
599  *	sequenced packet sockets providing the socket receive queue
600  *	is only ever holding data ready to receive.
601  *
602  *	Note: when you _don't_ use this routine for this protocol,
603  *	and you use a different write policy from sock_writeable()
604  *	then please supply your own write_space callback.
605  */
datagram_poll(struct file * file,struct socket * sock,poll_table * wait)606 unsigned int datagram_poll(struct file *file, struct socket *sock,
607 			   poll_table *wait)
608 {
609 	struct sock *sk = sock->sk;
610 	unsigned int mask;
611 
612 	poll_wait(file, sk->sk_sleep, wait);
613 	mask = 0;
614 
615 	/* exceptional events? */
616 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
617 		mask |= POLLERR;
618 	if (sk->sk_shutdown & RCV_SHUTDOWN)
619 		mask |= POLLRDHUP;
620 	if (sk->sk_shutdown == SHUTDOWN_MASK)
621 		mask |= POLLHUP;
622 
623 	/* readable? */
624 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
625 	    (sk->sk_shutdown & RCV_SHUTDOWN))
626 		mask |= POLLIN | POLLRDNORM;
627 
628 	/* Connection-based need to check for termination and startup */
629 	if (connection_based(sk)) {
630 		if (sk->sk_state == TCP_CLOSE)
631 			mask |= POLLHUP;
632 		/* connection hasn't started yet? */
633 		if (sk->sk_state == TCP_SYN_SENT)
634 			return mask;
635 	}
636 
637 	/* writable? */
638 	if (sock_writeable(sk))
639 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
640 	else
641 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
642 
643 	return mask;
644 }
645 
646 EXPORT_SYMBOL(datagram_poll);
647 EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
648 EXPORT_SYMBOL(skb_copy_datagram_iovec);
649 EXPORT_SYMBOL(skb_free_datagram);
650 EXPORT_SYMBOL(skb_recv_datagram);
651