• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120 
121 #include "scm.h"
122 
123 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
124 EXPORT_SYMBOL_GPL(unix_socket_table);
125 DEFINE_SPINLOCK(unix_table_lock);
126 EXPORT_SYMBOL_GPL(unix_table_lock);
127 static atomic_long_t unix_nr_socks;
128 
129 
unix_sockets_unbound(void * addr)130 static struct hlist_head *unix_sockets_unbound(void *addr)
131 {
132 	unsigned long hash = (unsigned long)addr;
133 
134 	hash ^= hash >> 16;
135 	hash ^= hash >> 8;
136 	hash %= UNIX_HASH_SIZE;
137 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
138 }
139 
140 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
141 
142 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)143 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 {
145 	UNIXCB(skb).secid = scm->secid;
146 }
147 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)148 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149 {
150 	scm->secid = UNIXCB(skb).secid;
151 }
152 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)153 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
154 {
155 	return (scm->secid == UNIXCB(skb).secid);
156 }
157 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)158 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159 { }
160 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)161 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
162 { }
163 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)164 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
165 {
166 	return true;
167 }
168 #endif /* CONFIG_SECURITY_NETWORK */
169 
170 /*
171  *  SMP locking strategy:
172  *    hash table is protected with spinlock unix_table_lock
173  *    each socket state is protected by separate spin lock.
174  */
175 
unix_hash_fold(__wsum n)176 static inline unsigned int unix_hash_fold(__wsum n)
177 {
178 	unsigned int hash = (__force unsigned int)csum_fold(n);
179 
180 	hash ^= hash>>8;
181 	return hash&(UNIX_HASH_SIZE-1);
182 }
183 
184 #define unix_peer(sk) (unix_sk(sk)->peer)
185 
unix_our_peer(struct sock * sk,struct sock * osk)186 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
187 {
188 	return unix_peer(osk) == sk;
189 }
190 
unix_may_send(struct sock * sk,struct sock * osk)191 static inline int unix_may_send(struct sock *sk, struct sock *osk)
192 {
193 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
194 }
195 
unix_recvq_full(const struct sock * sk)196 static inline int unix_recvq_full(const struct sock *sk)
197 {
198 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
199 }
200 
unix_recvq_full_lockless(const struct sock * sk)201 static inline int unix_recvq_full_lockless(const struct sock *sk)
202 {
203 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
204 		READ_ONCE(sk->sk_max_ack_backlog);
205 }
206 
unix_peer_get(struct sock * s)207 struct sock *unix_peer_get(struct sock *s)
208 {
209 	struct sock *peer;
210 
211 	unix_state_lock(s);
212 	peer = unix_peer(s);
213 	if (peer)
214 		sock_hold(peer);
215 	unix_state_unlock(s);
216 	return peer;
217 }
218 EXPORT_SYMBOL_GPL(unix_peer_get);
219 
unix_release_addr(struct unix_address * addr)220 static inline void unix_release_addr(struct unix_address *addr)
221 {
222 	if (atomic_dec_and_test(&addr->refcnt))
223 		kfree(addr);
224 }
225 
226 /*
227  *	Check unix socket name:
228  *		- should be not zero length.
229  *	        - if started by not zero, should be NULL terminated (FS object)
230  *		- if started by zero, it is abstract name.
231  */
232 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)233 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
234 {
235 	*hashp = 0;
236 
237 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
238 		return -EINVAL;
239 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
240 		return -EINVAL;
241 	if (sunaddr->sun_path[0]) {
242 		/*
243 		 * This may look like an off by one error but it is a bit more
244 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
245 		 * sun_path[108] doesn't as such exist.  However in kernel space
246 		 * we are guaranteed that it is a valid memory location in our
247 		 * kernel address buffer.
248 		 */
249 		((char *)sunaddr)[len] = 0;
250 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
251 		return len;
252 	}
253 
254 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
255 	return len;
256 }
257 
__unix_remove_socket(struct sock * sk)258 static void __unix_remove_socket(struct sock *sk)
259 {
260 	sk_del_node_init(sk);
261 }
262 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)263 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
264 {
265 	WARN_ON(!sk_unhashed(sk));
266 	sk_add_node(sk, list);
267 }
268 
unix_remove_socket(struct sock * sk)269 static inline void unix_remove_socket(struct sock *sk)
270 {
271 	spin_lock(&unix_table_lock);
272 	__unix_remove_socket(sk);
273 	spin_unlock(&unix_table_lock);
274 }
275 
unix_insert_socket(struct hlist_head * list,struct sock * sk)276 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
277 {
278 	spin_lock(&unix_table_lock);
279 	__unix_insert_socket(list, sk);
280 	spin_unlock(&unix_table_lock);
281 }
282 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)283 static struct sock *__unix_find_socket_byname(struct net *net,
284 					      struct sockaddr_un *sunname,
285 					      int len, int type, unsigned int hash)
286 {
287 	struct sock *s;
288 
289 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
290 		struct unix_sock *u = unix_sk(s);
291 
292 		if (!net_eq(sock_net(s), net))
293 			continue;
294 
295 		if (u->addr->len == len &&
296 		    !memcmp(u->addr->name, sunname, len))
297 			goto found;
298 	}
299 	s = NULL;
300 found:
301 	return s;
302 }
303 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)304 static inline struct sock *unix_find_socket_byname(struct net *net,
305 						   struct sockaddr_un *sunname,
306 						   int len, int type,
307 						   unsigned int hash)
308 {
309 	struct sock *s;
310 
311 	spin_lock(&unix_table_lock);
312 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
313 	if (s)
314 		sock_hold(s);
315 	spin_unlock(&unix_table_lock);
316 	return s;
317 }
318 
unix_find_socket_byinode(struct inode * i)319 static struct sock *unix_find_socket_byinode(struct inode *i)
320 {
321 	struct sock *s;
322 
323 	spin_lock(&unix_table_lock);
324 	sk_for_each(s,
325 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
326 		struct dentry *dentry = unix_sk(s)->path.dentry;
327 
328 		if (dentry && d_real_inode(dentry) == i) {
329 			sock_hold(s);
330 			goto found;
331 		}
332 	}
333 	s = NULL;
334 found:
335 	spin_unlock(&unix_table_lock);
336 	return s;
337 }
338 
339 /* Support code for asymmetrically connected dgram sockets
340  *
341  * If a datagram socket is connected to a socket not itself connected
342  * to the first socket (eg, /dev/log), clients may only enqueue more
343  * messages if the present receive queue of the server socket is not
344  * "too large". This means there's a second writeability condition
345  * poll and sendmsg need to test. The dgram recv code will do a wake
346  * up on the peer_wait wait queue of a socket upon reception of a
347  * datagram which needs to be propagated to sleeping would-be writers
348  * since these might not have sent anything so far. This can't be
349  * accomplished via poll_wait because the lifetime of the server
350  * socket might be less than that of its clients if these break their
351  * association with it or if the server socket is closed while clients
352  * are still connected to it and there's no way to inform "a polling
353  * implementation" that it should let go of a certain wait queue
354  *
355  * In order to propagate a wake up, a wait_queue_t of the client
356  * socket is enqueued on the peer_wait queue of the server socket
357  * whose wake function does a wake_up on the ordinary client socket
358  * wait queue. This connection is established whenever a write (or
359  * poll for write) hit the flow control condition and broken when the
360  * association to the server socket is dissolved or after a wake up
361  * was relayed.
362  */
363 
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)364 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
365 				      void *key)
366 {
367 	struct unix_sock *u;
368 	wait_queue_head_t *u_sleep;
369 
370 	u = container_of(q, struct unix_sock, peer_wake);
371 
372 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
373 			    q);
374 	u->peer_wake.private = NULL;
375 
376 	/* relaying can only happen while the wq still exists */
377 	u_sleep = sk_sleep(&u->sk);
378 	if (u_sleep)
379 		wake_up_interruptible_poll(u_sleep, key);
380 
381 	return 0;
382 }
383 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)384 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
385 {
386 	struct unix_sock *u, *u_other;
387 	int rc;
388 
389 	u = unix_sk(sk);
390 	u_other = unix_sk(other);
391 	rc = 0;
392 	spin_lock(&u_other->peer_wait.lock);
393 
394 	if (!u->peer_wake.private) {
395 		u->peer_wake.private = other;
396 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
397 
398 		rc = 1;
399 	}
400 
401 	spin_unlock(&u_other->peer_wait.lock);
402 	return rc;
403 }
404 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)405 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
406 					    struct sock *other)
407 {
408 	struct unix_sock *u, *u_other;
409 
410 	u = unix_sk(sk);
411 	u_other = unix_sk(other);
412 	spin_lock(&u_other->peer_wait.lock);
413 
414 	if (u->peer_wake.private == other) {
415 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
416 		u->peer_wake.private = NULL;
417 	}
418 
419 	spin_unlock(&u_other->peer_wait.lock);
420 }
421 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)422 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
423 						   struct sock *other)
424 {
425 	unix_dgram_peer_wake_disconnect(sk, other);
426 	wake_up_interruptible_poll(sk_sleep(sk),
427 				   POLLOUT |
428 				   POLLWRNORM |
429 				   POLLWRBAND);
430 }
431 
432 /* preconditions:
433  *	- unix_peer(sk) == other
434  *	- association is stable
435  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)436 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
437 {
438 	int connected;
439 
440 	connected = unix_dgram_peer_wake_connect(sk, other);
441 
442 	if (unix_recvq_full(other))
443 		return 1;
444 
445 	if (connected)
446 		unix_dgram_peer_wake_disconnect(sk, other);
447 
448 	return 0;
449 }
450 
unix_writable(const struct sock * sk)451 static int unix_writable(const struct sock *sk)
452 {
453 	return sk->sk_state != TCP_LISTEN &&
454 	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
455 }
456 
unix_write_space(struct sock * sk)457 static void unix_write_space(struct sock *sk)
458 {
459 	struct socket_wq *wq;
460 
461 	rcu_read_lock();
462 	if (unix_writable(sk)) {
463 		wq = rcu_dereference(sk->sk_wq);
464 		if (wq_has_sleeper(wq))
465 			wake_up_interruptible_sync_poll(&wq->wait,
466 				POLLOUT | POLLWRNORM | POLLWRBAND);
467 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
468 	}
469 	rcu_read_unlock();
470 }
471 
472 /* When dgram socket disconnects (or changes its peer), we clear its receive
473  * queue of packets arrived from previous peer. First, it allows to do
474  * flow control based only on wmem_alloc; second, sk connected to peer
475  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)476 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
477 {
478 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
479 		skb_queue_purge(&sk->sk_receive_queue);
480 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
481 
482 		/* If one link of bidirectional dgram pipe is disconnected,
483 		 * we signal error. Messages are lost. Do not make this,
484 		 * when peer was not connected to us.
485 		 */
486 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
487 			other->sk_err = ECONNRESET;
488 			other->sk_error_report(other);
489 		}
490 	}
491 }
492 
unix_sock_destructor(struct sock * sk)493 static void unix_sock_destructor(struct sock *sk)
494 {
495 	struct unix_sock *u = unix_sk(sk);
496 
497 	skb_queue_purge(&sk->sk_receive_queue);
498 
499 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
500 	WARN_ON(!sk_unhashed(sk));
501 	WARN_ON(sk->sk_socket);
502 	if (!sock_flag(sk, SOCK_DEAD)) {
503 		pr_info("Attempt to release alive unix socket: %p\n", sk);
504 		return;
505 	}
506 
507 	if (u->addr)
508 		unix_release_addr(u->addr);
509 
510 	atomic_long_dec(&unix_nr_socks);
511 	local_bh_disable();
512 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
513 	local_bh_enable();
514 #ifdef UNIX_REFCNT_DEBUG
515 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
516 		atomic_long_read(&unix_nr_socks));
517 #endif
518 }
519 
unix_release_sock(struct sock * sk,int embrion)520 static void unix_release_sock(struct sock *sk, int embrion)
521 {
522 	struct unix_sock *u = unix_sk(sk);
523 	struct path path;
524 	struct sock *skpair;
525 	struct sk_buff *skb;
526 	int state;
527 
528 	unix_remove_socket(sk);
529 
530 	/* Clear state */
531 	unix_state_lock(sk);
532 	sock_orphan(sk);
533 	sk->sk_shutdown = SHUTDOWN_MASK;
534 	path	     = u->path;
535 	u->path.dentry = NULL;
536 	u->path.mnt = NULL;
537 	state = sk->sk_state;
538 	sk->sk_state = TCP_CLOSE;
539 
540 	skpair = unix_peer(sk);
541 	unix_peer(sk) = NULL;
542 
543 	unix_state_unlock(sk);
544 
545 	wake_up_interruptible_all(&u->peer_wait);
546 
547 	if (skpair != NULL) {
548 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
549 			unix_state_lock(skpair);
550 			/* No more writes */
551 			skpair->sk_shutdown = SHUTDOWN_MASK;
552 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
553 				skpair->sk_err = ECONNRESET;
554 			unix_state_unlock(skpair);
555 			skpair->sk_state_change(skpair);
556 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
557 		}
558 
559 		unix_dgram_peer_wake_disconnect(sk, skpair);
560 		sock_put(skpair); /* It may now die */
561 	}
562 
563 	/* Try to flush out this socket. Throw out buffers at least */
564 
565 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
566 		if (state == TCP_LISTEN)
567 			unix_release_sock(skb->sk, 1);
568 		/* passed fds are erased in the kfree_skb hook	      */
569 		UNIXCB(skb).consumed = skb->len;
570 		kfree_skb(skb);
571 	}
572 
573 	if (path.dentry)
574 		path_put(&path);
575 
576 	sock_put(sk);
577 
578 	/* ---- Socket is dead now and most probably destroyed ---- */
579 
580 	/*
581 	 * Fixme: BSD difference: In BSD all sockets connected to us get
582 	 *	  ECONNRESET and we die on the spot. In Linux we behave
583 	 *	  like files and pipes do and wait for the last
584 	 *	  dereference.
585 	 *
586 	 * Can't we simply set sock->err?
587 	 *
588 	 *	  What the above comment does talk about? --ANK(980817)
589 	 */
590 
591 	if (unix_tot_inflight)
592 		unix_gc();		/* Garbage collect fds */
593 }
594 
init_peercred(struct sock * sk)595 static void init_peercred(struct sock *sk)
596 {
597 	const struct cred *old_cred;
598 	struct pid *old_pid;
599 
600 	spin_lock(&sk->sk_peer_lock);
601 	old_pid = sk->sk_peer_pid;
602 	old_cred = sk->sk_peer_cred;
603 	sk->sk_peer_pid  = get_pid(task_tgid(current));
604 	sk->sk_peer_cred = get_current_cred();
605 	spin_unlock(&sk->sk_peer_lock);
606 
607 	put_pid(old_pid);
608 	put_cred(old_cred);
609 }
610 
copy_peercred(struct sock * sk,struct sock * peersk)611 static void copy_peercred(struct sock *sk, struct sock *peersk)
612 {
613 	const struct cred *old_cred;
614 	struct pid *old_pid;
615 
616 	if (sk < peersk) {
617 		spin_lock(&sk->sk_peer_lock);
618 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
619 	} else {
620 		spin_lock(&peersk->sk_peer_lock);
621 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
622 	}
623 	old_pid = sk->sk_peer_pid;
624 	old_cred = sk->sk_peer_cred;
625 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
626 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
627 
628 	spin_unlock(&sk->sk_peer_lock);
629 	spin_unlock(&peersk->sk_peer_lock);
630 
631 	put_pid(old_pid);
632 	put_cred(old_cred);
633 }
634 
unix_listen(struct socket * sock,int backlog)635 static int unix_listen(struct socket *sock, int backlog)
636 {
637 	int err;
638 	struct sock *sk = sock->sk;
639 	struct unix_sock *u = unix_sk(sk);
640 	struct pid *old_pid = NULL;
641 
642 	err = -EOPNOTSUPP;
643 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
644 		goto out;	/* Only stream/seqpacket sockets accept */
645 	err = -EINVAL;
646 	if (!u->addr)
647 		goto out;	/* No listens on an unbound socket */
648 	unix_state_lock(sk);
649 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
650 		goto out_unlock;
651 	if (backlog > sk->sk_max_ack_backlog)
652 		wake_up_interruptible_all(&u->peer_wait);
653 	sk->sk_max_ack_backlog	= backlog;
654 	sk->sk_state		= TCP_LISTEN;
655 	/* set credentials so connect can copy them */
656 	init_peercred(sk);
657 	err = 0;
658 
659 out_unlock:
660 	unix_state_unlock(sk);
661 	put_pid(old_pid);
662 out:
663 	return err;
664 }
665 
666 static int unix_release(struct socket *);
667 static int unix_bind(struct socket *, struct sockaddr *, int);
668 static int unix_stream_connect(struct socket *, struct sockaddr *,
669 			       int addr_len, int flags);
670 static int unix_socketpair(struct socket *, struct socket *);
671 static int unix_accept(struct socket *, struct socket *, int);
672 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
673 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
674 static unsigned int unix_dgram_poll(struct file *, struct socket *,
675 				    poll_table *);
676 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
677 static int unix_shutdown(struct socket *, int);
678 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
679 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
680 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
681 				    size_t size, int flags);
682 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
683 				       struct pipe_inode_info *, size_t size,
684 				       unsigned int flags);
685 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
686 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
687 static int unix_dgram_connect(struct socket *, struct sockaddr *,
688 			      int, int);
689 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
690 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
691 				  int);
692 
unix_set_peek_off(struct sock * sk,int val)693 static int unix_set_peek_off(struct sock *sk, int val)
694 {
695 	struct unix_sock *u = unix_sk(sk);
696 
697 	if (mutex_lock_interruptible(&u->iolock))
698 		return -EINTR;
699 
700 	sk->sk_peek_off = val;
701 	mutex_unlock(&u->iolock);
702 
703 	return 0;
704 }
705 
706 
707 static const struct proto_ops unix_stream_ops = {
708 	.family =	PF_UNIX,
709 	.owner =	THIS_MODULE,
710 	.release =	unix_release,
711 	.bind =		unix_bind,
712 	.connect =	unix_stream_connect,
713 	.socketpair =	unix_socketpair,
714 	.accept =	unix_accept,
715 	.getname =	unix_getname,
716 	.poll =		unix_poll,
717 	.ioctl =	unix_ioctl,
718 	.listen =	unix_listen,
719 	.shutdown =	unix_shutdown,
720 	.setsockopt =	sock_no_setsockopt,
721 	.getsockopt =	sock_no_getsockopt,
722 	.sendmsg =	unix_stream_sendmsg,
723 	.recvmsg =	unix_stream_recvmsg,
724 	.mmap =		sock_no_mmap,
725 	.sendpage =	unix_stream_sendpage,
726 	.splice_read =	unix_stream_splice_read,
727 	.set_peek_off =	unix_set_peek_off,
728 };
729 
730 static const struct proto_ops unix_dgram_ops = {
731 	.family =	PF_UNIX,
732 	.owner =	THIS_MODULE,
733 	.release =	unix_release,
734 	.bind =		unix_bind,
735 	.connect =	unix_dgram_connect,
736 	.socketpair =	unix_socketpair,
737 	.accept =	sock_no_accept,
738 	.getname =	unix_getname,
739 	.poll =		unix_dgram_poll,
740 	.ioctl =	unix_ioctl,
741 	.listen =	sock_no_listen,
742 	.shutdown =	unix_shutdown,
743 	.setsockopt =	sock_no_setsockopt,
744 	.getsockopt =	sock_no_getsockopt,
745 	.sendmsg =	unix_dgram_sendmsg,
746 	.recvmsg =	unix_dgram_recvmsg,
747 	.mmap =		sock_no_mmap,
748 	.sendpage =	sock_no_sendpage,
749 	.set_peek_off =	unix_set_peek_off,
750 };
751 
752 static const struct proto_ops unix_seqpacket_ops = {
753 	.family =	PF_UNIX,
754 	.owner =	THIS_MODULE,
755 	.release =	unix_release,
756 	.bind =		unix_bind,
757 	.connect =	unix_stream_connect,
758 	.socketpair =	unix_socketpair,
759 	.accept =	unix_accept,
760 	.getname =	unix_getname,
761 	.poll =		unix_dgram_poll,
762 	.ioctl =	unix_ioctl,
763 	.listen =	unix_listen,
764 	.shutdown =	unix_shutdown,
765 	.setsockopt =	sock_no_setsockopt,
766 	.getsockopt =	sock_no_getsockopt,
767 	.sendmsg =	unix_seqpacket_sendmsg,
768 	.recvmsg =	unix_seqpacket_recvmsg,
769 	.mmap =		sock_no_mmap,
770 	.sendpage =	sock_no_sendpage,
771 	.set_peek_off =	unix_set_peek_off,
772 };
773 
774 static struct proto unix_proto = {
775 	.name			= "UNIX",
776 	.owner			= THIS_MODULE,
777 	.obj_size		= sizeof(struct unix_sock),
778 };
779 
780 /*
781  * AF_UNIX sockets do not interact with hardware, hence they
782  * dont trigger interrupts - so it's safe for them to have
783  * bh-unsafe locking for their sk_receive_queue.lock. Split off
784  * this special lock-class by reinitializing the spinlock key:
785  */
786 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
787 
unix_create1(struct net * net,struct socket * sock,int kern)788 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
789 {
790 	struct sock *sk = NULL;
791 	struct unix_sock *u;
792 
793 	atomic_long_inc(&unix_nr_socks);
794 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
795 		goto out;
796 
797 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
798 	if (!sk)
799 		goto out;
800 
801 	sock_init_data(sock, sk);
802 	lockdep_set_class(&sk->sk_receive_queue.lock,
803 				&af_unix_sk_receive_queue_lock_key);
804 
805 	sk->sk_write_space	= unix_write_space;
806 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
807 	sk->sk_destruct		= unix_sock_destructor;
808 	u	  = unix_sk(sk);
809 	u->path.dentry = NULL;
810 	u->path.mnt = NULL;
811 	spin_lock_init(&u->lock);
812 	atomic_long_set(&u->inflight, 0);
813 	INIT_LIST_HEAD(&u->link);
814 	mutex_init(&u->iolock); /* single task reading lock */
815 	mutex_init(&u->bindlock); /* single task binding lock */
816 	init_waitqueue_head(&u->peer_wait);
817 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
818 	unix_insert_socket(unix_sockets_unbound(sk), sk);
819 out:
820 	if (sk == NULL)
821 		atomic_long_dec(&unix_nr_socks);
822 	else {
823 		local_bh_disable();
824 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
825 		local_bh_enable();
826 	}
827 	return sk;
828 }
829 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)830 static int unix_create(struct net *net, struct socket *sock, int protocol,
831 		       int kern)
832 {
833 	if (protocol && protocol != PF_UNIX)
834 		return -EPROTONOSUPPORT;
835 
836 	sock->state = SS_UNCONNECTED;
837 
838 	switch (sock->type) {
839 	case SOCK_STREAM:
840 		sock->ops = &unix_stream_ops;
841 		break;
842 		/*
843 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
844 		 *	nothing uses it.
845 		 */
846 	case SOCK_RAW:
847 		sock->type = SOCK_DGRAM;
848 	case SOCK_DGRAM:
849 		sock->ops = &unix_dgram_ops;
850 		break;
851 	case SOCK_SEQPACKET:
852 		sock->ops = &unix_seqpacket_ops;
853 		break;
854 	default:
855 		return -ESOCKTNOSUPPORT;
856 	}
857 
858 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
859 }
860 
unix_release(struct socket * sock)861 static int unix_release(struct socket *sock)
862 {
863 	struct sock *sk = sock->sk;
864 
865 	if (!sk)
866 		return 0;
867 
868 	unix_release_sock(sk, 0);
869 	sock->sk = NULL;
870 
871 	return 0;
872 }
873 
unix_autobind(struct socket * sock)874 static int unix_autobind(struct socket *sock)
875 {
876 	struct sock *sk = sock->sk;
877 	struct net *net = sock_net(sk);
878 	struct unix_sock *u = unix_sk(sk);
879 	static u32 ordernum = 1;
880 	struct unix_address *addr;
881 	int err;
882 	unsigned int retries = 0;
883 
884 	err = mutex_lock_interruptible(&u->bindlock);
885 	if (err)
886 		return err;
887 
888 	err = 0;
889 	if (u->addr)
890 		goto out;
891 
892 	err = -ENOMEM;
893 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
894 	if (!addr)
895 		goto out;
896 
897 	addr->name->sun_family = AF_UNIX;
898 	atomic_set(&addr->refcnt, 1);
899 
900 retry:
901 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
902 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
903 
904 	spin_lock(&unix_table_lock);
905 	ordernum = (ordernum+1)&0xFFFFF;
906 
907 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
908 				      addr->hash)) {
909 		spin_unlock(&unix_table_lock);
910 		/*
911 		 * __unix_find_socket_byname() may take long time if many names
912 		 * are already in use.
913 		 */
914 		cond_resched();
915 		/* Give up if all names seems to be in use. */
916 		if (retries++ == 0xFFFFF) {
917 			err = -ENOSPC;
918 			kfree(addr);
919 			goto out;
920 		}
921 		goto retry;
922 	}
923 	addr->hash ^= sk->sk_type;
924 
925 	__unix_remove_socket(sk);
926 	smp_store_release(&u->addr, addr);
927 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
928 	spin_unlock(&unix_table_lock);
929 	err = 0;
930 
931 out:	mutex_unlock(&u->bindlock);
932 	return err;
933 }
934 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)935 static struct sock *unix_find_other(struct net *net,
936 				    struct sockaddr_un *sunname, int len,
937 				    int type, unsigned int hash, int *error)
938 {
939 	struct sock *u;
940 	struct path path;
941 	int err = 0;
942 
943 	if (sunname->sun_path[0]) {
944 		struct inode *inode;
945 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
946 		if (err)
947 			goto fail;
948 		inode = d_real_inode(path.dentry);
949 		err = inode_permission(inode, MAY_WRITE);
950 		if (err)
951 			goto put_fail;
952 
953 		err = -ECONNREFUSED;
954 		if (!S_ISSOCK(inode->i_mode))
955 			goto put_fail;
956 		u = unix_find_socket_byinode(inode);
957 		if (!u)
958 			goto put_fail;
959 
960 		if (u->sk_type == type)
961 			touch_atime(&path);
962 
963 		path_put(&path);
964 
965 		err = -EPROTOTYPE;
966 		if (u->sk_type != type) {
967 			sock_put(u);
968 			goto fail;
969 		}
970 	} else {
971 		err = -ECONNREFUSED;
972 		u = unix_find_socket_byname(net, sunname, len, type, hash);
973 		if (u) {
974 			struct dentry *dentry;
975 			dentry = unix_sk(u)->path.dentry;
976 			if (dentry)
977 				touch_atime(&unix_sk(u)->path);
978 		} else
979 			goto fail;
980 	}
981 	return u;
982 
983 put_fail:
984 	path_put(&path);
985 fail:
986 	*error = err;
987 	return NULL;
988 }
989 
unix_mknod(const char * sun_path,umode_t mode,struct path * res)990 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
991 {
992 	struct dentry *dentry;
993 	struct path path;
994 	int err = 0;
995 	/*
996 	 * Get the parent directory, calculate the hash for last
997 	 * component.
998 	 */
999 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1000 	err = PTR_ERR(dentry);
1001 	if (IS_ERR(dentry))
1002 		return err;
1003 
1004 	/*
1005 	 * All right, let's create it.
1006 	 */
1007 	err = security_path_mknod(&path, dentry, mode, 0);
1008 	if (!err) {
1009 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1010 		if (!err) {
1011 			res->mnt = mntget(path.mnt);
1012 			res->dentry = dget(dentry);
1013 		}
1014 	}
1015 	done_path_create(&path, dentry);
1016 	return err;
1017 }
1018 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1019 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1020 {
1021 	struct sock *sk = sock->sk;
1022 	struct net *net = sock_net(sk);
1023 	struct unix_sock *u = unix_sk(sk);
1024 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1025 	char *sun_path = sunaddr->sun_path;
1026 	int err;
1027 	unsigned int hash;
1028 	struct unix_address *addr;
1029 	struct hlist_head *list;
1030 	struct path path = { NULL, NULL };
1031 
1032 	err = -EINVAL;
1033 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1034 	    sunaddr->sun_family != AF_UNIX)
1035 		goto out;
1036 
1037 	if (addr_len == sizeof(short)) {
1038 		err = unix_autobind(sock);
1039 		goto out;
1040 	}
1041 
1042 	err = unix_mkname(sunaddr, addr_len, &hash);
1043 	if (err < 0)
1044 		goto out;
1045 	addr_len = err;
1046 
1047 	if (sun_path[0]) {
1048 		umode_t mode = S_IFSOCK |
1049 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1050 		err = unix_mknod(sun_path, mode, &path);
1051 		if (err) {
1052 			if (err == -EEXIST)
1053 				err = -EADDRINUSE;
1054 			goto out;
1055 		}
1056 	}
1057 
1058 	err = mutex_lock_interruptible(&u->bindlock);
1059 	if (err)
1060 		goto out_put;
1061 
1062 	err = -EINVAL;
1063 	if (u->addr)
1064 		goto out_up;
1065 
1066 	err = -ENOMEM;
1067 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1068 	if (!addr)
1069 		goto out_up;
1070 
1071 	memcpy(addr->name, sunaddr, addr_len);
1072 	addr->len = addr_len;
1073 	addr->hash = hash ^ sk->sk_type;
1074 	atomic_set(&addr->refcnt, 1);
1075 
1076 	if (sun_path[0]) {
1077 		addr->hash = UNIX_HASH_SIZE;
1078 		hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1079 		spin_lock(&unix_table_lock);
1080 		u->path = path;
1081 		list = &unix_socket_table[hash];
1082 	} else {
1083 		spin_lock(&unix_table_lock);
1084 		err = -EADDRINUSE;
1085 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1086 					      sk->sk_type, hash)) {
1087 			unix_release_addr(addr);
1088 			goto out_unlock;
1089 		}
1090 
1091 		list = &unix_socket_table[addr->hash];
1092 	}
1093 
1094 	err = 0;
1095 	__unix_remove_socket(sk);
1096 	smp_store_release(&u->addr, addr);
1097 	__unix_insert_socket(list, sk);
1098 
1099 out_unlock:
1100 	spin_unlock(&unix_table_lock);
1101 out_up:
1102 	mutex_unlock(&u->bindlock);
1103 out_put:
1104 	if (err)
1105 		path_put(&path);
1106 out:
1107 	return err;
1108 }
1109 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1110 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1111 {
1112 	if (unlikely(sk1 == sk2) || !sk2) {
1113 		unix_state_lock(sk1);
1114 		return;
1115 	}
1116 	if (sk1 < sk2) {
1117 		unix_state_lock(sk1);
1118 		unix_state_lock_nested(sk2);
1119 	} else {
1120 		unix_state_lock(sk2);
1121 		unix_state_lock_nested(sk1);
1122 	}
1123 }
1124 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1125 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1126 {
1127 	if (unlikely(sk1 == sk2) || !sk2) {
1128 		unix_state_unlock(sk1);
1129 		return;
1130 	}
1131 	unix_state_unlock(sk1);
1132 	unix_state_unlock(sk2);
1133 }
1134 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1135 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1136 			      int alen, int flags)
1137 {
1138 	struct sock *sk = sock->sk;
1139 	struct net *net = sock_net(sk);
1140 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1141 	struct sock *other;
1142 	unsigned int hash;
1143 	int err;
1144 
1145 	err = -EINVAL;
1146 	if (alen < offsetofend(struct sockaddr, sa_family))
1147 		goto out;
1148 
1149 	if (addr->sa_family != AF_UNSPEC) {
1150 		err = unix_mkname(sunaddr, alen, &hash);
1151 		if (err < 0)
1152 			goto out;
1153 		alen = err;
1154 
1155 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1156 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1157 			goto out;
1158 
1159 restart:
1160 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1161 		if (!other)
1162 			goto out;
1163 
1164 		unix_state_double_lock(sk, other);
1165 
1166 		/* Apparently VFS overslept socket death. Retry. */
1167 		if (sock_flag(other, SOCK_DEAD)) {
1168 			unix_state_double_unlock(sk, other);
1169 			sock_put(other);
1170 			goto restart;
1171 		}
1172 
1173 		err = -EPERM;
1174 		if (!unix_may_send(sk, other))
1175 			goto out_unlock;
1176 
1177 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1178 		if (err)
1179 			goto out_unlock;
1180 
1181 	} else {
1182 		/*
1183 		 *	1003.1g breaking connected state with AF_UNSPEC
1184 		 */
1185 		other = NULL;
1186 		unix_state_double_lock(sk, other);
1187 	}
1188 
1189 	/*
1190 	 * If it was connected, reconnect.
1191 	 */
1192 	if (unix_peer(sk)) {
1193 		struct sock *old_peer = unix_peer(sk);
1194 		unix_peer(sk) = other;
1195 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1196 
1197 		unix_state_double_unlock(sk, other);
1198 
1199 		if (other != old_peer)
1200 			unix_dgram_disconnected(sk, old_peer);
1201 		sock_put(old_peer);
1202 	} else {
1203 		unix_peer(sk) = other;
1204 		unix_state_double_unlock(sk, other);
1205 	}
1206 	return 0;
1207 
1208 out_unlock:
1209 	unix_state_double_unlock(sk, other);
1210 	sock_put(other);
1211 out:
1212 	return err;
1213 }
1214 
unix_wait_for_peer(struct sock * other,long timeo)1215 static long unix_wait_for_peer(struct sock *other, long timeo)
1216 {
1217 	struct unix_sock *u = unix_sk(other);
1218 	int sched;
1219 	DEFINE_WAIT(wait);
1220 
1221 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1222 
1223 	sched = !sock_flag(other, SOCK_DEAD) &&
1224 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1225 		unix_recvq_full(other);
1226 
1227 	unix_state_unlock(other);
1228 
1229 	if (sched)
1230 		timeo = schedule_timeout(timeo);
1231 
1232 	finish_wait(&u->peer_wait, &wait);
1233 	return timeo;
1234 }
1235 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1236 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1237 			       int addr_len, int flags)
1238 {
1239 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1240 	struct sock *sk = sock->sk;
1241 	struct net *net = sock_net(sk);
1242 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1243 	struct sock *newsk = NULL;
1244 	struct sock *other = NULL;
1245 	struct sk_buff *skb = NULL;
1246 	unsigned int hash;
1247 	int st;
1248 	int err;
1249 	long timeo;
1250 
1251 	err = unix_mkname(sunaddr, addr_len, &hash);
1252 	if (err < 0)
1253 		goto out;
1254 	addr_len = err;
1255 
1256 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1257 	    (err = unix_autobind(sock)) != 0)
1258 		goto out;
1259 
1260 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1261 
1262 	/* First of all allocate resources.
1263 	   If we will make it after state is locked,
1264 	   we will have to recheck all again in any case.
1265 	 */
1266 
1267 	err = -ENOMEM;
1268 
1269 	/* create new sock for complete connection */
1270 	newsk = unix_create1(sock_net(sk), NULL, 0);
1271 	if (newsk == NULL)
1272 		goto out;
1273 
1274 	/* Allocate skb for sending to listening sock */
1275 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1276 	if (skb == NULL)
1277 		goto out;
1278 
1279 restart:
1280 	/*  Find listening sock. */
1281 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1282 	if (!other)
1283 		goto out;
1284 
1285 	/* Latch state of peer */
1286 	unix_state_lock(other);
1287 
1288 	/* Apparently VFS overslept socket death. Retry. */
1289 	if (sock_flag(other, SOCK_DEAD)) {
1290 		unix_state_unlock(other);
1291 		sock_put(other);
1292 		goto restart;
1293 	}
1294 
1295 	err = -ECONNREFUSED;
1296 	if (other->sk_state != TCP_LISTEN)
1297 		goto out_unlock;
1298 	if (other->sk_shutdown & RCV_SHUTDOWN)
1299 		goto out_unlock;
1300 
1301 	if (unix_recvq_full(other)) {
1302 		err = -EAGAIN;
1303 		if (!timeo)
1304 			goto out_unlock;
1305 
1306 		timeo = unix_wait_for_peer(other, timeo);
1307 
1308 		err = sock_intr_errno(timeo);
1309 		if (signal_pending(current))
1310 			goto out;
1311 		sock_put(other);
1312 		goto restart;
1313 	}
1314 
1315 	/* Latch our state.
1316 
1317 	   It is tricky place. We need to grab our state lock and cannot
1318 	   drop lock on peer. It is dangerous because deadlock is
1319 	   possible. Connect to self case and simultaneous
1320 	   attempt to connect are eliminated by checking socket
1321 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1322 	   check this before attempt to grab lock.
1323 
1324 	   Well, and we have to recheck the state after socket locked.
1325 	 */
1326 	st = sk->sk_state;
1327 
1328 	switch (st) {
1329 	case TCP_CLOSE:
1330 		/* This is ok... continue with connect */
1331 		break;
1332 	case TCP_ESTABLISHED:
1333 		/* Socket is already connected */
1334 		err = -EISCONN;
1335 		goto out_unlock;
1336 	default:
1337 		err = -EINVAL;
1338 		goto out_unlock;
1339 	}
1340 
1341 	unix_state_lock_nested(sk);
1342 
1343 	if (sk->sk_state != st) {
1344 		unix_state_unlock(sk);
1345 		unix_state_unlock(other);
1346 		sock_put(other);
1347 		goto restart;
1348 	}
1349 
1350 	err = security_unix_stream_connect(sk, other, newsk);
1351 	if (err) {
1352 		unix_state_unlock(sk);
1353 		goto out_unlock;
1354 	}
1355 
1356 	/* The way is open! Fastly set all the necessary fields... */
1357 
1358 	sock_hold(sk);
1359 	unix_peer(newsk)	= sk;
1360 	newsk->sk_state		= TCP_ESTABLISHED;
1361 	newsk->sk_type		= sk->sk_type;
1362 	init_peercred(newsk);
1363 	newu = unix_sk(newsk);
1364 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1365 	otheru = unix_sk(other);
1366 
1367 	/* copy address information from listening to new sock
1368 	 *
1369 	 * The contents of *(otheru->addr) and otheru->path
1370 	 * are seen fully set up here, since we have found
1371 	 * otheru in hash under unix_table_lock.  Insertion
1372 	 * into the hash chain we'd found it in had been done
1373 	 * in an earlier critical area protected by unix_table_lock,
1374 	 * the same one where we'd set *(otheru->addr) contents,
1375 	 * as well as otheru->path and otheru->addr itself.
1376 	 *
1377 	 * Using smp_store_release() here to set newu->addr
1378 	 * is enough to make those stores, as well as stores
1379 	 * to newu->path visible to anyone who gets newu->addr
1380 	 * by smp_load_acquire().  IOW, the same warranties
1381 	 * as for unix_sock instances bound in unix_bind() or
1382 	 * in unix_autobind().
1383 	 */
1384 	if (otheru->path.dentry) {
1385 		path_get(&otheru->path);
1386 		newu->path = otheru->path;
1387 	}
1388 	atomic_inc(&otheru->addr->refcnt);
1389 	smp_store_release(&newu->addr, otheru->addr);
1390 
1391 	/* Set credentials */
1392 	copy_peercred(sk, other);
1393 
1394 	sock->state	= SS_CONNECTED;
1395 	sk->sk_state	= TCP_ESTABLISHED;
1396 	sock_hold(newsk);
1397 
1398 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1399 	unix_peer(sk)	= newsk;
1400 
1401 	unix_state_unlock(sk);
1402 
1403 	/* take ten and and send info to listening sock */
1404 	spin_lock(&other->sk_receive_queue.lock);
1405 	__skb_queue_tail(&other->sk_receive_queue, skb);
1406 	spin_unlock(&other->sk_receive_queue.lock);
1407 	unix_state_unlock(other);
1408 	other->sk_data_ready(other);
1409 	sock_put(other);
1410 	return 0;
1411 
1412 out_unlock:
1413 	if (other)
1414 		unix_state_unlock(other);
1415 
1416 out:
1417 	kfree_skb(skb);
1418 	if (newsk)
1419 		unix_release_sock(newsk, 0);
1420 	if (other)
1421 		sock_put(other);
1422 	return err;
1423 }
1424 
unix_socketpair(struct socket * socka,struct socket * sockb)1425 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1426 {
1427 	struct sock *ska = socka->sk, *skb = sockb->sk;
1428 
1429 	/* Join our sockets back to back */
1430 	sock_hold(ska);
1431 	sock_hold(skb);
1432 	unix_peer(ska) = skb;
1433 	unix_peer(skb) = ska;
1434 	init_peercred(ska);
1435 	init_peercred(skb);
1436 
1437 	if (ska->sk_type != SOCK_DGRAM) {
1438 		ska->sk_state = TCP_ESTABLISHED;
1439 		skb->sk_state = TCP_ESTABLISHED;
1440 		socka->state  = SS_CONNECTED;
1441 		sockb->state  = SS_CONNECTED;
1442 	}
1443 	return 0;
1444 }
1445 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1446 static void unix_sock_inherit_flags(const struct socket *old,
1447 				    struct socket *new)
1448 {
1449 	if (test_bit(SOCK_PASSCRED, &old->flags))
1450 		set_bit(SOCK_PASSCRED, &new->flags);
1451 	if (test_bit(SOCK_PASSSEC, &old->flags))
1452 		set_bit(SOCK_PASSSEC, &new->flags);
1453 }
1454 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1455 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1456 {
1457 	struct sock *sk = sock->sk;
1458 	struct sock *tsk;
1459 	struct sk_buff *skb;
1460 	int err;
1461 
1462 	err = -EOPNOTSUPP;
1463 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1464 		goto out;
1465 
1466 	err = -EINVAL;
1467 	if (sk->sk_state != TCP_LISTEN)
1468 		goto out;
1469 
1470 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1471 	 * so that no locks are necessary.
1472 	 */
1473 
1474 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1475 	if (!skb) {
1476 		/* This means receive shutdown. */
1477 		if (err == 0)
1478 			err = -EINVAL;
1479 		goto out;
1480 	}
1481 
1482 	tsk = skb->sk;
1483 	skb_free_datagram(sk, skb);
1484 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1485 
1486 	/* attach accepted sock to socket */
1487 	unix_state_lock(tsk);
1488 	newsock->state = SS_CONNECTED;
1489 	unix_sock_inherit_flags(sock, newsock);
1490 	sock_graft(tsk, newsock);
1491 	unix_state_unlock(tsk);
1492 	return 0;
1493 
1494 out:
1495 	return err;
1496 }
1497 
1498 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1499 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1500 {
1501 	struct sock *sk = sock->sk;
1502 	struct unix_address *addr;
1503 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1504 	int err = 0;
1505 
1506 	if (peer) {
1507 		sk = unix_peer_get(sk);
1508 
1509 		err = -ENOTCONN;
1510 		if (!sk)
1511 			goto out;
1512 		err = 0;
1513 	} else {
1514 		sock_hold(sk);
1515 	}
1516 
1517 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1518 	if (!addr) {
1519 		sunaddr->sun_family = AF_UNIX;
1520 		sunaddr->sun_path[0] = 0;
1521 		*uaddr_len = sizeof(short);
1522 	} else {
1523 		*uaddr_len = addr->len;
1524 		memcpy(sunaddr, addr->name, *uaddr_len);
1525 	}
1526 	sock_put(sk);
1527 out:
1528 	return err;
1529 }
1530 
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1531 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1532 {
1533 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1534 
1535 	/*
1536 	 * Garbage collection of unix sockets starts by selecting a set of
1537 	 * candidate sockets which have reference only from being in flight
1538 	 * (total_refs == inflight_refs).  This condition is checked once during
1539 	 * the candidate collection phase, and candidates are marked as such, so
1540 	 * that non-candidates can later be ignored.  While inflight_refs is
1541 	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1542 	 * is an instantaneous decision.
1543 	 *
1544 	 * Once a candidate, however, the socket must not be reinstalled into a
1545 	 * file descriptor while the garbage collection is in progress.
1546 	 *
1547 	 * If the above conditions are met, then the directed graph of
1548 	 * candidates (*) does not change while unix_gc_lock is held.
1549 	 *
1550 	 * Any operations that changes the file count through file descriptors
1551 	 * (dup, close, sendmsg) does not change the graph since candidates are
1552 	 * not installed in fds.
1553 	 *
1554 	 * Dequeing a candidate via recvmsg would install it into an fd, but
1555 	 * that takes unix_gc_lock to decrement the inflight count, so it's
1556 	 * serialized with garbage collection.
1557 	 *
1558 	 * MSG_PEEK is special in that it does not change the inflight count,
1559 	 * yet does install the socket into an fd.  The following lock/unlock
1560 	 * pair is to ensure serialization with garbage collection.  It must be
1561 	 * done between incrementing the file count and installing the file into
1562 	 * an fd.
1563 	 *
1564 	 * If garbage collection starts after the barrier provided by the
1565 	 * lock/unlock, then it will see the elevated refcount and not mark this
1566 	 * as a candidate.  If a garbage collection is already in progress
1567 	 * before the file count was incremented, then the lock/unlock pair will
1568 	 * ensure that garbage collection is finished before progressing to
1569 	 * installing the fd.
1570 	 *
1571 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1572 	 * which is on the queue of listening socket A.
1573 	 */
1574 	spin_lock(&unix_gc_lock);
1575 	spin_unlock(&unix_gc_lock);
1576 }
1577 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1578 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1579 {
1580 	int err = 0;
1581 
1582 	UNIXCB(skb).pid  = get_pid(scm->pid);
1583 	UNIXCB(skb).uid = scm->creds.uid;
1584 	UNIXCB(skb).gid = scm->creds.gid;
1585 	UNIXCB(skb).fp = NULL;
1586 	unix_get_secdata(scm, skb);
1587 	if (scm->fp && send_fds)
1588 		err = unix_attach_fds(scm, skb);
1589 
1590 	skb->destructor = unix_destruct_scm;
1591 	return err;
1592 }
1593 
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1594 static bool unix_passcred_enabled(const struct socket *sock,
1595 				  const struct sock *other)
1596 {
1597 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1598 	       !other->sk_socket ||
1599 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1600 }
1601 
1602 /*
1603  * Some apps rely on write() giving SCM_CREDENTIALS
1604  * We include credentials if source or destination socket
1605  * asserted SOCK_PASSCRED.
1606  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1607 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1608 			    const struct sock *other)
1609 {
1610 	if (UNIXCB(skb).pid)
1611 		return;
1612 	if (unix_passcred_enabled(sock, other)) {
1613 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1614 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1615 	}
1616 }
1617 
maybe_init_creds(struct scm_cookie * scm,struct socket * socket,const struct sock * other)1618 static int maybe_init_creds(struct scm_cookie *scm,
1619 			    struct socket *socket,
1620 			    const struct sock *other)
1621 {
1622 	int err;
1623 	struct msghdr msg = { .msg_controllen = 0 };
1624 
1625 	err = scm_send(socket, &msg, scm, false);
1626 	if (err)
1627 		return err;
1628 
1629 	if (unix_passcred_enabled(socket, other)) {
1630 		scm->pid = get_pid(task_tgid(current));
1631 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1632 	}
1633 	return err;
1634 }
1635 
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1636 static bool unix_skb_scm_eq(struct sk_buff *skb,
1637 			    struct scm_cookie *scm)
1638 {
1639 	const struct unix_skb_parms *u = &UNIXCB(skb);
1640 
1641 	return u->pid == scm->pid &&
1642 	       uid_eq(u->uid, scm->creds.uid) &&
1643 	       gid_eq(u->gid, scm->creds.gid) &&
1644 	       unix_secdata_eq(scm, skb);
1645 }
1646 
1647 /*
1648  *	Send AF_UNIX data.
1649  */
1650 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1651 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1652 			      size_t len)
1653 {
1654 	struct sock *sk = sock->sk;
1655 	struct net *net = sock_net(sk);
1656 	struct unix_sock *u = unix_sk(sk);
1657 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1658 	struct sock *other = NULL;
1659 	int namelen = 0; /* fake GCC */
1660 	int err;
1661 	unsigned int hash;
1662 	struct sk_buff *skb;
1663 	long timeo;
1664 	struct scm_cookie scm;
1665 	int max_level;
1666 	int data_len = 0;
1667 	int sk_locked;
1668 
1669 	wait_for_unix_gc();
1670 	err = scm_send(sock, msg, &scm, false);
1671 	if (err < 0)
1672 		return err;
1673 
1674 	err = -EOPNOTSUPP;
1675 	if (msg->msg_flags&MSG_OOB)
1676 		goto out;
1677 
1678 	if (msg->msg_namelen) {
1679 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1680 		if (err < 0)
1681 			goto out;
1682 		namelen = err;
1683 	} else {
1684 		sunaddr = NULL;
1685 		err = -ENOTCONN;
1686 		other = unix_peer_get(sk);
1687 		if (!other)
1688 			goto out;
1689 	}
1690 
1691 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1692 	    && (err = unix_autobind(sock)) != 0)
1693 		goto out;
1694 
1695 	err = -EMSGSIZE;
1696 	if (len > sk->sk_sndbuf - 32)
1697 		goto out;
1698 
1699 	if (len > SKB_MAX_ALLOC) {
1700 		data_len = min_t(size_t,
1701 				 len - SKB_MAX_ALLOC,
1702 				 MAX_SKB_FRAGS * PAGE_SIZE);
1703 		data_len = PAGE_ALIGN(data_len);
1704 
1705 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1706 	}
1707 
1708 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1709 				   msg->msg_flags & MSG_DONTWAIT, &err,
1710 				   PAGE_ALLOC_COSTLY_ORDER);
1711 	if (skb == NULL)
1712 		goto out;
1713 
1714 	err = unix_scm_to_skb(&scm, skb, true);
1715 	if (err < 0)
1716 		goto out_free;
1717 	max_level = err + 1;
1718 
1719 	skb_put(skb, len - data_len);
1720 	skb->data_len = data_len;
1721 	skb->len = len;
1722 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1723 	if (err)
1724 		goto out_free;
1725 
1726 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1727 
1728 restart:
1729 	if (!other) {
1730 		err = -ECONNRESET;
1731 		if (sunaddr == NULL)
1732 			goto out_free;
1733 
1734 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1735 					hash, &err);
1736 		if (other == NULL)
1737 			goto out_free;
1738 	}
1739 
1740 	if (sk_filter(other, skb) < 0) {
1741 		/* Toss the packet but do not return any error to the sender */
1742 		err = len;
1743 		goto out_free;
1744 	}
1745 
1746 	sk_locked = 0;
1747 	unix_state_lock(other);
1748 restart_locked:
1749 	err = -EPERM;
1750 	if (!unix_may_send(sk, other))
1751 		goto out_unlock;
1752 
1753 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1754 		/*
1755 		 *	Check with 1003.1g - what should
1756 		 *	datagram error
1757 		 */
1758 		unix_state_unlock(other);
1759 		sock_put(other);
1760 
1761 		if (!sk_locked)
1762 			unix_state_lock(sk);
1763 
1764 		err = 0;
1765 		if (unix_peer(sk) == other) {
1766 			unix_peer(sk) = NULL;
1767 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1768 
1769 			unix_state_unlock(sk);
1770 
1771 			unix_dgram_disconnected(sk, other);
1772 			sock_put(other);
1773 			err = -ECONNREFUSED;
1774 		} else {
1775 			unix_state_unlock(sk);
1776 		}
1777 
1778 		other = NULL;
1779 		if (err)
1780 			goto out_free;
1781 		goto restart;
1782 	}
1783 
1784 	err = -EPIPE;
1785 	if (other->sk_shutdown & RCV_SHUTDOWN)
1786 		goto out_unlock;
1787 
1788 	if (sk->sk_type != SOCK_SEQPACKET) {
1789 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1790 		if (err)
1791 			goto out_unlock;
1792 	}
1793 
1794 	/* other == sk && unix_peer(other) != sk if
1795 	 * - unix_peer(sk) == NULL, destination address bound to sk
1796 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1797 	 */
1798 	if (other != sk &&
1799 	    unlikely(unix_peer(other) != sk &&
1800 	    unix_recvq_full_lockless(other))) {
1801 		if (timeo) {
1802 			timeo = unix_wait_for_peer(other, timeo);
1803 
1804 			err = sock_intr_errno(timeo);
1805 			if (signal_pending(current))
1806 				goto out_free;
1807 
1808 			goto restart;
1809 		}
1810 
1811 		if (!sk_locked) {
1812 			unix_state_unlock(other);
1813 			unix_state_double_lock(sk, other);
1814 		}
1815 
1816 		if (unix_peer(sk) != other ||
1817 		    unix_dgram_peer_wake_me(sk, other)) {
1818 			err = -EAGAIN;
1819 			sk_locked = 1;
1820 			goto out_unlock;
1821 		}
1822 
1823 		if (!sk_locked) {
1824 			sk_locked = 1;
1825 			goto restart_locked;
1826 		}
1827 	}
1828 
1829 	if (unlikely(sk_locked))
1830 		unix_state_unlock(sk);
1831 
1832 	if (sock_flag(other, SOCK_RCVTSTAMP))
1833 		__net_timestamp(skb);
1834 	maybe_add_creds(skb, sock, other);
1835 	skb_queue_tail(&other->sk_receive_queue, skb);
1836 	if (max_level > unix_sk(other)->recursion_level)
1837 		unix_sk(other)->recursion_level = max_level;
1838 	unix_state_unlock(other);
1839 	other->sk_data_ready(other);
1840 	sock_put(other);
1841 	scm_destroy(&scm);
1842 	return len;
1843 
1844 out_unlock:
1845 	if (sk_locked)
1846 		unix_state_unlock(sk);
1847 	unix_state_unlock(other);
1848 out_free:
1849 	kfree_skb(skb);
1850 out:
1851 	if (other)
1852 		sock_put(other);
1853 	scm_destroy(&scm);
1854 	return err;
1855 }
1856 
1857 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1858  * bytes, and a minimun of a full page.
1859  */
1860 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1861 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1862 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1863 			       size_t len)
1864 {
1865 	struct sock *sk = sock->sk;
1866 	struct sock *other = NULL;
1867 	int err, size;
1868 	struct sk_buff *skb;
1869 	int sent = 0;
1870 	struct scm_cookie scm;
1871 	bool fds_sent = false;
1872 	int max_level;
1873 	int data_len;
1874 
1875 	wait_for_unix_gc();
1876 	err = scm_send(sock, msg, &scm, false);
1877 	if (err < 0)
1878 		return err;
1879 
1880 	err = -EOPNOTSUPP;
1881 	if (msg->msg_flags&MSG_OOB)
1882 		goto out_err;
1883 
1884 	if (msg->msg_namelen) {
1885 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1886 		goto out_err;
1887 	} else {
1888 		err = -ENOTCONN;
1889 		other = unix_peer(sk);
1890 		if (!other)
1891 			goto out_err;
1892 	}
1893 
1894 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1895 		goto pipe_err;
1896 
1897 	while (sent < len) {
1898 		size = len - sent;
1899 
1900 		/* Keep two messages in the pipe so it schedules better */
1901 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1902 
1903 		/* allow fallback to order-0 allocations */
1904 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1905 
1906 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1907 
1908 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1909 
1910 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1911 					   msg->msg_flags & MSG_DONTWAIT, &err,
1912 					   get_order(UNIX_SKB_FRAGS_SZ));
1913 		if (!skb)
1914 			goto out_err;
1915 
1916 		/* Only send the fds in the first buffer */
1917 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1918 		if (err < 0) {
1919 			kfree_skb(skb);
1920 			goto out_err;
1921 		}
1922 		max_level = err + 1;
1923 		fds_sent = true;
1924 
1925 		skb_put(skb, size - data_len);
1926 		skb->data_len = data_len;
1927 		skb->len = size;
1928 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1929 		if (err) {
1930 			kfree_skb(skb);
1931 			goto out_err;
1932 		}
1933 
1934 		unix_state_lock(other);
1935 
1936 		if (sock_flag(other, SOCK_DEAD) ||
1937 		    (other->sk_shutdown & RCV_SHUTDOWN))
1938 			goto pipe_err_free;
1939 
1940 		maybe_add_creds(skb, sock, other);
1941 		skb_queue_tail(&other->sk_receive_queue, skb);
1942 		if (max_level > unix_sk(other)->recursion_level)
1943 			unix_sk(other)->recursion_level = max_level;
1944 		unix_state_unlock(other);
1945 		other->sk_data_ready(other);
1946 		sent += size;
1947 	}
1948 
1949 	scm_destroy(&scm);
1950 
1951 	return sent;
1952 
1953 pipe_err_free:
1954 	unix_state_unlock(other);
1955 	kfree_skb(skb);
1956 pipe_err:
1957 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1958 		send_sig(SIGPIPE, current, 0);
1959 	err = -EPIPE;
1960 out_err:
1961 	scm_destroy(&scm);
1962 	return sent ? : err;
1963 }
1964 
unix_stream_sendpage(struct socket * socket,struct page * page,int offset,size_t size,int flags)1965 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1966 				    int offset, size_t size, int flags)
1967 {
1968 	int err;
1969 	bool send_sigpipe = false;
1970 	bool init_scm = true;
1971 	struct scm_cookie scm;
1972 	struct sock *other, *sk = socket->sk;
1973 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1974 
1975 	if (flags & MSG_OOB)
1976 		return -EOPNOTSUPP;
1977 
1978 	other = unix_peer(sk);
1979 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1980 		return -ENOTCONN;
1981 
1982 	if (false) {
1983 alloc_skb:
1984 		unix_state_unlock(other);
1985 		mutex_unlock(&unix_sk(other)->iolock);
1986 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1987 					      &err, 0);
1988 		if (!newskb)
1989 			goto err;
1990 	}
1991 
1992 	/* we must acquire iolock as we modify already present
1993 	 * skbs in the sk_receive_queue and mess with skb->len
1994 	 */
1995 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1996 	if (err) {
1997 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1998 		goto err;
1999 	}
2000 
2001 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
2002 		err = -EPIPE;
2003 		send_sigpipe = true;
2004 		goto err_unlock;
2005 	}
2006 
2007 	unix_state_lock(other);
2008 
2009 	if (sock_flag(other, SOCK_DEAD) ||
2010 	    other->sk_shutdown & RCV_SHUTDOWN) {
2011 		err = -EPIPE;
2012 		send_sigpipe = true;
2013 		goto err_state_unlock;
2014 	}
2015 
2016 	if (init_scm) {
2017 		err = maybe_init_creds(&scm, socket, other);
2018 		if (err)
2019 			goto err_state_unlock;
2020 		init_scm = false;
2021 	}
2022 
2023 	skb = skb_peek_tail(&other->sk_receive_queue);
2024 	if (tail && tail == skb) {
2025 		skb = newskb;
2026 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2027 		if (newskb) {
2028 			skb = newskb;
2029 		} else {
2030 			tail = skb;
2031 			goto alloc_skb;
2032 		}
2033 	} else if (newskb) {
2034 		/* this is fast path, we don't necessarily need to
2035 		 * call to kfree_skb even though with newskb == NULL
2036 		 * this - does no harm
2037 		 */
2038 		consume_skb(newskb);
2039 		newskb = NULL;
2040 	}
2041 
2042 	if (skb_append_pagefrags(skb, page, offset, size)) {
2043 		tail = skb;
2044 		goto alloc_skb;
2045 	}
2046 
2047 	skb->len += size;
2048 	skb->data_len += size;
2049 	skb->truesize += size;
2050 	atomic_add(size, &sk->sk_wmem_alloc);
2051 
2052 	if (newskb) {
2053 		err = unix_scm_to_skb(&scm, skb, false);
2054 		if (err)
2055 			goto err_state_unlock;
2056 		spin_lock(&other->sk_receive_queue.lock);
2057 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2058 		spin_unlock(&other->sk_receive_queue.lock);
2059 	}
2060 
2061 	unix_state_unlock(other);
2062 	mutex_unlock(&unix_sk(other)->iolock);
2063 
2064 	other->sk_data_ready(other);
2065 	scm_destroy(&scm);
2066 	return size;
2067 
2068 err_state_unlock:
2069 	unix_state_unlock(other);
2070 err_unlock:
2071 	mutex_unlock(&unix_sk(other)->iolock);
2072 err:
2073 	kfree_skb(newskb);
2074 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2075 		send_sig(SIGPIPE, current, 0);
2076 	if (!init_scm)
2077 		scm_destroy(&scm);
2078 	return err;
2079 }
2080 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2081 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2082 				  size_t len)
2083 {
2084 	int err;
2085 	struct sock *sk = sock->sk;
2086 
2087 	err = sock_error(sk);
2088 	if (err)
2089 		return err;
2090 
2091 	if (sk->sk_state != TCP_ESTABLISHED)
2092 		return -ENOTCONN;
2093 
2094 	if (msg->msg_namelen)
2095 		msg->msg_namelen = 0;
2096 
2097 	return unix_dgram_sendmsg(sock, msg, len);
2098 }
2099 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2100 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2101 				  size_t size, int flags)
2102 {
2103 	struct sock *sk = sock->sk;
2104 
2105 	if (sk->sk_state != TCP_ESTABLISHED)
2106 		return -ENOTCONN;
2107 
2108 	return unix_dgram_recvmsg(sock, msg, size, flags);
2109 }
2110 
unix_copy_addr(struct msghdr * msg,struct sock * sk)2111 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2112 {
2113 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2114 
2115 	if (addr) {
2116 		msg->msg_namelen = addr->len;
2117 		memcpy(msg->msg_name, addr->name, addr->len);
2118 	}
2119 }
2120 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2121 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2122 			      size_t size, int flags)
2123 {
2124 	struct scm_cookie scm;
2125 	struct sock *sk = sock->sk;
2126 	struct unix_sock *u = unix_sk(sk);
2127 	int noblock = flags & MSG_DONTWAIT;
2128 	struct sk_buff *skb;
2129 	int err;
2130 	int peeked, skip;
2131 
2132 	err = -EOPNOTSUPP;
2133 	if (flags&MSG_OOB)
2134 		goto out;
2135 
2136 	err = mutex_lock_interruptible(&u->iolock);
2137 	if (unlikely(err)) {
2138 		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
2139 		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2140 		 */
2141 		err = noblock ? -EAGAIN : -ERESTARTSYS;
2142 		goto out;
2143 	}
2144 
2145 	skip = sk_peek_offset(sk, flags);
2146 
2147 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2148 	if (!skb) {
2149 		unix_state_lock(sk);
2150 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2151 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2152 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2153 			err = 0;
2154 		unix_state_unlock(sk);
2155 		goto out_unlock;
2156 	}
2157 
2158 	wake_up_interruptible_sync_poll(&u->peer_wait,
2159 					POLLOUT | POLLWRNORM | POLLWRBAND);
2160 
2161 	if (msg->msg_name)
2162 		unix_copy_addr(msg, skb->sk);
2163 
2164 	if (size > skb->len - skip)
2165 		size = skb->len - skip;
2166 	else if (size < skb->len - skip)
2167 		msg->msg_flags |= MSG_TRUNC;
2168 
2169 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2170 	if (err)
2171 		goto out_free;
2172 
2173 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2174 		__sock_recv_timestamp(msg, sk, skb);
2175 
2176 	memset(&scm, 0, sizeof(scm));
2177 
2178 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2179 	unix_set_secdata(&scm, skb);
2180 
2181 	if (!(flags & MSG_PEEK)) {
2182 		if (UNIXCB(skb).fp)
2183 			unix_detach_fds(&scm, skb);
2184 
2185 		sk_peek_offset_bwd(sk, skb->len);
2186 	} else {
2187 		/* It is questionable: on PEEK we could:
2188 		   - do not return fds - good, but too simple 8)
2189 		   - return fds, and do not return them on read (old strategy,
2190 		     apparently wrong)
2191 		   - clone fds (I chose it for now, it is the most universal
2192 		     solution)
2193 
2194 		   POSIX 1003.1g does not actually define this clearly
2195 		   at all. POSIX 1003.1g doesn't define a lot of things
2196 		   clearly however!
2197 
2198 		*/
2199 
2200 		sk_peek_offset_fwd(sk, size);
2201 
2202 		if (UNIXCB(skb).fp)
2203 			unix_peek_fds(&scm, skb);
2204 	}
2205 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2206 
2207 	scm_recv(sock, msg, &scm, flags);
2208 
2209 out_free:
2210 	skb_free_datagram(sk, skb);
2211 out_unlock:
2212 	mutex_unlock(&u->iolock);
2213 out:
2214 	return err;
2215 }
2216 
2217 /*
2218  *	Sleep until more data has arrived. But check for races..
2219  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2220 static long unix_stream_data_wait(struct sock *sk, long timeo,
2221 				  struct sk_buff *last, unsigned int last_len,
2222 				  bool freezable)
2223 {
2224 	struct sk_buff *tail;
2225 	DEFINE_WAIT(wait);
2226 
2227 	unix_state_lock(sk);
2228 
2229 	for (;;) {
2230 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2231 
2232 		tail = skb_peek_tail(&sk->sk_receive_queue);
2233 		if (tail != last ||
2234 		    (tail && tail->len != last_len) ||
2235 		    sk->sk_err ||
2236 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2237 		    signal_pending(current) ||
2238 		    !timeo)
2239 			break;
2240 
2241 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2242 		unix_state_unlock(sk);
2243 		if (freezable)
2244 			timeo = freezable_schedule_timeout(timeo);
2245 		else
2246 			timeo = schedule_timeout(timeo);
2247 		unix_state_lock(sk);
2248 
2249 		if (sock_flag(sk, SOCK_DEAD))
2250 			break;
2251 
2252 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2253 	}
2254 
2255 	finish_wait(sk_sleep(sk), &wait);
2256 	unix_state_unlock(sk);
2257 	return timeo;
2258 }
2259 
unix_skb_len(const struct sk_buff * skb)2260 static unsigned int unix_skb_len(const struct sk_buff *skb)
2261 {
2262 	return skb->len - UNIXCB(skb).consumed;
2263 }
2264 
2265 struct unix_stream_read_state {
2266 	int (*recv_actor)(struct sk_buff *, int, int,
2267 			  struct unix_stream_read_state *);
2268 	struct socket *socket;
2269 	struct msghdr *msg;
2270 	struct pipe_inode_info *pipe;
2271 	size_t size;
2272 	int flags;
2273 	unsigned int splice_flags;
2274 };
2275 
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2276 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2277 				    bool freezable)
2278 {
2279 	struct scm_cookie scm;
2280 	struct socket *sock = state->socket;
2281 	struct sock *sk = sock->sk;
2282 	struct unix_sock *u = unix_sk(sk);
2283 	int copied = 0;
2284 	int flags = state->flags;
2285 	int noblock = flags & MSG_DONTWAIT;
2286 	bool check_creds = false;
2287 	int target;
2288 	int err = 0;
2289 	long timeo;
2290 	int skip;
2291 	size_t size = state->size;
2292 	unsigned int last_len;
2293 
2294 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2295 		err = -EINVAL;
2296 		goto out;
2297 	}
2298 
2299 	if (unlikely(flags & MSG_OOB)) {
2300 		err = -EOPNOTSUPP;
2301 		goto out;
2302 	}
2303 
2304 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2305 	timeo = sock_rcvtimeo(sk, noblock);
2306 
2307 	memset(&scm, 0, sizeof(scm));
2308 
2309 	/* Lock the socket to prevent queue disordering
2310 	 * while sleeps in memcpy_tomsg
2311 	 */
2312 	mutex_lock(&u->iolock);
2313 
2314 	if (flags & MSG_PEEK)
2315 		skip = sk_peek_offset(sk, flags);
2316 	else
2317 		skip = 0;
2318 
2319 	do {
2320 		int chunk;
2321 		bool drop_skb;
2322 		struct sk_buff *skb, *last;
2323 
2324 		unix_state_lock(sk);
2325 		if (sock_flag(sk, SOCK_DEAD)) {
2326 			err = -ECONNRESET;
2327 			goto unlock;
2328 		}
2329 		last = skb = skb_peek(&sk->sk_receive_queue);
2330 		last_len = last ? last->len : 0;
2331 again:
2332 		if (skb == NULL) {
2333 			unix_sk(sk)->recursion_level = 0;
2334 			if (copied >= target)
2335 				goto unlock;
2336 
2337 			/*
2338 			 *	POSIX 1003.1g mandates this order.
2339 			 */
2340 
2341 			err = sock_error(sk);
2342 			if (err)
2343 				goto unlock;
2344 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2345 				goto unlock;
2346 
2347 			unix_state_unlock(sk);
2348 			if (!timeo) {
2349 				err = -EAGAIN;
2350 				break;
2351 			}
2352 
2353 			mutex_unlock(&u->iolock);
2354 
2355 			timeo = unix_stream_data_wait(sk, timeo, last,
2356 						      last_len, freezable);
2357 
2358 			if (signal_pending(current)) {
2359 				err = sock_intr_errno(timeo);
2360 				scm_destroy(&scm);
2361 				goto out;
2362 			}
2363 
2364 			mutex_lock(&u->iolock);
2365 			continue;
2366 unlock:
2367 			unix_state_unlock(sk);
2368 			break;
2369 		}
2370 
2371 		while (skip >= unix_skb_len(skb)) {
2372 			skip -= unix_skb_len(skb);
2373 			last = skb;
2374 			last_len = skb->len;
2375 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2376 			if (!skb)
2377 				goto again;
2378 		}
2379 
2380 		unix_state_unlock(sk);
2381 
2382 		if (check_creds) {
2383 			/* Never glue messages from different writers */
2384 			if (!unix_skb_scm_eq(skb, &scm))
2385 				break;
2386 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2387 			/* Copy credentials */
2388 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2389 			unix_set_secdata(&scm, skb);
2390 			check_creds = true;
2391 		}
2392 
2393 		/* Copy address just once */
2394 		if (state->msg && state->msg->msg_name) {
2395 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2396 					 state->msg->msg_name);
2397 			unix_copy_addr(state->msg, skb->sk);
2398 			sunaddr = NULL;
2399 		}
2400 
2401 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2402 		skb_get(skb);
2403 		chunk = state->recv_actor(skb, skip, chunk, state);
2404 		drop_skb = !unix_skb_len(skb);
2405 		/* skb is only safe to use if !drop_skb */
2406 		consume_skb(skb);
2407 		if (chunk < 0) {
2408 			if (copied == 0)
2409 				copied = -EFAULT;
2410 			break;
2411 		}
2412 		copied += chunk;
2413 		size -= chunk;
2414 
2415 		if (drop_skb) {
2416 			/* the skb was touched by a concurrent reader;
2417 			 * we should not expect anything from this skb
2418 			 * anymore and assume it invalid - we can be
2419 			 * sure it was dropped from the socket queue
2420 			 *
2421 			 * let's report a short read
2422 			 */
2423 			err = 0;
2424 			break;
2425 		}
2426 
2427 		/* Mark read part of skb as used */
2428 		if (!(flags & MSG_PEEK)) {
2429 			UNIXCB(skb).consumed += chunk;
2430 
2431 			sk_peek_offset_bwd(sk, chunk);
2432 
2433 			if (UNIXCB(skb).fp)
2434 				unix_detach_fds(&scm, skb);
2435 
2436 			if (unix_skb_len(skb))
2437 				break;
2438 
2439 			skb_unlink(skb, &sk->sk_receive_queue);
2440 			consume_skb(skb);
2441 
2442 			if (scm.fp)
2443 				break;
2444 		} else {
2445 			/* It is questionable, see note in unix_dgram_recvmsg.
2446 			 */
2447 			if (UNIXCB(skb).fp)
2448 				unix_peek_fds(&scm, skb);
2449 
2450 			sk_peek_offset_fwd(sk, chunk);
2451 
2452 			if (UNIXCB(skb).fp)
2453 				break;
2454 
2455 			skip = 0;
2456 			last = skb;
2457 			last_len = skb->len;
2458 			unix_state_lock(sk);
2459 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2460 			if (skb)
2461 				goto again;
2462 			unix_state_unlock(sk);
2463 			break;
2464 		}
2465 	} while (size);
2466 
2467 	mutex_unlock(&u->iolock);
2468 	if (state->msg)
2469 		scm_recv(sock, state->msg, &scm, flags);
2470 	else
2471 		scm_destroy(&scm);
2472 out:
2473 	return copied ? : err;
2474 }
2475 
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2476 static int unix_stream_read_actor(struct sk_buff *skb,
2477 				  int skip, int chunk,
2478 				  struct unix_stream_read_state *state)
2479 {
2480 	int ret;
2481 
2482 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2483 				    state->msg, chunk);
2484 	return ret ?: chunk;
2485 }
2486 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2487 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2488 			       size_t size, int flags)
2489 {
2490 	struct unix_stream_read_state state = {
2491 		.recv_actor = unix_stream_read_actor,
2492 		.socket = sock,
2493 		.msg = msg,
2494 		.size = size,
2495 		.flags = flags
2496 	};
2497 
2498 	return unix_stream_read_generic(&state, true);
2499 }
2500 
skb_unix_socket_splice(struct sock * sk,struct pipe_inode_info * pipe,struct splice_pipe_desc * spd)2501 static ssize_t skb_unix_socket_splice(struct sock *sk,
2502 				      struct pipe_inode_info *pipe,
2503 				      struct splice_pipe_desc *spd)
2504 {
2505 	int ret;
2506 	struct unix_sock *u = unix_sk(sk);
2507 
2508 	mutex_unlock(&u->iolock);
2509 	ret = splice_to_pipe(pipe, spd);
2510 	mutex_lock(&u->iolock);
2511 
2512 	return ret;
2513 }
2514 
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2515 static int unix_stream_splice_actor(struct sk_buff *skb,
2516 				    int skip, int chunk,
2517 				    struct unix_stream_read_state *state)
2518 {
2519 	return skb_splice_bits(skb, state->socket->sk,
2520 			       UNIXCB(skb).consumed + skip,
2521 			       state->pipe, chunk, state->splice_flags,
2522 			       skb_unix_socket_splice);
2523 }
2524 
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2525 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2526 				       struct pipe_inode_info *pipe,
2527 				       size_t size, unsigned int flags)
2528 {
2529 	struct unix_stream_read_state state = {
2530 		.recv_actor = unix_stream_splice_actor,
2531 		.socket = sock,
2532 		.pipe = pipe,
2533 		.size = size,
2534 		.splice_flags = flags,
2535 	};
2536 
2537 	if (unlikely(*ppos))
2538 		return -ESPIPE;
2539 
2540 	if (sock->file->f_flags & O_NONBLOCK ||
2541 	    flags & SPLICE_F_NONBLOCK)
2542 		state.flags = MSG_DONTWAIT;
2543 
2544 	return unix_stream_read_generic(&state, false);
2545 }
2546 
unix_shutdown(struct socket * sock,int mode)2547 static int unix_shutdown(struct socket *sock, int mode)
2548 {
2549 	struct sock *sk = sock->sk;
2550 	struct sock *other;
2551 
2552 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2553 		return -EINVAL;
2554 	/* This maps:
2555 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2556 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2557 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2558 	 */
2559 	++mode;
2560 
2561 	unix_state_lock(sk);
2562 	sk->sk_shutdown |= mode;
2563 	other = unix_peer(sk);
2564 	if (other)
2565 		sock_hold(other);
2566 	unix_state_unlock(sk);
2567 	sk->sk_state_change(sk);
2568 
2569 	if (other &&
2570 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2571 
2572 		int peer_mode = 0;
2573 
2574 		if (mode&RCV_SHUTDOWN)
2575 			peer_mode |= SEND_SHUTDOWN;
2576 		if (mode&SEND_SHUTDOWN)
2577 			peer_mode |= RCV_SHUTDOWN;
2578 		unix_state_lock(other);
2579 		other->sk_shutdown |= peer_mode;
2580 		unix_state_unlock(other);
2581 		other->sk_state_change(other);
2582 		if (peer_mode == SHUTDOWN_MASK)
2583 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2584 		else if (peer_mode & RCV_SHUTDOWN)
2585 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2586 	}
2587 	if (other)
2588 		sock_put(other);
2589 
2590 	return 0;
2591 }
2592 
unix_inq_len(struct sock * sk)2593 long unix_inq_len(struct sock *sk)
2594 {
2595 	struct sk_buff *skb;
2596 	long amount = 0;
2597 
2598 	if (sk->sk_state == TCP_LISTEN)
2599 		return -EINVAL;
2600 
2601 	spin_lock(&sk->sk_receive_queue.lock);
2602 	if (sk->sk_type == SOCK_STREAM ||
2603 	    sk->sk_type == SOCK_SEQPACKET) {
2604 		skb_queue_walk(&sk->sk_receive_queue, skb)
2605 			amount += unix_skb_len(skb);
2606 	} else {
2607 		skb = skb_peek(&sk->sk_receive_queue);
2608 		if (skb)
2609 			amount = skb->len;
2610 	}
2611 	spin_unlock(&sk->sk_receive_queue.lock);
2612 
2613 	return amount;
2614 }
2615 EXPORT_SYMBOL_GPL(unix_inq_len);
2616 
unix_outq_len(struct sock * sk)2617 long unix_outq_len(struct sock *sk)
2618 {
2619 	return sk_wmem_alloc_get(sk);
2620 }
2621 EXPORT_SYMBOL_GPL(unix_outq_len);
2622 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2623 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2624 {
2625 	struct sock *sk = sock->sk;
2626 	long amount = 0;
2627 	int err;
2628 
2629 	switch (cmd) {
2630 	case SIOCOUTQ:
2631 		amount = unix_outq_len(sk);
2632 		err = put_user(amount, (int __user *)arg);
2633 		break;
2634 	case SIOCINQ:
2635 		amount = unix_inq_len(sk);
2636 		if (amount < 0)
2637 			err = amount;
2638 		else
2639 			err = put_user(amount, (int __user *)arg);
2640 		break;
2641 	default:
2642 		err = -ENOIOCTLCMD;
2643 		break;
2644 	}
2645 	return err;
2646 }
2647 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2648 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2649 {
2650 	struct sock *sk = sock->sk;
2651 	unsigned int mask;
2652 
2653 	sock_poll_wait(file, sk_sleep(sk), wait);
2654 	mask = 0;
2655 
2656 	/* exceptional events? */
2657 	if (sk->sk_err)
2658 		mask |= POLLERR;
2659 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2660 		mask |= POLLHUP;
2661 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2662 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2663 
2664 	/* readable? */
2665 	if (!skb_queue_empty(&sk->sk_receive_queue))
2666 		mask |= POLLIN | POLLRDNORM;
2667 
2668 	/* Connection-based need to check for termination and startup */
2669 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2670 	    sk->sk_state == TCP_CLOSE)
2671 		mask |= POLLHUP;
2672 
2673 	/*
2674 	 * we set writable also when the other side has shut down the
2675 	 * connection. This prevents stuck sockets.
2676 	 */
2677 	if (unix_writable(sk))
2678 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2679 
2680 	return mask;
2681 }
2682 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2683 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2684 				    poll_table *wait)
2685 {
2686 	struct sock *sk = sock->sk, *other;
2687 	unsigned int mask, writable;
2688 
2689 	sock_poll_wait(file, sk_sleep(sk), wait);
2690 	mask = 0;
2691 
2692 	/* exceptional events? */
2693 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2694 		mask |= POLLERR |
2695 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2696 
2697 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2698 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2699 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2700 		mask |= POLLHUP;
2701 
2702 	/* readable? */
2703 	if (!skb_queue_empty(&sk->sk_receive_queue))
2704 		mask |= POLLIN | POLLRDNORM;
2705 
2706 	/* Connection-based need to check for termination and startup */
2707 	if (sk->sk_type == SOCK_SEQPACKET) {
2708 		if (sk->sk_state == TCP_CLOSE)
2709 			mask |= POLLHUP;
2710 		/* connection hasn't started yet? */
2711 		if (sk->sk_state == TCP_SYN_SENT)
2712 			return mask;
2713 	}
2714 
2715 	/* No write status requested, avoid expensive OUT tests. */
2716 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2717 		return mask;
2718 
2719 	writable = unix_writable(sk);
2720 	if (writable) {
2721 		unix_state_lock(sk);
2722 
2723 		other = unix_peer(sk);
2724 		if (other && unix_peer(other) != sk &&
2725 		    unix_recvq_full_lockless(other) &&
2726 		    unix_dgram_peer_wake_me(sk, other))
2727 			writable = 0;
2728 
2729 		unix_state_unlock(sk);
2730 	}
2731 
2732 	if (writable)
2733 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2734 	else
2735 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2736 
2737 	return mask;
2738 }
2739 
2740 #ifdef CONFIG_PROC_FS
2741 
2742 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2743 
2744 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2745 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2746 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2747 
unix_from_bucket(struct seq_file * seq,loff_t * pos)2748 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2749 {
2750 	unsigned long offset = get_offset(*pos);
2751 	unsigned long bucket = get_bucket(*pos);
2752 	struct sock *sk;
2753 	unsigned long count = 0;
2754 
2755 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2756 		if (sock_net(sk) != seq_file_net(seq))
2757 			continue;
2758 		if (++count == offset)
2759 			break;
2760 	}
2761 
2762 	return sk;
2763 }
2764 
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2765 static struct sock *unix_next_socket(struct seq_file *seq,
2766 				     struct sock *sk,
2767 				     loff_t *pos)
2768 {
2769 	unsigned long bucket;
2770 
2771 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2772 		sk = sk_next(sk);
2773 		if (!sk)
2774 			goto next_bucket;
2775 		if (sock_net(sk) == seq_file_net(seq))
2776 			return sk;
2777 	}
2778 
2779 	do {
2780 		sk = unix_from_bucket(seq, pos);
2781 		if (sk)
2782 			return sk;
2783 
2784 next_bucket:
2785 		bucket = get_bucket(*pos) + 1;
2786 		*pos = set_bucket_offset(bucket, 1);
2787 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2788 
2789 	return NULL;
2790 }
2791 
unix_seq_start(struct seq_file * seq,loff_t * pos)2792 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2793 	__acquires(unix_table_lock)
2794 {
2795 	spin_lock(&unix_table_lock);
2796 
2797 	if (!*pos)
2798 		return SEQ_START_TOKEN;
2799 
2800 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2801 		return NULL;
2802 
2803 	return unix_next_socket(seq, NULL, pos);
2804 }
2805 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2806 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2807 {
2808 	++*pos;
2809 	return unix_next_socket(seq, v, pos);
2810 }
2811 
unix_seq_stop(struct seq_file * seq,void * v)2812 static void unix_seq_stop(struct seq_file *seq, void *v)
2813 	__releases(unix_table_lock)
2814 {
2815 	spin_unlock(&unix_table_lock);
2816 }
2817 
unix_seq_show(struct seq_file * seq,void * v)2818 static int unix_seq_show(struct seq_file *seq, void *v)
2819 {
2820 
2821 	if (v == SEQ_START_TOKEN)
2822 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2823 			 "Inode Path\n");
2824 	else {
2825 		struct sock *s = v;
2826 		struct unix_sock *u = unix_sk(s);
2827 		unix_state_lock(s);
2828 
2829 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2830 			s,
2831 			atomic_read(&s->sk_refcnt),
2832 			0,
2833 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2834 			s->sk_type,
2835 			s->sk_socket ?
2836 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2837 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2838 			sock_i_ino(s));
2839 
2840 		if (u->addr) {	// under unix_table_lock here
2841 			int i, len;
2842 			seq_putc(seq, ' ');
2843 
2844 			i = 0;
2845 			len = u->addr->len - sizeof(short);
2846 			if (!UNIX_ABSTRACT(s))
2847 				len--;
2848 			else {
2849 				seq_putc(seq, '@');
2850 				i++;
2851 			}
2852 			for ( ; i < len; i++)
2853 				seq_putc(seq, u->addr->name->sun_path[i]);
2854 		}
2855 		unix_state_unlock(s);
2856 		seq_putc(seq, '\n');
2857 	}
2858 
2859 	return 0;
2860 }
2861 
2862 static const struct seq_operations unix_seq_ops = {
2863 	.start  = unix_seq_start,
2864 	.next   = unix_seq_next,
2865 	.stop   = unix_seq_stop,
2866 	.show   = unix_seq_show,
2867 };
2868 
unix_seq_open(struct inode * inode,struct file * file)2869 static int unix_seq_open(struct inode *inode, struct file *file)
2870 {
2871 	return seq_open_net(inode, file, &unix_seq_ops,
2872 			    sizeof(struct seq_net_private));
2873 }
2874 
2875 static const struct file_operations unix_seq_fops = {
2876 	.owner		= THIS_MODULE,
2877 	.open		= unix_seq_open,
2878 	.read		= seq_read,
2879 	.llseek		= seq_lseek,
2880 	.release	= seq_release_net,
2881 };
2882 
2883 #endif
2884 
2885 static const struct net_proto_family unix_family_ops = {
2886 	.family = PF_UNIX,
2887 	.create = unix_create,
2888 	.owner	= THIS_MODULE,
2889 };
2890 
2891 
unix_net_init(struct net * net)2892 static int __net_init unix_net_init(struct net *net)
2893 {
2894 	int error = -ENOMEM;
2895 
2896 	net->unx.sysctl_max_dgram_qlen = 10;
2897 	if (unix_sysctl_register(net))
2898 		goto out;
2899 
2900 #ifdef CONFIG_PROC_FS
2901 	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2902 		unix_sysctl_unregister(net);
2903 		goto out;
2904 	}
2905 #endif
2906 	error = 0;
2907 out:
2908 	return error;
2909 }
2910 
unix_net_exit(struct net * net)2911 static void __net_exit unix_net_exit(struct net *net)
2912 {
2913 	unix_sysctl_unregister(net);
2914 	remove_proc_entry("unix", net->proc_net);
2915 }
2916 
2917 static struct pernet_operations unix_net_ops = {
2918 	.init = unix_net_init,
2919 	.exit = unix_net_exit,
2920 };
2921 
af_unix_init(void)2922 static int __init af_unix_init(void)
2923 {
2924 	int rc = -1;
2925 
2926 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2927 
2928 	rc = proto_register(&unix_proto, 1);
2929 	if (rc != 0) {
2930 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2931 		goto out;
2932 	}
2933 
2934 	sock_register(&unix_family_ops);
2935 	register_pernet_subsys(&unix_net_ops);
2936 out:
2937 	return rc;
2938 }
2939 
af_unix_exit(void)2940 static void __exit af_unix_exit(void)
2941 {
2942 	sock_unregister(PF_UNIX);
2943 	proto_unregister(&unix_proto);
2944 	unregister_pernet_subsys(&unix_net_ops);
2945 }
2946 
2947 /* Earlier than device_initcall() so that other drivers invoking
2948    request_module() don't end up in a loop when modprobe tries
2949    to use a UNIX socket. But later than subsys_initcall() because
2950    we depend on stuff initialised there */
2951 fs_initcall(af_unix_init);
2952 module_exit(af_unix_exit);
2953 
2954 MODULE_LICENSE("GPL");
2955 MODULE_ALIAS_NETPROTO(PF_UNIX);
2956