• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
unix_sockets_unbound(void * addr)126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 	unsigned long hash = (unsigned long)addr;
129 
130 	hash ^= hash >> 16;
131 	hash ^= hash >> 8;
132 	hash %= UNIX_HASH_SIZE;
133 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 	UNIXCB(skb).secid = scm->secid;
142 }
143 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 	scm->secid = UNIXCB(skb).secid;
147 }
148 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151 	return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162 	return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
unix_hash_fold(__wsum n)172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174 	unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176 	hash ^= hash>>8;
177 	return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
unix_our_peer(struct sock * sk,struct sock * osk)182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184 	return unix_peer(osk) == sk;
185 }
186 
unix_may_send(struct sock * sk,struct sock * osk)187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
unix_recvq_full(const struct sock * sk)192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
unix_recvq_full_lockless(const struct sock * sk)197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 		READ_ONCE(sk->sk_max_ack_backlog);
201 }
202 
unix_peer_get(struct sock * s)203 struct sock *unix_peer_get(struct sock *s)
204 {
205 	struct sock *peer;
206 
207 	unix_state_lock(s);
208 	peer = unix_peer(s);
209 	if (peer)
210 		sock_hold(peer);
211 	unix_state_unlock(s);
212 	return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215 
unix_release_addr(struct unix_address * addr)216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218 	if (refcount_dec_and_test(&addr->refcnt))
219 		kfree(addr);
220 }
221 
222 /*
223  *	Check unix socket name:
224  *		- should be not zero length.
225  *	        - if started by not zero, should be NULL terminated (FS object)
226  *		- if started by zero, it is abstract name.
227  */
228 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231 	*hashp = 0;
232 
233 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 		return -EINVAL;
235 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 		return -EINVAL;
237 	if (sunaddr->sun_path[0]) {
238 		/*
239 		 * This may look like an off by one error but it is a bit more
240 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 		 * sun_path[108] doesn't as such exist.  However in kernel space
242 		 * we are guaranteed that it is a valid memory location in our
243 		 * kernel address buffer.
244 		 */
245 		((char *)sunaddr)[len] = 0;
246 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 		return len;
248 	}
249 
250 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 	return len;
252 }
253 
__unix_remove_socket(struct sock * sk)254 static void __unix_remove_socket(struct sock *sk)
255 {
256 	sk_del_node_init(sk);
257 }
258 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261 	WARN_ON(!sk_unhashed(sk));
262 	sk_add_node(sk, list);
263 }
264 
unix_remove_socket(struct sock * sk)265 static inline void unix_remove_socket(struct sock *sk)
266 {
267 	spin_lock(&unix_table_lock);
268 	__unix_remove_socket(sk);
269 	spin_unlock(&unix_table_lock);
270 }
271 
unix_insert_socket(struct hlist_head * list,struct sock * sk)272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274 	spin_lock(&unix_table_lock);
275 	__unix_insert_socket(list, sk);
276 	spin_unlock(&unix_table_lock);
277 }
278 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)279 static struct sock *__unix_find_socket_byname(struct net *net,
280 					      struct sockaddr_un *sunname,
281 					      int len, int type, unsigned int hash)
282 {
283 	struct sock *s;
284 
285 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 		struct unix_sock *u = unix_sk(s);
287 
288 		if (!net_eq(sock_net(s), net))
289 			continue;
290 
291 		if (u->addr->len == len &&
292 		    !memcmp(u->addr->name, sunname, len))
293 			goto found;
294 	}
295 	s = NULL;
296 found:
297 	return s;
298 }
299 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)300 static inline struct sock *unix_find_socket_byname(struct net *net,
301 						   struct sockaddr_un *sunname,
302 						   int len, int type,
303 						   unsigned int hash)
304 {
305 	struct sock *s;
306 
307 	spin_lock(&unix_table_lock);
308 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
309 	if (s)
310 		sock_hold(s);
311 	spin_unlock(&unix_table_lock);
312 	return s;
313 }
314 
unix_find_socket_byinode(struct inode * i)315 static struct sock *unix_find_socket_byinode(struct inode *i)
316 {
317 	struct sock *s;
318 
319 	spin_lock(&unix_table_lock);
320 	sk_for_each(s,
321 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
322 		struct dentry *dentry = unix_sk(s)->path.dentry;
323 
324 		if (dentry && d_backing_inode(dentry) == i) {
325 			sock_hold(s);
326 			goto found;
327 		}
328 	}
329 	s = NULL;
330 found:
331 	spin_unlock(&unix_table_lock);
332 	return s;
333 }
334 
335 /* Support code for asymmetrically connected dgram sockets
336  *
337  * If a datagram socket is connected to a socket not itself connected
338  * to the first socket (eg, /dev/log), clients may only enqueue more
339  * messages if the present receive queue of the server socket is not
340  * "too large". This means there's a second writeability condition
341  * poll and sendmsg need to test. The dgram recv code will do a wake
342  * up on the peer_wait wait queue of a socket upon reception of a
343  * datagram which needs to be propagated to sleeping would-be writers
344  * since these might not have sent anything so far. This can't be
345  * accomplished via poll_wait because the lifetime of the server
346  * socket might be less than that of its clients if these break their
347  * association with it or if the server socket is closed while clients
348  * are still connected to it and there's no way to inform "a polling
349  * implementation" that it should let go of a certain wait queue
350  *
351  * In order to propagate a wake up, a wait_queue_entry_t of the client
352  * socket is enqueued on the peer_wait queue of the server socket
353  * whose wake function does a wake_up on the ordinary client socket
354  * wait queue. This connection is established whenever a write (or
355  * poll for write) hit the flow control condition and broken when the
356  * association to the server socket is dissolved or after a wake up
357  * was relayed.
358  */
359 
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)360 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
361 				      void *key)
362 {
363 	struct unix_sock *u;
364 	wait_queue_head_t *u_sleep;
365 
366 	u = container_of(q, struct unix_sock, peer_wake);
367 
368 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
369 			    q);
370 	u->peer_wake.private = NULL;
371 
372 	/* relaying can only happen while the wq still exists */
373 	u_sleep = sk_sleep(&u->sk);
374 	if (u_sleep)
375 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
376 
377 	return 0;
378 }
379 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)380 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
381 {
382 	struct unix_sock *u, *u_other;
383 	int rc;
384 
385 	u = unix_sk(sk);
386 	u_other = unix_sk(other);
387 	rc = 0;
388 	spin_lock(&u_other->peer_wait.lock);
389 
390 	if (!u->peer_wake.private) {
391 		u->peer_wake.private = other;
392 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
393 
394 		rc = 1;
395 	}
396 
397 	spin_unlock(&u_other->peer_wait.lock);
398 	return rc;
399 }
400 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)401 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
402 					    struct sock *other)
403 {
404 	struct unix_sock *u, *u_other;
405 
406 	u = unix_sk(sk);
407 	u_other = unix_sk(other);
408 	spin_lock(&u_other->peer_wait.lock);
409 
410 	if (u->peer_wake.private == other) {
411 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
412 		u->peer_wake.private = NULL;
413 	}
414 
415 	spin_unlock(&u_other->peer_wait.lock);
416 }
417 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)418 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
419 						   struct sock *other)
420 {
421 	unix_dgram_peer_wake_disconnect(sk, other);
422 	wake_up_interruptible_poll(sk_sleep(sk),
423 				   EPOLLOUT |
424 				   EPOLLWRNORM |
425 				   EPOLLWRBAND);
426 }
427 
428 /* preconditions:
429  *	- unix_peer(sk) == other
430  *	- association is stable
431  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)432 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
433 {
434 	int connected;
435 
436 	connected = unix_dgram_peer_wake_connect(sk, other);
437 
438 	/* If other is SOCK_DEAD, we want to make sure we signal
439 	 * POLLOUT, such that a subsequent write() can get a
440 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
441 	 * to other and its full, we will hang waiting for POLLOUT.
442 	 */
443 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
444 		return 1;
445 
446 	if (connected)
447 		unix_dgram_peer_wake_disconnect(sk, other);
448 
449 	return 0;
450 }
451 
unix_writable(const struct sock * sk)452 static int unix_writable(const struct sock *sk)
453 {
454 	return sk->sk_state != TCP_LISTEN &&
455 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
456 }
457 
unix_write_space(struct sock * sk)458 static void unix_write_space(struct sock *sk)
459 {
460 	struct socket_wq *wq;
461 
462 	rcu_read_lock();
463 	if (unix_writable(sk)) {
464 		wq = rcu_dereference(sk->sk_wq);
465 		if (skwq_has_sleeper(wq))
466 			wake_up_interruptible_sync_poll(&wq->wait,
467 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
468 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
469 	}
470 	rcu_read_unlock();
471 }
472 
473 /* When dgram socket disconnects (or changes its peer), we clear its receive
474  * queue of packets arrived from previous peer. First, it allows to do
475  * flow control based only on wmem_alloc; second, sk connected to peer
476  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)477 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
478 {
479 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
480 		skb_queue_purge(&sk->sk_receive_queue);
481 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
482 
483 		/* If one link of bidirectional dgram pipe is disconnected,
484 		 * we signal error. Messages are lost. Do not make this,
485 		 * when peer was not connected to us.
486 		 */
487 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
488 			other->sk_err = ECONNRESET;
489 			other->sk_error_report(other);
490 		}
491 	}
492 }
493 
unix_sock_destructor(struct sock * sk)494 static void unix_sock_destructor(struct sock *sk)
495 {
496 	struct unix_sock *u = unix_sk(sk);
497 
498 	skb_queue_purge(&sk->sk_receive_queue);
499 
500 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
501 	WARN_ON(!sk_unhashed(sk));
502 	WARN_ON(sk->sk_socket);
503 	if (!sock_flag(sk, SOCK_DEAD)) {
504 		pr_info("Attempt to release alive unix socket: %p\n", sk);
505 		return;
506 	}
507 
508 	if (u->addr)
509 		unix_release_addr(u->addr);
510 
511 	atomic_long_dec(&unix_nr_socks);
512 	local_bh_disable();
513 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
514 	local_bh_enable();
515 #ifdef UNIX_REFCNT_DEBUG
516 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
517 		atomic_long_read(&unix_nr_socks));
518 #endif
519 }
520 
unix_release_sock(struct sock * sk,int embrion)521 static void unix_release_sock(struct sock *sk, int embrion)
522 {
523 	struct unix_sock *u = unix_sk(sk);
524 	struct path path;
525 	struct sock *skpair;
526 	struct sk_buff *skb;
527 	int state;
528 
529 	unix_remove_socket(sk);
530 
531 	/* Clear state */
532 	unix_state_lock(sk);
533 	sock_orphan(sk);
534 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
535 	path	     = u->path;
536 	u->path.dentry = NULL;
537 	u->path.mnt = NULL;
538 	state = sk->sk_state;
539 	sk->sk_state = TCP_CLOSE;
540 
541 	skpair = unix_peer(sk);
542 	unix_peer(sk) = NULL;
543 
544 	unix_state_unlock(sk);
545 
546 	wake_up_interruptible_all(&u->peer_wait);
547 
548 	if (skpair != NULL) {
549 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
550 			unix_state_lock(skpair);
551 			/* No more writes */
552 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
553 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
554 				skpair->sk_err = ECONNRESET;
555 			unix_state_unlock(skpair);
556 			skpair->sk_state_change(skpair);
557 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
558 		}
559 
560 		unix_dgram_peer_wake_disconnect(sk, skpair);
561 		sock_put(skpair); /* It may now die */
562 	}
563 
564 	/* Try to flush out this socket. Throw out buffers at least */
565 
566 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
567 		if (state == TCP_LISTEN)
568 			unix_release_sock(skb->sk, 1);
569 		/* passed fds are erased in the kfree_skb hook	      */
570 		UNIXCB(skb).consumed = skb->len;
571 		kfree_skb(skb);
572 	}
573 
574 	if (path.dentry)
575 		path_put(&path);
576 
577 	sock_put(sk);
578 
579 	/* ---- Socket is dead now and most probably destroyed ---- */
580 
581 	/*
582 	 * Fixme: BSD difference: In BSD all sockets connected to us get
583 	 *	  ECONNRESET and we die on the spot. In Linux we behave
584 	 *	  like files and pipes do and wait for the last
585 	 *	  dereference.
586 	 *
587 	 * Can't we simply set sock->err?
588 	 *
589 	 *	  What the above comment does talk about? --ANK(980817)
590 	 */
591 
592 	if (READ_ONCE(unix_tot_inflight))
593 		unix_gc();		/* Garbage collect fds */
594 }
595 
init_peercred(struct sock * sk)596 static void init_peercred(struct sock *sk)
597 {
598 	const struct cred *old_cred;
599 	struct pid *old_pid;
600 
601 	spin_lock(&sk->sk_peer_lock);
602 	old_pid = sk->sk_peer_pid;
603 	old_cred = sk->sk_peer_cred;
604 	sk->sk_peer_pid  = get_pid(task_tgid(current));
605 	sk->sk_peer_cred = get_current_cred();
606 	spin_unlock(&sk->sk_peer_lock);
607 
608 	put_pid(old_pid);
609 	put_cred(old_cred);
610 }
611 
copy_peercred(struct sock * sk,struct sock * peersk)612 static void copy_peercred(struct sock *sk, struct sock *peersk)
613 {
614 	const struct cred *old_cred;
615 	struct pid *old_pid;
616 
617 	if (sk < peersk) {
618 		spin_lock(&sk->sk_peer_lock);
619 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
620 	} else {
621 		spin_lock(&peersk->sk_peer_lock);
622 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
623 	}
624 	old_pid = sk->sk_peer_pid;
625 	old_cred = sk->sk_peer_cred;
626 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
627 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
628 
629 	spin_unlock(&sk->sk_peer_lock);
630 	spin_unlock(&peersk->sk_peer_lock);
631 
632 	put_pid(old_pid);
633 	put_cred(old_cred);
634 }
635 
unix_listen(struct socket * sock,int backlog)636 static int unix_listen(struct socket *sock, int backlog)
637 {
638 	int err;
639 	struct sock *sk = sock->sk;
640 	struct unix_sock *u = unix_sk(sk);
641 	struct pid *old_pid = NULL;
642 
643 	err = -EOPNOTSUPP;
644 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
645 		goto out;	/* Only stream/seqpacket sockets accept */
646 	err = -EINVAL;
647 	if (!u->addr)
648 		goto out;	/* No listens on an unbound socket */
649 	unix_state_lock(sk);
650 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
651 		goto out_unlock;
652 	if (backlog > sk->sk_max_ack_backlog)
653 		wake_up_interruptible_all(&u->peer_wait);
654 	sk->sk_max_ack_backlog	= backlog;
655 	sk->sk_state		= TCP_LISTEN;
656 	/* set credentials so connect can copy them */
657 	init_peercred(sk);
658 	err = 0;
659 
660 out_unlock:
661 	unix_state_unlock(sk);
662 	put_pid(old_pid);
663 out:
664 	return err;
665 }
666 
667 static int unix_release(struct socket *);
668 static int unix_bind(struct socket *, struct sockaddr *, int);
669 static int unix_stream_connect(struct socket *, struct sockaddr *,
670 			       int addr_len, int flags);
671 static int unix_socketpair(struct socket *, struct socket *);
672 static int unix_accept(struct socket *, struct socket *, int, bool);
673 static int unix_getname(struct socket *, struct sockaddr *, int);
674 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
675 static __poll_t unix_dgram_poll(struct file *, struct socket *,
676 				    poll_table *);
677 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
678 #ifdef CONFIG_COMPAT
679 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
680 #endif
681 static int unix_shutdown(struct socket *, int);
682 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
683 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
684 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
685 				    size_t size, int flags);
686 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
687 				       struct pipe_inode_info *, size_t size,
688 				       unsigned int flags);
689 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
690 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
691 static int unix_dgram_connect(struct socket *, struct sockaddr *,
692 			      int, int);
693 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
694 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
695 				  int);
696 
unix_set_peek_off(struct sock * sk,int val)697 static int unix_set_peek_off(struct sock *sk, int val)
698 {
699 	struct unix_sock *u = unix_sk(sk);
700 
701 	if (mutex_lock_interruptible(&u->iolock))
702 		return -EINTR;
703 
704 	WRITE_ONCE(sk->sk_peek_off, val);
705 	mutex_unlock(&u->iolock);
706 
707 	return 0;
708 }
709 
710 
711 static const struct proto_ops unix_stream_ops = {
712 	.family =	PF_UNIX,
713 	.owner =	THIS_MODULE,
714 	.release =	unix_release,
715 	.bind =		unix_bind,
716 	.connect =	unix_stream_connect,
717 	.socketpair =	unix_socketpair,
718 	.accept =	unix_accept,
719 	.getname =	unix_getname,
720 	.poll =		unix_poll,
721 	.ioctl =	unix_ioctl,
722 #ifdef CONFIG_COMPAT
723 	.compat_ioctl =	unix_compat_ioctl,
724 #endif
725 	.listen =	unix_listen,
726 	.shutdown =	unix_shutdown,
727 	.setsockopt =	sock_no_setsockopt,
728 	.getsockopt =	sock_no_getsockopt,
729 	.sendmsg =	unix_stream_sendmsg,
730 	.recvmsg =	unix_stream_recvmsg,
731 	.mmap =		sock_no_mmap,
732 	.sendpage =	unix_stream_sendpage,
733 	.splice_read =	unix_stream_splice_read,
734 	.set_peek_off =	unix_set_peek_off,
735 };
736 
737 static const struct proto_ops unix_dgram_ops = {
738 	.family =	PF_UNIX,
739 	.owner =	THIS_MODULE,
740 	.release =	unix_release,
741 	.bind =		unix_bind,
742 	.connect =	unix_dgram_connect,
743 	.socketpair =	unix_socketpair,
744 	.accept =	sock_no_accept,
745 	.getname =	unix_getname,
746 	.poll =		unix_dgram_poll,
747 	.ioctl =	unix_ioctl,
748 #ifdef CONFIG_COMPAT
749 	.compat_ioctl =	unix_compat_ioctl,
750 #endif
751 	.listen =	sock_no_listen,
752 	.shutdown =	unix_shutdown,
753 	.setsockopt =	sock_no_setsockopt,
754 	.getsockopt =	sock_no_getsockopt,
755 	.sendmsg =	unix_dgram_sendmsg,
756 	.recvmsg =	unix_dgram_recvmsg,
757 	.mmap =		sock_no_mmap,
758 	.sendpage =	sock_no_sendpage,
759 	.set_peek_off =	unix_set_peek_off,
760 };
761 
762 static const struct proto_ops unix_seqpacket_ops = {
763 	.family =	PF_UNIX,
764 	.owner =	THIS_MODULE,
765 	.release =	unix_release,
766 	.bind =		unix_bind,
767 	.connect =	unix_stream_connect,
768 	.socketpair =	unix_socketpair,
769 	.accept =	unix_accept,
770 	.getname =	unix_getname,
771 	.poll =		unix_dgram_poll,
772 	.ioctl =	unix_ioctl,
773 #ifdef CONFIG_COMPAT
774 	.compat_ioctl =	unix_compat_ioctl,
775 #endif
776 	.listen =	unix_listen,
777 	.shutdown =	unix_shutdown,
778 	.setsockopt =	sock_no_setsockopt,
779 	.getsockopt =	sock_no_getsockopt,
780 	.sendmsg =	unix_seqpacket_sendmsg,
781 	.recvmsg =	unix_seqpacket_recvmsg,
782 	.mmap =		sock_no_mmap,
783 	.sendpage =	sock_no_sendpage,
784 	.set_peek_off =	unix_set_peek_off,
785 };
786 
787 static struct proto unix_proto = {
788 	.name			= "UNIX",
789 	.owner			= THIS_MODULE,
790 	.obj_size		= sizeof(struct unix_sock),
791 };
792 
unix_create1(struct net * net,struct socket * sock,int kern)793 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
794 {
795 	struct sock *sk = NULL;
796 	struct unix_sock *u;
797 
798 	atomic_long_inc(&unix_nr_socks);
799 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
800 		goto out;
801 
802 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
803 	if (!sk)
804 		goto out;
805 
806 	sock_init_data(sock, sk);
807 
808 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
809 	sk->sk_write_space	= unix_write_space;
810 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
811 	sk->sk_destruct		= unix_sock_destructor;
812 	u	  = unix_sk(sk);
813 	u->path.dentry = NULL;
814 	u->path.mnt = NULL;
815 	spin_lock_init(&u->lock);
816 	atomic_long_set(&u->inflight, 0);
817 	INIT_LIST_HEAD(&u->link);
818 	mutex_init(&u->iolock); /* single task reading lock */
819 	mutex_init(&u->bindlock); /* single task binding lock */
820 	init_waitqueue_head(&u->peer_wait);
821 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
822 	unix_insert_socket(unix_sockets_unbound(sk), sk);
823 out:
824 	if (sk == NULL)
825 		atomic_long_dec(&unix_nr_socks);
826 	else {
827 		local_bh_disable();
828 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
829 		local_bh_enable();
830 	}
831 	return sk;
832 }
833 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)834 static int unix_create(struct net *net, struct socket *sock, int protocol,
835 		       int kern)
836 {
837 	if (protocol && protocol != PF_UNIX)
838 		return -EPROTONOSUPPORT;
839 
840 	sock->state = SS_UNCONNECTED;
841 
842 	switch (sock->type) {
843 	case SOCK_STREAM:
844 		sock->ops = &unix_stream_ops;
845 		break;
846 		/*
847 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
848 		 *	nothing uses it.
849 		 */
850 	case SOCK_RAW:
851 		sock->type = SOCK_DGRAM;
852 		/* fall through */
853 	case SOCK_DGRAM:
854 		sock->ops = &unix_dgram_ops;
855 		break;
856 	case SOCK_SEQPACKET:
857 		sock->ops = &unix_seqpacket_ops;
858 		break;
859 	default:
860 		return -ESOCKTNOSUPPORT;
861 	}
862 
863 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
864 }
865 
unix_release(struct socket * sock)866 static int unix_release(struct socket *sock)
867 {
868 	struct sock *sk = sock->sk;
869 
870 	if (!sk)
871 		return 0;
872 
873 	unix_release_sock(sk, 0);
874 	sock->sk = NULL;
875 
876 	return 0;
877 }
878 
unix_autobind(struct socket * sock)879 static int unix_autobind(struct socket *sock)
880 {
881 	struct sock *sk = sock->sk;
882 	struct net *net = sock_net(sk);
883 	struct unix_sock *u = unix_sk(sk);
884 	static u32 ordernum = 1;
885 	struct unix_address *addr;
886 	int err;
887 	unsigned int retries = 0;
888 
889 	err = mutex_lock_interruptible(&u->bindlock);
890 	if (err)
891 		return err;
892 
893 	err = 0;
894 	if (u->addr)
895 		goto out;
896 
897 	err = -ENOMEM;
898 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
899 	if (!addr)
900 		goto out;
901 
902 	addr->name->sun_family = AF_UNIX;
903 	refcount_set(&addr->refcnt, 1);
904 
905 retry:
906 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
907 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
908 
909 	spin_lock(&unix_table_lock);
910 	ordernum = (ordernum+1)&0xFFFFF;
911 
912 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
913 				      addr->hash)) {
914 		spin_unlock(&unix_table_lock);
915 		/*
916 		 * __unix_find_socket_byname() may take long time if many names
917 		 * are already in use.
918 		 */
919 		cond_resched();
920 		/* Give up if all names seems to be in use. */
921 		if (retries++ == 0xFFFFF) {
922 			err = -ENOSPC;
923 			kfree(addr);
924 			goto out;
925 		}
926 		goto retry;
927 	}
928 	addr->hash ^= sk->sk_type;
929 
930 	__unix_remove_socket(sk);
931 	smp_store_release(&u->addr, addr);
932 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
933 	spin_unlock(&unix_table_lock);
934 	err = 0;
935 
936 out:	mutex_unlock(&u->bindlock);
937 	return err;
938 }
939 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)940 static struct sock *unix_find_other(struct net *net,
941 				    struct sockaddr_un *sunname, int len,
942 				    int type, unsigned int hash, int *error)
943 {
944 	struct sock *u;
945 	struct path path;
946 	int err = 0;
947 
948 	if (sunname->sun_path[0]) {
949 		struct inode *inode;
950 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
951 		if (err)
952 			goto fail;
953 		inode = d_backing_inode(path.dentry);
954 		err = inode_permission(inode, MAY_WRITE);
955 		if (err)
956 			goto put_fail;
957 
958 		err = -ECONNREFUSED;
959 		if (!S_ISSOCK(inode->i_mode))
960 			goto put_fail;
961 		u = unix_find_socket_byinode(inode);
962 		if (!u)
963 			goto put_fail;
964 
965 		if (u->sk_type == type)
966 			touch_atime(&path);
967 
968 		path_put(&path);
969 
970 		err = -EPROTOTYPE;
971 		if (u->sk_type != type) {
972 			sock_put(u);
973 			goto fail;
974 		}
975 	} else {
976 		err = -ECONNREFUSED;
977 		u = unix_find_socket_byname(net, sunname, len, type, hash);
978 		if (u) {
979 			struct dentry *dentry;
980 			dentry = unix_sk(u)->path.dentry;
981 			if (dentry)
982 				touch_atime(&unix_sk(u)->path);
983 		} else
984 			goto fail;
985 	}
986 	return u;
987 
988 put_fail:
989 	path_put(&path);
990 fail:
991 	*error = err;
992 	return NULL;
993 }
994 
unix_mknod(const char * sun_path,umode_t mode,struct path * res)995 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
996 {
997 	struct dentry *dentry;
998 	struct path path;
999 	int err = 0;
1000 	/*
1001 	 * Get the parent directory, calculate the hash for last
1002 	 * component.
1003 	 */
1004 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1005 	err = PTR_ERR(dentry);
1006 	if (IS_ERR(dentry))
1007 		return err;
1008 
1009 	/*
1010 	 * All right, let's create it.
1011 	 */
1012 	err = security_path_mknod(&path, dentry, mode, 0);
1013 	if (!err) {
1014 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1015 		if (!err) {
1016 			res->mnt = mntget(path.mnt);
1017 			res->dentry = dget(dentry);
1018 		}
1019 	}
1020 	done_path_create(&path, dentry);
1021 	return err;
1022 }
1023 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1024 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1025 {
1026 	struct sock *sk = sock->sk;
1027 	struct net *net = sock_net(sk);
1028 	struct unix_sock *u = unix_sk(sk);
1029 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1030 	char *sun_path = sunaddr->sun_path;
1031 	int err;
1032 	unsigned int hash;
1033 	struct unix_address *addr;
1034 	struct hlist_head *list;
1035 	struct path path = { };
1036 
1037 	err = -EINVAL;
1038 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1039 	    sunaddr->sun_family != AF_UNIX)
1040 		goto out;
1041 
1042 	if (addr_len == sizeof(short)) {
1043 		err = unix_autobind(sock);
1044 		goto out;
1045 	}
1046 
1047 	err = unix_mkname(sunaddr, addr_len, &hash);
1048 	if (err < 0)
1049 		goto out;
1050 	addr_len = err;
1051 
1052 	if (sun_path[0]) {
1053 		umode_t mode = S_IFSOCK |
1054 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1055 		err = unix_mknod(sun_path, mode, &path);
1056 		if (err) {
1057 			if (err == -EEXIST)
1058 				err = -EADDRINUSE;
1059 			goto out;
1060 		}
1061 	}
1062 
1063 	err = mutex_lock_interruptible(&u->bindlock);
1064 	if (err)
1065 		goto out_put;
1066 
1067 	err = -EINVAL;
1068 	if (u->addr)
1069 		goto out_up;
1070 
1071 	err = -ENOMEM;
1072 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1073 	if (!addr)
1074 		goto out_up;
1075 
1076 	memcpy(addr->name, sunaddr, addr_len);
1077 	addr->len = addr_len;
1078 	addr->hash = hash ^ sk->sk_type;
1079 	refcount_set(&addr->refcnt, 1);
1080 
1081 	if (sun_path[0]) {
1082 		addr->hash = UNIX_HASH_SIZE;
1083 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1084 		spin_lock(&unix_table_lock);
1085 		u->path = path;
1086 		list = &unix_socket_table[hash];
1087 	} else {
1088 		spin_lock(&unix_table_lock);
1089 		err = -EADDRINUSE;
1090 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1091 					      sk->sk_type, hash)) {
1092 			unix_release_addr(addr);
1093 			goto out_unlock;
1094 		}
1095 
1096 		list = &unix_socket_table[addr->hash];
1097 	}
1098 
1099 	err = 0;
1100 	__unix_remove_socket(sk);
1101 	smp_store_release(&u->addr, addr);
1102 	__unix_insert_socket(list, sk);
1103 
1104 out_unlock:
1105 	spin_unlock(&unix_table_lock);
1106 out_up:
1107 	mutex_unlock(&u->bindlock);
1108 out_put:
1109 	if (err)
1110 		path_put(&path);
1111 out:
1112 	return err;
1113 }
1114 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1115 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1116 {
1117 	if (unlikely(sk1 == sk2) || !sk2) {
1118 		unix_state_lock(sk1);
1119 		return;
1120 	}
1121 	if (sk1 > sk2)
1122 		swap(sk1, sk2);
1123 
1124 	unix_state_lock(sk1);
1125 	unix_state_lock_nested(sk2, U_LOCK_SECOND);
1126 }
1127 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1128 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1129 {
1130 	if (unlikely(sk1 == sk2) || !sk2) {
1131 		unix_state_unlock(sk1);
1132 		return;
1133 	}
1134 	unix_state_unlock(sk1);
1135 	unix_state_unlock(sk2);
1136 }
1137 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1138 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1139 			      int alen, int flags)
1140 {
1141 	struct sock *sk = sock->sk;
1142 	struct net *net = sock_net(sk);
1143 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1144 	struct sock *other;
1145 	unsigned int hash;
1146 	int err;
1147 
1148 	err = -EINVAL;
1149 	if (alen < offsetofend(struct sockaddr, sa_family))
1150 		goto out;
1151 
1152 	if (addr->sa_family != AF_UNSPEC) {
1153 		err = unix_mkname(sunaddr, alen, &hash);
1154 		if (err < 0)
1155 			goto out;
1156 		alen = err;
1157 
1158 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1159 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1160 			goto out;
1161 
1162 restart:
1163 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1164 		if (!other)
1165 			goto out;
1166 
1167 		unix_state_double_lock(sk, other);
1168 
1169 		/* Apparently VFS overslept socket death. Retry. */
1170 		if (sock_flag(other, SOCK_DEAD)) {
1171 			unix_state_double_unlock(sk, other);
1172 			sock_put(other);
1173 			goto restart;
1174 		}
1175 
1176 		err = -EPERM;
1177 		if (!unix_may_send(sk, other))
1178 			goto out_unlock;
1179 
1180 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1181 		if (err)
1182 			goto out_unlock;
1183 
1184 	} else {
1185 		/*
1186 		 *	1003.1g breaking connected state with AF_UNSPEC
1187 		 */
1188 		other = NULL;
1189 		unix_state_double_lock(sk, other);
1190 	}
1191 
1192 	/*
1193 	 * If it was connected, reconnect.
1194 	 */
1195 	if (unix_peer(sk)) {
1196 		struct sock *old_peer = unix_peer(sk);
1197 		unix_peer(sk) = other;
1198 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1199 
1200 		unix_state_double_unlock(sk, other);
1201 
1202 		if (other != old_peer)
1203 			unix_dgram_disconnected(sk, old_peer);
1204 		sock_put(old_peer);
1205 	} else {
1206 		unix_peer(sk) = other;
1207 		unix_state_double_unlock(sk, other);
1208 	}
1209 	return 0;
1210 
1211 out_unlock:
1212 	unix_state_double_unlock(sk, other);
1213 	sock_put(other);
1214 out:
1215 	return err;
1216 }
1217 
unix_wait_for_peer(struct sock * other,long timeo)1218 static long unix_wait_for_peer(struct sock *other, long timeo)
1219 {
1220 	struct unix_sock *u = unix_sk(other);
1221 	int sched;
1222 	DEFINE_WAIT(wait);
1223 
1224 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1225 
1226 	sched = !sock_flag(other, SOCK_DEAD) &&
1227 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1228 		unix_recvq_full_lockless(other);
1229 
1230 	unix_state_unlock(other);
1231 
1232 	if (sched)
1233 		timeo = schedule_timeout(timeo);
1234 
1235 	finish_wait(&u->peer_wait, &wait);
1236 	return timeo;
1237 }
1238 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1239 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1240 			       int addr_len, int flags)
1241 {
1242 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1243 	struct sock *sk = sock->sk;
1244 	struct net *net = sock_net(sk);
1245 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1246 	struct sock *newsk = NULL;
1247 	struct sock *other = NULL;
1248 	struct sk_buff *skb = NULL;
1249 	unsigned int hash;
1250 	int st;
1251 	int err;
1252 	long timeo;
1253 
1254 	err = unix_mkname(sunaddr, addr_len, &hash);
1255 	if (err < 0)
1256 		goto out;
1257 	addr_len = err;
1258 
1259 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1260 	    (err = unix_autobind(sock)) != 0)
1261 		goto out;
1262 
1263 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1264 
1265 	/* First of all allocate resources.
1266 	   If we will make it after state is locked,
1267 	   we will have to recheck all again in any case.
1268 	 */
1269 
1270 	err = -ENOMEM;
1271 
1272 	/* create new sock for complete connection */
1273 	newsk = unix_create1(sock_net(sk), NULL, 0);
1274 	if (newsk == NULL)
1275 		goto out;
1276 
1277 	/* Allocate skb for sending to listening sock */
1278 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1279 	if (skb == NULL)
1280 		goto out;
1281 
1282 restart:
1283 	/*  Find listening sock. */
1284 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1285 	if (!other)
1286 		goto out;
1287 
1288 	/* Latch state of peer */
1289 	unix_state_lock(other);
1290 
1291 	/* Apparently VFS overslept socket death. Retry. */
1292 	if (sock_flag(other, SOCK_DEAD)) {
1293 		unix_state_unlock(other);
1294 		sock_put(other);
1295 		goto restart;
1296 	}
1297 
1298 	err = -ECONNREFUSED;
1299 	if (other->sk_state != TCP_LISTEN)
1300 		goto out_unlock;
1301 	if (other->sk_shutdown & RCV_SHUTDOWN)
1302 		goto out_unlock;
1303 
1304 	if (unix_recvq_full(other)) {
1305 		err = -EAGAIN;
1306 		if (!timeo)
1307 			goto out_unlock;
1308 
1309 		timeo = unix_wait_for_peer(other, timeo);
1310 
1311 		err = sock_intr_errno(timeo);
1312 		if (signal_pending(current))
1313 			goto out;
1314 		sock_put(other);
1315 		goto restart;
1316 	}
1317 
1318 	/* Latch our state.
1319 
1320 	   It is tricky place. We need to grab our state lock and cannot
1321 	   drop lock on peer. It is dangerous because deadlock is
1322 	   possible. Connect to self case and simultaneous
1323 	   attempt to connect are eliminated by checking socket
1324 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1325 	   check this before attempt to grab lock.
1326 
1327 	   Well, and we have to recheck the state after socket locked.
1328 	 */
1329 	st = sk->sk_state;
1330 
1331 	switch (st) {
1332 	case TCP_CLOSE:
1333 		/* This is ok... continue with connect */
1334 		break;
1335 	case TCP_ESTABLISHED:
1336 		/* Socket is already connected */
1337 		err = -EISCONN;
1338 		goto out_unlock;
1339 	default:
1340 		err = -EINVAL;
1341 		goto out_unlock;
1342 	}
1343 
1344 	unix_state_lock_nested(sk, U_LOCK_SECOND);
1345 
1346 	if (sk->sk_state != st) {
1347 		unix_state_unlock(sk);
1348 		unix_state_unlock(other);
1349 		sock_put(other);
1350 		goto restart;
1351 	}
1352 
1353 	err = security_unix_stream_connect(sk, other, newsk);
1354 	if (err) {
1355 		unix_state_unlock(sk);
1356 		goto out_unlock;
1357 	}
1358 
1359 	/* The way is open! Fastly set all the necessary fields... */
1360 
1361 	sock_hold(sk);
1362 	unix_peer(newsk)	= sk;
1363 	newsk->sk_state		= TCP_ESTABLISHED;
1364 	newsk->sk_type		= sk->sk_type;
1365 	init_peercred(newsk);
1366 	newu = unix_sk(newsk);
1367 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1368 	otheru = unix_sk(other);
1369 
1370 	/* copy address information from listening to new sock
1371 	 *
1372 	 * The contents of *(otheru->addr) and otheru->path
1373 	 * are seen fully set up here, since we have found
1374 	 * otheru in hash under unix_table_lock.  Insertion
1375 	 * into the hash chain we'd found it in had been done
1376 	 * in an earlier critical area protected by unix_table_lock,
1377 	 * the same one where we'd set *(otheru->addr) contents,
1378 	 * as well as otheru->path and otheru->addr itself.
1379 	 *
1380 	 * Using smp_store_release() here to set newu->addr
1381 	 * is enough to make those stores, as well as stores
1382 	 * to newu->path visible to anyone who gets newu->addr
1383 	 * by smp_load_acquire().  IOW, the same warranties
1384 	 * as for unix_sock instances bound in unix_bind() or
1385 	 * in unix_autobind().
1386 	 */
1387 	if (otheru->path.dentry) {
1388 		path_get(&otheru->path);
1389 		newu->path = otheru->path;
1390 	}
1391 	refcount_inc(&otheru->addr->refcnt);
1392 	smp_store_release(&newu->addr, otheru->addr);
1393 
1394 	/* Set credentials */
1395 	copy_peercred(sk, other);
1396 
1397 	sock->state	= SS_CONNECTED;
1398 	sk->sk_state	= TCP_ESTABLISHED;
1399 	sock_hold(newsk);
1400 
1401 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1402 	unix_peer(sk)	= newsk;
1403 
1404 	unix_state_unlock(sk);
1405 
1406 	/* take ten and and send info to listening sock */
1407 	spin_lock(&other->sk_receive_queue.lock);
1408 	__skb_queue_tail(&other->sk_receive_queue, skb);
1409 	spin_unlock(&other->sk_receive_queue.lock);
1410 	unix_state_unlock(other);
1411 	other->sk_data_ready(other);
1412 	sock_put(other);
1413 	return 0;
1414 
1415 out_unlock:
1416 	if (other)
1417 		unix_state_unlock(other);
1418 
1419 out:
1420 	kfree_skb(skb);
1421 	if (newsk)
1422 		unix_release_sock(newsk, 0);
1423 	if (other)
1424 		sock_put(other);
1425 	return err;
1426 }
1427 
unix_socketpair(struct socket * socka,struct socket * sockb)1428 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1429 {
1430 	struct sock *ska = socka->sk, *skb = sockb->sk;
1431 
1432 	/* Join our sockets back to back */
1433 	sock_hold(ska);
1434 	sock_hold(skb);
1435 	unix_peer(ska) = skb;
1436 	unix_peer(skb) = ska;
1437 	init_peercred(ska);
1438 	init_peercred(skb);
1439 
1440 	if (ska->sk_type != SOCK_DGRAM) {
1441 		ska->sk_state = TCP_ESTABLISHED;
1442 		skb->sk_state = TCP_ESTABLISHED;
1443 		socka->state  = SS_CONNECTED;
1444 		sockb->state  = SS_CONNECTED;
1445 	}
1446 	return 0;
1447 }
1448 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1449 static void unix_sock_inherit_flags(const struct socket *old,
1450 				    struct socket *new)
1451 {
1452 	if (test_bit(SOCK_PASSCRED, &old->flags))
1453 		set_bit(SOCK_PASSCRED, &new->flags);
1454 	if (test_bit(SOCK_PASSSEC, &old->flags))
1455 		set_bit(SOCK_PASSSEC, &new->flags);
1456 }
1457 
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1458 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1459 		       bool kern)
1460 {
1461 	struct sock *sk = sock->sk;
1462 	struct sock *tsk;
1463 	struct sk_buff *skb;
1464 	int err;
1465 
1466 	err = -EOPNOTSUPP;
1467 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1468 		goto out;
1469 
1470 	err = -EINVAL;
1471 	if (sk->sk_state != TCP_LISTEN)
1472 		goto out;
1473 
1474 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1475 	 * so that no locks are necessary.
1476 	 */
1477 
1478 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1479 	if (!skb) {
1480 		/* This means receive shutdown. */
1481 		if (err == 0)
1482 			err = -EINVAL;
1483 		goto out;
1484 	}
1485 
1486 	tsk = skb->sk;
1487 	skb_free_datagram(sk, skb);
1488 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1489 
1490 	/* attach accepted sock to socket */
1491 	unix_state_lock(tsk);
1492 	newsock->state = SS_CONNECTED;
1493 	unix_sock_inherit_flags(sock, newsock);
1494 	sock_graft(tsk, newsock);
1495 	unix_state_unlock(tsk);
1496 	return 0;
1497 
1498 out:
1499 	return err;
1500 }
1501 
1502 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1503 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1504 {
1505 	struct sock *sk = sock->sk;
1506 	struct unix_address *addr;
1507 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1508 	int err = 0;
1509 
1510 	if (peer) {
1511 		sk = unix_peer_get(sk);
1512 
1513 		err = -ENOTCONN;
1514 		if (!sk)
1515 			goto out;
1516 		err = 0;
1517 	} else {
1518 		sock_hold(sk);
1519 	}
1520 
1521 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1522 	if (!addr) {
1523 		sunaddr->sun_family = AF_UNIX;
1524 		sunaddr->sun_path[0] = 0;
1525 		err = sizeof(short);
1526 	} else {
1527 		err = addr->len;
1528 		memcpy(sunaddr, addr->name, addr->len);
1529 	}
1530 	sock_put(sk);
1531 out:
1532 	return err;
1533 }
1534 
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1535 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1536 {
1537 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1538 
1539 	/*
1540 	 * Garbage collection of unix sockets starts by selecting a set of
1541 	 * candidate sockets which have reference only from being in flight
1542 	 * (total_refs == inflight_refs).  This condition is checked once during
1543 	 * the candidate collection phase, and candidates are marked as such, so
1544 	 * that non-candidates can later be ignored.  While inflight_refs is
1545 	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1546 	 * is an instantaneous decision.
1547 	 *
1548 	 * Once a candidate, however, the socket must not be reinstalled into a
1549 	 * file descriptor while the garbage collection is in progress.
1550 	 *
1551 	 * If the above conditions are met, then the directed graph of
1552 	 * candidates (*) does not change while unix_gc_lock is held.
1553 	 *
1554 	 * Any operations that changes the file count through file descriptors
1555 	 * (dup, close, sendmsg) does not change the graph since candidates are
1556 	 * not installed in fds.
1557 	 *
1558 	 * Dequeing a candidate via recvmsg would install it into an fd, but
1559 	 * that takes unix_gc_lock to decrement the inflight count, so it's
1560 	 * serialized with garbage collection.
1561 	 *
1562 	 * MSG_PEEK is special in that it does not change the inflight count,
1563 	 * yet does install the socket into an fd.  The following lock/unlock
1564 	 * pair is to ensure serialization with garbage collection.  It must be
1565 	 * done between incrementing the file count and installing the file into
1566 	 * an fd.
1567 	 *
1568 	 * If garbage collection starts after the barrier provided by the
1569 	 * lock/unlock, then it will see the elevated refcount and not mark this
1570 	 * as a candidate.  If a garbage collection is already in progress
1571 	 * before the file count was incremented, then the lock/unlock pair will
1572 	 * ensure that garbage collection is finished before progressing to
1573 	 * installing the fd.
1574 	 *
1575 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1576 	 * which is on the queue of listening socket A.
1577 	 */
1578 	spin_lock(&unix_gc_lock);
1579 	spin_unlock(&unix_gc_lock);
1580 }
1581 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1582 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1583 {
1584 	int err = 0;
1585 
1586 	UNIXCB(skb).pid  = get_pid(scm->pid);
1587 	UNIXCB(skb).uid = scm->creds.uid;
1588 	UNIXCB(skb).gid = scm->creds.gid;
1589 	UNIXCB(skb).fp = NULL;
1590 	unix_get_secdata(scm, skb);
1591 	if (scm->fp && send_fds)
1592 		err = unix_attach_fds(scm, skb);
1593 
1594 	skb->destructor = unix_destruct_scm;
1595 	return err;
1596 }
1597 
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1598 static bool unix_passcred_enabled(const struct socket *sock,
1599 				  const struct sock *other)
1600 {
1601 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1602 	       !other->sk_socket ||
1603 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1604 }
1605 
1606 /*
1607  * Some apps rely on write() giving SCM_CREDENTIALS
1608  * We include credentials if source or destination socket
1609  * asserted SOCK_PASSCRED.
1610  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1611 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1612 			    const struct sock *other)
1613 {
1614 	if (UNIXCB(skb).pid)
1615 		return;
1616 	if (unix_passcred_enabled(sock, other)) {
1617 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1618 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1619 	}
1620 }
1621 
maybe_init_creds(struct scm_cookie * scm,struct socket * socket,const struct sock * other)1622 static int maybe_init_creds(struct scm_cookie *scm,
1623 			    struct socket *socket,
1624 			    const struct sock *other)
1625 {
1626 	int err;
1627 	struct msghdr msg = { .msg_controllen = 0 };
1628 
1629 	err = scm_send(socket, &msg, scm, false);
1630 	if (err)
1631 		return err;
1632 
1633 	if (unix_passcred_enabled(socket, other)) {
1634 		scm->pid = get_pid(task_tgid(current));
1635 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1636 	}
1637 	return err;
1638 }
1639 
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1640 static bool unix_skb_scm_eq(struct sk_buff *skb,
1641 			    struct scm_cookie *scm)
1642 {
1643 	const struct unix_skb_parms *u = &UNIXCB(skb);
1644 
1645 	return u->pid == scm->pid &&
1646 	       uid_eq(u->uid, scm->creds.uid) &&
1647 	       gid_eq(u->gid, scm->creds.gid) &&
1648 	       unix_secdata_eq(scm, skb);
1649 }
1650 
1651 /*
1652  *	Send AF_UNIX data.
1653  */
1654 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1655 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1656 			      size_t len)
1657 {
1658 	struct sock *sk = sock->sk;
1659 	struct net *net = sock_net(sk);
1660 	struct unix_sock *u = unix_sk(sk);
1661 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1662 	struct sock *other = NULL;
1663 	int namelen = 0; /* fake GCC */
1664 	int err;
1665 	unsigned int hash;
1666 	struct sk_buff *skb;
1667 	long timeo;
1668 	struct scm_cookie scm;
1669 	int data_len = 0;
1670 	int sk_locked;
1671 
1672 	wait_for_unix_gc();
1673 	err = scm_send(sock, msg, &scm, false);
1674 	if (err < 0)
1675 		return err;
1676 
1677 	err = -EOPNOTSUPP;
1678 	if (msg->msg_flags&MSG_OOB)
1679 		goto out;
1680 
1681 	if (msg->msg_namelen) {
1682 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1683 		if (err < 0)
1684 			goto out;
1685 		namelen = err;
1686 	} else {
1687 		sunaddr = NULL;
1688 		err = -ENOTCONN;
1689 		other = unix_peer_get(sk);
1690 		if (!other)
1691 			goto out;
1692 	}
1693 
1694 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1695 	    && (err = unix_autobind(sock)) != 0)
1696 		goto out;
1697 
1698 	err = -EMSGSIZE;
1699 	if (len > sk->sk_sndbuf - 32)
1700 		goto out;
1701 
1702 	if (len > SKB_MAX_ALLOC) {
1703 		data_len = min_t(size_t,
1704 				 len - SKB_MAX_ALLOC,
1705 				 MAX_SKB_FRAGS * PAGE_SIZE);
1706 		data_len = PAGE_ALIGN(data_len);
1707 
1708 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1709 	}
1710 
1711 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1712 				   msg->msg_flags & MSG_DONTWAIT, &err,
1713 				   PAGE_ALLOC_COSTLY_ORDER);
1714 	if (skb == NULL)
1715 		goto out;
1716 
1717 	err = unix_scm_to_skb(&scm, skb, true);
1718 	if (err < 0)
1719 		goto out_free;
1720 
1721 	skb_put(skb, len - data_len);
1722 	skb->data_len = data_len;
1723 	skb->len = len;
1724 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1725 	if (err)
1726 		goto out_free;
1727 
1728 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1729 
1730 restart:
1731 	if (!other) {
1732 		err = -ECONNRESET;
1733 		if (sunaddr == NULL)
1734 			goto out_free;
1735 
1736 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1737 					hash, &err);
1738 		if (other == NULL)
1739 			goto out_free;
1740 	}
1741 
1742 	if (sk_filter(other, skb) < 0) {
1743 		/* Toss the packet but do not return any error to the sender */
1744 		err = len;
1745 		goto out_free;
1746 	}
1747 
1748 	sk_locked = 0;
1749 	unix_state_lock(other);
1750 restart_locked:
1751 	err = -EPERM;
1752 	if (!unix_may_send(sk, other))
1753 		goto out_unlock;
1754 
1755 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1756 		/*
1757 		 *	Check with 1003.1g - what should
1758 		 *	datagram error
1759 		 */
1760 		unix_state_unlock(other);
1761 		sock_put(other);
1762 
1763 		if (!sk_locked)
1764 			unix_state_lock(sk);
1765 
1766 		err = 0;
1767 		if (unix_peer(sk) == other) {
1768 			unix_peer(sk) = NULL;
1769 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1770 
1771 			unix_state_unlock(sk);
1772 
1773 			unix_dgram_disconnected(sk, other);
1774 			sock_put(other);
1775 			err = -ECONNREFUSED;
1776 		} else {
1777 			unix_state_unlock(sk);
1778 		}
1779 
1780 		other = NULL;
1781 		if (err)
1782 			goto out_free;
1783 		goto restart;
1784 	}
1785 
1786 	err = -EPIPE;
1787 	if (other->sk_shutdown & RCV_SHUTDOWN)
1788 		goto out_unlock;
1789 
1790 	if (sk->sk_type != SOCK_SEQPACKET) {
1791 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1792 		if (err)
1793 			goto out_unlock;
1794 	}
1795 
1796 	/* other == sk && unix_peer(other) != sk if
1797 	 * - unix_peer(sk) == NULL, destination address bound to sk
1798 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1799 	 */
1800 	if (other != sk &&
1801 	    unlikely(unix_peer(other) != sk &&
1802 	    unix_recvq_full_lockless(other))) {
1803 		if (timeo) {
1804 			timeo = unix_wait_for_peer(other, timeo);
1805 
1806 			err = sock_intr_errno(timeo);
1807 			if (signal_pending(current))
1808 				goto out_free;
1809 
1810 			goto restart;
1811 		}
1812 
1813 		if (!sk_locked) {
1814 			unix_state_unlock(other);
1815 			unix_state_double_lock(sk, other);
1816 		}
1817 
1818 		if (unix_peer(sk) != other ||
1819 		    unix_dgram_peer_wake_me(sk, other)) {
1820 			err = -EAGAIN;
1821 			sk_locked = 1;
1822 			goto out_unlock;
1823 		}
1824 
1825 		if (!sk_locked) {
1826 			sk_locked = 1;
1827 			goto restart_locked;
1828 		}
1829 	}
1830 
1831 	if (unlikely(sk_locked))
1832 		unix_state_unlock(sk);
1833 
1834 	if (sock_flag(other, SOCK_RCVTSTAMP))
1835 		__net_timestamp(skb);
1836 	maybe_add_creds(skb, sock, other);
1837 	skb_queue_tail(&other->sk_receive_queue, skb);
1838 	unix_state_unlock(other);
1839 	other->sk_data_ready(other);
1840 	sock_put(other);
1841 	scm_destroy(&scm);
1842 	return len;
1843 
1844 out_unlock:
1845 	if (sk_locked)
1846 		unix_state_unlock(sk);
1847 	unix_state_unlock(other);
1848 out_free:
1849 	kfree_skb(skb);
1850 out:
1851 	if (other)
1852 		sock_put(other);
1853 	scm_destroy(&scm);
1854 	return err;
1855 }
1856 
1857 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1858  * bytes, and a minimum of a full page.
1859  */
1860 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1861 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1862 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1863 			       size_t len)
1864 {
1865 	struct sock *sk = sock->sk;
1866 	struct sock *other = NULL;
1867 	int err, size;
1868 	struct sk_buff *skb;
1869 	int sent = 0;
1870 	struct scm_cookie scm;
1871 	bool fds_sent = false;
1872 	int data_len;
1873 
1874 	wait_for_unix_gc();
1875 	err = scm_send(sock, msg, &scm, false);
1876 	if (err < 0)
1877 		return err;
1878 
1879 	err = -EOPNOTSUPP;
1880 	if (msg->msg_flags&MSG_OOB)
1881 		goto out_err;
1882 
1883 	if (msg->msg_namelen) {
1884 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1885 		goto out_err;
1886 	} else {
1887 		err = -ENOTCONN;
1888 		other = unix_peer(sk);
1889 		if (!other)
1890 			goto out_err;
1891 	}
1892 
1893 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1894 		goto pipe_err;
1895 
1896 	while (sent < len) {
1897 		size = len - sent;
1898 
1899 		/* Keep two messages in the pipe so it schedules better */
1900 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1901 
1902 		/* allow fallback to order-0 allocations */
1903 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1904 
1905 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1906 
1907 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1908 
1909 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1910 					   msg->msg_flags & MSG_DONTWAIT, &err,
1911 					   get_order(UNIX_SKB_FRAGS_SZ));
1912 		if (!skb)
1913 			goto out_err;
1914 
1915 		/* Only send the fds in the first buffer */
1916 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1917 		if (err < 0) {
1918 			kfree_skb(skb);
1919 			goto out_err;
1920 		}
1921 		fds_sent = true;
1922 
1923 		skb_put(skb, size - data_len);
1924 		skb->data_len = data_len;
1925 		skb->len = size;
1926 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1927 		if (err) {
1928 			kfree_skb(skb);
1929 			goto out_err;
1930 		}
1931 
1932 		unix_state_lock(other);
1933 
1934 		if (sock_flag(other, SOCK_DEAD) ||
1935 		    (other->sk_shutdown & RCV_SHUTDOWN))
1936 			goto pipe_err_free;
1937 
1938 		maybe_add_creds(skb, sock, other);
1939 		skb_queue_tail(&other->sk_receive_queue, skb);
1940 		unix_state_unlock(other);
1941 		other->sk_data_ready(other);
1942 		sent += size;
1943 	}
1944 
1945 	scm_destroy(&scm);
1946 
1947 	return sent;
1948 
1949 pipe_err_free:
1950 	unix_state_unlock(other);
1951 	kfree_skb(skb);
1952 pipe_err:
1953 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1954 		send_sig(SIGPIPE, current, 0);
1955 	err = -EPIPE;
1956 out_err:
1957 	scm_destroy(&scm);
1958 	return sent ? : err;
1959 }
1960 
unix_stream_sendpage(struct socket * socket,struct page * page,int offset,size_t size,int flags)1961 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1962 				    int offset, size_t size, int flags)
1963 {
1964 	int err;
1965 	bool send_sigpipe = false;
1966 	bool init_scm = true;
1967 	struct scm_cookie scm;
1968 	struct sock *other, *sk = socket->sk;
1969 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1970 
1971 	if (flags & MSG_OOB)
1972 		return -EOPNOTSUPP;
1973 
1974 	other = unix_peer(sk);
1975 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1976 		return -ENOTCONN;
1977 
1978 	if (false) {
1979 alloc_skb:
1980 		spin_unlock(&other->sk_receive_queue.lock);
1981 		unix_state_unlock(other);
1982 		mutex_unlock(&unix_sk(other)->iolock);
1983 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1984 					      &err, 0);
1985 		if (!newskb)
1986 			goto err;
1987 	}
1988 
1989 	/* we must acquire iolock as we modify already present
1990 	 * skbs in the sk_receive_queue and mess with skb->len
1991 	 */
1992 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1993 	if (err) {
1994 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1995 		goto err;
1996 	}
1997 
1998 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1999 		err = -EPIPE;
2000 		send_sigpipe = true;
2001 		goto err_unlock;
2002 	}
2003 
2004 	unix_state_lock(other);
2005 
2006 	if (sock_flag(other, SOCK_DEAD) ||
2007 	    other->sk_shutdown & RCV_SHUTDOWN) {
2008 		err = -EPIPE;
2009 		send_sigpipe = true;
2010 		goto err_state_unlock;
2011 	}
2012 
2013 	if (init_scm) {
2014 		err = maybe_init_creds(&scm, socket, other);
2015 		if (err)
2016 			goto err_state_unlock;
2017 		init_scm = false;
2018 	}
2019 
2020 	spin_lock(&other->sk_receive_queue.lock);
2021 	skb = skb_peek_tail(&other->sk_receive_queue);
2022 	if (tail && tail == skb) {
2023 		skb = newskb;
2024 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2025 		if (newskb) {
2026 			skb = newskb;
2027 		} else {
2028 			tail = skb;
2029 			goto alloc_skb;
2030 		}
2031 	} else if (newskb) {
2032 		/* this is fast path, we don't necessarily need to
2033 		 * call to kfree_skb even though with newskb == NULL
2034 		 * this - does no harm
2035 		 */
2036 		consume_skb(newskb);
2037 		newskb = NULL;
2038 	}
2039 
2040 	if (skb_append_pagefrags(skb, page, offset, size)) {
2041 		tail = skb;
2042 		goto alloc_skb;
2043 	}
2044 
2045 	skb->len += size;
2046 	skb->data_len += size;
2047 	skb->truesize += size;
2048 	refcount_add(size, &sk->sk_wmem_alloc);
2049 
2050 	if (newskb) {
2051 		unix_scm_to_skb(&scm, skb, false);
2052 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2053 	}
2054 
2055 	spin_unlock(&other->sk_receive_queue.lock);
2056 	unix_state_unlock(other);
2057 	mutex_unlock(&unix_sk(other)->iolock);
2058 
2059 	other->sk_data_ready(other);
2060 	scm_destroy(&scm);
2061 	return size;
2062 
2063 err_state_unlock:
2064 	unix_state_unlock(other);
2065 err_unlock:
2066 	mutex_unlock(&unix_sk(other)->iolock);
2067 err:
2068 	kfree_skb(newskb);
2069 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2070 		send_sig(SIGPIPE, current, 0);
2071 	if (!init_scm)
2072 		scm_destroy(&scm);
2073 	return err;
2074 }
2075 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2076 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2077 				  size_t len)
2078 {
2079 	int err;
2080 	struct sock *sk = sock->sk;
2081 
2082 	err = sock_error(sk);
2083 	if (err)
2084 		return err;
2085 
2086 	if (sk->sk_state != TCP_ESTABLISHED)
2087 		return -ENOTCONN;
2088 
2089 	if (msg->msg_namelen)
2090 		msg->msg_namelen = 0;
2091 
2092 	return unix_dgram_sendmsg(sock, msg, len);
2093 }
2094 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2095 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2096 				  size_t size, int flags)
2097 {
2098 	struct sock *sk = sock->sk;
2099 
2100 	if (sk->sk_state != TCP_ESTABLISHED)
2101 		return -ENOTCONN;
2102 
2103 	return unix_dgram_recvmsg(sock, msg, size, flags);
2104 }
2105 
unix_copy_addr(struct msghdr * msg,struct sock * sk)2106 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2107 {
2108 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2109 
2110 	if (addr) {
2111 		msg->msg_namelen = addr->len;
2112 		memcpy(msg->msg_name, addr->name, addr->len);
2113 	}
2114 }
2115 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2116 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2117 			      size_t size, int flags)
2118 {
2119 	struct scm_cookie scm;
2120 	struct sock *sk = sock->sk;
2121 	struct unix_sock *u = unix_sk(sk);
2122 	struct sk_buff *skb, *last;
2123 	long timeo;
2124 	int skip;
2125 	int err;
2126 
2127 	err = -EOPNOTSUPP;
2128 	if (flags&MSG_OOB)
2129 		goto out;
2130 
2131 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2132 
2133 	do {
2134 		mutex_lock(&u->iolock);
2135 
2136 		skip = sk_peek_offset(sk, flags);
2137 		skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2138 					      &last);
2139 		if (skb)
2140 			break;
2141 
2142 		mutex_unlock(&u->iolock);
2143 
2144 		if (err != -EAGAIN)
2145 			break;
2146 	} while (timeo &&
2147 		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2148 
2149 	if (!skb) { /* implies iolock unlocked */
2150 		unix_state_lock(sk);
2151 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2152 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2153 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2154 			err = 0;
2155 		unix_state_unlock(sk);
2156 		goto out;
2157 	}
2158 
2159 	if (wq_has_sleeper(&u->peer_wait))
2160 		wake_up_interruptible_sync_poll(&u->peer_wait,
2161 						EPOLLOUT | EPOLLWRNORM |
2162 						EPOLLWRBAND);
2163 
2164 	if (msg->msg_name)
2165 		unix_copy_addr(msg, skb->sk);
2166 
2167 	if (size > skb->len - skip)
2168 		size = skb->len - skip;
2169 	else if (size < skb->len - skip)
2170 		msg->msg_flags |= MSG_TRUNC;
2171 
2172 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2173 	if (err)
2174 		goto out_free;
2175 
2176 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2177 		__sock_recv_timestamp(msg, sk, skb);
2178 
2179 	memset(&scm, 0, sizeof(scm));
2180 
2181 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2182 	unix_set_secdata(&scm, skb);
2183 
2184 	if (!(flags & MSG_PEEK)) {
2185 		if (UNIXCB(skb).fp)
2186 			unix_detach_fds(&scm, skb);
2187 
2188 		sk_peek_offset_bwd(sk, skb->len);
2189 	} else {
2190 		/* It is questionable: on PEEK we could:
2191 		   - do not return fds - good, but too simple 8)
2192 		   - return fds, and do not return them on read (old strategy,
2193 		     apparently wrong)
2194 		   - clone fds (I chose it for now, it is the most universal
2195 		     solution)
2196 
2197 		   POSIX 1003.1g does not actually define this clearly
2198 		   at all. POSIX 1003.1g doesn't define a lot of things
2199 		   clearly however!
2200 
2201 		*/
2202 
2203 		sk_peek_offset_fwd(sk, size);
2204 
2205 		if (UNIXCB(skb).fp)
2206 			unix_peek_fds(&scm, skb);
2207 	}
2208 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2209 
2210 	scm_recv(sock, msg, &scm, flags);
2211 
2212 out_free:
2213 	skb_free_datagram(sk, skb);
2214 	mutex_unlock(&u->iolock);
2215 out:
2216 	return err;
2217 }
2218 
2219 /*
2220  *	Sleep until more data has arrived. But check for races..
2221  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2222 static long unix_stream_data_wait(struct sock *sk, long timeo,
2223 				  struct sk_buff *last, unsigned int last_len,
2224 				  bool freezable)
2225 {
2226 	struct sk_buff *tail;
2227 	DEFINE_WAIT(wait);
2228 
2229 	unix_state_lock(sk);
2230 
2231 	for (;;) {
2232 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2233 
2234 		tail = skb_peek_tail(&sk->sk_receive_queue);
2235 		if (tail != last ||
2236 		    (tail && tail->len != last_len) ||
2237 		    sk->sk_err ||
2238 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2239 		    signal_pending(current) ||
2240 		    !timeo)
2241 			break;
2242 
2243 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2244 		unix_state_unlock(sk);
2245 		if (freezable)
2246 			timeo = freezable_schedule_timeout(timeo);
2247 		else
2248 			timeo = schedule_timeout(timeo);
2249 		unix_state_lock(sk);
2250 
2251 		if (sock_flag(sk, SOCK_DEAD))
2252 			break;
2253 
2254 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2255 	}
2256 
2257 	finish_wait(sk_sleep(sk), &wait);
2258 	unix_state_unlock(sk);
2259 	return timeo;
2260 }
2261 
unix_skb_len(const struct sk_buff * skb)2262 static unsigned int unix_skb_len(const struct sk_buff *skb)
2263 {
2264 	return skb->len - UNIXCB(skb).consumed;
2265 }
2266 
2267 struct unix_stream_read_state {
2268 	int (*recv_actor)(struct sk_buff *, int, int,
2269 			  struct unix_stream_read_state *);
2270 	struct socket *socket;
2271 	struct msghdr *msg;
2272 	struct pipe_inode_info *pipe;
2273 	size_t size;
2274 	int flags;
2275 	unsigned int splice_flags;
2276 };
2277 
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2278 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2279 				    bool freezable)
2280 {
2281 	struct scm_cookie scm;
2282 	struct socket *sock = state->socket;
2283 	struct sock *sk = sock->sk;
2284 	struct unix_sock *u = unix_sk(sk);
2285 	int copied = 0;
2286 	int flags = state->flags;
2287 	int noblock = flags & MSG_DONTWAIT;
2288 	bool check_creds = false;
2289 	int target;
2290 	int err = 0;
2291 	long timeo;
2292 	int skip;
2293 	size_t size = state->size;
2294 	unsigned int last_len;
2295 
2296 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2297 		err = -EINVAL;
2298 		goto out;
2299 	}
2300 
2301 	if (unlikely(flags & MSG_OOB)) {
2302 		err = -EOPNOTSUPP;
2303 		goto out;
2304 	}
2305 
2306 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2307 	timeo = sock_rcvtimeo(sk, noblock);
2308 
2309 	memset(&scm, 0, sizeof(scm));
2310 
2311 	/* Lock the socket to prevent queue disordering
2312 	 * while sleeps in memcpy_tomsg
2313 	 */
2314 	mutex_lock(&u->iolock);
2315 
2316 	skip = max(sk_peek_offset(sk, flags), 0);
2317 
2318 	do {
2319 		int chunk;
2320 		bool drop_skb;
2321 		struct sk_buff *skb, *last;
2322 
2323 redo:
2324 		unix_state_lock(sk);
2325 		if (sock_flag(sk, SOCK_DEAD)) {
2326 			err = -ECONNRESET;
2327 			goto unlock;
2328 		}
2329 		last = skb = skb_peek(&sk->sk_receive_queue);
2330 		last_len = last ? last->len : 0;
2331 again:
2332 		if (skb == NULL) {
2333 			if (copied >= target)
2334 				goto unlock;
2335 
2336 			/*
2337 			 *	POSIX 1003.1g mandates this order.
2338 			 */
2339 
2340 			err = sock_error(sk);
2341 			if (err)
2342 				goto unlock;
2343 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2344 				goto unlock;
2345 
2346 			unix_state_unlock(sk);
2347 			if (!timeo) {
2348 				err = -EAGAIN;
2349 				break;
2350 			}
2351 
2352 			mutex_unlock(&u->iolock);
2353 
2354 			timeo = unix_stream_data_wait(sk, timeo, last,
2355 						      last_len, freezable);
2356 
2357 			if (signal_pending(current)) {
2358 				err = sock_intr_errno(timeo);
2359 				scm_destroy(&scm);
2360 				goto out;
2361 			}
2362 
2363 			mutex_lock(&u->iolock);
2364 			goto redo;
2365 unlock:
2366 			unix_state_unlock(sk);
2367 			break;
2368 		}
2369 
2370 		while (skip >= unix_skb_len(skb)) {
2371 			skip -= unix_skb_len(skb);
2372 			last = skb;
2373 			last_len = skb->len;
2374 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2375 			if (!skb)
2376 				goto again;
2377 		}
2378 
2379 		unix_state_unlock(sk);
2380 
2381 		if (check_creds) {
2382 			/* Never glue messages from different writers */
2383 			if (!unix_skb_scm_eq(skb, &scm))
2384 				break;
2385 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2386 			/* Copy credentials */
2387 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2388 			unix_set_secdata(&scm, skb);
2389 			check_creds = true;
2390 		}
2391 
2392 		/* Copy address just once */
2393 		if (state->msg && state->msg->msg_name) {
2394 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2395 					 state->msg->msg_name);
2396 			unix_copy_addr(state->msg, skb->sk);
2397 			sunaddr = NULL;
2398 		}
2399 
2400 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2401 		skb_get(skb);
2402 		chunk = state->recv_actor(skb, skip, chunk, state);
2403 		drop_skb = !unix_skb_len(skb);
2404 		/* skb is only safe to use if !drop_skb */
2405 		consume_skb(skb);
2406 		if (chunk < 0) {
2407 			if (copied == 0)
2408 				copied = -EFAULT;
2409 			break;
2410 		}
2411 		copied += chunk;
2412 		size -= chunk;
2413 
2414 		if (drop_skb) {
2415 			/* the skb was touched by a concurrent reader;
2416 			 * we should not expect anything from this skb
2417 			 * anymore and assume it invalid - we can be
2418 			 * sure it was dropped from the socket queue
2419 			 *
2420 			 * let's report a short read
2421 			 */
2422 			err = 0;
2423 			break;
2424 		}
2425 
2426 		/* Mark read part of skb as used */
2427 		if (!(flags & MSG_PEEK)) {
2428 			UNIXCB(skb).consumed += chunk;
2429 
2430 			sk_peek_offset_bwd(sk, chunk);
2431 
2432 			if (UNIXCB(skb).fp)
2433 				unix_detach_fds(&scm, skb);
2434 
2435 			if (unix_skb_len(skb))
2436 				break;
2437 
2438 			skb_unlink(skb, &sk->sk_receive_queue);
2439 			consume_skb(skb);
2440 
2441 			if (scm.fp)
2442 				break;
2443 		} else {
2444 			/* It is questionable, see note in unix_dgram_recvmsg.
2445 			 */
2446 			if (UNIXCB(skb).fp)
2447 				unix_peek_fds(&scm, skb);
2448 
2449 			sk_peek_offset_fwd(sk, chunk);
2450 
2451 			if (UNIXCB(skb).fp)
2452 				break;
2453 
2454 			skip = 0;
2455 			last = skb;
2456 			last_len = skb->len;
2457 			unix_state_lock(sk);
2458 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2459 			if (skb)
2460 				goto again;
2461 			unix_state_unlock(sk);
2462 			break;
2463 		}
2464 	} while (size);
2465 
2466 	mutex_unlock(&u->iolock);
2467 	if (state->msg)
2468 		scm_recv(sock, state->msg, &scm, flags);
2469 	else
2470 		scm_destroy(&scm);
2471 out:
2472 	return copied ? : err;
2473 }
2474 
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2475 static int unix_stream_read_actor(struct sk_buff *skb,
2476 				  int skip, int chunk,
2477 				  struct unix_stream_read_state *state)
2478 {
2479 	int ret;
2480 
2481 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2482 				    state->msg, chunk);
2483 	return ret ?: chunk;
2484 }
2485 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2486 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2487 			       size_t size, int flags)
2488 {
2489 	struct unix_stream_read_state state = {
2490 		.recv_actor = unix_stream_read_actor,
2491 		.socket = sock,
2492 		.msg = msg,
2493 		.size = size,
2494 		.flags = flags
2495 	};
2496 
2497 	return unix_stream_read_generic(&state, true);
2498 }
2499 
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2500 static int unix_stream_splice_actor(struct sk_buff *skb,
2501 				    int skip, int chunk,
2502 				    struct unix_stream_read_state *state)
2503 {
2504 	return skb_splice_bits(skb, state->socket->sk,
2505 			       UNIXCB(skb).consumed + skip,
2506 			       state->pipe, chunk, state->splice_flags);
2507 }
2508 
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2509 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2510 				       struct pipe_inode_info *pipe,
2511 				       size_t size, unsigned int flags)
2512 {
2513 	struct unix_stream_read_state state = {
2514 		.recv_actor = unix_stream_splice_actor,
2515 		.socket = sock,
2516 		.pipe = pipe,
2517 		.size = size,
2518 		.splice_flags = flags,
2519 	};
2520 
2521 	if (unlikely(*ppos))
2522 		return -ESPIPE;
2523 
2524 	if (sock->file->f_flags & O_NONBLOCK ||
2525 	    flags & SPLICE_F_NONBLOCK)
2526 		state.flags = MSG_DONTWAIT;
2527 
2528 	return unix_stream_read_generic(&state, false);
2529 }
2530 
unix_shutdown(struct socket * sock,int mode)2531 static int unix_shutdown(struct socket *sock, int mode)
2532 {
2533 	struct sock *sk = sock->sk;
2534 	struct sock *other;
2535 
2536 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2537 		return -EINVAL;
2538 	/* This maps:
2539 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2540 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2541 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2542 	 */
2543 	++mode;
2544 
2545 	unix_state_lock(sk);
2546 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2547 	other = unix_peer(sk);
2548 	if (other)
2549 		sock_hold(other);
2550 	unix_state_unlock(sk);
2551 	sk->sk_state_change(sk);
2552 
2553 	if (other &&
2554 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2555 
2556 		int peer_mode = 0;
2557 
2558 		if (mode&RCV_SHUTDOWN)
2559 			peer_mode |= SEND_SHUTDOWN;
2560 		if (mode&SEND_SHUTDOWN)
2561 			peer_mode |= RCV_SHUTDOWN;
2562 		unix_state_lock(other);
2563 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2564 		unix_state_unlock(other);
2565 		other->sk_state_change(other);
2566 		if (peer_mode == SHUTDOWN_MASK)
2567 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2568 		else if (peer_mode & RCV_SHUTDOWN)
2569 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2570 	}
2571 	if (other)
2572 		sock_put(other);
2573 
2574 	return 0;
2575 }
2576 
unix_inq_len(struct sock * sk)2577 long unix_inq_len(struct sock *sk)
2578 {
2579 	struct sk_buff *skb;
2580 	long amount = 0;
2581 
2582 	if (sk->sk_state == TCP_LISTEN)
2583 		return -EINVAL;
2584 
2585 	spin_lock(&sk->sk_receive_queue.lock);
2586 	if (sk->sk_type == SOCK_STREAM ||
2587 	    sk->sk_type == SOCK_SEQPACKET) {
2588 		skb_queue_walk(&sk->sk_receive_queue, skb)
2589 			amount += unix_skb_len(skb);
2590 	} else {
2591 		skb = skb_peek(&sk->sk_receive_queue);
2592 		if (skb)
2593 			amount = skb->len;
2594 	}
2595 	spin_unlock(&sk->sk_receive_queue.lock);
2596 
2597 	return amount;
2598 }
2599 EXPORT_SYMBOL_GPL(unix_inq_len);
2600 
unix_outq_len(struct sock * sk)2601 long unix_outq_len(struct sock *sk)
2602 {
2603 	return sk_wmem_alloc_get(sk);
2604 }
2605 EXPORT_SYMBOL_GPL(unix_outq_len);
2606 
unix_open_file(struct sock * sk)2607 static int unix_open_file(struct sock *sk)
2608 {
2609 	struct path path;
2610 	struct file *f;
2611 	int fd;
2612 
2613 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2614 		return -EPERM;
2615 
2616 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2617 		return -ENOENT;
2618 
2619 	path = unix_sk(sk)->path;
2620 	if (!path.dentry)
2621 		return -ENOENT;
2622 
2623 	path_get(&path);
2624 
2625 	fd = get_unused_fd_flags(O_CLOEXEC);
2626 	if (fd < 0)
2627 		goto out;
2628 
2629 	f = dentry_open(&path, O_PATH, current_cred());
2630 	if (IS_ERR(f)) {
2631 		put_unused_fd(fd);
2632 		fd = PTR_ERR(f);
2633 		goto out;
2634 	}
2635 
2636 	fd_install(fd, f);
2637 out:
2638 	path_put(&path);
2639 
2640 	return fd;
2641 }
2642 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2643 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2644 {
2645 	struct sock *sk = sock->sk;
2646 	long amount = 0;
2647 	int err;
2648 
2649 	switch (cmd) {
2650 	case SIOCOUTQ:
2651 		amount = unix_outq_len(sk);
2652 		err = put_user(amount, (int __user *)arg);
2653 		break;
2654 	case SIOCINQ:
2655 		amount = unix_inq_len(sk);
2656 		if (amount < 0)
2657 			err = amount;
2658 		else
2659 			err = put_user(amount, (int __user *)arg);
2660 		break;
2661 	case SIOCUNIXFILE:
2662 		err = unix_open_file(sk);
2663 		break;
2664 	default:
2665 		err = -ENOIOCTLCMD;
2666 		break;
2667 	}
2668 	return err;
2669 }
2670 
2671 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2672 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2673 {
2674 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2675 }
2676 #endif
2677 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2678 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2679 {
2680 	struct sock *sk = sock->sk;
2681 	__poll_t mask;
2682 	u8 shutdown;
2683 
2684 	sock_poll_wait(file, sock, wait);
2685 	mask = 0;
2686 	shutdown = READ_ONCE(sk->sk_shutdown);
2687 
2688 	/* exceptional events? */
2689 	if (sk->sk_err)
2690 		mask |= EPOLLERR;
2691 	if (shutdown == SHUTDOWN_MASK)
2692 		mask |= EPOLLHUP;
2693 	if (shutdown & RCV_SHUTDOWN)
2694 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2695 
2696 	/* readable? */
2697 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2698 		mask |= EPOLLIN | EPOLLRDNORM;
2699 
2700 	/* Connection-based need to check for termination and startup */
2701 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2702 	    sk->sk_state == TCP_CLOSE)
2703 		mask |= EPOLLHUP;
2704 
2705 	/*
2706 	 * we set writable also when the other side has shut down the
2707 	 * connection. This prevents stuck sockets.
2708 	 */
2709 	if (unix_writable(sk))
2710 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2711 
2712 	return mask;
2713 }
2714 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2715 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2716 				    poll_table *wait)
2717 {
2718 	struct sock *sk = sock->sk, *other;
2719 	unsigned int writable;
2720 	__poll_t mask;
2721 	u8 shutdown;
2722 
2723 	sock_poll_wait(file, sock, wait);
2724 	mask = 0;
2725 	shutdown = READ_ONCE(sk->sk_shutdown);
2726 
2727 	/* exceptional events? */
2728 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2729 		mask |= EPOLLERR |
2730 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2731 
2732 	if (shutdown & RCV_SHUTDOWN)
2733 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2734 	if (shutdown == SHUTDOWN_MASK)
2735 		mask |= EPOLLHUP;
2736 
2737 	/* readable? */
2738 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2739 		mask |= EPOLLIN | EPOLLRDNORM;
2740 
2741 	/* Connection-based need to check for termination and startup */
2742 	if (sk->sk_type == SOCK_SEQPACKET) {
2743 		if (sk->sk_state == TCP_CLOSE)
2744 			mask |= EPOLLHUP;
2745 		/* connection hasn't started yet? */
2746 		if (sk->sk_state == TCP_SYN_SENT)
2747 			return mask;
2748 	}
2749 
2750 	/* No write status requested, avoid expensive OUT tests. */
2751 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2752 		return mask;
2753 
2754 	writable = unix_writable(sk);
2755 	if (writable) {
2756 		unix_state_lock(sk);
2757 
2758 		other = unix_peer(sk);
2759 		if (other && unix_peer(other) != sk &&
2760 		    unix_recvq_full_lockless(other) &&
2761 		    unix_dgram_peer_wake_me(sk, other))
2762 			writable = 0;
2763 
2764 		unix_state_unlock(sk);
2765 	}
2766 
2767 	if (writable)
2768 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2769 	else
2770 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2771 
2772 	return mask;
2773 }
2774 
2775 #ifdef CONFIG_PROC_FS
2776 
2777 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2778 
2779 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2780 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2781 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2782 
unix_from_bucket(struct seq_file * seq,loff_t * pos)2783 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2784 {
2785 	unsigned long offset = get_offset(*pos);
2786 	unsigned long bucket = get_bucket(*pos);
2787 	struct sock *sk;
2788 	unsigned long count = 0;
2789 
2790 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2791 		if (sock_net(sk) != seq_file_net(seq))
2792 			continue;
2793 		if (++count == offset)
2794 			break;
2795 	}
2796 
2797 	return sk;
2798 }
2799 
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2800 static struct sock *unix_next_socket(struct seq_file *seq,
2801 				     struct sock *sk,
2802 				     loff_t *pos)
2803 {
2804 	unsigned long bucket;
2805 
2806 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2807 		sk = sk_next(sk);
2808 		if (!sk)
2809 			goto next_bucket;
2810 		if (sock_net(sk) == seq_file_net(seq))
2811 			return sk;
2812 	}
2813 
2814 	do {
2815 		sk = unix_from_bucket(seq, pos);
2816 		if (sk)
2817 			return sk;
2818 
2819 next_bucket:
2820 		bucket = get_bucket(*pos) + 1;
2821 		*pos = set_bucket_offset(bucket, 1);
2822 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2823 
2824 	return NULL;
2825 }
2826 
unix_seq_start(struct seq_file * seq,loff_t * pos)2827 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2828 	__acquires(unix_table_lock)
2829 {
2830 	spin_lock(&unix_table_lock);
2831 
2832 	if (!*pos)
2833 		return SEQ_START_TOKEN;
2834 
2835 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2836 		return NULL;
2837 
2838 	return unix_next_socket(seq, NULL, pos);
2839 }
2840 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2841 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2842 {
2843 	++*pos;
2844 	return unix_next_socket(seq, v, pos);
2845 }
2846 
unix_seq_stop(struct seq_file * seq,void * v)2847 static void unix_seq_stop(struct seq_file *seq, void *v)
2848 	__releases(unix_table_lock)
2849 {
2850 	spin_unlock(&unix_table_lock);
2851 }
2852 
unix_seq_show(struct seq_file * seq,void * v)2853 static int unix_seq_show(struct seq_file *seq, void *v)
2854 {
2855 
2856 	if (v == SEQ_START_TOKEN)
2857 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2858 			 "Inode Path\n");
2859 	else {
2860 		struct sock *s = v;
2861 		struct unix_sock *u = unix_sk(s);
2862 		unix_state_lock(s);
2863 
2864 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2865 			s,
2866 			refcount_read(&s->sk_refcnt),
2867 			0,
2868 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2869 			s->sk_type,
2870 			s->sk_socket ?
2871 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2872 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2873 			sock_i_ino(s));
2874 
2875 		if (u->addr) {	// under unix_table_lock here
2876 			int i, len;
2877 			seq_putc(seq, ' ');
2878 
2879 			i = 0;
2880 			len = u->addr->len - sizeof(short);
2881 			if (!UNIX_ABSTRACT(s))
2882 				len--;
2883 			else {
2884 				seq_putc(seq, '@');
2885 				i++;
2886 			}
2887 			for ( ; i < len; i++)
2888 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2889 					 '@');
2890 		}
2891 		unix_state_unlock(s);
2892 		seq_putc(seq, '\n');
2893 	}
2894 
2895 	return 0;
2896 }
2897 
2898 static const struct seq_operations unix_seq_ops = {
2899 	.start  = unix_seq_start,
2900 	.next   = unix_seq_next,
2901 	.stop   = unix_seq_stop,
2902 	.show   = unix_seq_show,
2903 };
2904 #endif
2905 
2906 static const struct net_proto_family unix_family_ops = {
2907 	.family = PF_UNIX,
2908 	.create = unix_create,
2909 	.owner	= THIS_MODULE,
2910 };
2911 
2912 
unix_net_init(struct net * net)2913 static int __net_init unix_net_init(struct net *net)
2914 {
2915 	int error = -ENOMEM;
2916 
2917 	net->unx.sysctl_max_dgram_qlen = 10;
2918 	if (unix_sysctl_register(net))
2919 		goto out;
2920 
2921 #ifdef CONFIG_PROC_FS
2922 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2923 			sizeof(struct seq_net_private))) {
2924 		unix_sysctl_unregister(net);
2925 		goto out;
2926 	}
2927 #endif
2928 	error = 0;
2929 out:
2930 	return error;
2931 }
2932 
unix_net_exit(struct net * net)2933 static void __net_exit unix_net_exit(struct net *net)
2934 {
2935 	unix_sysctl_unregister(net);
2936 	remove_proc_entry("unix", net->proc_net);
2937 }
2938 
2939 static struct pernet_operations unix_net_ops = {
2940 	.init = unix_net_init,
2941 	.exit = unix_net_exit,
2942 };
2943 
af_unix_init(void)2944 static int __init af_unix_init(void)
2945 {
2946 	int rc = -1;
2947 
2948 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2949 
2950 	rc = proto_register(&unix_proto, 1);
2951 	if (rc != 0) {
2952 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2953 		goto out;
2954 	}
2955 
2956 	sock_register(&unix_family_ops);
2957 	register_pernet_subsys(&unix_net_ops);
2958 out:
2959 	return rc;
2960 }
2961 
af_unix_exit(void)2962 static void __exit af_unix_exit(void)
2963 {
2964 	sock_unregister(PF_UNIX);
2965 	proto_unregister(&unix_proto);
2966 	unregister_pernet_subsys(&unix_net_ops);
2967 }
2968 
2969 /* Earlier than device_initcall() so that other drivers invoking
2970    request_module() don't end up in a loop when modprobe tries
2971    to use a UNIX socket. But later than subsys_initcall() because
2972    we depend on stuff initialised there */
2973 fs_initcall(af_unix_init);
2974 module_exit(af_unix_exit);
2975 
2976 MODULE_LICENSE("GPL");
2977 MODULE_ALIAS_NETPROTO(PF_UNIX);
2978