• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120 
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
126 
127 
unix_sockets_unbound(void * addr)128 static struct hlist_head *unix_sockets_unbound(void *addr)
129 {
130 	unsigned long hash = (unsigned long)addr;
131 
132 	hash ^= hash >> 16;
133 	hash ^= hash >> 8;
134 	hash %= UNIX_HASH_SIZE;
135 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 }
137 
138 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139 
140 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143 	UNIXCB(skb).secid = scm->secid;
144 }
145 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147 {
148 	scm->secid = UNIXCB(skb).secid;
149 }
150 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152 {
153 	return (scm->secid == UNIXCB(skb).secid);
154 }
155 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157 { }
158 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160 { }
161 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163 {
164 	return true;
165 }
166 #endif /* CONFIG_SECURITY_NETWORK */
167 
168 /*
169  *  SMP locking strategy:
170  *    hash table is protected with spinlock unix_table_lock
171  *    each socket state is protected by separate spin lock.
172  */
173 
unix_hash_fold(__wsum n)174 static inline unsigned int unix_hash_fold(__wsum n)
175 {
176 	unsigned int hash = (__force unsigned int)csum_fold(n);
177 
178 	hash ^= hash>>8;
179 	return hash&(UNIX_HASH_SIZE-1);
180 }
181 
182 #define unix_peer(sk) (unix_sk(sk)->peer)
183 
unix_our_peer(struct sock * sk,struct sock * osk)184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185 {
186 	return unix_peer(osk) == sk;
187 }
188 
unix_may_send(struct sock * sk,struct sock * osk)189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
190 {
191 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192 }
193 
unix_recvq_full(struct sock const * sk)194 static inline int unix_recvq_full(struct sock const *sk)
195 {
196 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197 }
198 
unix_peer_get(struct sock * s)199 struct sock *unix_peer_get(struct sock *s)
200 {
201 	struct sock *peer;
202 
203 	unix_state_lock(s);
204 	peer = unix_peer(s);
205 	if (peer)
206 		sock_hold(peer);
207 	unix_state_unlock(s);
208 	return peer;
209 }
210 EXPORT_SYMBOL_GPL(unix_peer_get);
211 
unix_release_addr(struct unix_address * addr)212 static inline void unix_release_addr(struct unix_address *addr)
213 {
214 	if (atomic_dec_and_test(&addr->refcnt))
215 		kfree(addr);
216 }
217 
218 /*
219  *	Check unix socket name:
220  *		- should be not zero length.
221  *	        - if started by not zero, should be NULL terminated (FS object)
222  *		- if started by zero, it is abstract name.
223  */
224 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
226 {
227 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 		return -EINVAL;
229 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 		return -EINVAL;
231 	if (sunaddr->sun_path[0]) {
232 		/*
233 		 * This may look like an off by one error but it is a bit more
234 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 		 * sun_path[108] doesn't as such exist.  However in kernel space
236 		 * we are guaranteed that it is a valid memory location in our
237 		 * kernel address buffer.
238 		 */
239 		((char *)sunaddr)[len] = 0;
240 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 		return len;
242 	}
243 
244 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245 	return len;
246 }
247 
__unix_remove_socket(struct sock * sk)248 static void __unix_remove_socket(struct sock *sk)
249 {
250 	sk_del_node_init(sk);
251 }
252 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255 	WARN_ON(!sk_unhashed(sk));
256 	sk_add_node(sk, list);
257 }
258 
unix_remove_socket(struct sock * sk)259 static inline void unix_remove_socket(struct sock *sk)
260 {
261 	spin_lock(&unix_table_lock);
262 	__unix_remove_socket(sk);
263 	spin_unlock(&unix_table_lock);
264 }
265 
unix_insert_socket(struct hlist_head * list,struct sock * sk)266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268 	spin_lock(&unix_table_lock);
269 	__unix_insert_socket(list, sk);
270 	spin_unlock(&unix_table_lock);
271 }
272 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)273 static struct sock *__unix_find_socket_byname(struct net *net,
274 					      struct sockaddr_un *sunname,
275 					      int len, int type, unsigned int hash)
276 {
277 	struct sock *s;
278 
279 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
280 		struct unix_sock *u = unix_sk(s);
281 
282 		if (!net_eq(sock_net(s), net))
283 			continue;
284 
285 		if (u->addr->len == len &&
286 		    !memcmp(u->addr->name, sunname, len))
287 			goto found;
288 	}
289 	s = NULL;
290 found:
291 	return s;
292 }
293 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)294 static inline struct sock *unix_find_socket_byname(struct net *net,
295 						   struct sockaddr_un *sunname,
296 						   int len, int type,
297 						   unsigned int hash)
298 {
299 	struct sock *s;
300 
301 	spin_lock(&unix_table_lock);
302 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
303 	if (s)
304 		sock_hold(s);
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
unix_find_socket_byinode(struct inode * i)309 static struct sock *unix_find_socket_byinode(struct inode *i)
310 {
311 	struct sock *s;
312 
313 	spin_lock(&unix_table_lock);
314 	sk_for_each(s,
315 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316 		struct dentry *dentry = unix_sk(s)->path.dentry;
317 
318 		if (dentry && d_real_inode(dentry) == i) {
319 			sock_hold(s);
320 			goto found;
321 		}
322 	}
323 	s = NULL;
324 found:
325 	spin_unlock(&unix_table_lock);
326 	return s;
327 }
328 
329 /* Support code for asymmetrically connected dgram sockets
330  *
331  * If a datagram socket is connected to a socket not itself connected
332  * to the first socket (eg, /dev/log), clients may only enqueue more
333  * messages if the present receive queue of the server socket is not
334  * "too large". This means there's a second writeability condition
335  * poll and sendmsg need to test. The dgram recv code will do a wake
336  * up on the peer_wait wait queue of a socket upon reception of a
337  * datagram which needs to be propagated to sleeping would-be writers
338  * since these might not have sent anything so far. This can't be
339  * accomplished via poll_wait because the lifetime of the server
340  * socket might be less than that of its clients if these break their
341  * association with it or if the server socket is closed while clients
342  * are still connected to it and there's no way to inform "a polling
343  * implementation" that it should let go of a certain wait queue
344  *
345  * In order to propagate a wake up, a wait_queue_t of the client
346  * socket is enqueued on the peer_wait queue of the server socket
347  * whose wake function does a wake_up on the ordinary client socket
348  * wait queue. This connection is established whenever a write (or
349  * poll for write) hit the flow control condition and broken when the
350  * association to the server socket is dissolved or after a wake up
351  * was relayed.
352  */
353 
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)354 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355 				      void *key)
356 {
357 	struct unix_sock *u;
358 	wait_queue_head_t *u_sleep;
359 
360 	u = container_of(q, struct unix_sock, peer_wake);
361 
362 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363 			    q);
364 	u->peer_wake.private = NULL;
365 
366 	/* relaying can only happen while the wq still exists */
367 	u_sleep = sk_sleep(&u->sk);
368 	if (u_sleep)
369 		wake_up_interruptible_poll(u_sleep, key);
370 
371 	return 0;
372 }
373 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375 {
376 	struct unix_sock *u, *u_other;
377 	int rc;
378 
379 	u = unix_sk(sk);
380 	u_other = unix_sk(other);
381 	rc = 0;
382 	spin_lock(&u_other->peer_wait.lock);
383 
384 	if (!u->peer_wake.private) {
385 		u->peer_wake.private = other;
386 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387 
388 		rc = 1;
389 	}
390 
391 	spin_unlock(&u_other->peer_wait.lock);
392 	return rc;
393 }
394 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396 					    struct sock *other)
397 {
398 	struct unix_sock *u, *u_other;
399 
400 	u = unix_sk(sk);
401 	u_other = unix_sk(other);
402 	spin_lock(&u_other->peer_wait.lock);
403 
404 	if (u->peer_wake.private == other) {
405 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406 		u->peer_wake.private = NULL;
407 	}
408 
409 	spin_unlock(&u_other->peer_wait.lock);
410 }
411 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413 						   struct sock *other)
414 {
415 	unix_dgram_peer_wake_disconnect(sk, other);
416 	wake_up_interruptible_poll(sk_sleep(sk),
417 				   POLLOUT |
418 				   POLLWRNORM |
419 				   POLLWRBAND);
420 }
421 
422 /* preconditions:
423  *	- unix_peer(sk) == other
424  *	- association is stable
425  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427 {
428 	int connected;
429 
430 	connected = unix_dgram_peer_wake_connect(sk, other);
431 
432 	if (unix_recvq_full(other))
433 		return 1;
434 
435 	if (connected)
436 		unix_dgram_peer_wake_disconnect(sk, other);
437 
438 	return 0;
439 }
440 
unix_writable(const struct sock * sk)441 static int unix_writable(const struct sock *sk)
442 {
443 	return sk->sk_state != TCP_LISTEN &&
444 	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
445 }
446 
unix_write_space(struct sock * sk)447 static void unix_write_space(struct sock *sk)
448 {
449 	struct socket_wq *wq;
450 
451 	rcu_read_lock();
452 	if (unix_writable(sk)) {
453 		wq = rcu_dereference(sk->sk_wq);
454 		if (skwq_has_sleeper(wq))
455 			wake_up_interruptible_sync_poll(&wq->wait,
456 				POLLOUT | POLLWRNORM | POLLWRBAND);
457 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
458 	}
459 	rcu_read_unlock();
460 }
461 
462 /* When dgram socket disconnects (or changes its peer), we clear its receive
463  * queue of packets arrived from previous peer. First, it allows to do
464  * flow control based only on wmem_alloc; second, sk connected to peer
465  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)466 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
467 {
468 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
469 		skb_queue_purge(&sk->sk_receive_queue);
470 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
471 
472 		/* If one link of bidirectional dgram pipe is disconnected,
473 		 * we signal error. Messages are lost. Do not make this,
474 		 * when peer was not connected to us.
475 		 */
476 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
477 			other->sk_err = ECONNRESET;
478 			other->sk_error_report(other);
479 		}
480 	}
481 }
482 
unix_sock_destructor(struct sock * sk)483 static void unix_sock_destructor(struct sock *sk)
484 {
485 	struct unix_sock *u = unix_sk(sk);
486 
487 	skb_queue_purge(&sk->sk_receive_queue);
488 
489 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
490 	WARN_ON(!sk_unhashed(sk));
491 	WARN_ON(sk->sk_socket);
492 	if (!sock_flag(sk, SOCK_DEAD)) {
493 		pr_info("Attempt to release alive unix socket: %p\n", sk);
494 		return;
495 	}
496 
497 	if (u->addr)
498 		unix_release_addr(u->addr);
499 
500 	atomic_long_dec(&unix_nr_socks);
501 	local_bh_disable();
502 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
503 	local_bh_enable();
504 #ifdef UNIX_REFCNT_DEBUG
505 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
506 		atomic_long_read(&unix_nr_socks));
507 #endif
508 }
509 
unix_release_sock(struct sock * sk,int embrion)510 static void unix_release_sock(struct sock *sk, int embrion)
511 {
512 	struct unix_sock *u = unix_sk(sk);
513 	struct path path;
514 	struct sock *skpair;
515 	struct sk_buff *skb;
516 	int state;
517 
518 	unix_remove_socket(sk);
519 
520 	/* Clear state */
521 	unix_state_lock(sk);
522 	sock_orphan(sk);
523 	sk->sk_shutdown = SHUTDOWN_MASK;
524 	path	     = u->path;
525 	u->path.dentry = NULL;
526 	u->path.mnt = NULL;
527 	state = sk->sk_state;
528 	sk->sk_state = TCP_CLOSE;
529 	unix_state_unlock(sk);
530 
531 	wake_up_interruptible_all(&u->peer_wait);
532 
533 	skpair = unix_peer(sk);
534 
535 	if (skpair != NULL) {
536 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
537 			unix_state_lock(skpair);
538 			/* No more writes */
539 			skpair->sk_shutdown = SHUTDOWN_MASK;
540 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
541 				skpair->sk_err = ECONNRESET;
542 			unix_state_unlock(skpair);
543 			skpair->sk_state_change(skpair);
544 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
545 		}
546 
547 		unix_dgram_peer_wake_disconnect(sk, skpair);
548 		sock_put(skpair); /* It may now die */
549 		unix_peer(sk) = NULL;
550 	}
551 
552 	/* Try to flush out this socket. Throw out buffers at least */
553 
554 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
555 		if (state == TCP_LISTEN)
556 			unix_release_sock(skb->sk, 1);
557 		/* passed fds are erased in the kfree_skb hook	      */
558 		UNIXCB(skb).consumed = skb->len;
559 		kfree_skb(skb);
560 	}
561 
562 	if (path.dentry)
563 		path_put(&path);
564 
565 	sock_put(sk);
566 
567 	/* ---- Socket is dead now and most probably destroyed ---- */
568 
569 	/*
570 	 * Fixme: BSD difference: In BSD all sockets connected to us get
571 	 *	  ECONNRESET and we die on the spot. In Linux we behave
572 	 *	  like files and pipes do and wait for the last
573 	 *	  dereference.
574 	 *
575 	 * Can't we simply set sock->err?
576 	 *
577 	 *	  What the above comment does talk about? --ANK(980817)
578 	 */
579 
580 	if (unix_tot_inflight)
581 		unix_gc();		/* Garbage collect fds */
582 }
583 
init_peercred(struct sock * sk)584 static void init_peercred(struct sock *sk)
585 {
586 	put_pid(sk->sk_peer_pid);
587 	if (sk->sk_peer_cred)
588 		put_cred(sk->sk_peer_cred);
589 	sk->sk_peer_pid  = get_pid(task_tgid(current));
590 	sk->sk_peer_cred = get_current_cred();
591 }
592 
copy_peercred(struct sock * sk,struct sock * peersk)593 static void copy_peercred(struct sock *sk, struct sock *peersk)
594 {
595 	put_pid(sk->sk_peer_pid);
596 	if (sk->sk_peer_cred)
597 		put_cred(sk->sk_peer_cred);
598 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
599 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
600 }
601 
unix_listen(struct socket * sock,int backlog)602 static int unix_listen(struct socket *sock, int backlog)
603 {
604 	int err;
605 	struct sock *sk = sock->sk;
606 	struct unix_sock *u = unix_sk(sk);
607 	struct pid *old_pid = NULL;
608 
609 	err = -EOPNOTSUPP;
610 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
611 		goto out;	/* Only stream/seqpacket sockets accept */
612 	err = -EINVAL;
613 	if (!u->addr)
614 		goto out;	/* No listens on an unbound socket */
615 	unix_state_lock(sk);
616 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
617 		goto out_unlock;
618 	if (backlog > sk->sk_max_ack_backlog)
619 		wake_up_interruptible_all(&u->peer_wait);
620 	sk->sk_max_ack_backlog	= backlog;
621 	sk->sk_state		= TCP_LISTEN;
622 	/* set credentials so connect can copy them */
623 	init_peercred(sk);
624 	err = 0;
625 
626 out_unlock:
627 	unix_state_unlock(sk);
628 	put_pid(old_pid);
629 out:
630 	return err;
631 }
632 
633 static int unix_release(struct socket *);
634 static int unix_bind(struct socket *, struct sockaddr *, int);
635 static int unix_stream_connect(struct socket *, struct sockaddr *,
636 			       int addr_len, int flags);
637 static int unix_socketpair(struct socket *, struct socket *);
638 static int unix_accept(struct socket *, struct socket *, int);
639 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
640 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
641 static unsigned int unix_dgram_poll(struct file *, struct socket *,
642 				    poll_table *);
643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644 static int unix_shutdown(struct socket *, int);
645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648 				    size_t size, int flags);
649 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
650 				       struct pipe_inode_info *, size_t size,
651 				       unsigned int flags);
652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
655 			      int, int);
656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
658 				  int);
659 
unix_set_peek_off(struct sock * sk,int val)660 static int unix_set_peek_off(struct sock *sk, int val)
661 {
662 	struct unix_sock *u = unix_sk(sk);
663 
664 	if (mutex_lock_interruptible(&u->iolock))
665 		return -EINTR;
666 
667 	sk->sk_peek_off = val;
668 	mutex_unlock(&u->iolock);
669 
670 	return 0;
671 }
672 
673 
674 static const struct proto_ops unix_stream_ops = {
675 	.family =	PF_UNIX,
676 	.owner =	THIS_MODULE,
677 	.release =	unix_release,
678 	.bind =		unix_bind,
679 	.connect =	unix_stream_connect,
680 	.socketpair =	unix_socketpair,
681 	.accept =	unix_accept,
682 	.getname =	unix_getname,
683 	.poll =		unix_poll,
684 	.ioctl =	unix_ioctl,
685 	.listen =	unix_listen,
686 	.shutdown =	unix_shutdown,
687 	.setsockopt =	sock_no_setsockopt,
688 	.getsockopt =	sock_no_getsockopt,
689 	.sendmsg =	unix_stream_sendmsg,
690 	.recvmsg =	unix_stream_recvmsg,
691 	.mmap =		sock_no_mmap,
692 	.sendpage =	unix_stream_sendpage,
693 	.splice_read =	unix_stream_splice_read,
694 	.set_peek_off =	unix_set_peek_off,
695 };
696 
697 static const struct proto_ops unix_dgram_ops = {
698 	.family =	PF_UNIX,
699 	.owner =	THIS_MODULE,
700 	.release =	unix_release,
701 	.bind =		unix_bind,
702 	.connect =	unix_dgram_connect,
703 	.socketpair =	unix_socketpair,
704 	.accept =	sock_no_accept,
705 	.getname =	unix_getname,
706 	.poll =		unix_dgram_poll,
707 	.ioctl =	unix_ioctl,
708 	.listen =	sock_no_listen,
709 	.shutdown =	unix_shutdown,
710 	.setsockopt =	sock_no_setsockopt,
711 	.getsockopt =	sock_no_getsockopt,
712 	.sendmsg =	unix_dgram_sendmsg,
713 	.recvmsg =	unix_dgram_recvmsg,
714 	.mmap =		sock_no_mmap,
715 	.sendpage =	sock_no_sendpage,
716 	.set_peek_off =	unix_set_peek_off,
717 };
718 
719 static const struct proto_ops unix_seqpacket_ops = {
720 	.family =	PF_UNIX,
721 	.owner =	THIS_MODULE,
722 	.release =	unix_release,
723 	.bind =		unix_bind,
724 	.connect =	unix_stream_connect,
725 	.socketpair =	unix_socketpair,
726 	.accept =	unix_accept,
727 	.getname =	unix_getname,
728 	.poll =		unix_dgram_poll,
729 	.ioctl =	unix_ioctl,
730 	.listen =	unix_listen,
731 	.shutdown =	unix_shutdown,
732 	.setsockopt =	sock_no_setsockopt,
733 	.getsockopt =	sock_no_getsockopt,
734 	.sendmsg =	unix_seqpacket_sendmsg,
735 	.recvmsg =	unix_seqpacket_recvmsg,
736 	.mmap =		sock_no_mmap,
737 	.sendpage =	sock_no_sendpage,
738 	.set_peek_off =	unix_set_peek_off,
739 };
740 
741 static struct proto unix_proto = {
742 	.name			= "UNIX",
743 	.owner			= THIS_MODULE,
744 	.obj_size		= sizeof(struct unix_sock),
745 };
746 
747 /*
748  * AF_UNIX sockets do not interact with hardware, hence they
749  * dont trigger interrupts - so it's safe for them to have
750  * bh-unsafe locking for their sk_receive_queue.lock. Split off
751  * this special lock-class by reinitializing the spinlock key:
752  */
753 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
754 
unix_create1(struct net * net,struct socket * sock,int kern)755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
756 {
757 	struct sock *sk = NULL;
758 	struct unix_sock *u;
759 
760 	atomic_long_inc(&unix_nr_socks);
761 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
762 		goto out;
763 
764 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
765 	if (!sk)
766 		goto out;
767 
768 	sock_init_data(sock, sk);
769 	lockdep_set_class(&sk->sk_receive_queue.lock,
770 				&af_unix_sk_receive_queue_lock_key);
771 
772 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
773 	sk->sk_write_space	= unix_write_space;
774 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
775 	sk->sk_destruct		= unix_sock_destructor;
776 	u	  = unix_sk(sk);
777 	u->path.dentry = NULL;
778 	u->path.mnt = NULL;
779 	spin_lock_init(&u->lock);
780 	atomic_long_set(&u->inflight, 0);
781 	INIT_LIST_HEAD(&u->link);
782 	mutex_init(&u->iolock); /* single task reading lock */
783 	mutex_init(&u->bindlock); /* single task binding lock */
784 	init_waitqueue_head(&u->peer_wait);
785 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
786 	unix_insert_socket(unix_sockets_unbound(sk), sk);
787 out:
788 	if (sk == NULL)
789 		atomic_long_dec(&unix_nr_socks);
790 	else {
791 		local_bh_disable();
792 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
793 		local_bh_enable();
794 	}
795 	return sk;
796 }
797 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)798 static int unix_create(struct net *net, struct socket *sock, int protocol,
799 		       int kern)
800 {
801 	if (protocol && protocol != PF_UNIX)
802 		return -EPROTONOSUPPORT;
803 
804 	sock->state = SS_UNCONNECTED;
805 
806 	switch (sock->type) {
807 	case SOCK_STREAM:
808 		sock->ops = &unix_stream_ops;
809 		break;
810 		/*
811 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
812 		 *	nothing uses it.
813 		 */
814 	case SOCK_RAW:
815 		sock->type = SOCK_DGRAM;
816 	case SOCK_DGRAM:
817 		sock->ops = &unix_dgram_ops;
818 		break;
819 	case SOCK_SEQPACKET:
820 		sock->ops = &unix_seqpacket_ops;
821 		break;
822 	default:
823 		return -ESOCKTNOSUPPORT;
824 	}
825 
826 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
827 }
828 
unix_release(struct socket * sock)829 static int unix_release(struct socket *sock)
830 {
831 	struct sock *sk = sock->sk;
832 
833 	if (!sk)
834 		return 0;
835 
836 	unix_release_sock(sk, 0);
837 	sock->sk = NULL;
838 
839 	return 0;
840 }
841 
unix_autobind(struct socket * sock)842 static int unix_autobind(struct socket *sock)
843 {
844 	struct sock *sk = sock->sk;
845 	struct net *net = sock_net(sk);
846 	struct unix_sock *u = unix_sk(sk);
847 	static u32 ordernum = 1;
848 	struct unix_address *addr;
849 	int err;
850 	unsigned int retries = 0;
851 
852 	err = mutex_lock_interruptible(&u->bindlock);
853 	if (err)
854 		return err;
855 
856 	err = 0;
857 	if (u->addr)
858 		goto out;
859 
860 	err = -ENOMEM;
861 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
862 	if (!addr)
863 		goto out;
864 
865 	addr->name->sun_family = AF_UNIX;
866 	atomic_set(&addr->refcnt, 1);
867 
868 retry:
869 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
870 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
871 
872 	spin_lock(&unix_table_lock);
873 	ordernum = (ordernum+1)&0xFFFFF;
874 
875 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
876 				      addr->hash)) {
877 		spin_unlock(&unix_table_lock);
878 		/*
879 		 * __unix_find_socket_byname() may take long time if many names
880 		 * are already in use.
881 		 */
882 		cond_resched();
883 		/* Give up if all names seems to be in use. */
884 		if (retries++ == 0xFFFFF) {
885 			err = -ENOSPC;
886 			kfree(addr);
887 			goto out;
888 		}
889 		goto retry;
890 	}
891 	addr->hash ^= sk->sk_type;
892 
893 	__unix_remove_socket(sk);
894 	u->addr = addr;
895 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
896 	spin_unlock(&unix_table_lock);
897 	err = 0;
898 
899 out:	mutex_unlock(&u->bindlock);
900 	return err;
901 }
902 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)903 static struct sock *unix_find_other(struct net *net,
904 				    struct sockaddr_un *sunname, int len,
905 				    int type, unsigned int hash, int *error)
906 {
907 	struct sock *u;
908 	struct path path;
909 	int err = 0;
910 
911 	if (sunname->sun_path[0]) {
912 		struct inode *inode;
913 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
914 		if (err)
915 			goto fail;
916 		inode = d_real_inode(path.dentry);
917 		err = inode_permission(inode, MAY_WRITE);
918 		if (err)
919 			goto put_fail;
920 
921 		err = -ECONNREFUSED;
922 		if (!S_ISSOCK(inode->i_mode))
923 			goto put_fail;
924 		u = unix_find_socket_byinode(inode);
925 		if (!u)
926 			goto put_fail;
927 
928 		if (u->sk_type == type)
929 			touch_atime(&path);
930 
931 		path_put(&path);
932 
933 		err = -EPROTOTYPE;
934 		if (u->sk_type != type) {
935 			sock_put(u);
936 			goto fail;
937 		}
938 	} else {
939 		err = -ECONNREFUSED;
940 		u = unix_find_socket_byname(net, sunname, len, type, hash);
941 		if (u) {
942 			struct dentry *dentry;
943 			dentry = unix_sk(u)->path.dentry;
944 			if (dentry)
945 				touch_atime(&unix_sk(u)->path);
946 		} else
947 			goto fail;
948 	}
949 	return u;
950 
951 put_fail:
952 	path_put(&path);
953 fail:
954 	*error = err;
955 	return NULL;
956 }
957 
unix_mknod(const char * sun_path,umode_t mode,struct path * res)958 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
959 {
960 	struct dentry *dentry;
961 	struct path path;
962 	int err = 0;
963 	/*
964 	 * Get the parent directory, calculate the hash for last
965 	 * component.
966 	 */
967 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
968 	err = PTR_ERR(dentry);
969 	if (IS_ERR(dentry))
970 		return err;
971 
972 	/*
973 	 * All right, let's create it.
974 	 */
975 	err = security_path_mknod(&path, dentry, mode, 0);
976 	if (!err) {
977 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
978 		if (!err) {
979 			res->mnt = mntget(path.mnt);
980 			res->dentry = dget(dentry);
981 		}
982 	}
983 	done_path_create(&path, dentry);
984 	return err;
985 }
986 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)987 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
988 {
989 	struct sock *sk = sock->sk;
990 	struct net *net = sock_net(sk);
991 	struct unix_sock *u = unix_sk(sk);
992 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
993 	char *sun_path = sunaddr->sun_path;
994 	int err;
995 	unsigned int hash;
996 	struct unix_address *addr;
997 	struct hlist_head *list;
998 	struct path path = { NULL, NULL };
999 
1000 	err = -EINVAL;
1001 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1002 	    sunaddr->sun_family != AF_UNIX)
1003 		goto out;
1004 
1005 	if (addr_len == sizeof(short)) {
1006 		err = unix_autobind(sock);
1007 		goto out;
1008 	}
1009 
1010 	err = unix_mkname(sunaddr, addr_len, &hash);
1011 	if (err < 0)
1012 		goto out;
1013 	addr_len = err;
1014 
1015 	if (sun_path[0]) {
1016 		umode_t mode = S_IFSOCK |
1017 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1018 		err = unix_mknod(sun_path, mode, &path);
1019 		if (err) {
1020 			if (err == -EEXIST)
1021 				err = -EADDRINUSE;
1022 			goto out;
1023 		}
1024 	}
1025 
1026 	err = mutex_lock_interruptible(&u->bindlock);
1027 	if (err)
1028 		goto out_put;
1029 
1030 	err = -EINVAL;
1031 	if (u->addr)
1032 		goto out_up;
1033 
1034 	err = -ENOMEM;
1035 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1036 	if (!addr)
1037 		goto out_up;
1038 
1039 	memcpy(addr->name, sunaddr, addr_len);
1040 	addr->len = addr_len;
1041 	addr->hash = hash ^ sk->sk_type;
1042 	atomic_set(&addr->refcnt, 1);
1043 
1044 	if (sun_path[0]) {
1045 		addr->hash = UNIX_HASH_SIZE;
1046 		hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1047 		spin_lock(&unix_table_lock);
1048 		u->path = path;
1049 		list = &unix_socket_table[hash];
1050 	} else {
1051 		spin_lock(&unix_table_lock);
1052 		err = -EADDRINUSE;
1053 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1054 					      sk->sk_type, hash)) {
1055 			unix_release_addr(addr);
1056 			goto out_unlock;
1057 		}
1058 
1059 		list = &unix_socket_table[addr->hash];
1060 	}
1061 
1062 	err = 0;
1063 	__unix_remove_socket(sk);
1064 	u->addr = addr;
1065 	__unix_insert_socket(list, sk);
1066 
1067 out_unlock:
1068 	spin_unlock(&unix_table_lock);
1069 out_up:
1070 	mutex_unlock(&u->bindlock);
1071 out_put:
1072 	if (err)
1073 		path_put(&path);
1074 out:
1075 	return err;
1076 }
1077 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1078 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1079 {
1080 	if (unlikely(sk1 == sk2) || !sk2) {
1081 		unix_state_lock(sk1);
1082 		return;
1083 	}
1084 	if (sk1 < sk2) {
1085 		unix_state_lock(sk1);
1086 		unix_state_lock_nested(sk2);
1087 	} else {
1088 		unix_state_lock(sk2);
1089 		unix_state_lock_nested(sk1);
1090 	}
1091 }
1092 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1093 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1094 {
1095 	if (unlikely(sk1 == sk2) || !sk2) {
1096 		unix_state_unlock(sk1);
1097 		return;
1098 	}
1099 	unix_state_unlock(sk1);
1100 	unix_state_unlock(sk2);
1101 }
1102 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1103 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1104 			      int alen, int flags)
1105 {
1106 	struct sock *sk = sock->sk;
1107 	struct net *net = sock_net(sk);
1108 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1109 	struct sock *other;
1110 	unsigned int hash;
1111 	int err;
1112 
1113 	err = -EINVAL;
1114 	if (alen < offsetofend(struct sockaddr, sa_family))
1115 		goto out;
1116 
1117 	if (addr->sa_family != AF_UNSPEC) {
1118 		err = unix_mkname(sunaddr, alen, &hash);
1119 		if (err < 0)
1120 			goto out;
1121 		alen = err;
1122 
1123 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1124 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1125 			goto out;
1126 
1127 restart:
1128 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1129 		if (!other)
1130 			goto out;
1131 
1132 		unix_state_double_lock(sk, other);
1133 
1134 		/* Apparently VFS overslept socket death. Retry. */
1135 		if (sock_flag(other, SOCK_DEAD)) {
1136 			unix_state_double_unlock(sk, other);
1137 			sock_put(other);
1138 			goto restart;
1139 		}
1140 
1141 		err = -EPERM;
1142 		if (!unix_may_send(sk, other))
1143 			goto out_unlock;
1144 
1145 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1146 		if (err)
1147 			goto out_unlock;
1148 
1149 	} else {
1150 		/*
1151 		 *	1003.1g breaking connected state with AF_UNSPEC
1152 		 */
1153 		other = NULL;
1154 		unix_state_double_lock(sk, other);
1155 	}
1156 
1157 	/*
1158 	 * If it was connected, reconnect.
1159 	 */
1160 	if (unix_peer(sk)) {
1161 		struct sock *old_peer = unix_peer(sk);
1162 		unix_peer(sk) = other;
1163 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1164 
1165 		unix_state_double_unlock(sk, other);
1166 
1167 		if (other != old_peer)
1168 			unix_dgram_disconnected(sk, old_peer);
1169 		sock_put(old_peer);
1170 	} else {
1171 		unix_peer(sk) = other;
1172 		unix_state_double_unlock(sk, other);
1173 	}
1174 	return 0;
1175 
1176 out_unlock:
1177 	unix_state_double_unlock(sk, other);
1178 	sock_put(other);
1179 out:
1180 	return err;
1181 }
1182 
unix_wait_for_peer(struct sock * other,long timeo)1183 static long unix_wait_for_peer(struct sock *other, long timeo)
1184 {
1185 	struct unix_sock *u = unix_sk(other);
1186 	int sched;
1187 	DEFINE_WAIT(wait);
1188 
1189 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1190 
1191 	sched = !sock_flag(other, SOCK_DEAD) &&
1192 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1193 		unix_recvq_full(other);
1194 
1195 	unix_state_unlock(other);
1196 
1197 	if (sched)
1198 		timeo = schedule_timeout(timeo);
1199 
1200 	finish_wait(&u->peer_wait, &wait);
1201 	return timeo;
1202 }
1203 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1204 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1205 			       int addr_len, int flags)
1206 {
1207 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1208 	struct sock *sk = sock->sk;
1209 	struct net *net = sock_net(sk);
1210 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1211 	struct sock *newsk = NULL;
1212 	struct sock *other = NULL;
1213 	struct sk_buff *skb = NULL;
1214 	unsigned int hash;
1215 	int st;
1216 	int err;
1217 	long timeo;
1218 
1219 	err = unix_mkname(sunaddr, addr_len, &hash);
1220 	if (err < 0)
1221 		goto out;
1222 	addr_len = err;
1223 
1224 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1225 	    (err = unix_autobind(sock)) != 0)
1226 		goto out;
1227 
1228 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1229 
1230 	/* First of all allocate resources.
1231 	   If we will make it after state is locked,
1232 	   we will have to recheck all again in any case.
1233 	 */
1234 
1235 	err = -ENOMEM;
1236 
1237 	/* create new sock for complete connection */
1238 	newsk = unix_create1(sock_net(sk), NULL, 0);
1239 	if (newsk == NULL)
1240 		goto out;
1241 
1242 	/* Allocate skb for sending to listening sock */
1243 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1244 	if (skb == NULL)
1245 		goto out;
1246 
1247 restart:
1248 	/*  Find listening sock. */
1249 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1250 	if (!other)
1251 		goto out;
1252 
1253 	/* Latch state of peer */
1254 	unix_state_lock(other);
1255 
1256 	/* Apparently VFS overslept socket death. Retry. */
1257 	if (sock_flag(other, SOCK_DEAD)) {
1258 		unix_state_unlock(other);
1259 		sock_put(other);
1260 		goto restart;
1261 	}
1262 
1263 	err = -ECONNREFUSED;
1264 	if (other->sk_state != TCP_LISTEN)
1265 		goto out_unlock;
1266 	if (other->sk_shutdown & RCV_SHUTDOWN)
1267 		goto out_unlock;
1268 
1269 	if (unix_recvq_full(other)) {
1270 		err = -EAGAIN;
1271 		if (!timeo)
1272 			goto out_unlock;
1273 
1274 		timeo = unix_wait_for_peer(other, timeo);
1275 
1276 		err = sock_intr_errno(timeo);
1277 		if (signal_pending(current))
1278 			goto out;
1279 		sock_put(other);
1280 		goto restart;
1281 	}
1282 
1283 	/* Latch our state.
1284 
1285 	   It is tricky place. We need to grab our state lock and cannot
1286 	   drop lock on peer. It is dangerous because deadlock is
1287 	   possible. Connect to self case and simultaneous
1288 	   attempt to connect are eliminated by checking socket
1289 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1290 	   check this before attempt to grab lock.
1291 
1292 	   Well, and we have to recheck the state after socket locked.
1293 	 */
1294 	st = sk->sk_state;
1295 
1296 	switch (st) {
1297 	case TCP_CLOSE:
1298 		/* This is ok... continue with connect */
1299 		break;
1300 	case TCP_ESTABLISHED:
1301 		/* Socket is already connected */
1302 		err = -EISCONN;
1303 		goto out_unlock;
1304 	default:
1305 		err = -EINVAL;
1306 		goto out_unlock;
1307 	}
1308 
1309 	unix_state_lock_nested(sk);
1310 
1311 	if (sk->sk_state != st) {
1312 		unix_state_unlock(sk);
1313 		unix_state_unlock(other);
1314 		sock_put(other);
1315 		goto restart;
1316 	}
1317 
1318 	err = security_unix_stream_connect(sk, other, newsk);
1319 	if (err) {
1320 		unix_state_unlock(sk);
1321 		goto out_unlock;
1322 	}
1323 
1324 	/* The way is open! Fastly set all the necessary fields... */
1325 
1326 	sock_hold(sk);
1327 	unix_peer(newsk)	= sk;
1328 	newsk->sk_state		= TCP_ESTABLISHED;
1329 	newsk->sk_type		= sk->sk_type;
1330 	init_peercred(newsk);
1331 	newu = unix_sk(newsk);
1332 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1333 	otheru = unix_sk(other);
1334 
1335 	/* copy address information from listening to new sock*/
1336 	if (otheru->addr) {
1337 		atomic_inc(&otheru->addr->refcnt);
1338 		newu->addr = otheru->addr;
1339 	}
1340 	if (otheru->path.dentry) {
1341 		path_get(&otheru->path);
1342 		newu->path = otheru->path;
1343 	}
1344 
1345 	/* Set credentials */
1346 	copy_peercred(sk, other);
1347 
1348 	sock->state	= SS_CONNECTED;
1349 	sk->sk_state	= TCP_ESTABLISHED;
1350 	sock_hold(newsk);
1351 
1352 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1353 	unix_peer(sk)	= newsk;
1354 
1355 	unix_state_unlock(sk);
1356 
1357 	/* take ten and and send info to listening sock */
1358 	spin_lock(&other->sk_receive_queue.lock);
1359 	__skb_queue_tail(&other->sk_receive_queue, skb);
1360 	spin_unlock(&other->sk_receive_queue.lock);
1361 	unix_state_unlock(other);
1362 	other->sk_data_ready(other);
1363 	sock_put(other);
1364 	return 0;
1365 
1366 out_unlock:
1367 	if (other)
1368 		unix_state_unlock(other);
1369 
1370 out:
1371 	kfree_skb(skb);
1372 	if (newsk)
1373 		unix_release_sock(newsk, 0);
1374 	if (other)
1375 		sock_put(other);
1376 	return err;
1377 }
1378 
unix_socketpair(struct socket * socka,struct socket * sockb)1379 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1380 {
1381 	struct sock *ska = socka->sk, *skb = sockb->sk;
1382 
1383 	/* Join our sockets back to back */
1384 	sock_hold(ska);
1385 	sock_hold(skb);
1386 	unix_peer(ska) = skb;
1387 	unix_peer(skb) = ska;
1388 	init_peercred(ska);
1389 	init_peercred(skb);
1390 
1391 	if (ska->sk_type != SOCK_DGRAM) {
1392 		ska->sk_state = TCP_ESTABLISHED;
1393 		skb->sk_state = TCP_ESTABLISHED;
1394 		socka->state  = SS_CONNECTED;
1395 		sockb->state  = SS_CONNECTED;
1396 	}
1397 	return 0;
1398 }
1399 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1400 static void unix_sock_inherit_flags(const struct socket *old,
1401 				    struct socket *new)
1402 {
1403 	if (test_bit(SOCK_PASSCRED, &old->flags))
1404 		set_bit(SOCK_PASSCRED, &new->flags);
1405 	if (test_bit(SOCK_PASSSEC, &old->flags))
1406 		set_bit(SOCK_PASSSEC, &new->flags);
1407 }
1408 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1409 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1410 {
1411 	struct sock *sk = sock->sk;
1412 	struct sock *tsk;
1413 	struct sk_buff *skb;
1414 	int err;
1415 
1416 	err = -EOPNOTSUPP;
1417 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1418 		goto out;
1419 
1420 	err = -EINVAL;
1421 	if (sk->sk_state != TCP_LISTEN)
1422 		goto out;
1423 
1424 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1425 	 * so that no locks are necessary.
1426 	 */
1427 
1428 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1429 	if (!skb) {
1430 		/* This means receive shutdown. */
1431 		if (err == 0)
1432 			err = -EINVAL;
1433 		goto out;
1434 	}
1435 
1436 	tsk = skb->sk;
1437 	skb_free_datagram(sk, skb);
1438 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1439 
1440 	/* attach accepted sock to socket */
1441 	unix_state_lock(tsk);
1442 	newsock->state = SS_CONNECTED;
1443 	unix_sock_inherit_flags(sock, newsock);
1444 	sock_graft(tsk, newsock);
1445 	unix_state_unlock(tsk);
1446 	return 0;
1447 
1448 out:
1449 	return err;
1450 }
1451 
1452 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1453 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1454 {
1455 	struct sock *sk = sock->sk;
1456 	struct unix_sock *u;
1457 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1458 	int err = 0;
1459 
1460 	if (peer) {
1461 		sk = unix_peer_get(sk);
1462 
1463 		err = -ENOTCONN;
1464 		if (!sk)
1465 			goto out;
1466 		err = 0;
1467 	} else {
1468 		sock_hold(sk);
1469 	}
1470 
1471 	u = unix_sk(sk);
1472 	unix_state_lock(sk);
1473 	if (!u->addr) {
1474 		sunaddr->sun_family = AF_UNIX;
1475 		sunaddr->sun_path[0] = 0;
1476 		*uaddr_len = sizeof(short);
1477 	} else {
1478 		struct unix_address *addr = u->addr;
1479 
1480 		*uaddr_len = addr->len;
1481 		memcpy(sunaddr, addr->name, *uaddr_len);
1482 	}
1483 	unix_state_unlock(sk);
1484 	sock_put(sk);
1485 out:
1486 	return err;
1487 }
1488 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1489 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1490 {
1491 	int i;
1492 
1493 	scm->fp = UNIXCB(skb).fp;
1494 	UNIXCB(skb).fp = NULL;
1495 
1496 	for (i = scm->fp->count-1; i >= 0; i--)
1497 		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1498 }
1499 
unix_destruct_scm(struct sk_buff * skb)1500 static void unix_destruct_scm(struct sk_buff *skb)
1501 {
1502 	struct scm_cookie scm;
1503 	memset(&scm, 0, sizeof(scm));
1504 	scm.pid  = UNIXCB(skb).pid;
1505 	if (UNIXCB(skb).fp)
1506 		unix_detach_fds(&scm, skb);
1507 
1508 	/* Alas, it calls VFS */
1509 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1510 	scm_destroy(&scm);
1511 	sock_wfree(skb);
1512 }
1513 
1514 /*
1515  * The "user->unix_inflight" variable is protected by the garbage
1516  * collection lock, and we just read it locklessly here. If you go
1517  * over the limit, there might be a tiny race in actually noticing
1518  * it across threads. Tough.
1519  */
too_many_unix_fds(struct task_struct * p)1520 static inline bool too_many_unix_fds(struct task_struct *p)
1521 {
1522 	struct user_struct *user = current_user();
1523 
1524 	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1525 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1526 	return false;
1527 }
1528 
1529 #define MAX_RECURSION_LEVEL 4
1530 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1531 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1532 {
1533 	int i;
1534 	unsigned char max_level = 0;
1535 
1536 	if (too_many_unix_fds(current))
1537 		return -ETOOMANYREFS;
1538 
1539 	for (i = scm->fp->count - 1; i >= 0; i--) {
1540 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1541 
1542 		if (sk)
1543 			max_level = max(max_level,
1544 					unix_sk(sk)->recursion_level);
1545 	}
1546 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1547 		return -ETOOMANYREFS;
1548 
1549 	/*
1550 	 * Need to duplicate file references for the sake of garbage
1551 	 * collection.  Otherwise a socket in the fps might become a
1552 	 * candidate for GC while the skb is not yet queued.
1553 	 */
1554 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1555 	if (!UNIXCB(skb).fp)
1556 		return -ENOMEM;
1557 
1558 	for (i = scm->fp->count - 1; i >= 0; i--)
1559 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
1560 	return max_level;
1561 }
1562 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1563 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1564 {
1565 	int err = 0;
1566 
1567 	UNIXCB(skb).pid  = get_pid(scm->pid);
1568 	UNIXCB(skb).uid = scm->creds.uid;
1569 	UNIXCB(skb).gid = scm->creds.gid;
1570 	UNIXCB(skb).fp = NULL;
1571 	unix_get_secdata(scm, skb);
1572 	if (scm->fp && send_fds)
1573 		err = unix_attach_fds(scm, skb);
1574 
1575 	skb->destructor = unix_destruct_scm;
1576 	return err;
1577 }
1578 
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1579 static bool unix_passcred_enabled(const struct socket *sock,
1580 				  const struct sock *other)
1581 {
1582 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1583 	       !other->sk_socket ||
1584 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1585 }
1586 
1587 /*
1588  * Some apps rely on write() giving SCM_CREDENTIALS
1589  * We include credentials if source or destination socket
1590  * asserted SOCK_PASSCRED.
1591  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1592 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1593 			    const struct sock *other)
1594 {
1595 	if (UNIXCB(skb).pid)
1596 		return;
1597 	if (unix_passcred_enabled(sock, other)) {
1598 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1599 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1600 	}
1601 }
1602 
maybe_init_creds(struct scm_cookie * scm,struct socket * socket,const struct sock * other)1603 static int maybe_init_creds(struct scm_cookie *scm,
1604 			    struct socket *socket,
1605 			    const struct sock *other)
1606 {
1607 	int err;
1608 	struct msghdr msg = { .msg_controllen = 0 };
1609 
1610 	err = scm_send(socket, &msg, scm, false);
1611 	if (err)
1612 		return err;
1613 
1614 	if (unix_passcred_enabled(socket, other)) {
1615 		scm->pid = get_pid(task_tgid(current));
1616 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1617 	}
1618 	return err;
1619 }
1620 
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1621 static bool unix_skb_scm_eq(struct sk_buff *skb,
1622 			    struct scm_cookie *scm)
1623 {
1624 	const struct unix_skb_parms *u = &UNIXCB(skb);
1625 
1626 	return u->pid == scm->pid &&
1627 	       uid_eq(u->uid, scm->creds.uid) &&
1628 	       gid_eq(u->gid, scm->creds.gid) &&
1629 	       unix_secdata_eq(scm, skb);
1630 }
1631 
1632 /*
1633  *	Send AF_UNIX data.
1634  */
1635 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1636 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1637 			      size_t len)
1638 {
1639 	struct sock *sk = sock->sk;
1640 	struct net *net = sock_net(sk);
1641 	struct unix_sock *u = unix_sk(sk);
1642 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1643 	struct sock *other = NULL;
1644 	int namelen = 0; /* fake GCC */
1645 	int err;
1646 	unsigned int hash;
1647 	struct sk_buff *skb;
1648 	long timeo;
1649 	struct scm_cookie scm;
1650 	int max_level;
1651 	int data_len = 0;
1652 	int sk_locked;
1653 
1654 	wait_for_unix_gc();
1655 	err = scm_send(sock, msg, &scm, false);
1656 	if (err < 0)
1657 		return err;
1658 
1659 	err = -EOPNOTSUPP;
1660 	if (msg->msg_flags&MSG_OOB)
1661 		goto out;
1662 
1663 	if (msg->msg_namelen) {
1664 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1665 		if (err < 0)
1666 			goto out;
1667 		namelen = err;
1668 	} else {
1669 		sunaddr = NULL;
1670 		err = -ENOTCONN;
1671 		other = unix_peer_get(sk);
1672 		if (!other)
1673 			goto out;
1674 	}
1675 
1676 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1677 	    && (err = unix_autobind(sock)) != 0)
1678 		goto out;
1679 
1680 	err = -EMSGSIZE;
1681 	if (len > sk->sk_sndbuf - 32)
1682 		goto out;
1683 
1684 	if (len > SKB_MAX_ALLOC) {
1685 		data_len = min_t(size_t,
1686 				 len - SKB_MAX_ALLOC,
1687 				 MAX_SKB_FRAGS * PAGE_SIZE);
1688 		data_len = PAGE_ALIGN(data_len);
1689 
1690 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1691 	}
1692 
1693 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1694 				   msg->msg_flags & MSG_DONTWAIT, &err,
1695 				   PAGE_ALLOC_COSTLY_ORDER);
1696 	if (skb == NULL)
1697 		goto out;
1698 
1699 	err = unix_scm_to_skb(&scm, skb, true);
1700 	if (err < 0)
1701 		goto out_free;
1702 	max_level = err + 1;
1703 
1704 	skb_put(skb, len - data_len);
1705 	skb->data_len = data_len;
1706 	skb->len = len;
1707 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1708 	if (err)
1709 		goto out_free;
1710 
1711 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1712 
1713 restart:
1714 	if (!other) {
1715 		err = -ECONNRESET;
1716 		if (sunaddr == NULL)
1717 			goto out_free;
1718 
1719 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1720 					hash, &err);
1721 		if (other == NULL)
1722 			goto out_free;
1723 	}
1724 
1725 	if (sk_filter(other, skb) < 0) {
1726 		/* Toss the packet but do not return any error to the sender */
1727 		err = len;
1728 		goto out_free;
1729 	}
1730 
1731 	sk_locked = 0;
1732 	unix_state_lock(other);
1733 restart_locked:
1734 	err = -EPERM;
1735 	if (!unix_may_send(sk, other))
1736 		goto out_unlock;
1737 
1738 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1739 		/*
1740 		 *	Check with 1003.1g - what should
1741 		 *	datagram error
1742 		 */
1743 		unix_state_unlock(other);
1744 		sock_put(other);
1745 
1746 		if (!sk_locked)
1747 			unix_state_lock(sk);
1748 
1749 		err = 0;
1750 		if (unix_peer(sk) == other) {
1751 			unix_peer(sk) = NULL;
1752 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1753 
1754 			unix_state_unlock(sk);
1755 
1756 			unix_dgram_disconnected(sk, other);
1757 			sock_put(other);
1758 			err = -ECONNREFUSED;
1759 		} else {
1760 			unix_state_unlock(sk);
1761 		}
1762 
1763 		other = NULL;
1764 		if (err)
1765 			goto out_free;
1766 		goto restart;
1767 	}
1768 
1769 	err = -EPIPE;
1770 	if (other->sk_shutdown & RCV_SHUTDOWN)
1771 		goto out_unlock;
1772 
1773 	if (sk->sk_type != SOCK_SEQPACKET) {
1774 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1775 		if (err)
1776 			goto out_unlock;
1777 	}
1778 
1779 	/* other == sk && unix_peer(other) != sk if
1780 	 * - unix_peer(sk) == NULL, destination address bound to sk
1781 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1782 	 */
1783 	if (other != sk &&
1784 	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1785 		if (timeo) {
1786 			timeo = unix_wait_for_peer(other, timeo);
1787 
1788 			err = sock_intr_errno(timeo);
1789 			if (signal_pending(current))
1790 				goto out_free;
1791 
1792 			goto restart;
1793 		}
1794 
1795 		if (!sk_locked) {
1796 			unix_state_unlock(other);
1797 			unix_state_double_lock(sk, other);
1798 		}
1799 
1800 		if (unix_peer(sk) != other ||
1801 		    unix_dgram_peer_wake_me(sk, other)) {
1802 			err = -EAGAIN;
1803 			sk_locked = 1;
1804 			goto out_unlock;
1805 		}
1806 
1807 		if (!sk_locked) {
1808 			sk_locked = 1;
1809 			goto restart_locked;
1810 		}
1811 	}
1812 
1813 	if (unlikely(sk_locked))
1814 		unix_state_unlock(sk);
1815 
1816 	if (sock_flag(other, SOCK_RCVTSTAMP))
1817 		__net_timestamp(skb);
1818 	maybe_add_creds(skb, sock, other);
1819 	skb_queue_tail(&other->sk_receive_queue, skb);
1820 	if (max_level > unix_sk(other)->recursion_level)
1821 		unix_sk(other)->recursion_level = max_level;
1822 	unix_state_unlock(other);
1823 	other->sk_data_ready(other);
1824 	sock_put(other);
1825 	scm_destroy(&scm);
1826 	return len;
1827 
1828 out_unlock:
1829 	if (sk_locked)
1830 		unix_state_unlock(sk);
1831 	unix_state_unlock(other);
1832 out_free:
1833 	kfree_skb(skb);
1834 out:
1835 	if (other)
1836 		sock_put(other);
1837 	scm_destroy(&scm);
1838 	return err;
1839 }
1840 
1841 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1842  * bytes, and a minimun of a full page.
1843  */
1844 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1845 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1846 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1847 			       size_t len)
1848 {
1849 	struct sock *sk = sock->sk;
1850 	struct sock *other = NULL;
1851 	int err, size;
1852 	struct sk_buff *skb;
1853 	int sent = 0;
1854 	struct scm_cookie scm;
1855 	bool fds_sent = false;
1856 	int max_level;
1857 	int data_len;
1858 
1859 	wait_for_unix_gc();
1860 	err = scm_send(sock, msg, &scm, false);
1861 	if (err < 0)
1862 		return err;
1863 
1864 	err = -EOPNOTSUPP;
1865 	if (msg->msg_flags&MSG_OOB)
1866 		goto out_err;
1867 
1868 	if (msg->msg_namelen) {
1869 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1870 		goto out_err;
1871 	} else {
1872 		err = -ENOTCONN;
1873 		other = unix_peer(sk);
1874 		if (!other)
1875 			goto out_err;
1876 	}
1877 
1878 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1879 		goto pipe_err;
1880 
1881 	while (sent < len) {
1882 		size = len - sent;
1883 
1884 		/* Keep two messages in the pipe so it schedules better */
1885 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1886 
1887 		/* allow fallback to order-0 allocations */
1888 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1889 
1890 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1891 
1892 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1893 
1894 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1895 					   msg->msg_flags & MSG_DONTWAIT, &err,
1896 					   get_order(UNIX_SKB_FRAGS_SZ));
1897 		if (!skb)
1898 			goto out_err;
1899 
1900 		/* Only send the fds in the first buffer */
1901 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1902 		if (err < 0) {
1903 			kfree_skb(skb);
1904 			goto out_err;
1905 		}
1906 		max_level = err + 1;
1907 		fds_sent = true;
1908 
1909 		skb_put(skb, size - data_len);
1910 		skb->data_len = data_len;
1911 		skb->len = size;
1912 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1913 		if (err) {
1914 			kfree_skb(skb);
1915 			goto out_err;
1916 		}
1917 
1918 		unix_state_lock(other);
1919 
1920 		if (sock_flag(other, SOCK_DEAD) ||
1921 		    (other->sk_shutdown & RCV_SHUTDOWN))
1922 			goto pipe_err_free;
1923 
1924 		maybe_add_creds(skb, sock, other);
1925 		skb_queue_tail(&other->sk_receive_queue, skb);
1926 		if (max_level > unix_sk(other)->recursion_level)
1927 			unix_sk(other)->recursion_level = max_level;
1928 		unix_state_unlock(other);
1929 		other->sk_data_ready(other);
1930 		sent += size;
1931 	}
1932 
1933 	scm_destroy(&scm);
1934 
1935 	return sent;
1936 
1937 pipe_err_free:
1938 	unix_state_unlock(other);
1939 	kfree_skb(skb);
1940 pipe_err:
1941 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1942 		send_sig(SIGPIPE, current, 0);
1943 	err = -EPIPE;
1944 out_err:
1945 	scm_destroy(&scm);
1946 	return sent ? : err;
1947 }
1948 
unix_stream_sendpage(struct socket * socket,struct page * page,int offset,size_t size,int flags)1949 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1950 				    int offset, size_t size, int flags)
1951 {
1952 	int err;
1953 	bool send_sigpipe = false;
1954 	bool init_scm = true;
1955 	struct scm_cookie scm;
1956 	struct sock *other, *sk = socket->sk;
1957 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1958 
1959 	if (flags & MSG_OOB)
1960 		return -EOPNOTSUPP;
1961 
1962 	other = unix_peer(sk);
1963 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1964 		return -ENOTCONN;
1965 
1966 	if (false) {
1967 alloc_skb:
1968 		unix_state_unlock(other);
1969 		mutex_unlock(&unix_sk(other)->iolock);
1970 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1971 					      &err, 0);
1972 		if (!newskb)
1973 			goto err;
1974 	}
1975 
1976 	/* we must acquire iolock as we modify already present
1977 	 * skbs in the sk_receive_queue and mess with skb->len
1978 	 */
1979 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1980 	if (err) {
1981 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1982 		goto err;
1983 	}
1984 
1985 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1986 		err = -EPIPE;
1987 		send_sigpipe = true;
1988 		goto err_unlock;
1989 	}
1990 
1991 	unix_state_lock(other);
1992 
1993 	if (sock_flag(other, SOCK_DEAD) ||
1994 	    other->sk_shutdown & RCV_SHUTDOWN) {
1995 		err = -EPIPE;
1996 		send_sigpipe = true;
1997 		goto err_state_unlock;
1998 	}
1999 
2000 	if (init_scm) {
2001 		err = maybe_init_creds(&scm, socket, other);
2002 		if (err)
2003 			goto err_state_unlock;
2004 		init_scm = false;
2005 	}
2006 
2007 	skb = skb_peek_tail(&other->sk_receive_queue);
2008 	if (tail && tail == skb) {
2009 		skb = newskb;
2010 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2011 		if (newskb) {
2012 			skb = newskb;
2013 		} else {
2014 			tail = skb;
2015 			goto alloc_skb;
2016 		}
2017 	} else if (newskb) {
2018 		/* this is fast path, we don't necessarily need to
2019 		 * call to kfree_skb even though with newskb == NULL
2020 		 * this - does no harm
2021 		 */
2022 		consume_skb(newskb);
2023 		newskb = NULL;
2024 	}
2025 
2026 	if (skb_append_pagefrags(skb, page, offset, size)) {
2027 		tail = skb;
2028 		goto alloc_skb;
2029 	}
2030 
2031 	skb->len += size;
2032 	skb->data_len += size;
2033 	skb->truesize += size;
2034 	atomic_add(size, &sk->sk_wmem_alloc);
2035 
2036 	if (newskb) {
2037 		err = unix_scm_to_skb(&scm, skb, false);
2038 		if (err)
2039 			goto err_state_unlock;
2040 		spin_lock(&other->sk_receive_queue.lock);
2041 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2042 		spin_unlock(&other->sk_receive_queue.lock);
2043 	}
2044 
2045 	unix_state_unlock(other);
2046 	mutex_unlock(&unix_sk(other)->iolock);
2047 
2048 	other->sk_data_ready(other);
2049 	scm_destroy(&scm);
2050 	return size;
2051 
2052 err_state_unlock:
2053 	unix_state_unlock(other);
2054 err_unlock:
2055 	mutex_unlock(&unix_sk(other)->iolock);
2056 err:
2057 	kfree_skb(newskb);
2058 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2059 		send_sig(SIGPIPE, current, 0);
2060 	if (!init_scm)
2061 		scm_destroy(&scm);
2062 	return err;
2063 }
2064 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2065 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2066 				  size_t len)
2067 {
2068 	int err;
2069 	struct sock *sk = sock->sk;
2070 
2071 	err = sock_error(sk);
2072 	if (err)
2073 		return err;
2074 
2075 	if (sk->sk_state != TCP_ESTABLISHED)
2076 		return -ENOTCONN;
2077 
2078 	if (msg->msg_namelen)
2079 		msg->msg_namelen = 0;
2080 
2081 	return unix_dgram_sendmsg(sock, msg, len);
2082 }
2083 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2084 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2085 				  size_t size, int flags)
2086 {
2087 	struct sock *sk = sock->sk;
2088 
2089 	if (sk->sk_state != TCP_ESTABLISHED)
2090 		return -ENOTCONN;
2091 
2092 	return unix_dgram_recvmsg(sock, msg, size, flags);
2093 }
2094 
unix_copy_addr(struct msghdr * msg,struct sock * sk)2095 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2096 {
2097 	struct unix_sock *u = unix_sk(sk);
2098 
2099 	if (u->addr) {
2100 		msg->msg_namelen = u->addr->len;
2101 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
2102 	}
2103 }
2104 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2105 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2106 			      size_t size, int flags)
2107 {
2108 	struct scm_cookie scm;
2109 	struct sock *sk = sock->sk;
2110 	struct unix_sock *u = unix_sk(sk);
2111 	struct sk_buff *skb, *last;
2112 	long timeo;
2113 	int err;
2114 	int peeked, skip;
2115 
2116 	err = -EOPNOTSUPP;
2117 	if (flags&MSG_OOB)
2118 		goto out;
2119 
2120 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2121 
2122 	do {
2123 		mutex_lock(&u->iolock);
2124 
2125 		skip = sk_peek_offset(sk, flags);
2126 		skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2127 					      &last);
2128 		if (skb)
2129 			break;
2130 
2131 		mutex_unlock(&u->iolock);
2132 
2133 		if (err != -EAGAIN)
2134 			break;
2135 	} while (timeo &&
2136 		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2137 
2138 	if (!skb) { /* implies iolock unlocked */
2139 		unix_state_lock(sk);
2140 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2141 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2142 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2143 			err = 0;
2144 		unix_state_unlock(sk);
2145 		goto out;
2146 	}
2147 
2148 	if (wq_has_sleeper(&u->peer_wait))
2149 		wake_up_interruptible_sync_poll(&u->peer_wait,
2150 						POLLOUT | POLLWRNORM |
2151 						POLLWRBAND);
2152 
2153 	if (msg->msg_name)
2154 		unix_copy_addr(msg, skb->sk);
2155 
2156 	if (size > skb->len - skip)
2157 		size = skb->len - skip;
2158 	else if (size < skb->len - skip)
2159 		msg->msg_flags |= MSG_TRUNC;
2160 
2161 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2162 	if (err)
2163 		goto out_free;
2164 
2165 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2166 		__sock_recv_timestamp(msg, sk, skb);
2167 
2168 	memset(&scm, 0, sizeof(scm));
2169 
2170 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2171 	unix_set_secdata(&scm, skb);
2172 
2173 	if (!(flags & MSG_PEEK)) {
2174 		if (UNIXCB(skb).fp)
2175 			unix_detach_fds(&scm, skb);
2176 
2177 		sk_peek_offset_bwd(sk, skb->len);
2178 	} else {
2179 		/* It is questionable: on PEEK we could:
2180 		   - do not return fds - good, but too simple 8)
2181 		   - return fds, and do not return them on read (old strategy,
2182 		     apparently wrong)
2183 		   - clone fds (I chose it for now, it is the most universal
2184 		     solution)
2185 
2186 		   POSIX 1003.1g does not actually define this clearly
2187 		   at all. POSIX 1003.1g doesn't define a lot of things
2188 		   clearly however!
2189 
2190 		*/
2191 
2192 		sk_peek_offset_fwd(sk, size);
2193 
2194 		if (UNIXCB(skb).fp)
2195 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2196 	}
2197 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2198 
2199 	scm_recv(sock, msg, &scm, flags);
2200 
2201 out_free:
2202 	skb_free_datagram(sk, skb);
2203 	mutex_unlock(&u->iolock);
2204 out:
2205 	return err;
2206 }
2207 
2208 /*
2209  *	Sleep until more data has arrived. But check for races..
2210  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2211 static long unix_stream_data_wait(struct sock *sk, long timeo,
2212 				  struct sk_buff *last, unsigned int last_len,
2213 				  bool freezable)
2214 {
2215 	struct sk_buff *tail;
2216 	DEFINE_WAIT(wait);
2217 
2218 	unix_state_lock(sk);
2219 
2220 	for (;;) {
2221 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2222 
2223 		tail = skb_peek_tail(&sk->sk_receive_queue);
2224 		if (tail != last ||
2225 		    (tail && tail->len != last_len) ||
2226 		    sk->sk_err ||
2227 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2228 		    signal_pending(current) ||
2229 		    !timeo)
2230 			break;
2231 
2232 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2233 		unix_state_unlock(sk);
2234 		if (freezable)
2235 			timeo = freezable_schedule_timeout(timeo);
2236 		else
2237 			timeo = schedule_timeout(timeo);
2238 		unix_state_lock(sk);
2239 
2240 		if (sock_flag(sk, SOCK_DEAD))
2241 			break;
2242 
2243 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2244 	}
2245 
2246 	finish_wait(sk_sleep(sk), &wait);
2247 	unix_state_unlock(sk);
2248 	return timeo;
2249 }
2250 
unix_skb_len(const struct sk_buff * skb)2251 static unsigned int unix_skb_len(const struct sk_buff *skb)
2252 {
2253 	return skb->len - UNIXCB(skb).consumed;
2254 }
2255 
2256 struct unix_stream_read_state {
2257 	int (*recv_actor)(struct sk_buff *, int, int,
2258 			  struct unix_stream_read_state *);
2259 	struct socket *socket;
2260 	struct msghdr *msg;
2261 	struct pipe_inode_info *pipe;
2262 	size_t size;
2263 	int flags;
2264 	unsigned int splice_flags;
2265 };
2266 
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2267 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2268 				    bool freezable)
2269 {
2270 	struct scm_cookie scm;
2271 	struct socket *sock = state->socket;
2272 	struct sock *sk = sock->sk;
2273 	struct unix_sock *u = unix_sk(sk);
2274 	int copied = 0;
2275 	int flags = state->flags;
2276 	int noblock = flags & MSG_DONTWAIT;
2277 	bool check_creds = false;
2278 	int target;
2279 	int err = 0;
2280 	long timeo;
2281 	int skip;
2282 	size_t size = state->size;
2283 	unsigned int last_len;
2284 
2285 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2286 		err = -EINVAL;
2287 		goto out;
2288 	}
2289 
2290 	if (unlikely(flags & MSG_OOB)) {
2291 		err = -EOPNOTSUPP;
2292 		goto out;
2293 	}
2294 
2295 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2296 	timeo = sock_rcvtimeo(sk, noblock);
2297 
2298 	memset(&scm, 0, sizeof(scm));
2299 
2300 	/* Lock the socket to prevent queue disordering
2301 	 * while sleeps in memcpy_tomsg
2302 	 */
2303 	mutex_lock(&u->iolock);
2304 
2305 	if (flags & MSG_PEEK)
2306 		skip = sk_peek_offset(sk, flags);
2307 	else
2308 		skip = 0;
2309 
2310 	do {
2311 		int chunk;
2312 		bool drop_skb;
2313 		struct sk_buff *skb, *last;
2314 
2315 redo:
2316 		unix_state_lock(sk);
2317 		if (sock_flag(sk, SOCK_DEAD)) {
2318 			err = -ECONNRESET;
2319 			goto unlock;
2320 		}
2321 		last = skb = skb_peek(&sk->sk_receive_queue);
2322 		last_len = last ? last->len : 0;
2323 again:
2324 		if (skb == NULL) {
2325 			unix_sk(sk)->recursion_level = 0;
2326 			if (copied >= target)
2327 				goto unlock;
2328 
2329 			/*
2330 			 *	POSIX 1003.1g mandates this order.
2331 			 */
2332 
2333 			err = sock_error(sk);
2334 			if (err)
2335 				goto unlock;
2336 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2337 				goto unlock;
2338 
2339 			unix_state_unlock(sk);
2340 			if (!timeo) {
2341 				err = -EAGAIN;
2342 				break;
2343 			}
2344 
2345 			mutex_unlock(&u->iolock);
2346 
2347 			timeo = unix_stream_data_wait(sk, timeo, last,
2348 						      last_len, freezable);
2349 
2350 			if (signal_pending(current)) {
2351 				err = sock_intr_errno(timeo);
2352 				scm_destroy(&scm);
2353 				goto out;
2354 			}
2355 
2356 			mutex_lock(&u->iolock);
2357 			goto redo;
2358 unlock:
2359 			unix_state_unlock(sk);
2360 			break;
2361 		}
2362 
2363 		while (skip >= unix_skb_len(skb)) {
2364 			skip -= unix_skb_len(skb);
2365 			last = skb;
2366 			last_len = skb->len;
2367 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2368 			if (!skb)
2369 				goto again;
2370 		}
2371 
2372 		unix_state_unlock(sk);
2373 
2374 		if (check_creds) {
2375 			/* Never glue messages from different writers */
2376 			if (!unix_skb_scm_eq(skb, &scm))
2377 				break;
2378 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2379 			/* Copy credentials */
2380 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2381 			unix_set_secdata(&scm, skb);
2382 			check_creds = true;
2383 		}
2384 
2385 		/* Copy address just once */
2386 		if (state->msg && state->msg->msg_name) {
2387 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2388 					 state->msg->msg_name);
2389 			unix_copy_addr(state->msg, skb->sk);
2390 			sunaddr = NULL;
2391 		}
2392 
2393 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2394 		skb_get(skb);
2395 		chunk = state->recv_actor(skb, skip, chunk, state);
2396 		drop_skb = !unix_skb_len(skb);
2397 		/* skb is only safe to use if !drop_skb */
2398 		consume_skb(skb);
2399 		if (chunk < 0) {
2400 			if (copied == 0)
2401 				copied = -EFAULT;
2402 			break;
2403 		}
2404 		copied += chunk;
2405 		size -= chunk;
2406 
2407 		if (drop_skb) {
2408 			/* the skb was touched by a concurrent reader;
2409 			 * we should not expect anything from this skb
2410 			 * anymore and assume it invalid - we can be
2411 			 * sure it was dropped from the socket queue
2412 			 *
2413 			 * let's report a short read
2414 			 */
2415 			err = 0;
2416 			break;
2417 		}
2418 
2419 		/* Mark read part of skb as used */
2420 		if (!(flags & MSG_PEEK)) {
2421 			UNIXCB(skb).consumed += chunk;
2422 
2423 			sk_peek_offset_bwd(sk, chunk);
2424 
2425 			if (UNIXCB(skb).fp)
2426 				unix_detach_fds(&scm, skb);
2427 
2428 			if (unix_skb_len(skb))
2429 				break;
2430 
2431 			skb_unlink(skb, &sk->sk_receive_queue);
2432 			consume_skb(skb);
2433 
2434 			if (scm.fp)
2435 				break;
2436 		} else {
2437 			/* It is questionable, see note in unix_dgram_recvmsg.
2438 			 */
2439 			if (UNIXCB(skb).fp)
2440 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2441 
2442 			sk_peek_offset_fwd(sk, chunk);
2443 
2444 			if (UNIXCB(skb).fp)
2445 				break;
2446 
2447 			skip = 0;
2448 			last = skb;
2449 			last_len = skb->len;
2450 			unix_state_lock(sk);
2451 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2452 			if (skb)
2453 				goto again;
2454 			unix_state_unlock(sk);
2455 			break;
2456 		}
2457 	} while (size);
2458 
2459 	mutex_unlock(&u->iolock);
2460 	if (state->msg)
2461 		scm_recv(sock, state->msg, &scm, flags);
2462 	else
2463 		scm_destroy(&scm);
2464 out:
2465 	return copied ? : err;
2466 }
2467 
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2468 static int unix_stream_read_actor(struct sk_buff *skb,
2469 				  int skip, int chunk,
2470 				  struct unix_stream_read_state *state)
2471 {
2472 	int ret;
2473 
2474 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2475 				    state->msg, chunk);
2476 	return ret ?: chunk;
2477 }
2478 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2479 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2480 			       size_t size, int flags)
2481 {
2482 	struct unix_stream_read_state state = {
2483 		.recv_actor = unix_stream_read_actor,
2484 		.socket = sock,
2485 		.msg = msg,
2486 		.size = size,
2487 		.flags = flags
2488 	};
2489 
2490 	return unix_stream_read_generic(&state, true);
2491 }
2492 
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2493 static int unix_stream_splice_actor(struct sk_buff *skb,
2494 				    int skip, int chunk,
2495 				    struct unix_stream_read_state *state)
2496 {
2497 	return skb_splice_bits(skb, state->socket->sk,
2498 			       UNIXCB(skb).consumed + skip,
2499 			       state->pipe, chunk, state->splice_flags);
2500 }
2501 
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2502 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2503 				       struct pipe_inode_info *pipe,
2504 				       size_t size, unsigned int flags)
2505 {
2506 	struct unix_stream_read_state state = {
2507 		.recv_actor = unix_stream_splice_actor,
2508 		.socket = sock,
2509 		.pipe = pipe,
2510 		.size = size,
2511 		.splice_flags = flags,
2512 	};
2513 
2514 	if (unlikely(*ppos))
2515 		return -ESPIPE;
2516 
2517 	if (sock->file->f_flags & O_NONBLOCK ||
2518 	    flags & SPLICE_F_NONBLOCK)
2519 		state.flags = MSG_DONTWAIT;
2520 
2521 	return unix_stream_read_generic(&state, false);
2522 }
2523 
unix_shutdown(struct socket * sock,int mode)2524 static int unix_shutdown(struct socket *sock, int mode)
2525 {
2526 	struct sock *sk = sock->sk;
2527 	struct sock *other;
2528 
2529 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2530 		return -EINVAL;
2531 	/* This maps:
2532 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2533 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2534 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2535 	 */
2536 	++mode;
2537 
2538 	unix_state_lock(sk);
2539 	sk->sk_shutdown |= mode;
2540 	other = unix_peer(sk);
2541 	if (other)
2542 		sock_hold(other);
2543 	unix_state_unlock(sk);
2544 	sk->sk_state_change(sk);
2545 
2546 	if (other &&
2547 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2548 
2549 		int peer_mode = 0;
2550 
2551 		if (mode&RCV_SHUTDOWN)
2552 			peer_mode |= SEND_SHUTDOWN;
2553 		if (mode&SEND_SHUTDOWN)
2554 			peer_mode |= RCV_SHUTDOWN;
2555 		unix_state_lock(other);
2556 		other->sk_shutdown |= peer_mode;
2557 		unix_state_unlock(other);
2558 		other->sk_state_change(other);
2559 		if (peer_mode == SHUTDOWN_MASK)
2560 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2561 		else if (peer_mode & RCV_SHUTDOWN)
2562 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2563 	}
2564 	if (other)
2565 		sock_put(other);
2566 
2567 	return 0;
2568 }
2569 
unix_inq_len(struct sock * sk)2570 long unix_inq_len(struct sock *sk)
2571 {
2572 	struct sk_buff *skb;
2573 	long amount = 0;
2574 
2575 	if (sk->sk_state == TCP_LISTEN)
2576 		return -EINVAL;
2577 
2578 	spin_lock(&sk->sk_receive_queue.lock);
2579 	if (sk->sk_type == SOCK_STREAM ||
2580 	    sk->sk_type == SOCK_SEQPACKET) {
2581 		skb_queue_walk(&sk->sk_receive_queue, skb)
2582 			amount += unix_skb_len(skb);
2583 	} else {
2584 		skb = skb_peek(&sk->sk_receive_queue);
2585 		if (skb)
2586 			amount = skb->len;
2587 	}
2588 	spin_unlock(&sk->sk_receive_queue.lock);
2589 
2590 	return amount;
2591 }
2592 EXPORT_SYMBOL_GPL(unix_inq_len);
2593 
unix_outq_len(struct sock * sk)2594 long unix_outq_len(struct sock *sk)
2595 {
2596 	return sk_wmem_alloc_get(sk);
2597 }
2598 EXPORT_SYMBOL_GPL(unix_outq_len);
2599 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2600 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2601 {
2602 	struct sock *sk = sock->sk;
2603 	long amount = 0;
2604 	int err;
2605 
2606 	switch (cmd) {
2607 	case SIOCOUTQ:
2608 		amount = unix_outq_len(sk);
2609 		err = put_user(amount, (int __user *)arg);
2610 		break;
2611 	case SIOCINQ:
2612 		amount = unix_inq_len(sk);
2613 		if (amount < 0)
2614 			err = amount;
2615 		else
2616 			err = put_user(amount, (int __user *)arg);
2617 		break;
2618 	default:
2619 		err = -ENOIOCTLCMD;
2620 		break;
2621 	}
2622 	return err;
2623 }
2624 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2625 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2626 {
2627 	struct sock *sk = sock->sk;
2628 	unsigned int mask;
2629 
2630 	sock_poll_wait(file, sk_sleep(sk), wait);
2631 	mask = 0;
2632 
2633 	/* exceptional events? */
2634 	if (sk->sk_err)
2635 		mask |= POLLERR;
2636 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2637 		mask |= POLLHUP;
2638 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2639 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2640 
2641 	/* readable? */
2642 	if (!skb_queue_empty(&sk->sk_receive_queue))
2643 		mask |= POLLIN | POLLRDNORM;
2644 
2645 	/* Connection-based need to check for termination and startup */
2646 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2647 	    sk->sk_state == TCP_CLOSE)
2648 		mask |= POLLHUP;
2649 
2650 	/*
2651 	 * we set writable also when the other side has shut down the
2652 	 * connection. This prevents stuck sockets.
2653 	 */
2654 	if (unix_writable(sk))
2655 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2656 
2657 	return mask;
2658 }
2659 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2660 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2661 				    poll_table *wait)
2662 {
2663 	struct sock *sk = sock->sk, *other;
2664 	unsigned int mask, writable;
2665 
2666 	sock_poll_wait(file, sk_sleep(sk), wait);
2667 	mask = 0;
2668 
2669 	/* exceptional events? */
2670 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2671 		mask |= POLLERR |
2672 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2673 
2674 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2675 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2676 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2677 		mask |= POLLHUP;
2678 
2679 	/* readable? */
2680 	if (!skb_queue_empty(&sk->sk_receive_queue))
2681 		mask |= POLLIN | POLLRDNORM;
2682 
2683 	/* Connection-based need to check for termination and startup */
2684 	if (sk->sk_type == SOCK_SEQPACKET) {
2685 		if (sk->sk_state == TCP_CLOSE)
2686 			mask |= POLLHUP;
2687 		/* connection hasn't started yet? */
2688 		if (sk->sk_state == TCP_SYN_SENT)
2689 			return mask;
2690 	}
2691 
2692 	/* No write status requested, avoid expensive OUT tests. */
2693 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2694 		return mask;
2695 
2696 	writable = unix_writable(sk);
2697 	if (writable) {
2698 		unix_state_lock(sk);
2699 
2700 		other = unix_peer(sk);
2701 		if (other && unix_peer(other) != sk &&
2702 		    unix_recvq_full(other) &&
2703 		    unix_dgram_peer_wake_me(sk, other))
2704 			writable = 0;
2705 
2706 		unix_state_unlock(sk);
2707 	}
2708 
2709 	if (writable)
2710 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2711 	else
2712 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2713 
2714 	return mask;
2715 }
2716 
2717 #ifdef CONFIG_PROC_FS
2718 
2719 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2720 
2721 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2722 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2723 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2724 
unix_from_bucket(struct seq_file * seq,loff_t * pos)2725 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2726 {
2727 	unsigned long offset = get_offset(*pos);
2728 	unsigned long bucket = get_bucket(*pos);
2729 	struct sock *sk;
2730 	unsigned long count = 0;
2731 
2732 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2733 		if (sock_net(sk) != seq_file_net(seq))
2734 			continue;
2735 		if (++count == offset)
2736 			break;
2737 	}
2738 
2739 	return sk;
2740 }
2741 
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2742 static struct sock *unix_next_socket(struct seq_file *seq,
2743 				     struct sock *sk,
2744 				     loff_t *pos)
2745 {
2746 	unsigned long bucket;
2747 
2748 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2749 		sk = sk_next(sk);
2750 		if (!sk)
2751 			goto next_bucket;
2752 		if (sock_net(sk) == seq_file_net(seq))
2753 			return sk;
2754 	}
2755 
2756 	do {
2757 		sk = unix_from_bucket(seq, pos);
2758 		if (sk)
2759 			return sk;
2760 
2761 next_bucket:
2762 		bucket = get_bucket(*pos) + 1;
2763 		*pos = set_bucket_offset(bucket, 1);
2764 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2765 
2766 	return NULL;
2767 }
2768 
unix_seq_start(struct seq_file * seq,loff_t * pos)2769 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2770 	__acquires(unix_table_lock)
2771 {
2772 	spin_lock(&unix_table_lock);
2773 
2774 	if (!*pos)
2775 		return SEQ_START_TOKEN;
2776 
2777 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2778 		return NULL;
2779 
2780 	return unix_next_socket(seq, NULL, pos);
2781 }
2782 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2783 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2784 {
2785 	++*pos;
2786 	return unix_next_socket(seq, v, pos);
2787 }
2788 
unix_seq_stop(struct seq_file * seq,void * v)2789 static void unix_seq_stop(struct seq_file *seq, void *v)
2790 	__releases(unix_table_lock)
2791 {
2792 	spin_unlock(&unix_table_lock);
2793 }
2794 
unix_seq_show(struct seq_file * seq,void * v)2795 static int unix_seq_show(struct seq_file *seq, void *v)
2796 {
2797 
2798 	if (v == SEQ_START_TOKEN)
2799 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2800 			 "Inode Path\n");
2801 	else {
2802 		struct sock *s = v;
2803 		struct unix_sock *u = unix_sk(s);
2804 		unix_state_lock(s);
2805 
2806 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2807 			s,
2808 			atomic_read(&s->sk_refcnt),
2809 			0,
2810 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2811 			s->sk_type,
2812 			s->sk_socket ?
2813 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2814 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2815 			sock_i_ino(s));
2816 
2817 		if (u->addr) {
2818 			int i, len;
2819 			seq_putc(seq, ' ');
2820 
2821 			i = 0;
2822 			len = u->addr->len - sizeof(short);
2823 			if (!UNIX_ABSTRACT(s))
2824 				len--;
2825 			else {
2826 				seq_putc(seq, '@');
2827 				i++;
2828 			}
2829 			for ( ; i < len; i++)
2830 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2831 					 '@');
2832 		}
2833 		unix_state_unlock(s);
2834 		seq_putc(seq, '\n');
2835 	}
2836 
2837 	return 0;
2838 }
2839 
2840 static const struct seq_operations unix_seq_ops = {
2841 	.start  = unix_seq_start,
2842 	.next   = unix_seq_next,
2843 	.stop   = unix_seq_stop,
2844 	.show   = unix_seq_show,
2845 };
2846 
unix_seq_open(struct inode * inode,struct file * file)2847 static int unix_seq_open(struct inode *inode, struct file *file)
2848 {
2849 	return seq_open_net(inode, file, &unix_seq_ops,
2850 			    sizeof(struct seq_net_private));
2851 }
2852 
2853 static const struct file_operations unix_seq_fops = {
2854 	.owner		= THIS_MODULE,
2855 	.open		= unix_seq_open,
2856 	.read		= seq_read,
2857 	.llseek		= seq_lseek,
2858 	.release	= seq_release_net,
2859 };
2860 
2861 #endif
2862 
2863 static const struct net_proto_family unix_family_ops = {
2864 	.family = PF_UNIX,
2865 	.create = unix_create,
2866 	.owner	= THIS_MODULE,
2867 };
2868 
2869 
unix_net_init(struct net * net)2870 static int __net_init unix_net_init(struct net *net)
2871 {
2872 	int error = -ENOMEM;
2873 
2874 	net->unx.sysctl_max_dgram_qlen = 10;
2875 	if (unix_sysctl_register(net))
2876 		goto out;
2877 
2878 #ifdef CONFIG_PROC_FS
2879 	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2880 		unix_sysctl_unregister(net);
2881 		goto out;
2882 	}
2883 #endif
2884 	error = 0;
2885 out:
2886 	return error;
2887 }
2888 
unix_net_exit(struct net * net)2889 static void __net_exit unix_net_exit(struct net *net)
2890 {
2891 	unix_sysctl_unregister(net);
2892 	remove_proc_entry("unix", net->proc_net);
2893 }
2894 
2895 static struct pernet_operations unix_net_ops = {
2896 	.init = unix_net_init,
2897 	.exit = unix_net_exit,
2898 };
2899 
af_unix_init(void)2900 static int __init af_unix_init(void)
2901 {
2902 	int rc = -1;
2903 
2904 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2905 
2906 	rc = proto_register(&unix_proto, 1);
2907 	if (rc != 0) {
2908 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2909 		goto out;
2910 	}
2911 
2912 	sock_register(&unix_family_ops);
2913 	register_pernet_subsys(&unix_net_ops);
2914 out:
2915 	return rc;
2916 }
2917 
af_unix_exit(void)2918 static void __exit af_unix_exit(void)
2919 {
2920 	sock_unregister(PF_UNIX);
2921 	proto_unregister(&unix_proto);
2922 	unregister_pernet_subsys(&unix_net_ops);
2923 }
2924 
2925 /* Earlier than device_initcall() so that other drivers invoking
2926    request_module() don't end up in a loop when modprobe tries
2927    to use a UNIX socket. But later than subsys_initcall() because
2928    we depend on stuff initialised there */
2929 fs_initcall(af_unix_init);
2930 module_exit(af_unix_exit);
2931 
2932 MODULE_LICENSE("GPL");
2933 MODULE_ALIAS_NETPROTO(PF_UNIX);
2934