• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120 
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
126 
127 
unix_sockets_unbound(void * addr)128 static struct hlist_head *unix_sockets_unbound(void *addr)
129 {
130 	unsigned long hash = (unsigned long)addr;
131 
132 	hash ^= hash >> 16;
133 	hash ^= hash >> 8;
134 	hash %= UNIX_HASH_SIZE;
135 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 }
137 
138 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139 
140 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
144 }
145 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147 {
148 	scm->secid = *UNIXSID(skb);
149 }
150 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)151 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)154 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 #endif /* CONFIG_SECURITY_NETWORK */
157 
158 /*
159  *  SMP locking strategy:
160  *    hash table is protected with spinlock unix_table_lock
161  *    each socket state is protected by separate spin lock.
162  */
163 
unix_hash_fold(__wsum n)164 static inline unsigned int unix_hash_fold(__wsum n)
165 {
166 	unsigned int hash = (__force unsigned int)csum_fold(n);
167 
168 	hash ^= hash>>8;
169 	return hash&(UNIX_HASH_SIZE-1);
170 }
171 
172 #define unix_peer(sk) (unix_sk(sk)->peer)
173 
unix_our_peer(struct sock * sk,struct sock * osk)174 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
175 {
176 	return unix_peer(osk) == sk;
177 }
178 
unix_may_send(struct sock * sk,struct sock * osk)179 static inline int unix_may_send(struct sock *sk, struct sock *osk)
180 {
181 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
182 }
183 
unix_recvq_full(struct sock const * sk)184 static inline int unix_recvq_full(struct sock const *sk)
185 {
186 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
187 }
188 
unix_peer_get(struct sock * s)189 struct sock *unix_peer_get(struct sock *s)
190 {
191 	struct sock *peer;
192 
193 	unix_state_lock(s);
194 	peer = unix_peer(s);
195 	if (peer)
196 		sock_hold(peer);
197 	unix_state_unlock(s);
198 	return peer;
199 }
200 EXPORT_SYMBOL_GPL(unix_peer_get);
201 
unix_release_addr(struct unix_address * addr)202 static inline void unix_release_addr(struct unix_address *addr)
203 {
204 	if (atomic_dec_and_test(&addr->refcnt))
205 		kfree(addr);
206 }
207 
208 /*
209  *	Check unix socket name:
210  *		- should be not zero length.
211  *	        - if started by not zero, should be NULL terminated (FS object)
212  *		- if started by zero, it is abstract name.
213  */
214 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)215 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
216 {
217 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
218 		return -EINVAL;
219 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
220 		return -EINVAL;
221 	if (sunaddr->sun_path[0]) {
222 		/*
223 		 * This may look like an off by one error but it is a bit more
224 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
225 		 * sun_path[108] doesn't as such exist.  However in kernel space
226 		 * we are guaranteed that it is a valid memory location in our
227 		 * kernel address buffer.
228 		 */
229 		((char *)sunaddr)[len] = 0;
230 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
231 		return len;
232 	}
233 
234 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
235 	return len;
236 }
237 
__unix_remove_socket(struct sock * sk)238 static void __unix_remove_socket(struct sock *sk)
239 {
240 	sk_del_node_init(sk);
241 }
242 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)243 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
244 {
245 	WARN_ON(!sk_unhashed(sk));
246 	sk_add_node(sk, list);
247 }
248 
unix_remove_socket(struct sock * sk)249 static inline void unix_remove_socket(struct sock *sk)
250 {
251 	spin_lock(&unix_table_lock);
252 	__unix_remove_socket(sk);
253 	spin_unlock(&unix_table_lock);
254 }
255 
unix_insert_socket(struct hlist_head * list,struct sock * sk)256 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
257 {
258 	spin_lock(&unix_table_lock);
259 	__unix_insert_socket(list, sk);
260 	spin_unlock(&unix_table_lock);
261 }
262 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)263 static struct sock *__unix_find_socket_byname(struct net *net,
264 					      struct sockaddr_un *sunname,
265 					      int len, int type, unsigned int hash)
266 {
267 	struct sock *s;
268 
269 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
270 		struct unix_sock *u = unix_sk(s);
271 
272 		if (!net_eq(sock_net(s), net))
273 			continue;
274 
275 		if (u->addr->len == len &&
276 		    !memcmp(u->addr->name, sunname, len))
277 			goto found;
278 	}
279 	s = NULL;
280 found:
281 	return s;
282 }
283 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)284 static inline struct sock *unix_find_socket_byname(struct net *net,
285 						   struct sockaddr_un *sunname,
286 						   int len, int type,
287 						   unsigned int hash)
288 {
289 	struct sock *s;
290 
291 	spin_lock(&unix_table_lock);
292 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
293 	if (s)
294 		sock_hold(s);
295 	spin_unlock(&unix_table_lock);
296 	return s;
297 }
298 
unix_find_socket_byinode(struct inode * i)299 static struct sock *unix_find_socket_byinode(struct inode *i)
300 {
301 	struct sock *s;
302 
303 	spin_lock(&unix_table_lock);
304 	sk_for_each(s,
305 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
306 		struct dentry *dentry = unix_sk(s)->path.dentry;
307 
308 		if (dentry && d_backing_inode(dentry) == i) {
309 			sock_hold(s);
310 			goto found;
311 		}
312 	}
313 	s = NULL;
314 found:
315 	spin_unlock(&unix_table_lock);
316 	return s;
317 }
318 
319 /* Support code for asymmetrically connected dgram sockets
320  *
321  * If a datagram socket is connected to a socket not itself connected
322  * to the first socket (eg, /dev/log), clients may only enqueue more
323  * messages if the present receive queue of the server socket is not
324  * "too large". This means there's a second writeability condition
325  * poll and sendmsg need to test. The dgram recv code will do a wake
326  * up on the peer_wait wait queue of a socket upon reception of a
327  * datagram which needs to be propagated to sleeping would-be writers
328  * since these might not have sent anything so far. This can't be
329  * accomplished via poll_wait because the lifetime of the server
330  * socket might be less than that of its clients if these break their
331  * association with it or if the server socket is closed while clients
332  * are still connected to it and there's no way to inform "a polling
333  * implementation" that it should let go of a certain wait queue
334  *
335  * In order to propagate a wake up, a wait_queue_t of the client
336  * socket is enqueued on the peer_wait queue of the server socket
337  * whose wake function does a wake_up on the ordinary client socket
338  * wait queue. This connection is established whenever a write (or
339  * poll for write) hit the flow control condition and broken when the
340  * association to the server socket is dissolved or after a wake up
341  * was relayed.
342  */
343 
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)344 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
345 				      void *key)
346 {
347 	struct unix_sock *u;
348 	wait_queue_head_t *u_sleep;
349 
350 	u = container_of(q, struct unix_sock, peer_wake);
351 
352 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
353 			    q);
354 	u->peer_wake.private = NULL;
355 
356 	/* relaying can only happen while the wq still exists */
357 	u_sleep = sk_sleep(&u->sk);
358 	if (u_sleep)
359 		wake_up_interruptible_poll(u_sleep, key);
360 
361 	return 0;
362 }
363 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)364 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
365 {
366 	struct unix_sock *u, *u_other;
367 	int rc;
368 
369 	u = unix_sk(sk);
370 	u_other = unix_sk(other);
371 	rc = 0;
372 	spin_lock(&u_other->peer_wait.lock);
373 
374 	if (!u->peer_wake.private) {
375 		u->peer_wake.private = other;
376 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
377 
378 		rc = 1;
379 	}
380 
381 	spin_unlock(&u_other->peer_wait.lock);
382 	return rc;
383 }
384 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)385 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
386 					    struct sock *other)
387 {
388 	struct unix_sock *u, *u_other;
389 
390 	u = unix_sk(sk);
391 	u_other = unix_sk(other);
392 	spin_lock(&u_other->peer_wait.lock);
393 
394 	if (u->peer_wake.private == other) {
395 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
396 		u->peer_wake.private = NULL;
397 	}
398 
399 	spin_unlock(&u_other->peer_wait.lock);
400 }
401 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)402 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
403 						   struct sock *other)
404 {
405 	unix_dgram_peer_wake_disconnect(sk, other);
406 	wake_up_interruptible_poll(sk_sleep(sk),
407 				   POLLOUT |
408 				   POLLWRNORM |
409 				   POLLWRBAND);
410 }
411 
412 /* preconditions:
413  *	- unix_peer(sk) == other
414  *	- association is stable
415  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)416 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
417 {
418 	int connected;
419 
420 	connected = unix_dgram_peer_wake_connect(sk, other);
421 
422 	if (unix_recvq_full(other))
423 		return 1;
424 
425 	if (connected)
426 		unix_dgram_peer_wake_disconnect(sk, other);
427 
428 	return 0;
429 }
430 
unix_writable(struct sock * sk)431 static inline int unix_writable(struct sock *sk)
432 {
433 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
434 }
435 
unix_write_space(struct sock * sk)436 static void unix_write_space(struct sock *sk)
437 {
438 	struct socket_wq *wq;
439 
440 	rcu_read_lock();
441 	if (unix_writable(sk)) {
442 		wq = rcu_dereference(sk->sk_wq);
443 		if (wq_has_sleeper(wq))
444 			wake_up_interruptible_sync_poll(&wq->wait,
445 				POLLOUT | POLLWRNORM | POLLWRBAND);
446 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
447 	}
448 	rcu_read_unlock();
449 }
450 
451 /* When dgram socket disconnects (or changes its peer), we clear its receive
452  * queue of packets arrived from previous peer. First, it allows to do
453  * flow control based only on wmem_alloc; second, sk connected to peer
454  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)455 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
456 {
457 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
458 		skb_queue_purge(&sk->sk_receive_queue);
459 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
460 
461 		/* If one link of bidirectional dgram pipe is disconnected,
462 		 * we signal error. Messages are lost. Do not make this,
463 		 * when peer was not connected to us.
464 		 */
465 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
466 			other->sk_err = ECONNRESET;
467 			other->sk_error_report(other);
468 		}
469 	}
470 }
471 
unix_sock_destructor(struct sock * sk)472 static void unix_sock_destructor(struct sock *sk)
473 {
474 	struct unix_sock *u = unix_sk(sk);
475 
476 	skb_queue_purge(&sk->sk_receive_queue);
477 
478 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
479 	WARN_ON(!sk_unhashed(sk));
480 	WARN_ON(sk->sk_socket);
481 	if (!sock_flag(sk, SOCK_DEAD)) {
482 		pr_info("Attempt to release alive unix socket: %p\n", sk);
483 		return;
484 	}
485 
486 	if (u->addr)
487 		unix_release_addr(u->addr);
488 
489 	atomic_long_dec(&unix_nr_socks);
490 	local_bh_disable();
491 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
492 	local_bh_enable();
493 #ifdef UNIX_REFCNT_DEBUG
494 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
495 		atomic_long_read(&unix_nr_socks));
496 #endif
497 }
498 
unix_release_sock(struct sock * sk,int embrion)499 static void unix_release_sock(struct sock *sk, int embrion)
500 {
501 	struct unix_sock *u = unix_sk(sk);
502 	struct path path;
503 	struct sock *skpair;
504 	struct sk_buff *skb;
505 	int state;
506 
507 	unix_remove_socket(sk);
508 
509 	/* Clear state */
510 	unix_state_lock(sk);
511 	sock_orphan(sk);
512 	sk->sk_shutdown = SHUTDOWN_MASK;
513 	path	     = u->path;
514 	u->path.dentry = NULL;
515 	u->path.mnt = NULL;
516 	state = sk->sk_state;
517 	sk->sk_state = TCP_CLOSE;
518 	unix_state_unlock(sk);
519 
520 	wake_up_interruptible_all(&u->peer_wait);
521 
522 	skpair = unix_peer(sk);
523 
524 	if (skpair != NULL) {
525 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
526 			unix_state_lock(skpair);
527 			/* No more writes */
528 			skpair->sk_shutdown = SHUTDOWN_MASK;
529 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
530 				skpair->sk_err = ECONNRESET;
531 			unix_state_unlock(skpair);
532 			skpair->sk_state_change(skpair);
533 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
534 		}
535 
536 		unix_dgram_peer_wake_disconnect(sk, skpair);
537 		sock_put(skpair); /* It may now die */
538 		unix_peer(sk) = NULL;
539 	}
540 
541 	/* Try to flush out this socket. Throw out buffers at least */
542 
543 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
544 		if (state == TCP_LISTEN)
545 			unix_release_sock(skb->sk, 1);
546 		/* passed fds are erased in the kfree_skb hook	      */
547 		kfree_skb(skb);
548 	}
549 
550 	if (path.dentry)
551 		path_put(&path);
552 
553 	sock_put(sk);
554 
555 	/* ---- Socket is dead now and most probably destroyed ---- */
556 
557 	/*
558 	 * Fixme: BSD difference: In BSD all sockets connected to us get
559 	 *	  ECONNRESET and we die on the spot. In Linux we behave
560 	 *	  like files and pipes do and wait for the last
561 	 *	  dereference.
562 	 *
563 	 * Can't we simply set sock->err?
564 	 *
565 	 *	  What the above comment does talk about? --ANK(980817)
566 	 */
567 
568 	if (unix_tot_inflight)
569 		unix_gc();		/* Garbage collect fds */
570 }
571 
init_peercred(struct sock * sk)572 static void init_peercred(struct sock *sk)
573 {
574 	put_pid(sk->sk_peer_pid);
575 	if (sk->sk_peer_cred)
576 		put_cred(sk->sk_peer_cred);
577 	sk->sk_peer_pid  = get_pid(task_tgid(current));
578 	sk->sk_peer_cred = get_current_cred();
579 }
580 
copy_peercred(struct sock * sk,struct sock * peersk)581 static void copy_peercred(struct sock *sk, struct sock *peersk)
582 {
583 	put_pid(sk->sk_peer_pid);
584 	if (sk->sk_peer_cred)
585 		put_cred(sk->sk_peer_cred);
586 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
587 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
588 }
589 
unix_listen(struct socket * sock,int backlog)590 static int unix_listen(struct socket *sock, int backlog)
591 {
592 	int err;
593 	struct sock *sk = sock->sk;
594 	struct unix_sock *u = unix_sk(sk);
595 	struct pid *old_pid = NULL;
596 
597 	err = -EOPNOTSUPP;
598 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
599 		goto out;	/* Only stream/seqpacket sockets accept */
600 	err = -EINVAL;
601 	if (!u->addr)
602 		goto out;	/* No listens on an unbound socket */
603 	unix_state_lock(sk);
604 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
605 		goto out_unlock;
606 	if (backlog > sk->sk_max_ack_backlog)
607 		wake_up_interruptible_all(&u->peer_wait);
608 	sk->sk_max_ack_backlog	= backlog;
609 	sk->sk_state		= TCP_LISTEN;
610 	/* set credentials so connect can copy them */
611 	init_peercred(sk);
612 	err = 0;
613 
614 out_unlock:
615 	unix_state_unlock(sk);
616 	put_pid(old_pid);
617 out:
618 	return err;
619 }
620 
621 static int unix_release(struct socket *);
622 static int unix_bind(struct socket *, struct sockaddr *, int);
623 static int unix_stream_connect(struct socket *, struct sockaddr *,
624 			       int addr_len, int flags);
625 static int unix_socketpair(struct socket *, struct socket *);
626 static int unix_accept(struct socket *, struct socket *, int);
627 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
628 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
629 static unsigned int unix_dgram_poll(struct file *, struct socket *,
630 				    poll_table *);
631 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
632 static int unix_shutdown(struct socket *, int);
633 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
634 			       struct msghdr *, size_t);
635 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
636 			       struct msghdr *, size_t, int);
637 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
638 			      struct msghdr *, size_t);
639 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
640 			      struct msghdr *, size_t, int);
641 static int unix_dgram_connect(struct socket *, struct sockaddr *,
642 			      int, int);
643 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
644 				  struct msghdr *, size_t);
645 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
646 				  struct msghdr *, size_t, int);
647 
unix_set_peek_off(struct sock * sk,int val)648 static int unix_set_peek_off(struct sock *sk, int val)
649 {
650 	struct unix_sock *u = unix_sk(sk);
651 
652 	if (mutex_lock_interruptible(&u->readlock))
653 		return -EINTR;
654 
655 	sk->sk_peek_off = val;
656 	mutex_unlock(&u->readlock);
657 
658 	return 0;
659 }
660 
661 
662 static const struct proto_ops unix_stream_ops = {
663 	.family =	PF_UNIX,
664 	.owner =	THIS_MODULE,
665 	.release =	unix_release,
666 	.bind =		unix_bind,
667 	.connect =	unix_stream_connect,
668 	.socketpair =	unix_socketpair,
669 	.accept =	unix_accept,
670 	.getname =	unix_getname,
671 	.poll =		unix_poll,
672 	.ioctl =	unix_ioctl,
673 	.listen =	unix_listen,
674 	.shutdown =	unix_shutdown,
675 	.setsockopt =	sock_no_setsockopt,
676 	.getsockopt =	sock_no_getsockopt,
677 	.sendmsg =	unix_stream_sendmsg,
678 	.recvmsg =	unix_stream_recvmsg,
679 	.mmap =		sock_no_mmap,
680 	.sendpage =	sock_no_sendpage,
681 	.set_peek_off =	unix_set_peek_off,
682 };
683 
684 static const struct proto_ops unix_dgram_ops = {
685 	.family =	PF_UNIX,
686 	.owner =	THIS_MODULE,
687 	.release =	unix_release,
688 	.bind =		unix_bind,
689 	.connect =	unix_dgram_connect,
690 	.socketpair =	unix_socketpair,
691 	.accept =	sock_no_accept,
692 	.getname =	unix_getname,
693 	.poll =		unix_dgram_poll,
694 	.ioctl =	unix_ioctl,
695 	.listen =	sock_no_listen,
696 	.shutdown =	unix_shutdown,
697 	.setsockopt =	sock_no_setsockopt,
698 	.getsockopt =	sock_no_getsockopt,
699 	.sendmsg =	unix_dgram_sendmsg,
700 	.recvmsg =	unix_dgram_recvmsg,
701 	.mmap =		sock_no_mmap,
702 	.sendpage =	sock_no_sendpage,
703 	.set_peek_off =	unix_set_peek_off,
704 };
705 
706 static const struct proto_ops unix_seqpacket_ops = {
707 	.family =	PF_UNIX,
708 	.owner =	THIS_MODULE,
709 	.release =	unix_release,
710 	.bind =		unix_bind,
711 	.connect =	unix_stream_connect,
712 	.socketpair =	unix_socketpair,
713 	.accept =	unix_accept,
714 	.getname =	unix_getname,
715 	.poll =		unix_dgram_poll,
716 	.ioctl =	unix_ioctl,
717 	.listen =	unix_listen,
718 	.shutdown =	unix_shutdown,
719 	.setsockopt =	sock_no_setsockopt,
720 	.getsockopt =	sock_no_getsockopt,
721 	.sendmsg =	unix_seqpacket_sendmsg,
722 	.recvmsg =	unix_seqpacket_recvmsg,
723 	.mmap =		sock_no_mmap,
724 	.sendpage =	sock_no_sendpage,
725 	.set_peek_off =	unix_set_peek_off,
726 };
727 
728 static struct proto unix_proto = {
729 	.name			= "UNIX",
730 	.owner			= THIS_MODULE,
731 	.obj_size		= sizeof(struct unix_sock),
732 };
733 
734 /*
735  * AF_UNIX sockets do not interact with hardware, hence they
736  * dont trigger interrupts - so it's safe for them to have
737  * bh-unsafe locking for their sk_receive_queue.lock. Split off
738  * this special lock-class by reinitializing the spinlock key:
739  */
740 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
741 
unix_create1(struct net * net,struct socket * sock)742 static struct sock *unix_create1(struct net *net, struct socket *sock)
743 {
744 	struct sock *sk = NULL;
745 	struct unix_sock *u;
746 
747 	atomic_long_inc(&unix_nr_socks);
748 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
749 		goto out;
750 
751 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
752 	if (!sk)
753 		goto out;
754 
755 	sock_init_data(sock, sk);
756 	lockdep_set_class(&sk->sk_receive_queue.lock,
757 				&af_unix_sk_receive_queue_lock_key);
758 
759 	sk->sk_write_space	= unix_write_space;
760 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
761 	sk->sk_destruct		= unix_sock_destructor;
762 	u	  = unix_sk(sk);
763 	u->path.dentry = NULL;
764 	u->path.mnt = NULL;
765 	spin_lock_init(&u->lock);
766 	atomic_long_set(&u->inflight, 0);
767 	INIT_LIST_HEAD(&u->link);
768 	mutex_init(&u->readlock); /* single task reading lock */
769 	init_waitqueue_head(&u->peer_wait);
770 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
771 	unix_insert_socket(unix_sockets_unbound(sk), sk);
772 out:
773 	if (sk == NULL)
774 		atomic_long_dec(&unix_nr_socks);
775 	else {
776 		local_bh_disable();
777 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
778 		local_bh_enable();
779 	}
780 	return sk;
781 }
782 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)783 static int unix_create(struct net *net, struct socket *sock, int protocol,
784 		       int kern)
785 {
786 	if (protocol && protocol != PF_UNIX)
787 		return -EPROTONOSUPPORT;
788 
789 	sock->state = SS_UNCONNECTED;
790 
791 	switch (sock->type) {
792 	case SOCK_STREAM:
793 		sock->ops = &unix_stream_ops;
794 		break;
795 		/*
796 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
797 		 *	nothing uses it.
798 		 */
799 	case SOCK_RAW:
800 		sock->type = SOCK_DGRAM;
801 	case SOCK_DGRAM:
802 		sock->ops = &unix_dgram_ops;
803 		break;
804 	case SOCK_SEQPACKET:
805 		sock->ops = &unix_seqpacket_ops;
806 		break;
807 	default:
808 		return -ESOCKTNOSUPPORT;
809 	}
810 
811 	return unix_create1(net, sock) ? 0 : -ENOMEM;
812 }
813 
unix_release(struct socket * sock)814 static int unix_release(struct socket *sock)
815 {
816 	struct sock *sk = sock->sk;
817 
818 	if (!sk)
819 		return 0;
820 
821 	unix_release_sock(sk, 0);
822 	sock->sk = NULL;
823 
824 	return 0;
825 }
826 
unix_autobind(struct socket * sock)827 static int unix_autobind(struct socket *sock)
828 {
829 	struct sock *sk = sock->sk;
830 	struct net *net = sock_net(sk);
831 	struct unix_sock *u = unix_sk(sk);
832 	static u32 ordernum = 1;
833 	struct unix_address *addr;
834 	int err;
835 	unsigned int retries = 0;
836 
837 	err = mutex_lock_interruptible(&u->readlock);
838 	if (err)
839 		return err;
840 
841 	err = 0;
842 	if (u->addr)
843 		goto out;
844 
845 	err = -ENOMEM;
846 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
847 	if (!addr)
848 		goto out;
849 
850 	addr->name->sun_family = AF_UNIX;
851 	atomic_set(&addr->refcnt, 1);
852 
853 retry:
854 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
855 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
856 
857 	spin_lock(&unix_table_lock);
858 	ordernum = (ordernum+1)&0xFFFFF;
859 
860 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
861 				      addr->hash)) {
862 		spin_unlock(&unix_table_lock);
863 		/*
864 		 * __unix_find_socket_byname() may take long time if many names
865 		 * are already in use.
866 		 */
867 		cond_resched();
868 		/* Give up if all names seems to be in use. */
869 		if (retries++ == 0xFFFFF) {
870 			err = -ENOSPC;
871 			kfree(addr);
872 			goto out;
873 		}
874 		goto retry;
875 	}
876 	addr->hash ^= sk->sk_type;
877 
878 	__unix_remove_socket(sk);
879 	u->addr = addr;
880 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
881 	spin_unlock(&unix_table_lock);
882 	err = 0;
883 
884 out:	mutex_unlock(&u->readlock);
885 	return err;
886 }
887 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)888 static struct sock *unix_find_other(struct net *net,
889 				    struct sockaddr_un *sunname, int len,
890 				    int type, unsigned int hash, int *error)
891 {
892 	struct sock *u;
893 	struct path path;
894 	int err = 0;
895 
896 	if (sunname->sun_path[0]) {
897 		struct inode *inode;
898 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
899 		if (err)
900 			goto fail;
901 		inode = d_backing_inode(path.dentry);
902 		err = inode_permission(inode, MAY_WRITE);
903 		if (err)
904 			goto put_fail;
905 
906 		err = -ECONNREFUSED;
907 		if (!S_ISSOCK(inode->i_mode))
908 			goto put_fail;
909 		u = unix_find_socket_byinode(inode);
910 		if (!u)
911 			goto put_fail;
912 
913 		if (u->sk_type == type)
914 			touch_atime(&path);
915 
916 		path_put(&path);
917 
918 		err = -EPROTOTYPE;
919 		if (u->sk_type != type) {
920 			sock_put(u);
921 			goto fail;
922 		}
923 	} else {
924 		err = -ECONNREFUSED;
925 		u = unix_find_socket_byname(net, sunname, len, type, hash);
926 		if (u) {
927 			struct dentry *dentry;
928 			dentry = unix_sk(u)->path.dentry;
929 			if (dentry)
930 				touch_atime(&unix_sk(u)->path);
931 		} else
932 			goto fail;
933 	}
934 	return u;
935 
936 put_fail:
937 	path_put(&path);
938 fail:
939 	*error = err;
940 	return NULL;
941 }
942 
unix_mknod(const char * sun_path,umode_t mode,struct path * res)943 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
944 {
945 	struct dentry *dentry;
946 	struct path path;
947 	int err = 0;
948 	/*
949 	 * Get the parent directory, calculate the hash for last
950 	 * component.
951 	 */
952 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
953 	err = PTR_ERR(dentry);
954 	if (IS_ERR(dentry))
955 		return err;
956 
957 	/*
958 	 * All right, let's create it.
959 	 */
960 	err = security_path_mknod(&path, dentry, mode, 0);
961 	if (!err) {
962 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
963 		if (!err) {
964 			res->mnt = mntget(path.mnt);
965 			res->dentry = dget(dentry);
966 		}
967 	}
968 	done_path_create(&path, dentry);
969 	return err;
970 }
971 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)972 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
973 {
974 	struct sock *sk = sock->sk;
975 	struct net *net = sock_net(sk);
976 	struct unix_sock *u = unix_sk(sk);
977 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
978 	char *sun_path = sunaddr->sun_path;
979 	int err;
980 	unsigned int hash;
981 	struct unix_address *addr;
982 	struct hlist_head *list;
983 
984 	err = -EINVAL;
985 	if (sunaddr->sun_family != AF_UNIX)
986 		goto out;
987 
988 	if (addr_len == sizeof(short)) {
989 		err = unix_autobind(sock);
990 		goto out;
991 	}
992 
993 	err = unix_mkname(sunaddr, addr_len, &hash);
994 	if (err < 0)
995 		goto out;
996 	addr_len = err;
997 
998 	err = mutex_lock_interruptible(&u->readlock);
999 	if (err)
1000 		goto out;
1001 
1002 	err = -EINVAL;
1003 	if (u->addr)
1004 		goto out_up;
1005 
1006 	err = -ENOMEM;
1007 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1008 	if (!addr)
1009 		goto out_up;
1010 
1011 	memcpy(addr->name, sunaddr, addr_len);
1012 	addr->len = addr_len;
1013 	addr->hash = hash ^ sk->sk_type;
1014 	atomic_set(&addr->refcnt, 1);
1015 
1016 	if (sun_path[0]) {
1017 		struct path path;
1018 		umode_t mode = S_IFSOCK |
1019 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1020 		err = unix_mknod(sun_path, mode, &path);
1021 		if (err) {
1022 			if (err == -EEXIST)
1023 				err = -EADDRINUSE;
1024 			unix_release_addr(addr);
1025 			goto out_up;
1026 		}
1027 		addr->hash = UNIX_HASH_SIZE;
1028 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
1029 		spin_lock(&unix_table_lock);
1030 		u->path = path;
1031 		list = &unix_socket_table[hash];
1032 	} else {
1033 		spin_lock(&unix_table_lock);
1034 		err = -EADDRINUSE;
1035 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1036 					      sk->sk_type, hash)) {
1037 			unix_release_addr(addr);
1038 			goto out_unlock;
1039 		}
1040 
1041 		list = &unix_socket_table[addr->hash];
1042 	}
1043 
1044 	err = 0;
1045 	__unix_remove_socket(sk);
1046 	u->addr = addr;
1047 	__unix_insert_socket(list, sk);
1048 
1049 out_unlock:
1050 	spin_unlock(&unix_table_lock);
1051 out_up:
1052 	mutex_unlock(&u->readlock);
1053 out:
1054 	return err;
1055 }
1056 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1057 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1058 {
1059 	if (unlikely(sk1 == sk2) || !sk2) {
1060 		unix_state_lock(sk1);
1061 		return;
1062 	}
1063 	if (sk1 < sk2) {
1064 		unix_state_lock(sk1);
1065 		unix_state_lock_nested(sk2);
1066 	} else {
1067 		unix_state_lock(sk2);
1068 		unix_state_lock_nested(sk1);
1069 	}
1070 }
1071 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1072 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1073 {
1074 	if (unlikely(sk1 == sk2) || !sk2) {
1075 		unix_state_unlock(sk1);
1076 		return;
1077 	}
1078 	unix_state_unlock(sk1);
1079 	unix_state_unlock(sk2);
1080 }
1081 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1082 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1083 			      int alen, int flags)
1084 {
1085 	struct sock *sk = sock->sk;
1086 	struct net *net = sock_net(sk);
1087 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1088 	struct sock *other;
1089 	unsigned int hash;
1090 	int err;
1091 
1092 	if (addr->sa_family != AF_UNSPEC) {
1093 		err = unix_mkname(sunaddr, alen, &hash);
1094 		if (err < 0)
1095 			goto out;
1096 		alen = err;
1097 
1098 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1099 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1100 			goto out;
1101 
1102 restart:
1103 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1104 		if (!other)
1105 			goto out;
1106 
1107 		unix_state_double_lock(sk, other);
1108 
1109 		/* Apparently VFS overslept socket death. Retry. */
1110 		if (sock_flag(other, SOCK_DEAD)) {
1111 			unix_state_double_unlock(sk, other);
1112 			sock_put(other);
1113 			goto restart;
1114 		}
1115 
1116 		err = -EPERM;
1117 		if (!unix_may_send(sk, other))
1118 			goto out_unlock;
1119 
1120 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1121 		if (err)
1122 			goto out_unlock;
1123 
1124 	} else {
1125 		/*
1126 		 *	1003.1g breaking connected state with AF_UNSPEC
1127 		 */
1128 		other = NULL;
1129 		unix_state_double_lock(sk, other);
1130 	}
1131 
1132 	/*
1133 	 * If it was connected, reconnect.
1134 	 */
1135 	if (unix_peer(sk)) {
1136 		struct sock *old_peer = unix_peer(sk);
1137 		unix_peer(sk) = other;
1138 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1139 
1140 		unix_state_double_unlock(sk, other);
1141 
1142 		if (other != old_peer)
1143 			unix_dgram_disconnected(sk, old_peer);
1144 		sock_put(old_peer);
1145 	} else {
1146 		unix_peer(sk) = other;
1147 		unix_state_double_unlock(sk, other);
1148 	}
1149 	return 0;
1150 
1151 out_unlock:
1152 	unix_state_double_unlock(sk, other);
1153 	sock_put(other);
1154 out:
1155 	return err;
1156 }
1157 
unix_wait_for_peer(struct sock * other,long timeo)1158 static long unix_wait_for_peer(struct sock *other, long timeo)
1159 {
1160 	struct unix_sock *u = unix_sk(other);
1161 	int sched;
1162 	DEFINE_WAIT(wait);
1163 
1164 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1165 
1166 	sched = !sock_flag(other, SOCK_DEAD) &&
1167 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1168 		unix_recvq_full(other);
1169 
1170 	unix_state_unlock(other);
1171 
1172 	if (sched)
1173 		timeo = schedule_timeout(timeo);
1174 
1175 	finish_wait(&u->peer_wait, &wait);
1176 	return timeo;
1177 }
1178 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1179 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1180 			       int addr_len, int flags)
1181 {
1182 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1183 	struct sock *sk = sock->sk;
1184 	struct net *net = sock_net(sk);
1185 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1186 	struct sock *newsk = NULL;
1187 	struct sock *other = NULL;
1188 	struct sk_buff *skb = NULL;
1189 	unsigned int hash;
1190 	int st;
1191 	int err;
1192 	long timeo;
1193 
1194 	err = unix_mkname(sunaddr, addr_len, &hash);
1195 	if (err < 0)
1196 		goto out;
1197 	addr_len = err;
1198 
1199 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1200 	    (err = unix_autobind(sock)) != 0)
1201 		goto out;
1202 
1203 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1204 
1205 	/* First of all allocate resources.
1206 	   If we will make it after state is locked,
1207 	   we will have to recheck all again in any case.
1208 	 */
1209 
1210 	err = -ENOMEM;
1211 
1212 	/* create new sock for complete connection */
1213 	newsk = unix_create1(sock_net(sk), NULL);
1214 	if (newsk == NULL)
1215 		goto out;
1216 
1217 	/* Allocate skb for sending to listening sock */
1218 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1219 	if (skb == NULL)
1220 		goto out;
1221 
1222 restart:
1223 	/*  Find listening sock. */
1224 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1225 	if (!other)
1226 		goto out;
1227 
1228 	/* Latch state of peer */
1229 	unix_state_lock(other);
1230 
1231 	/* Apparently VFS overslept socket death. Retry. */
1232 	if (sock_flag(other, SOCK_DEAD)) {
1233 		unix_state_unlock(other);
1234 		sock_put(other);
1235 		goto restart;
1236 	}
1237 
1238 	err = -ECONNREFUSED;
1239 	if (other->sk_state != TCP_LISTEN)
1240 		goto out_unlock;
1241 	if (other->sk_shutdown & RCV_SHUTDOWN)
1242 		goto out_unlock;
1243 
1244 	if (unix_recvq_full(other)) {
1245 		err = -EAGAIN;
1246 		if (!timeo)
1247 			goto out_unlock;
1248 
1249 		timeo = unix_wait_for_peer(other, timeo);
1250 
1251 		err = sock_intr_errno(timeo);
1252 		if (signal_pending(current))
1253 			goto out;
1254 		sock_put(other);
1255 		goto restart;
1256 	}
1257 
1258 	/* Latch our state.
1259 
1260 	   It is tricky place. We need to grab our state lock and cannot
1261 	   drop lock on peer. It is dangerous because deadlock is
1262 	   possible. Connect to self case and simultaneous
1263 	   attempt to connect are eliminated by checking socket
1264 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1265 	   check this before attempt to grab lock.
1266 
1267 	   Well, and we have to recheck the state after socket locked.
1268 	 */
1269 	st = sk->sk_state;
1270 
1271 	switch (st) {
1272 	case TCP_CLOSE:
1273 		/* This is ok... continue with connect */
1274 		break;
1275 	case TCP_ESTABLISHED:
1276 		/* Socket is already connected */
1277 		err = -EISCONN;
1278 		goto out_unlock;
1279 	default:
1280 		err = -EINVAL;
1281 		goto out_unlock;
1282 	}
1283 
1284 	unix_state_lock_nested(sk);
1285 
1286 	if (sk->sk_state != st) {
1287 		unix_state_unlock(sk);
1288 		unix_state_unlock(other);
1289 		sock_put(other);
1290 		goto restart;
1291 	}
1292 
1293 	err = security_unix_stream_connect(sk, other, newsk);
1294 	if (err) {
1295 		unix_state_unlock(sk);
1296 		goto out_unlock;
1297 	}
1298 
1299 	/* The way is open! Fastly set all the necessary fields... */
1300 
1301 	sock_hold(sk);
1302 	unix_peer(newsk)	= sk;
1303 	newsk->sk_state		= TCP_ESTABLISHED;
1304 	newsk->sk_type		= sk->sk_type;
1305 	init_peercred(newsk);
1306 	newu = unix_sk(newsk);
1307 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1308 	otheru = unix_sk(other);
1309 
1310 	/* copy address information from listening to new sock*/
1311 	if (otheru->addr) {
1312 		atomic_inc(&otheru->addr->refcnt);
1313 		newu->addr = otheru->addr;
1314 	}
1315 	if (otheru->path.dentry) {
1316 		path_get(&otheru->path);
1317 		newu->path = otheru->path;
1318 	}
1319 
1320 	/* Set credentials */
1321 	copy_peercred(sk, other);
1322 
1323 	sock->state	= SS_CONNECTED;
1324 	sk->sk_state	= TCP_ESTABLISHED;
1325 	sock_hold(newsk);
1326 
1327 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1328 	unix_peer(sk)	= newsk;
1329 
1330 	unix_state_unlock(sk);
1331 
1332 	/* take ten and and send info to listening sock */
1333 	spin_lock(&other->sk_receive_queue.lock);
1334 	__skb_queue_tail(&other->sk_receive_queue, skb);
1335 	spin_unlock(&other->sk_receive_queue.lock);
1336 	unix_state_unlock(other);
1337 	other->sk_data_ready(other);
1338 	sock_put(other);
1339 	return 0;
1340 
1341 out_unlock:
1342 	if (other)
1343 		unix_state_unlock(other);
1344 
1345 out:
1346 	kfree_skb(skb);
1347 	if (newsk)
1348 		unix_release_sock(newsk, 0);
1349 	if (other)
1350 		sock_put(other);
1351 	return err;
1352 }
1353 
unix_socketpair(struct socket * socka,struct socket * sockb)1354 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1355 {
1356 	struct sock *ska = socka->sk, *skb = sockb->sk;
1357 
1358 	/* Join our sockets back to back */
1359 	sock_hold(ska);
1360 	sock_hold(skb);
1361 	unix_peer(ska) = skb;
1362 	unix_peer(skb) = ska;
1363 	init_peercred(ska);
1364 	init_peercred(skb);
1365 
1366 	if (ska->sk_type != SOCK_DGRAM) {
1367 		ska->sk_state = TCP_ESTABLISHED;
1368 		skb->sk_state = TCP_ESTABLISHED;
1369 		socka->state  = SS_CONNECTED;
1370 		sockb->state  = SS_CONNECTED;
1371 	}
1372 	return 0;
1373 }
1374 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1375 static void unix_sock_inherit_flags(const struct socket *old,
1376 				    struct socket *new)
1377 {
1378 	if (test_bit(SOCK_PASSCRED, &old->flags))
1379 		set_bit(SOCK_PASSCRED, &new->flags);
1380 	if (test_bit(SOCK_PASSSEC, &old->flags))
1381 		set_bit(SOCK_PASSSEC, &new->flags);
1382 }
1383 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1384 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1385 {
1386 	struct sock *sk = sock->sk;
1387 	struct sock *tsk;
1388 	struct sk_buff *skb;
1389 	int err;
1390 
1391 	err = -EOPNOTSUPP;
1392 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1393 		goto out;
1394 
1395 	err = -EINVAL;
1396 	if (sk->sk_state != TCP_LISTEN)
1397 		goto out;
1398 
1399 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1400 	 * so that no locks are necessary.
1401 	 */
1402 
1403 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1404 	if (!skb) {
1405 		/* This means receive shutdown. */
1406 		if (err == 0)
1407 			err = -EINVAL;
1408 		goto out;
1409 	}
1410 
1411 	tsk = skb->sk;
1412 	skb_free_datagram(sk, skb);
1413 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1414 
1415 	/* attach accepted sock to socket */
1416 	unix_state_lock(tsk);
1417 	newsock->state = SS_CONNECTED;
1418 	unix_sock_inherit_flags(sock, newsock);
1419 	sock_graft(tsk, newsock);
1420 	unix_state_unlock(tsk);
1421 	return 0;
1422 
1423 out:
1424 	return err;
1425 }
1426 
1427 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1428 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1429 {
1430 	struct sock *sk = sock->sk;
1431 	struct unix_sock *u;
1432 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1433 	int err = 0;
1434 
1435 	if (peer) {
1436 		sk = unix_peer_get(sk);
1437 
1438 		err = -ENOTCONN;
1439 		if (!sk)
1440 			goto out;
1441 		err = 0;
1442 	} else {
1443 		sock_hold(sk);
1444 	}
1445 
1446 	u = unix_sk(sk);
1447 	unix_state_lock(sk);
1448 	if (!u->addr) {
1449 		sunaddr->sun_family = AF_UNIX;
1450 		sunaddr->sun_path[0] = 0;
1451 		*uaddr_len = sizeof(short);
1452 	} else {
1453 		struct unix_address *addr = u->addr;
1454 
1455 		*uaddr_len = addr->len;
1456 		memcpy(sunaddr, addr->name, *uaddr_len);
1457 	}
1458 	unix_state_unlock(sk);
1459 	sock_put(sk);
1460 out:
1461 	return err;
1462 }
1463 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1464 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1465 {
1466 	int i;
1467 
1468 	scm->fp = UNIXCB(skb).fp;
1469 	UNIXCB(skb).fp = NULL;
1470 
1471 	for (i = scm->fp->count-1; i >= 0; i--)
1472 		unix_notinflight(scm->fp->fp[i]);
1473 }
1474 
unix_destruct_scm(struct sk_buff * skb)1475 static void unix_destruct_scm(struct sk_buff *skb)
1476 {
1477 	struct scm_cookie scm;
1478 	memset(&scm, 0, sizeof(scm));
1479 	scm.pid  = UNIXCB(skb).pid;
1480 	if (UNIXCB(skb).fp)
1481 		unix_detach_fds(&scm, skb);
1482 
1483 	/* Alas, it calls VFS */
1484 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1485 	scm_destroy(&scm);
1486 	sock_wfree(skb);
1487 }
1488 
1489 /*
1490  * The "user->unix_inflight" variable is protected by the garbage
1491  * collection lock, and we just read it locklessly here. If you go
1492  * over the limit, there might be a tiny race in actually noticing
1493  * it across threads. Tough.
1494  */
too_many_unix_fds(struct task_struct * p)1495 static inline bool too_many_unix_fds(struct task_struct *p)
1496 {
1497 	struct user_struct *user = current_user();
1498 
1499 	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1500 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1501 	return false;
1502 }
1503 
1504 #define MAX_RECURSION_LEVEL 4
1505 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1506 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1507 {
1508 	int i;
1509 	unsigned char max_level = 0;
1510 	int unix_sock_count = 0;
1511 
1512 	if (too_many_unix_fds(current))
1513 		return -ETOOMANYREFS;
1514 
1515 	for (i = scm->fp->count - 1; i >= 0; i--) {
1516 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1517 
1518 		if (sk) {
1519 			unix_sock_count++;
1520 			max_level = max(max_level,
1521 					unix_sk(sk)->recursion_level);
1522 		}
1523 	}
1524 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1525 		return -ETOOMANYREFS;
1526 
1527 	/*
1528 	 * Need to duplicate file references for the sake of garbage
1529 	 * collection.  Otherwise a socket in the fps might become a
1530 	 * candidate for GC while the skb is not yet queued.
1531 	 */
1532 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1533 	if (!UNIXCB(skb).fp)
1534 		return -ENOMEM;
1535 
1536 	for (i = scm->fp->count - 1; i >= 0; i--)
1537 		unix_inflight(scm->fp->fp[i]);
1538 	return max_level;
1539 }
1540 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1541 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1542 {
1543 	int err = 0;
1544 
1545 	UNIXCB(skb).pid  = get_pid(scm->pid);
1546 	UNIXCB(skb).uid = scm->creds.uid;
1547 	UNIXCB(skb).gid = scm->creds.gid;
1548 	UNIXCB(skb).fp = NULL;
1549 	if (scm->fp && send_fds)
1550 		err = unix_attach_fds(scm, skb);
1551 
1552 	skb->destructor = unix_destruct_scm;
1553 	return err;
1554 }
1555 
1556 /*
1557  * Some apps rely on write() giving SCM_CREDENTIALS
1558  * We include credentials if source or destination socket
1559  * asserted SOCK_PASSCRED.
1560  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1561 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1562 			    const struct sock *other)
1563 {
1564 	if (UNIXCB(skb).pid)
1565 		return;
1566 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1567 	    !other->sk_socket ||
1568 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1569 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1570 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1571 	}
1572 }
1573 
1574 /*
1575  *	Send AF_UNIX data.
1576  */
1577 
unix_dgram_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1578 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1579 			      struct msghdr *msg, size_t len)
1580 {
1581 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1582 	struct sock *sk = sock->sk;
1583 	struct net *net = sock_net(sk);
1584 	struct unix_sock *u = unix_sk(sk);
1585 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586 	struct sock *other = NULL;
1587 	int namelen = 0; /* fake GCC */
1588 	int err;
1589 	unsigned int hash;
1590 	struct sk_buff *skb;
1591 	long timeo;
1592 	struct scm_cookie tmp_scm;
1593 	int max_level;
1594 	int data_len = 0;
1595 	int sk_locked;
1596 
1597 	if (NULL == siocb->scm)
1598 		siocb->scm = &tmp_scm;
1599 	wait_for_unix_gc();
1600 	err = scm_send(sock, msg, siocb->scm, false);
1601 	if (err < 0)
1602 		return err;
1603 
1604 	err = -EOPNOTSUPP;
1605 	if (msg->msg_flags&MSG_OOB)
1606 		goto out;
1607 
1608 	if (msg->msg_namelen) {
1609 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1610 		if (err < 0)
1611 			goto out;
1612 		namelen = err;
1613 	} else {
1614 		sunaddr = NULL;
1615 		err = -ENOTCONN;
1616 		other = unix_peer_get(sk);
1617 		if (!other)
1618 			goto out;
1619 	}
1620 
1621 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1622 	    && (err = unix_autobind(sock)) != 0)
1623 		goto out;
1624 
1625 	err = -EMSGSIZE;
1626 	if (len > sk->sk_sndbuf - 32)
1627 		goto out;
1628 
1629 	if (len > SKB_MAX_ALLOC) {
1630 		data_len = min_t(size_t,
1631 				 len - SKB_MAX_ALLOC,
1632 				 MAX_SKB_FRAGS * PAGE_SIZE);
1633 		data_len = PAGE_ALIGN(data_len);
1634 
1635 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1636 	}
1637 
1638 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1639 				   msg->msg_flags & MSG_DONTWAIT, &err,
1640 				   PAGE_ALLOC_COSTLY_ORDER);
1641 	if (skb == NULL)
1642 		goto out;
1643 
1644 	err = unix_scm_to_skb(siocb->scm, skb, true);
1645 	if (err < 0)
1646 		goto out_free;
1647 	max_level = err + 1;
1648 	unix_get_secdata(siocb->scm, skb);
1649 
1650 	skb_put(skb, len - data_len);
1651 	skb->data_len = data_len;
1652 	skb->len = len;
1653 	err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1654 	if (err)
1655 		goto out_free;
1656 
1657 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1658 
1659 restart:
1660 	if (!other) {
1661 		err = -ECONNRESET;
1662 		if (sunaddr == NULL)
1663 			goto out_free;
1664 
1665 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1666 					hash, &err);
1667 		if (other == NULL)
1668 			goto out_free;
1669 	}
1670 
1671 	if (sk_filter(other, skb) < 0) {
1672 		/* Toss the packet but do not return any error to the sender */
1673 		err = len;
1674 		goto out_free;
1675 	}
1676 
1677 	sk_locked = 0;
1678 	unix_state_lock(other);
1679 restart_locked:
1680 	err = -EPERM;
1681 	if (!unix_may_send(sk, other))
1682 		goto out_unlock;
1683 
1684 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1685 		/*
1686 		 *	Check with 1003.1g - what should
1687 		 *	datagram error
1688 		 */
1689 		unix_state_unlock(other);
1690 		sock_put(other);
1691 
1692 		if (!sk_locked)
1693 			unix_state_lock(sk);
1694 
1695 		err = 0;
1696 		if (unix_peer(sk) == other) {
1697 			unix_peer(sk) = NULL;
1698 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1699 
1700 			unix_state_unlock(sk);
1701 
1702 			unix_dgram_disconnected(sk, other);
1703 			sock_put(other);
1704 			err = -ECONNREFUSED;
1705 		} else {
1706 			unix_state_unlock(sk);
1707 		}
1708 
1709 		other = NULL;
1710 		if (err)
1711 			goto out_free;
1712 		goto restart;
1713 	}
1714 
1715 	err = -EPIPE;
1716 	if (other->sk_shutdown & RCV_SHUTDOWN)
1717 		goto out_unlock;
1718 
1719 	if (sk->sk_type != SOCK_SEQPACKET) {
1720 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1721 		if (err)
1722 			goto out_unlock;
1723 	}
1724 
1725 	/* other == sk && unix_peer(other) != sk if
1726 	 * - unix_peer(sk) == NULL, destination address bound to sk
1727 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1728 	 */
1729 	if (other != sk &&
1730 	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1731 		if (timeo) {
1732 			timeo = unix_wait_for_peer(other, timeo);
1733 
1734 			err = sock_intr_errno(timeo);
1735 			if (signal_pending(current))
1736 				goto out_free;
1737 
1738 			goto restart;
1739 		}
1740 
1741 		if (!sk_locked) {
1742 			unix_state_unlock(other);
1743 			unix_state_double_lock(sk, other);
1744 		}
1745 
1746 		if (unix_peer(sk) != other ||
1747 		    unix_dgram_peer_wake_me(sk, other)) {
1748 			err = -EAGAIN;
1749 			sk_locked = 1;
1750 			goto out_unlock;
1751 		}
1752 
1753 		if (!sk_locked) {
1754 			sk_locked = 1;
1755 			goto restart_locked;
1756 		}
1757 	}
1758 
1759 	if (unlikely(sk_locked))
1760 		unix_state_unlock(sk);
1761 
1762 	if (sock_flag(other, SOCK_RCVTSTAMP))
1763 		__net_timestamp(skb);
1764 	maybe_add_creds(skb, sock, other);
1765 	skb_queue_tail(&other->sk_receive_queue, skb);
1766 	if (max_level > unix_sk(other)->recursion_level)
1767 		unix_sk(other)->recursion_level = max_level;
1768 	unix_state_unlock(other);
1769 	other->sk_data_ready(other);
1770 	sock_put(other);
1771 	scm_destroy(siocb->scm);
1772 	return len;
1773 
1774 out_unlock:
1775 	if (sk_locked)
1776 		unix_state_unlock(sk);
1777 	unix_state_unlock(other);
1778 out_free:
1779 	kfree_skb(skb);
1780 out:
1781 	if (other)
1782 		sock_put(other);
1783 	scm_destroy(siocb->scm);
1784 	return err;
1785 }
1786 
1787 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1788  * bytes, and a minimun of a full page.
1789  */
1790 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1791 
unix_stream_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1792 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1793 			       struct msghdr *msg, size_t len)
1794 {
1795 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1796 	struct sock *sk = sock->sk;
1797 	struct sock *other = NULL;
1798 	int err, size;
1799 	struct sk_buff *skb;
1800 	int sent = 0;
1801 	struct scm_cookie tmp_scm;
1802 	bool fds_sent = false;
1803 	int max_level;
1804 	int data_len;
1805 
1806 	if (NULL == siocb->scm)
1807 		siocb->scm = &tmp_scm;
1808 	wait_for_unix_gc();
1809 	err = scm_send(sock, msg, siocb->scm, false);
1810 	if (err < 0)
1811 		return err;
1812 
1813 	err = -EOPNOTSUPP;
1814 	if (msg->msg_flags&MSG_OOB)
1815 		goto out_err;
1816 
1817 	if (msg->msg_namelen) {
1818 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1819 		goto out_err;
1820 	} else {
1821 		err = -ENOTCONN;
1822 		other = unix_peer(sk);
1823 		if (!other)
1824 			goto out_err;
1825 	}
1826 
1827 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1828 		goto pipe_err;
1829 
1830 	while (sent < len) {
1831 		size = len - sent;
1832 
1833 		/* Keep two messages in the pipe so it schedules better */
1834 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1835 
1836 		/* allow fallback to order-0 allocations */
1837 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1838 
1839 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1840 
1841 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1842 
1843 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1844 					   msg->msg_flags & MSG_DONTWAIT, &err,
1845 					   get_order(UNIX_SKB_FRAGS_SZ));
1846 		if (!skb)
1847 			goto out_err;
1848 
1849 		/* Only send the fds in the first buffer */
1850 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1851 		if (err < 0) {
1852 			kfree_skb(skb);
1853 			goto out_err;
1854 		}
1855 		max_level = err + 1;
1856 		fds_sent = true;
1857 
1858 		skb_put(skb, size - data_len);
1859 		skb->data_len = data_len;
1860 		skb->len = size;
1861 		err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
1862 						   sent, size);
1863 		if (err) {
1864 			kfree_skb(skb);
1865 			goto out_err;
1866 		}
1867 
1868 		unix_state_lock(other);
1869 
1870 		if (sock_flag(other, SOCK_DEAD) ||
1871 		    (other->sk_shutdown & RCV_SHUTDOWN))
1872 			goto pipe_err_free;
1873 
1874 		maybe_add_creds(skb, sock, other);
1875 		skb_queue_tail(&other->sk_receive_queue, skb);
1876 		if (max_level > unix_sk(other)->recursion_level)
1877 			unix_sk(other)->recursion_level = max_level;
1878 		unix_state_unlock(other);
1879 		other->sk_data_ready(other);
1880 		sent += size;
1881 	}
1882 
1883 	scm_destroy(siocb->scm);
1884 	siocb->scm = NULL;
1885 
1886 	return sent;
1887 
1888 pipe_err_free:
1889 	unix_state_unlock(other);
1890 	kfree_skb(skb);
1891 pipe_err:
1892 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1893 		send_sig(SIGPIPE, current, 0);
1894 	err = -EPIPE;
1895 out_err:
1896 	scm_destroy(siocb->scm);
1897 	siocb->scm = NULL;
1898 	return sent ? : err;
1899 }
1900 
unix_seqpacket_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1901 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1902 				  struct msghdr *msg, size_t len)
1903 {
1904 	int err;
1905 	struct sock *sk = sock->sk;
1906 
1907 	err = sock_error(sk);
1908 	if (err)
1909 		return err;
1910 
1911 	if (sk->sk_state != TCP_ESTABLISHED)
1912 		return -ENOTCONN;
1913 
1914 	if (msg->msg_namelen)
1915 		msg->msg_namelen = 0;
1916 
1917 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1918 }
1919 
unix_seqpacket_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1920 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1921 			      struct msghdr *msg, size_t size,
1922 			      int flags)
1923 {
1924 	struct sock *sk = sock->sk;
1925 
1926 	if (sk->sk_state != TCP_ESTABLISHED)
1927 		return -ENOTCONN;
1928 
1929 	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1930 }
1931 
unix_copy_addr(struct msghdr * msg,struct sock * sk)1932 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1933 {
1934 	struct unix_sock *u = unix_sk(sk);
1935 
1936 	if (u->addr) {
1937 		msg->msg_namelen = u->addr->len;
1938 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1939 	}
1940 }
1941 
unix_dgram_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1942 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1943 			      struct msghdr *msg, size_t size,
1944 			      int flags)
1945 {
1946 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1947 	struct scm_cookie tmp_scm;
1948 	struct sock *sk = sock->sk;
1949 	struct unix_sock *u = unix_sk(sk);
1950 	int noblock = flags & MSG_DONTWAIT;
1951 	struct sk_buff *skb;
1952 	int err;
1953 	int peeked, skip;
1954 
1955 	err = -EOPNOTSUPP;
1956 	if (flags&MSG_OOB)
1957 		goto out;
1958 
1959 	err = mutex_lock_interruptible(&u->readlock);
1960 	if (unlikely(err)) {
1961 		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
1962 		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1963 		 */
1964 		err = noblock ? -EAGAIN : -ERESTARTSYS;
1965 		goto out;
1966 	}
1967 
1968 	skip = sk_peek_offset(sk, flags);
1969 
1970 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1971 	if (!skb) {
1972 		unix_state_lock(sk);
1973 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1974 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1975 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1976 			err = 0;
1977 		unix_state_unlock(sk);
1978 		goto out_unlock;
1979 	}
1980 
1981 	wake_up_interruptible_sync_poll(&u->peer_wait,
1982 					POLLOUT | POLLWRNORM | POLLWRBAND);
1983 
1984 	if (msg->msg_name)
1985 		unix_copy_addr(msg, skb->sk);
1986 
1987 	if (size > skb->len - skip)
1988 		size = skb->len - skip;
1989 	else if (size < skb->len - skip)
1990 		msg->msg_flags |= MSG_TRUNC;
1991 
1992 	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1993 	if (err)
1994 		goto out_free;
1995 
1996 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1997 		__sock_recv_timestamp(msg, sk, skb);
1998 
1999 	if (!siocb->scm) {
2000 		siocb->scm = &tmp_scm;
2001 		memset(&tmp_scm, 0, sizeof(tmp_scm));
2002 	}
2003 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2004 	unix_set_secdata(siocb->scm, skb);
2005 
2006 	if (!(flags & MSG_PEEK)) {
2007 		if (UNIXCB(skb).fp)
2008 			unix_detach_fds(siocb->scm, skb);
2009 
2010 		sk_peek_offset_bwd(sk, skb->len);
2011 	} else {
2012 		/* It is questionable: on PEEK we could:
2013 		   - do not return fds - good, but too simple 8)
2014 		   - return fds, and do not return them on read (old strategy,
2015 		     apparently wrong)
2016 		   - clone fds (I chose it for now, it is the most universal
2017 		     solution)
2018 
2019 		   POSIX 1003.1g does not actually define this clearly
2020 		   at all. POSIX 1003.1g doesn't define a lot of things
2021 		   clearly however!
2022 
2023 		*/
2024 
2025 		sk_peek_offset_fwd(sk, size);
2026 
2027 		if (UNIXCB(skb).fp)
2028 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2029 	}
2030 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2031 
2032 	scm_recv(sock, msg, siocb->scm, flags);
2033 
2034 out_free:
2035 	skb_free_datagram(sk, skb);
2036 out_unlock:
2037 	mutex_unlock(&u->readlock);
2038 out:
2039 	return err;
2040 }
2041 
2042 /*
2043  *	Sleep until more data has arrived. But check for races..
2044  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last)2045 static long unix_stream_data_wait(struct sock *sk, long timeo,
2046 				  struct sk_buff *last)
2047 {
2048 	DEFINE_WAIT(wait);
2049 
2050 	unix_state_lock(sk);
2051 
2052 	for (;;) {
2053 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2054 
2055 		if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2056 		    sk->sk_err ||
2057 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2058 		    signal_pending(current) ||
2059 		    !timeo)
2060 			break;
2061 
2062 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2063 		unix_state_unlock(sk);
2064 		timeo = freezable_schedule_timeout(timeo);
2065 		unix_state_lock(sk);
2066 
2067 		if (sock_flag(sk, SOCK_DEAD))
2068 			break;
2069 
2070 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2071 	}
2072 
2073 	finish_wait(sk_sleep(sk), &wait);
2074 	unix_state_unlock(sk);
2075 	return timeo;
2076 }
2077 
unix_skb_len(const struct sk_buff * skb)2078 static unsigned int unix_skb_len(const struct sk_buff *skb)
2079 {
2080 	return skb->len - UNIXCB(skb).consumed;
2081 }
2082 
unix_stream_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)2083 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2084 			       struct msghdr *msg, size_t size,
2085 			       int flags)
2086 {
2087 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2088 	struct scm_cookie tmp_scm;
2089 	struct sock *sk = sock->sk;
2090 	struct unix_sock *u = unix_sk(sk);
2091 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
2092 	int copied = 0;
2093 	int noblock = flags & MSG_DONTWAIT;
2094 	int check_creds = 0;
2095 	int target;
2096 	int err = 0;
2097 	long timeo;
2098 	int skip;
2099 
2100 	err = -EINVAL;
2101 	if (sk->sk_state != TCP_ESTABLISHED)
2102 		goto out;
2103 
2104 	err = -EOPNOTSUPP;
2105 	if (flags&MSG_OOB)
2106 		goto out;
2107 
2108 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2109 	timeo = sock_rcvtimeo(sk, noblock);
2110 
2111 	/* Lock the socket to prevent queue disordering
2112 	 * while sleeps in memcpy_tomsg
2113 	 */
2114 
2115 	if (!siocb->scm) {
2116 		siocb->scm = &tmp_scm;
2117 		memset(&tmp_scm, 0, sizeof(tmp_scm));
2118 	}
2119 
2120 	mutex_lock(&u->readlock);
2121 
2122 	if (flags & MSG_PEEK)
2123 		skip = sk_peek_offset(sk, flags);
2124 	else
2125 		skip = 0;
2126 
2127 	do {
2128 		int chunk;
2129 		struct sk_buff *skb, *last;
2130 
2131 		unix_state_lock(sk);
2132 		if (sock_flag(sk, SOCK_DEAD)) {
2133 			err = -ECONNRESET;
2134 			goto unlock;
2135 		}
2136 		last = skb = skb_peek(&sk->sk_receive_queue);
2137 again:
2138 		if (skb == NULL) {
2139 			unix_sk(sk)->recursion_level = 0;
2140 			if (copied >= target)
2141 				goto unlock;
2142 
2143 			/*
2144 			 *	POSIX 1003.1g mandates this order.
2145 			 */
2146 
2147 			err = sock_error(sk);
2148 			if (err)
2149 				goto unlock;
2150 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2151 				goto unlock;
2152 
2153 			unix_state_unlock(sk);
2154 			err = -EAGAIN;
2155 			if (!timeo)
2156 				break;
2157 			mutex_unlock(&u->readlock);
2158 
2159 			timeo = unix_stream_data_wait(sk, timeo, last);
2160 
2161 			if (signal_pending(current)) {
2162 				err = sock_intr_errno(timeo);
2163 				goto out;
2164 			}
2165 
2166 			mutex_lock(&u->readlock);
2167 			continue;
2168  unlock:
2169 			unix_state_unlock(sk);
2170 			break;
2171 		}
2172 
2173 		while (skip >= unix_skb_len(skb)) {
2174 			skip -= unix_skb_len(skb);
2175 			last = skb;
2176 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2177 			if (!skb)
2178 				goto again;
2179 		}
2180 
2181 		unix_state_unlock(sk);
2182 
2183 		if (check_creds) {
2184 			/* Never glue messages from different writers */
2185 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
2186 			    !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2187 			    !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2188 				break;
2189 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2190 			/* Copy credentials */
2191 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2192 			check_creds = 1;
2193 		}
2194 
2195 		/* Copy address just once */
2196 		if (sunaddr) {
2197 			unix_copy_addr(msg, skb->sk);
2198 			sunaddr = NULL;
2199 		}
2200 
2201 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2202 		if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
2203 					    msg->msg_iov, chunk)) {
2204 			if (copied == 0)
2205 				copied = -EFAULT;
2206 			break;
2207 		}
2208 		copied += chunk;
2209 		size -= chunk;
2210 
2211 		/* Mark read part of skb as used */
2212 		if (!(flags & MSG_PEEK)) {
2213 			UNIXCB(skb).consumed += chunk;
2214 
2215 			sk_peek_offset_bwd(sk, chunk);
2216 
2217 			if (UNIXCB(skb).fp)
2218 				unix_detach_fds(siocb->scm, skb);
2219 
2220 			if (unix_skb_len(skb))
2221 				break;
2222 
2223 			skb_unlink(skb, &sk->sk_receive_queue);
2224 			consume_skb(skb);
2225 
2226 			if (siocb->scm->fp)
2227 				break;
2228 		} else {
2229 			/* It is questionable, see note in unix_dgram_recvmsg.
2230 			 */
2231 			if (UNIXCB(skb).fp)
2232 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2233 
2234 			sk_peek_offset_fwd(sk, chunk);
2235 
2236 			if (UNIXCB(skb).fp)
2237 				break;
2238 
2239 			skip = 0;
2240 			last = skb;
2241 			unix_state_lock(sk);
2242 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2243 			if (skb)
2244 				goto again;
2245 			unix_state_unlock(sk);
2246 			break;
2247 		}
2248 	} while (size);
2249 
2250 	mutex_unlock(&u->readlock);
2251 	scm_recv(sock, msg, siocb->scm, flags);
2252 out:
2253 	return copied ? : err;
2254 }
2255 
unix_shutdown(struct socket * sock,int mode)2256 static int unix_shutdown(struct socket *sock, int mode)
2257 {
2258 	struct sock *sk = sock->sk;
2259 	struct sock *other;
2260 
2261 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2262 		return -EINVAL;
2263 	/* This maps:
2264 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2265 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2266 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2267 	 */
2268 	++mode;
2269 
2270 	unix_state_lock(sk);
2271 	sk->sk_shutdown |= mode;
2272 	other = unix_peer(sk);
2273 	if (other)
2274 		sock_hold(other);
2275 	unix_state_unlock(sk);
2276 	sk->sk_state_change(sk);
2277 
2278 	if (other &&
2279 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2280 
2281 		int peer_mode = 0;
2282 
2283 		if (mode&RCV_SHUTDOWN)
2284 			peer_mode |= SEND_SHUTDOWN;
2285 		if (mode&SEND_SHUTDOWN)
2286 			peer_mode |= RCV_SHUTDOWN;
2287 		unix_state_lock(other);
2288 		other->sk_shutdown |= peer_mode;
2289 		unix_state_unlock(other);
2290 		other->sk_state_change(other);
2291 		if (peer_mode == SHUTDOWN_MASK)
2292 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2293 		else if (peer_mode & RCV_SHUTDOWN)
2294 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2295 	}
2296 	if (other)
2297 		sock_put(other);
2298 
2299 	return 0;
2300 }
2301 
unix_inq_len(struct sock * sk)2302 long unix_inq_len(struct sock *sk)
2303 {
2304 	struct sk_buff *skb;
2305 	long amount = 0;
2306 
2307 	if (sk->sk_state == TCP_LISTEN)
2308 		return -EINVAL;
2309 
2310 	spin_lock(&sk->sk_receive_queue.lock);
2311 	if (sk->sk_type == SOCK_STREAM ||
2312 	    sk->sk_type == SOCK_SEQPACKET) {
2313 		skb_queue_walk(&sk->sk_receive_queue, skb)
2314 			amount += unix_skb_len(skb);
2315 	} else {
2316 		skb = skb_peek(&sk->sk_receive_queue);
2317 		if (skb)
2318 			amount = skb->len;
2319 	}
2320 	spin_unlock(&sk->sk_receive_queue.lock);
2321 
2322 	return amount;
2323 }
2324 EXPORT_SYMBOL_GPL(unix_inq_len);
2325 
unix_outq_len(struct sock * sk)2326 long unix_outq_len(struct sock *sk)
2327 {
2328 	return sk_wmem_alloc_get(sk);
2329 }
2330 EXPORT_SYMBOL_GPL(unix_outq_len);
2331 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2332 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2333 {
2334 	struct sock *sk = sock->sk;
2335 	long amount = 0;
2336 	int err;
2337 
2338 	switch (cmd) {
2339 	case SIOCOUTQ:
2340 		amount = unix_outq_len(sk);
2341 		err = put_user(amount, (int __user *)arg);
2342 		break;
2343 	case SIOCINQ:
2344 		amount = unix_inq_len(sk);
2345 		if (amount < 0)
2346 			err = amount;
2347 		else
2348 			err = put_user(amount, (int __user *)arg);
2349 		break;
2350 	default:
2351 		err = -ENOIOCTLCMD;
2352 		break;
2353 	}
2354 	return err;
2355 }
2356 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2357 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2358 {
2359 	struct sock *sk = sock->sk;
2360 	unsigned int mask;
2361 
2362 	sock_poll_wait(file, sk_sleep(sk), wait);
2363 	mask = 0;
2364 
2365 	/* exceptional events? */
2366 	if (sk->sk_err)
2367 		mask |= POLLERR;
2368 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2369 		mask |= POLLHUP;
2370 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2371 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2372 
2373 	/* readable? */
2374 	if (!skb_queue_empty(&sk->sk_receive_queue))
2375 		mask |= POLLIN | POLLRDNORM;
2376 
2377 	/* Connection-based need to check for termination and startup */
2378 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2379 	    sk->sk_state == TCP_CLOSE)
2380 		mask |= POLLHUP;
2381 
2382 	/*
2383 	 * we set writable also when the other side has shut down the
2384 	 * connection. This prevents stuck sockets.
2385 	 */
2386 	if (unix_writable(sk))
2387 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2388 
2389 	return mask;
2390 }
2391 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2392 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2393 				    poll_table *wait)
2394 {
2395 	struct sock *sk = sock->sk, *other;
2396 	unsigned int mask, writable;
2397 
2398 	sock_poll_wait(file, sk_sleep(sk), wait);
2399 	mask = 0;
2400 
2401 	/* exceptional events? */
2402 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2403 		mask |= POLLERR |
2404 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2405 
2406 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2407 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2408 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2409 		mask |= POLLHUP;
2410 
2411 	/* readable? */
2412 	if (!skb_queue_empty(&sk->sk_receive_queue))
2413 		mask |= POLLIN | POLLRDNORM;
2414 
2415 	/* Connection-based need to check for termination and startup */
2416 	if (sk->sk_type == SOCK_SEQPACKET) {
2417 		if (sk->sk_state == TCP_CLOSE)
2418 			mask |= POLLHUP;
2419 		/* connection hasn't started yet? */
2420 		if (sk->sk_state == TCP_SYN_SENT)
2421 			return mask;
2422 	}
2423 
2424 	/* No write status requested, avoid expensive OUT tests. */
2425 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2426 		return mask;
2427 
2428 	writable = unix_writable(sk);
2429 	if (writable) {
2430 		unix_state_lock(sk);
2431 
2432 		other = unix_peer(sk);
2433 		if (other && unix_peer(other) != sk &&
2434 		    unix_recvq_full(other) &&
2435 		    unix_dgram_peer_wake_me(sk, other))
2436 			writable = 0;
2437 
2438 		unix_state_unlock(sk);
2439 	}
2440 
2441 	if (writable)
2442 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2443 	else
2444 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2445 
2446 	return mask;
2447 }
2448 
2449 #ifdef CONFIG_PROC_FS
2450 
2451 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2452 
2453 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2454 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2455 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2456 
unix_from_bucket(struct seq_file * seq,loff_t * pos)2457 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2458 {
2459 	unsigned long offset = get_offset(*pos);
2460 	unsigned long bucket = get_bucket(*pos);
2461 	struct sock *sk;
2462 	unsigned long count = 0;
2463 
2464 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2465 		if (sock_net(sk) != seq_file_net(seq))
2466 			continue;
2467 		if (++count == offset)
2468 			break;
2469 	}
2470 
2471 	return sk;
2472 }
2473 
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2474 static struct sock *unix_next_socket(struct seq_file *seq,
2475 				     struct sock *sk,
2476 				     loff_t *pos)
2477 {
2478 	unsigned long bucket;
2479 
2480 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2481 		sk = sk_next(sk);
2482 		if (!sk)
2483 			goto next_bucket;
2484 		if (sock_net(sk) == seq_file_net(seq))
2485 			return sk;
2486 	}
2487 
2488 	do {
2489 		sk = unix_from_bucket(seq, pos);
2490 		if (sk)
2491 			return sk;
2492 
2493 next_bucket:
2494 		bucket = get_bucket(*pos) + 1;
2495 		*pos = set_bucket_offset(bucket, 1);
2496 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2497 
2498 	return NULL;
2499 }
2500 
unix_seq_start(struct seq_file * seq,loff_t * pos)2501 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2502 	__acquires(unix_table_lock)
2503 {
2504 	spin_lock(&unix_table_lock);
2505 
2506 	if (!*pos)
2507 		return SEQ_START_TOKEN;
2508 
2509 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2510 		return NULL;
2511 
2512 	return unix_next_socket(seq, NULL, pos);
2513 }
2514 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2515 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2516 {
2517 	++*pos;
2518 	return unix_next_socket(seq, v, pos);
2519 }
2520 
unix_seq_stop(struct seq_file * seq,void * v)2521 static void unix_seq_stop(struct seq_file *seq, void *v)
2522 	__releases(unix_table_lock)
2523 {
2524 	spin_unlock(&unix_table_lock);
2525 }
2526 
unix_seq_show(struct seq_file * seq,void * v)2527 static int unix_seq_show(struct seq_file *seq, void *v)
2528 {
2529 
2530 	if (v == SEQ_START_TOKEN)
2531 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2532 			 "Inode Path\n");
2533 	else {
2534 		struct sock *s = v;
2535 		struct unix_sock *u = unix_sk(s);
2536 		unix_state_lock(s);
2537 
2538 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2539 			s,
2540 			atomic_read(&s->sk_refcnt),
2541 			0,
2542 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2543 			s->sk_type,
2544 			s->sk_socket ?
2545 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2546 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2547 			sock_i_ino(s));
2548 
2549 		if (u->addr) {
2550 			int i, len;
2551 			seq_putc(seq, ' ');
2552 
2553 			i = 0;
2554 			len = u->addr->len - sizeof(short);
2555 			if (!UNIX_ABSTRACT(s))
2556 				len--;
2557 			else {
2558 				seq_putc(seq, '@');
2559 				i++;
2560 			}
2561 			for ( ; i < len; i++)
2562 				seq_putc(seq, u->addr->name->sun_path[i]);
2563 		}
2564 		unix_state_unlock(s);
2565 		seq_putc(seq, '\n');
2566 	}
2567 
2568 	return 0;
2569 }
2570 
2571 static const struct seq_operations unix_seq_ops = {
2572 	.start  = unix_seq_start,
2573 	.next   = unix_seq_next,
2574 	.stop   = unix_seq_stop,
2575 	.show   = unix_seq_show,
2576 };
2577 
unix_seq_open(struct inode * inode,struct file * file)2578 static int unix_seq_open(struct inode *inode, struct file *file)
2579 {
2580 	return seq_open_net(inode, file, &unix_seq_ops,
2581 			    sizeof(struct seq_net_private));
2582 }
2583 
2584 static const struct file_operations unix_seq_fops = {
2585 	.owner		= THIS_MODULE,
2586 	.open		= unix_seq_open,
2587 	.read		= seq_read,
2588 	.llseek		= seq_lseek,
2589 	.release	= seq_release_net,
2590 };
2591 
2592 #endif
2593 
2594 static const struct net_proto_family unix_family_ops = {
2595 	.family = PF_UNIX,
2596 	.create = unix_create,
2597 	.owner	= THIS_MODULE,
2598 };
2599 
2600 
unix_net_init(struct net * net)2601 static int __net_init unix_net_init(struct net *net)
2602 {
2603 	int error = -ENOMEM;
2604 
2605 	net->unx.sysctl_max_dgram_qlen = 10;
2606 	if (unix_sysctl_register(net))
2607 		goto out;
2608 
2609 #ifdef CONFIG_PROC_FS
2610 	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2611 		unix_sysctl_unregister(net);
2612 		goto out;
2613 	}
2614 #endif
2615 	error = 0;
2616 out:
2617 	return error;
2618 }
2619 
unix_net_exit(struct net * net)2620 static void __net_exit unix_net_exit(struct net *net)
2621 {
2622 	unix_sysctl_unregister(net);
2623 	remove_proc_entry("unix", net->proc_net);
2624 }
2625 
2626 static struct pernet_operations unix_net_ops = {
2627 	.init = unix_net_init,
2628 	.exit = unix_net_exit,
2629 };
2630 
af_unix_init(void)2631 static int __init af_unix_init(void)
2632 {
2633 	int rc = -1;
2634 
2635 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2636 
2637 	rc = proto_register(&unix_proto, 1);
2638 	if (rc != 0) {
2639 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2640 		goto out;
2641 	}
2642 
2643 	sock_register(&unix_family_ops);
2644 	register_pernet_subsys(&unix_net_ops);
2645 out:
2646 	return rc;
2647 }
2648 
af_unix_exit(void)2649 static void __exit af_unix_exit(void)
2650 {
2651 	sock_unregister(PF_UNIX);
2652 	proto_unregister(&unix_proto);
2653 	unregister_pernet_subsys(&unix_net_ops);
2654 }
2655 
2656 /* Earlier than device_initcall() so that other drivers invoking
2657    request_module() don't end up in a loop when modprobe tries
2658    to use a UNIX socket. But later than subsys_initcall() because
2659    we depend on stuff initialised there */
2660 fs_initcall(af_unix_init);
2661 module_exit(af_unix_exit);
2662 
2663 MODULE_LICENSE("GPL");
2664 MODULE_ALIAS_NETPROTO(PF_UNIX);
2665