• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 #include <linux/freezer.h>
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
unix_sockets_unbound(void * addr)126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 	unsigned long hash = (unsigned long)addr;
129 
130 	hash ^= hash >> 16;
131 	hash ^= hash >> 8;
132 	hash %= UNIX_HASH_SIZE;
133 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
142 }
143 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 	scm->secid = *UNIXSID(skb);
147 }
148 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)149 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
150 { }
151 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)152 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
153 { }
154 #endif /* CONFIG_SECURITY_NETWORK */
155 
156 /*
157  *  SMP locking strategy:
158  *    hash table is protected with spinlock unix_table_lock
159  *    each socket state is protected by separate spin lock.
160  */
161 
unix_hash_fold(__wsum n)162 static inline unsigned int unix_hash_fold(__wsum n)
163 {
164 	unsigned int hash = (__force unsigned int)n;
165 
166 	hash ^= hash>>16;
167 	hash ^= hash>>8;
168 	return hash&(UNIX_HASH_SIZE-1);
169 }
170 
171 #define unix_peer(sk) (unix_sk(sk)->peer)
172 
unix_our_peer(struct sock * sk,struct sock * osk)173 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
174 {
175 	return unix_peer(osk) == sk;
176 }
177 
unix_may_send(struct sock * sk,struct sock * osk)178 static inline int unix_may_send(struct sock *sk, struct sock *osk)
179 {
180 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
181 }
182 
unix_recvq_full(struct sock const * sk)183 static inline int unix_recvq_full(struct sock const *sk)
184 {
185 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
186 }
187 
unix_peer_get(struct sock * s)188 struct sock *unix_peer_get(struct sock *s)
189 {
190 	struct sock *peer;
191 
192 	unix_state_lock(s);
193 	peer = unix_peer(s);
194 	if (peer)
195 		sock_hold(peer);
196 	unix_state_unlock(s);
197 	return peer;
198 }
199 EXPORT_SYMBOL_GPL(unix_peer_get);
200 
unix_release_addr(struct unix_address * addr)201 static inline void unix_release_addr(struct unix_address *addr)
202 {
203 	if (atomic_dec_and_test(&addr->refcnt))
204 		kfree(addr);
205 }
206 
207 /*
208  *	Check unix socket name:
209  *		- should be not zero length.
210  *	        - if started by not zero, should be NULL terminated (FS object)
211  *		- if started by zero, it is abstract name.
212  */
213 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)214 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
215 {
216 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
217 		return -EINVAL;
218 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
219 		return -EINVAL;
220 	if (sunaddr->sun_path[0]) {
221 		/*
222 		 * This may look like an off by one error but it is a bit more
223 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
224 		 * sun_path[108] doesn't as such exist.  However in kernel space
225 		 * we are guaranteed that it is a valid memory location in our
226 		 * kernel address buffer.
227 		 */
228 		((char *)sunaddr)[len] = 0;
229 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
230 		return len;
231 	}
232 
233 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
234 	return len;
235 }
236 
__unix_remove_socket(struct sock * sk)237 static void __unix_remove_socket(struct sock *sk)
238 {
239 	sk_del_node_init(sk);
240 }
241 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)242 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
243 {
244 	WARN_ON(!sk_unhashed(sk));
245 	sk_add_node(sk, list);
246 }
247 
unix_remove_socket(struct sock * sk)248 static inline void unix_remove_socket(struct sock *sk)
249 {
250 	spin_lock(&unix_table_lock);
251 	__unix_remove_socket(sk);
252 	spin_unlock(&unix_table_lock);
253 }
254 
unix_insert_socket(struct hlist_head * list,struct sock * sk)255 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
256 {
257 	spin_lock(&unix_table_lock);
258 	__unix_insert_socket(list, sk);
259 	spin_unlock(&unix_table_lock);
260 }
261 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)262 static struct sock *__unix_find_socket_byname(struct net *net,
263 					      struct sockaddr_un *sunname,
264 					      int len, int type, unsigned int hash)
265 {
266 	struct sock *s;
267 
268 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
269 		struct unix_sock *u = unix_sk(s);
270 
271 		if (!net_eq(sock_net(s), net))
272 			continue;
273 
274 		if (u->addr->len == len &&
275 		    !memcmp(u->addr->name, sunname, len))
276 			goto found;
277 	}
278 	s = NULL;
279 found:
280 	return s;
281 }
282 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)283 static inline struct sock *unix_find_socket_byname(struct net *net,
284 						   struct sockaddr_un *sunname,
285 						   int len, int type,
286 						   unsigned int hash)
287 {
288 	struct sock *s;
289 
290 	spin_lock(&unix_table_lock);
291 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
292 	if (s)
293 		sock_hold(s);
294 	spin_unlock(&unix_table_lock);
295 	return s;
296 }
297 
unix_find_socket_byinode(struct inode * i)298 static struct sock *unix_find_socket_byinode(struct inode *i)
299 {
300 	struct sock *s;
301 
302 	spin_lock(&unix_table_lock);
303 	sk_for_each(s,
304 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
305 		struct dentry *dentry = unix_sk(s)->path.dentry;
306 
307 		if (dentry && dentry->d_inode == i) {
308 			sock_hold(s);
309 			goto found;
310 		}
311 	}
312 	s = NULL;
313 found:
314 	spin_unlock(&unix_table_lock);
315 	return s;
316 }
317 
318 /* Support code for asymmetrically connected dgram sockets
319  *
320  * If a datagram socket is connected to a socket not itself connected
321  * to the first socket (eg, /dev/log), clients may only enqueue more
322  * messages if the present receive queue of the server socket is not
323  * "too large". This means there's a second writeability condition
324  * poll and sendmsg need to test. The dgram recv code will do a wake
325  * up on the peer_wait wait queue of a socket upon reception of a
326  * datagram which needs to be propagated to sleeping would-be writers
327  * since these might not have sent anything so far. This can't be
328  * accomplished via poll_wait because the lifetime of the server
329  * socket might be less than that of its clients if these break their
330  * association with it or if the server socket is closed while clients
331  * are still connected to it and there's no way to inform "a polling
332  * implementation" that it should let go of a certain wait queue
333  *
334  * In order to propagate a wake up, a wait_queue_t of the client
335  * socket is enqueued on the peer_wait queue of the server socket
336  * whose wake function does a wake_up on the ordinary client socket
337  * wait queue. This connection is established whenever a write (or
338  * poll for write) hit the flow control condition and broken when the
339  * association to the server socket is dissolved or after a wake up
340  * was relayed.
341  */
342 
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)343 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
344 				      void *key)
345 {
346 	struct unix_sock *u;
347 	wait_queue_head_t *u_sleep;
348 
349 	u = container_of(q, struct unix_sock, peer_wake);
350 
351 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
352 			    q);
353 	u->peer_wake.private = NULL;
354 
355 	/* relaying can only happen while the wq still exists */
356 	u_sleep = sk_sleep(&u->sk);
357 	if (u_sleep)
358 		wake_up_interruptible_poll(u_sleep, key);
359 
360 	return 0;
361 }
362 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)363 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
364 {
365 	struct unix_sock *u, *u_other;
366 	int rc;
367 
368 	u = unix_sk(sk);
369 	u_other = unix_sk(other);
370 	rc = 0;
371 	spin_lock(&u_other->peer_wait.lock);
372 
373 	if (!u->peer_wake.private) {
374 		u->peer_wake.private = other;
375 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
376 
377 		rc = 1;
378 	}
379 
380 	spin_unlock(&u_other->peer_wait.lock);
381 	return rc;
382 }
383 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)384 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
385 					    struct sock *other)
386 {
387 	struct unix_sock *u, *u_other;
388 
389 	u = unix_sk(sk);
390 	u_other = unix_sk(other);
391 	spin_lock(&u_other->peer_wait.lock);
392 
393 	if (u->peer_wake.private == other) {
394 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
395 		u->peer_wake.private = NULL;
396 	}
397 
398 	spin_unlock(&u_other->peer_wait.lock);
399 }
400 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)401 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
402 						   struct sock *other)
403 {
404 	unix_dgram_peer_wake_disconnect(sk, other);
405 	wake_up_interruptible_poll(sk_sleep(sk),
406 				   POLLOUT |
407 				   POLLWRNORM |
408 				   POLLWRBAND);
409 }
410 
411 /* preconditions:
412  *	- unix_peer(sk) == other
413  *	- association is stable
414  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)415 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
416 {
417 	int connected;
418 
419 	connected = unix_dgram_peer_wake_connect(sk, other);
420 
421 	if (unix_recvq_full(other))
422 		return 1;
423 
424 	if (connected)
425 		unix_dgram_peer_wake_disconnect(sk, other);
426 
427 	return 0;
428 }
429 
unix_writable(struct sock * sk)430 static inline int unix_writable(struct sock *sk)
431 {
432 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
433 }
434 
unix_write_space(struct sock * sk)435 static void unix_write_space(struct sock *sk)
436 {
437 	struct socket_wq *wq;
438 
439 	rcu_read_lock();
440 	if (unix_writable(sk)) {
441 		wq = rcu_dereference(sk->sk_wq);
442 		if (wq_has_sleeper(wq))
443 			wake_up_interruptible_sync_poll(&wq->wait,
444 				POLLOUT | POLLWRNORM | POLLWRBAND);
445 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
446 	}
447 	rcu_read_unlock();
448 }
449 
450 /* When dgram socket disconnects (or changes its peer), we clear its receive
451  * queue of packets arrived from previous peer. First, it allows to do
452  * flow control based only on wmem_alloc; second, sk connected to peer
453  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)454 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
455 {
456 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
457 		skb_queue_purge(&sk->sk_receive_queue);
458 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
459 
460 		/* If one link of bidirectional dgram pipe is disconnected,
461 		 * we signal error. Messages are lost. Do not make this,
462 		 * when peer was not connected to us.
463 		 */
464 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
465 			other->sk_err = ECONNRESET;
466 			other->sk_error_report(other);
467 		}
468 	}
469 }
470 
unix_sock_destructor(struct sock * sk)471 static void unix_sock_destructor(struct sock *sk)
472 {
473 	struct unix_sock *u = unix_sk(sk);
474 
475 	skb_queue_purge(&sk->sk_receive_queue);
476 
477 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
478 	WARN_ON(!sk_unhashed(sk));
479 	WARN_ON(sk->sk_socket);
480 	if (!sock_flag(sk, SOCK_DEAD)) {
481 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
482 		return;
483 	}
484 
485 	if (u->addr)
486 		unix_release_addr(u->addr);
487 
488 	atomic_long_dec(&unix_nr_socks);
489 	local_bh_disable();
490 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
491 	local_bh_enable();
492 #ifdef UNIX_REFCNT_DEBUG
493 	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
494 		atomic_long_read(&unix_nr_socks));
495 #endif
496 }
497 
unix_release_sock(struct sock * sk,int embrion)498 static void unix_release_sock(struct sock *sk, int embrion)
499 {
500 	struct unix_sock *u = unix_sk(sk);
501 	struct path path;
502 	struct sock *skpair;
503 	struct sk_buff *skb;
504 	int state;
505 
506 	unix_remove_socket(sk);
507 
508 	/* Clear state */
509 	unix_state_lock(sk);
510 	sock_orphan(sk);
511 	sk->sk_shutdown = SHUTDOWN_MASK;
512 	path	     = u->path;
513 	u->path.dentry = NULL;
514 	u->path.mnt = NULL;
515 	state = sk->sk_state;
516 	sk->sk_state = TCP_CLOSE;
517 	unix_state_unlock(sk);
518 
519 	wake_up_interruptible_all(&u->peer_wait);
520 
521 	skpair = unix_peer(sk);
522 
523 	if (skpair != NULL) {
524 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
525 			unix_state_lock(skpair);
526 			/* No more writes */
527 			skpair->sk_shutdown = SHUTDOWN_MASK;
528 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
529 				skpair->sk_err = ECONNRESET;
530 			unix_state_unlock(skpair);
531 			skpair->sk_state_change(skpair);
532 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
533 		}
534 
535 		unix_dgram_peer_wake_disconnect(sk, skpair);
536 		sock_put(skpair); /* It may now die */
537 		unix_peer(sk) = NULL;
538 	}
539 
540 	/* Try to flush out this socket. Throw out buffers at least */
541 
542 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
543 		if (state == TCP_LISTEN)
544 			unix_release_sock(skb->sk, 1);
545 		/* passed fds are erased in the kfree_skb hook	      */
546 		kfree_skb(skb);
547 	}
548 
549 	if (path.dentry)
550 		path_put(&path);
551 
552 	sock_put(sk);
553 
554 	/* ---- Socket is dead now and most probably destroyed ---- */
555 
556 	/*
557 	 * Fixme: BSD difference: In BSD all sockets connected to us get
558 	 *	  ECONNRESET and we die on the spot. In Linux we behave
559 	 *	  like files and pipes do and wait for the last
560 	 *	  dereference.
561 	 *
562 	 * Can't we simply set sock->err?
563 	 *
564 	 *	  What the above comment does talk about? --ANK(980817)
565 	 */
566 
567 	if (unix_tot_inflight)
568 		unix_gc();		/* Garbage collect fds */
569 }
570 
init_peercred(struct sock * sk)571 static void init_peercred(struct sock *sk)
572 {
573 	put_pid(sk->sk_peer_pid);
574 	if (sk->sk_peer_cred)
575 		put_cred(sk->sk_peer_cred);
576 	sk->sk_peer_pid  = get_pid(task_tgid(current));
577 	sk->sk_peer_cred = get_current_cred();
578 }
579 
copy_peercred(struct sock * sk,struct sock * peersk)580 static void copy_peercred(struct sock *sk, struct sock *peersk)
581 {
582 	put_pid(sk->sk_peer_pid);
583 	if (sk->sk_peer_cred)
584 		put_cred(sk->sk_peer_cred);
585 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
586 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
587 }
588 
unix_listen(struct socket * sock,int backlog)589 static int unix_listen(struct socket *sock, int backlog)
590 {
591 	int err;
592 	struct sock *sk = sock->sk;
593 	struct unix_sock *u = unix_sk(sk);
594 	struct pid *old_pid = NULL;
595 
596 	err = -EOPNOTSUPP;
597 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
598 		goto out;	/* Only stream/seqpacket sockets accept */
599 	err = -EINVAL;
600 	if (!u->addr)
601 		goto out;	/* No listens on an unbound socket */
602 	unix_state_lock(sk);
603 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
604 		goto out_unlock;
605 	if (backlog > sk->sk_max_ack_backlog)
606 		wake_up_interruptible_all(&u->peer_wait);
607 	sk->sk_max_ack_backlog	= backlog;
608 	sk->sk_state		= TCP_LISTEN;
609 	/* set credentials so connect can copy them */
610 	init_peercred(sk);
611 	err = 0;
612 
613 out_unlock:
614 	unix_state_unlock(sk);
615 	put_pid(old_pid);
616 out:
617 	return err;
618 }
619 
620 static int unix_release(struct socket *);
621 static int unix_bind(struct socket *, struct sockaddr *, int);
622 static int unix_stream_connect(struct socket *, struct sockaddr *,
623 			       int addr_len, int flags);
624 static int unix_socketpair(struct socket *, struct socket *);
625 static int unix_accept(struct socket *, struct socket *, int);
626 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
627 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
628 static unsigned int unix_dgram_poll(struct file *, struct socket *,
629 				    poll_table *);
630 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
631 static int unix_shutdown(struct socket *, int);
632 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
633 			       struct msghdr *, size_t);
634 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
635 			       struct msghdr *, size_t, int);
636 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
637 			      struct msghdr *, size_t);
638 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
639 			      struct msghdr *, size_t, int);
640 static int unix_dgram_connect(struct socket *, struct sockaddr *,
641 			      int, int);
642 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
643 				  struct msghdr *, size_t);
644 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
645 				  struct msghdr *, size_t, int);
646 
unix_set_peek_off(struct sock * sk,int val)647 static void unix_set_peek_off(struct sock *sk, int val)
648 {
649 	struct unix_sock *u = unix_sk(sk);
650 
651 	mutex_lock(&u->readlock);
652 	sk->sk_peek_off = val;
653 	mutex_unlock(&u->readlock);
654 }
655 
656 
657 static const struct proto_ops unix_stream_ops = {
658 	.family =	PF_UNIX,
659 	.owner =	THIS_MODULE,
660 	.release =	unix_release,
661 	.bind =		unix_bind,
662 	.connect =	unix_stream_connect,
663 	.socketpair =	unix_socketpair,
664 	.accept =	unix_accept,
665 	.getname =	unix_getname,
666 	.poll =		unix_poll,
667 	.ioctl =	unix_ioctl,
668 	.listen =	unix_listen,
669 	.shutdown =	unix_shutdown,
670 	.setsockopt =	sock_no_setsockopt,
671 	.getsockopt =	sock_no_getsockopt,
672 	.sendmsg =	unix_stream_sendmsg,
673 	.recvmsg =	unix_stream_recvmsg,
674 	.mmap =		sock_no_mmap,
675 	.sendpage =	sock_no_sendpage,
676 	.set_peek_off =	unix_set_peek_off,
677 };
678 
679 static const struct proto_ops unix_dgram_ops = {
680 	.family =	PF_UNIX,
681 	.owner =	THIS_MODULE,
682 	.release =	unix_release,
683 	.bind =		unix_bind,
684 	.connect =	unix_dgram_connect,
685 	.socketpair =	unix_socketpair,
686 	.accept =	sock_no_accept,
687 	.getname =	unix_getname,
688 	.poll =		unix_dgram_poll,
689 	.ioctl =	unix_ioctl,
690 	.listen =	sock_no_listen,
691 	.shutdown =	unix_shutdown,
692 	.setsockopt =	sock_no_setsockopt,
693 	.getsockopt =	sock_no_getsockopt,
694 	.sendmsg =	unix_dgram_sendmsg,
695 	.recvmsg =	unix_dgram_recvmsg,
696 	.mmap =		sock_no_mmap,
697 	.sendpage =	sock_no_sendpage,
698 	.set_peek_off =	unix_set_peek_off,
699 };
700 
701 static const struct proto_ops unix_seqpacket_ops = {
702 	.family =	PF_UNIX,
703 	.owner =	THIS_MODULE,
704 	.release =	unix_release,
705 	.bind =		unix_bind,
706 	.connect =	unix_stream_connect,
707 	.socketpair =	unix_socketpair,
708 	.accept =	unix_accept,
709 	.getname =	unix_getname,
710 	.poll =		unix_dgram_poll,
711 	.ioctl =	unix_ioctl,
712 	.listen =	unix_listen,
713 	.shutdown =	unix_shutdown,
714 	.setsockopt =	sock_no_setsockopt,
715 	.getsockopt =	sock_no_getsockopt,
716 	.sendmsg =	unix_seqpacket_sendmsg,
717 	.recvmsg =	unix_seqpacket_recvmsg,
718 	.mmap =		sock_no_mmap,
719 	.sendpage =	sock_no_sendpage,
720 	.set_peek_off =	unix_set_peek_off,
721 };
722 
723 static struct proto unix_proto = {
724 	.name			= "UNIX",
725 	.owner			= THIS_MODULE,
726 	.obj_size		= sizeof(struct unix_sock),
727 };
728 
729 /*
730  * AF_UNIX sockets do not interact with hardware, hence they
731  * dont trigger interrupts - so it's safe for them to have
732  * bh-unsafe locking for their sk_receive_queue.lock. Split off
733  * this special lock-class by reinitializing the spinlock key:
734  */
735 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
736 
unix_create1(struct net * net,struct socket * sock)737 static struct sock *unix_create1(struct net *net, struct socket *sock)
738 {
739 	struct sock *sk = NULL;
740 	struct unix_sock *u;
741 
742 	atomic_long_inc(&unix_nr_socks);
743 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
744 		goto out;
745 
746 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
747 	if (!sk)
748 		goto out;
749 
750 	sock_init_data(sock, sk);
751 	lockdep_set_class(&sk->sk_receive_queue.lock,
752 				&af_unix_sk_receive_queue_lock_key);
753 
754 	sk->sk_write_space	= unix_write_space;
755 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
756 	sk->sk_destruct		= unix_sock_destructor;
757 	u	  = unix_sk(sk);
758 	u->path.dentry = NULL;
759 	u->path.mnt = NULL;
760 	spin_lock_init(&u->lock);
761 	atomic_long_set(&u->inflight, 0);
762 	INIT_LIST_HEAD(&u->link);
763 	mutex_init(&u->readlock); /* single task reading lock */
764 	init_waitqueue_head(&u->peer_wait);
765 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
766 	unix_insert_socket(unix_sockets_unbound(sk), sk);
767 out:
768 	if (sk == NULL)
769 		atomic_long_dec(&unix_nr_socks);
770 	else {
771 		local_bh_disable();
772 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
773 		local_bh_enable();
774 	}
775 	return sk;
776 }
777 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)778 static int unix_create(struct net *net, struct socket *sock, int protocol,
779 		       int kern)
780 {
781 	if (protocol && protocol != PF_UNIX)
782 		return -EPROTONOSUPPORT;
783 
784 	sock->state = SS_UNCONNECTED;
785 
786 	switch (sock->type) {
787 	case SOCK_STREAM:
788 		sock->ops = &unix_stream_ops;
789 		break;
790 		/*
791 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
792 		 *	nothing uses it.
793 		 */
794 	case SOCK_RAW:
795 		sock->type = SOCK_DGRAM;
796 	case SOCK_DGRAM:
797 		sock->ops = &unix_dgram_ops;
798 		break;
799 	case SOCK_SEQPACKET:
800 		sock->ops = &unix_seqpacket_ops;
801 		break;
802 	default:
803 		return -ESOCKTNOSUPPORT;
804 	}
805 
806 	return unix_create1(net, sock) ? 0 : -ENOMEM;
807 }
808 
unix_release(struct socket * sock)809 static int unix_release(struct socket *sock)
810 {
811 	struct sock *sk = sock->sk;
812 
813 	if (!sk)
814 		return 0;
815 
816 	unix_release_sock(sk, 0);
817 	sock->sk = NULL;
818 
819 	return 0;
820 }
821 
unix_autobind(struct socket * sock)822 static int unix_autobind(struct socket *sock)
823 {
824 	struct sock *sk = sock->sk;
825 	struct net *net = sock_net(sk);
826 	struct unix_sock *u = unix_sk(sk);
827 	static u32 ordernum = 1;
828 	struct unix_address *addr;
829 	int err;
830 	unsigned int retries = 0;
831 
832 	mutex_lock(&u->readlock);
833 
834 	err = 0;
835 	if (u->addr)
836 		goto out;
837 
838 	err = -ENOMEM;
839 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
840 	if (!addr)
841 		goto out;
842 
843 	addr->name->sun_family = AF_UNIX;
844 	atomic_set(&addr->refcnt, 1);
845 
846 retry:
847 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
848 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
849 
850 	spin_lock(&unix_table_lock);
851 	ordernum = (ordernum+1)&0xFFFFF;
852 
853 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
854 				      addr->hash)) {
855 		spin_unlock(&unix_table_lock);
856 		/*
857 		 * __unix_find_socket_byname() may take long time if many names
858 		 * are already in use.
859 		 */
860 		cond_resched();
861 		/* Give up if all names seems to be in use. */
862 		if (retries++ == 0xFFFFF) {
863 			err = -ENOSPC;
864 			kfree(addr);
865 			goto out;
866 		}
867 		goto retry;
868 	}
869 	addr->hash ^= sk->sk_type;
870 
871 	__unix_remove_socket(sk);
872 	u->addr = addr;
873 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
874 	spin_unlock(&unix_table_lock);
875 	err = 0;
876 
877 out:	mutex_unlock(&u->readlock);
878 	return err;
879 }
880 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)881 static struct sock *unix_find_other(struct net *net,
882 				    struct sockaddr_un *sunname, int len,
883 				    int type, unsigned int hash, int *error)
884 {
885 	struct sock *u;
886 	struct path path;
887 	int err = 0;
888 
889 	if (sunname->sun_path[0]) {
890 		struct inode *inode;
891 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
892 		if (err)
893 			goto fail;
894 		inode = path.dentry->d_inode;
895 		err = inode_permission(inode, MAY_WRITE);
896 		if (err)
897 			goto put_fail;
898 
899 		err = -ECONNREFUSED;
900 		if (!S_ISSOCK(inode->i_mode))
901 			goto put_fail;
902 		u = unix_find_socket_byinode(inode);
903 		if (!u)
904 			goto put_fail;
905 
906 		if (u->sk_type == type)
907 			touch_atime(&path);
908 
909 		path_put(&path);
910 
911 		err = -EPROTOTYPE;
912 		if (u->sk_type != type) {
913 			sock_put(u);
914 			goto fail;
915 		}
916 	} else {
917 		err = -ECONNREFUSED;
918 		u = unix_find_socket_byname(net, sunname, len, type, hash);
919 		if (u) {
920 			struct dentry *dentry;
921 			dentry = unix_sk(u)->path.dentry;
922 			if (dentry)
923 				touch_atime(&unix_sk(u)->path);
924 		} else
925 			goto fail;
926 	}
927 	return u;
928 
929 put_fail:
930 	path_put(&path);
931 fail:
932 	*error = err;
933 	return NULL;
934 }
935 
unix_mknod(const char * sun_path,umode_t mode,struct path * res)936 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
937 {
938 	struct dentry *dentry;
939 	struct path path;
940 	int err = 0;
941 	/*
942 	 * Get the parent directory, calculate the hash for last
943 	 * component.
944 	 */
945 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
946 	err = PTR_ERR(dentry);
947 	if (IS_ERR(dentry))
948 		return err;
949 
950 	/*
951 	 * All right, let's create it.
952 	 */
953 	err = security_path_mknod(&path, dentry, mode, 0);
954 	if (!err) {
955 		err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
956 		if (!err) {
957 			res->mnt = mntget(path.mnt);
958 			res->dentry = dget(dentry);
959 		}
960 	}
961 	done_path_create(&path, dentry);
962 	return err;
963 }
964 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)965 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
966 {
967 	struct sock *sk = sock->sk;
968 	struct net *net = sock_net(sk);
969 	struct unix_sock *u = unix_sk(sk);
970 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
971 	char *sun_path = sunaddr->sun_path;
972 	int err;
973 	unsigned int hash;
974 	struct unix_address *addr;
975 	struct hlist_head *list;
976 
977 	err = -EINVAL;
978 	if (sunaddr->sun_family != AF_UNIX)
979 		goto out;
980 
981 	if (addr_len == sizeof(short)) {
982 		err = unix_autobind(sock);
983 		goto out;
984 	}
985 
986 	err = unix_mkname(sunaddr, addr_len, &hash);
987 	if (err < 0)
988 		goto out;
989 	addr_len = err;
990 
991 	mutex_lock(&u->readlock);
992 
993 	err = -EINVAL;
994 	if (u->addr)
995 		goto out_up;
996 
997 	err = -ENOMEM;
998 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
999 	if (!addr)
1000 		goto out_up;
1001 
1002 	memcpy(addr->name, sunaddr, addr_len);
1003 	addr->len = addr_len;
1004 	addr->hash = hash ^ sk->sk_type;
1005 	atomic_set(&addr->refcnt, 1);
1006 
1007 	if (sun_path[0]) {
1008 		struct path path;
1009 		umode_t mode = S_IFSOCK |
1010 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1011 		err = unix_mknod(sun_path, mode, &path);
1012 		if (err) {
1013 			if (err == -EEXIST)
1014 				err = -EADDRINUSE;
1015 			unix_release_addr(addr);
1016 			goto out_up;
1017 		}
1018 		addr->hash = UNIX_HASH_SIZE;
1019 		hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
1020 		spin_lock(&unix_table_lock);
1021 		u->path = path;
1022 		list = &unix_socket_table[hash];
1023 	} else {
1024 		spin_lock(&unix_table_lock);
1025 		err = -EADDRINUSE;
1026 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1027 					      sk->sk_type, hash)) {
1028 			unix_release_addr(addr);
1029 			goto out_unlock;
1030 		}
1031 
1032 		list = &unix_socket_table[addr->hash];
1033 	}
1034 
1035 	err = 0;
1036 	__unix_remove_socket(sk);
1037 	u->addr = addr;
1038 	__unix_insert_socket(list, sk);
1039 
1040 out_unlock:
1041 	spin_unlock(&unix_table_lock);
1042 out_up:
1043 	mutex_unlock(&u->readlock);
1044 out:
1045 	return err;
1046 }
1047 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1048 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1049 {
1050 	if (unlikely(sk1 == sk2) || !sk2) {
1051 		unix_state_lock(sk1);
1052 		return;
1053 	}
1054 	if (sk1 < sk2) {
1055 		unix_state_lock(sk1);
1056 		unix_state_lock_nested(sk2);
1057 	} else {
1058 		unix_state_lock(sk2);
1059 		unix_state_lock_nested(sk1);
1060 	}
1061 }
1062 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1063 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1064 {
1065 	if (unlikely(sk1 == sk2) || !sk2) {
1066 		unix_state_unlock(sk1);
1067 		return;
1068 	}
1069 	unix_state_unlock(sk1);
1070 	unix_state_unlock(sk2);
1071 }
1072 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1073 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1074 			      int alen, int flags)
1075 {
1076 	struct sock *sk = sock->sk;
1077 	struct net *net = sock_net(sk);
1078 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1079 	struct sock *other;
1080 	unsigned int hash;
1081 	int err;
1082 
1083 	if (addr->sa_family != AF_UNSPEC) {
1084 		err = unix_mkname(sunaddr, alen, &hash);
1085 		if (err < 0)
1086 			goto out;
1087 		alen = err;
1088 
1089 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1090 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1091 			goto out;
1092 
1093 restart:
1094 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1095 		if (!other)
1096 			goto out;
1097 
1098 		unix_state_double_lock(sk, other);
1099 
1100 		/* Apparently VFS overslept socket death. Retry. */
1101 		if (sock_flag(other, SOCK_DEAD)) {
1102 			unix_state_double_unlock(sk, other);
1103 			sock_put(other);
1104 			goto restart;
1105 		}
1106 
1107 		err = -EPERM;
1108 		if (!unix_may_send(sk, other))
1109 			goto out_unlock;
1110 
1111 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1112 		if (err)
1113 			goto out_unlock;
1114 
1115 	} else {
1116 		/*
1117 		 *	1003.1g breaking connected state with AF_UNSPEC
1118 		 */
1119 		other = NULL;
1120 		unix_state_double_lock(sk, other);
1121 	}
1122 
1123 	/*
1124 	 * If it was connected, reconnect.
1125 	 */
1126 	if (unix_peer(sk)) {
1127 		struct sock *old_peer = unix_peer(sk);
1128 		unix_peer(sk) = other;
1129 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1130 
1131 		unix_state_double_unlock(sk, other);
1132 
1133 		if (other != old_peer)
1134 			unix_dgram_disconnected(sk, old_peer);
1135 		sock_put(old_peer);
1136 	} else {
1137 		unix_peer(sk) = other;
1138 		unix_state_double_unlock(sk, other);
1139 	}
1140 	return 0;
1141 
1142 out_unlock:
1143 	unix_state_double_unlock(sk, other);
1144 	sock_put(other);
1145 out:
1146 	return err;
1147 }
1148 
unix_wait_for_peer(struct sock * other,long timeo)1149 static long unix_wait_for_peer(struct sock *other, long timeo)
1150 {
1151 	struct unix_sock *u = unix_sk(other);
1152 	int sched;
1153 	DEFINE_WAIT(wait);
1154 
1155 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1156 
1157 	sched = !sock_flag(other, SOCK_DEAD) &&
1158 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1159 		unix_recvq_full(other);
1160 
1161 	unix_state_unlock(other);
1162 
1163 	if (sched)
1164 		timeo = schedule_timeout(timeo);
1165 
1166 	finish_wait(&u->peer_wait, &wait);
1167 	return timeo;
1168 }
1169 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1170 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1171 			       int addr_len, int flags)
1172 {
1173 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1174 	struct sock *sk = sock->sk;
1175 	struct net *net = sock_net(sk);
1176 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1177 	struct sock *newsk = NULL;
1178 	struct sock *other = NULL;
1179 	struct sk_buff *skb = NULL;
1180 	unsigned int hash;
1181 	int st;
1182 	int err;
1183 	long timeo;
1184 
1185 	err = unix_mkname(sunaddr, addr_len, &hash);
1186 	if (err < 0)
1187 		goto out;
1188 	addr_len = err;
1189 
1190 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1191 	    (err = unix_autobind(sock)) != 0)
1192 		goto out;
1193 
1194 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1195 
1196 	/* First of all allocate resources.
1197 	   If we will make it after state is locked,
1198 	   we will have to recheck all again in any case.
1199 	 */
1200 
1201 	err = -ENOMEM;
1202 
1203 	/* create new sock for complete connection */
1204 	newsk = unix_create1(sock_net(sk), NULL);
1205 	if (newsk == NULL)
1206 		goto out;
1207 
1208 	/* Allocate skb for sending to listening sock */
1209 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1210 	if (skb == NULL)
1211 		goto out;
1212 
1213 restart:
1214 	/*  Find listening sock. */
1215 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1216 	if (!other)
1217 		goto out;
1218 
1219 	/* Latch state of peer */
1220 	unix_state_lock(other);
1221 
1222 	/* Apparently VFS overslept socket death. Retry. */
1223 	if (sock_flag(other, SOCK_DEAD)) {
1224 		unix_state_unlock(other);
1225 		sock_put(other);
1226 		goto restart;
1227 	}
1228 
1229 	err = -ECONNREFUSED;
1230 	if (other->sk_state != TCP_LISTEN)
1231 		goto out_unlock;
1232 	if (other->sk_shutdown & RCV_SHUTDOWN)
1233 		goto out_unlock;
1234 
1235 	if (unix_recvq_full(other)) {
1236 		err = -EAGAIN;
1237 		if (!timeo)
1238 			goto out_unlock;
1239 
1240 		timeo = unix_wait_for_peer(other, timeo);
1241 
1242 		err = sock_intr_errno(timeo);
1243 		if (signal_pending(current))
1244 			goto out;
1245 		sock_put(other);
1246 		goto restart;
1247 	}
1248 
1249 	/* Latch our state.
1250 
1251 	   It is tricky place. We need to grab our state lock and cannot
1252 	   drop lock on peer. It is dangerous because deadlock is
1253 	   possible. Connect to self case and simultaneous
1254 	   attempt to connect are eliminated by checking socket
1255 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1256 	   check this before attempt to grab lock.
1257 
1258 	   Well, and we have to recheck the state after socket locked.
1259 	 */
1260 	st = sk->sk_state;
1261 
1262 	switch (st) {
1263 	case TCP_CLOSE:
1264 		/* This is ok... continue with connect */
1265 		break;
1266 	case TCP_ESTABLISHED:
1267 		/* Socket is already connected */
1268 		err = -EISCONN;
1269 		goto out_unlock;
1270 	default:
1271 		err = -EINVAL;
1272 		goto out_unlock;
1273 	}
1274 
1275 	unix_state_lock_nested(sk);
1276 
1277 	if (sk->sk_state != st) {
1278 		unix_state_unlock(sk);
1279 		unix_state_unlock(other);
1280 		sock_put(other);
1281 		goto restart;
1282 	}
1283 
1284 	err = security_unix_stream_connect(sk, other, newsk);
1285 	if (err) {
1286 		unix_state_unlock(sk);
1287 		goto out_unlock;
1288 	}
1289 
1290 	/* The way is open! Fastly set all the necessary fields... */
1291 
1292 	sock_hold(sk);
1293 	unix_peer(newsk)	= sk;
1294 	newsk->sk_state		= TCP_ESTABLISHED;
1295 	newsk->sk_type		= sk->sk_type;
1296 	init_peercred(newsk);
1297 	newu = unix_sk(newsk);
1298 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1299 	otheru = unix_sk(other);
1300 
1301 	/* copy address information from listening to new sock*/
1302 	if (otheru->addr) {
1303 		atomic_inc(&otheru->addr->refcnt);
1304 		newu->addr = otheru->addr;
1305 	}
1306 	if (otheru->path.dentry) {
1307 		path_get(&otheru->path);
1308 		newu->path = otheru->path;
1309 	}
1310 
1311 	/* Set credentials */
1312 	copy_peercred(sk, other);
1313 
1314 	sock->state	= SS_CONNECTED;
1315 	sk->sk_state	= TCP_ESTABLISHED;
1316 	sock_hold(newsk);
1317 
1318 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1319 	unix_peer(sk)	= newsk;
1320 
1321 	unix_state_unlock(sk);
1322 
1323 	/* take ten and and send info to listening sock */
1324 	spin_lock(&other->sk_receive_queue.lock);
1325 	__skb_queue_tail(&other->sk_receive_queue, skb);
1326 	spin_unlock(&other->sk_receive_queue.lock);
1327 	unix_state_unlock(other);
1328 	other->sk_data_ready(other, 0);
1329 	sock_put(other);
1330 	return 0;
1331 
1332 out_unlock:
1333 	if (other)
1334 		unix_state_unlock(other);
1335 
1336 out:
1337 	kfree_skb(skb);
1338 	if (newsk)
1339 		unix_release_sock(newsk, 0);
1340 	if (other)
1341 		sock_put(other);
1342 	return err;
1343 }
1344 
unix_socketpair(struct socket * socka,struct socket * sockb)1345 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1346 {
1347 	struct sock *ska = socka->sk, *skb = sockb->sk;
1348 
1349 	/* Join our sockets back to back */
1350 	sock_hold(ska);
1351 	sock_hold(skb);
1352 	unix_peer(ska) = skb;
1353 	unix_peer(skb) = ska;
1354 	init_peercred(ska);
1355 	init_peercred(skb);
1356 
1357 	if (ska->sk_type != SOCK_DGRAM) {
1358 		ska->sk_state = TCP_ESTABLISHED;
1359 		skb->sk_state = TCP_ESTABLISHED;
1360 		socka->state  = SS_CONNECTED;
1361 		sockb->state  = SS_CONNECTED;
1362 	}
1363 	return 0;
1364 }
1365 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1366 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1367 {
1368 	struct sock *sk = sock->sk;
1369 	struct sock *tsk;
1370 	struct sk_buff *skb;
1371 	int err;
1372 
1373 	err = -EOPNOTSUPP;
1374 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1375 		goto out;
1376 
1377 	err = -EINVAL;
1378 	if (sk->sk_state != TCP_LISTEN)
1379 		goto out;
1380 
1381 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1382 	 * so that no locks are necessary.
1383 	 */
1384 
1385 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1386 	if (!skb) {
1387 		/* This means receive shutdown. */
1388 		if (err == 0)
1389 			err = -EINVAL;
1390 		goto out;
1391 	}
1392 
1393 	tsk = skb->sk;
1394 	skb_free_datagram(sk, skb);
1395 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1396 
1397 	/* attach accepted sock to socket */
1398 	unix_state_lock(tsk);
1399 	newsock->state = SS_CONNECTED;
1400 	sock_graft(tsk, newsock);
1401 	unix_state_unlock(tsk);
1402 	return 0;
1403 
1404 out:
1405 	return err;
1406 }
1407 
1408 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1409 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1410 {
1411 	struct sock *sk = sock->sk;
1412 	struct unix_sock *u;
1413 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1414 	int err = 0;
1415 
1416 	if (peer) {
1417 		sk = unix_peer_get(sk);
1418 
1419 		err = -ENOTCONN;
1420 		if (!sk)
1421 			goto out;
1422 		err = 0;
1423 	} else {
1424 		sock_hold(sk);
1425 	}
1426 
1427 	u = unix_sk(sk);
1428 	unix_state_lock(sk);
1429 	if (!u->addr) {
1430 		sunaddr->sun_family = AF_UNIX;
1431 		sunaddr->sun_path[0] = 0;
1432 		*uaddr_len = sizeof(short);
1433 	} else {
1434 		struct unix_address *addr = u->addr;
1435 
1436 		*uaddr_len = addr->len;
1437 		memcpy(sunaddr, addr->name, *uaddr_len);
1438 	}
1439 	unix_state_unlock(sk);
1440 	sock_put(sk);
1441 out:
1442 	return err;
1443 }
1444 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1445 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1446 {
1447 	int i;
1448 
1449 	scm->fp = UNIXCB(skb).fp;
1450 	UNIXCB(skb).fp = NULL;
1451 
1452 	for (i = scm->fp->count-1; i >= 0; i--)
1453 		unix_notinflight(scm->fp->fp[i]);
1454 }
1455 
unix_destruct_scm(struct sk_buff * skb)1456 static void unix_destruct_scm(struct sk_buff *skb)
1457 {
1458 	struct scm_cookie scm;
1459 	memset(&scm, 0, sizeof(scm));
1460 	scm.pid  = UNIXCB(skb).pid;
1461 	if (UNIXCB(skb).fp)
1462 		unix_detach_fds(&scm, skb);
1463 
1464 	/* Alas, it calls VFS */
1465 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1466 	scm_destroy(&scm);
1467 	sock_wfree(skb);
1468 }
1469 
1470 #define MAX_RECURSION_LEVEL 4
1471 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1472 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1473 {
1474 	int i;
1475 	unsigned char max_level = 0;
1476 	int unix_sock_count = 0;
1477 
1478 	for (i = scm->fp->count - 1; i >= 0; i--) {
1479 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1480 
1481 		if (sk) {
1482 			unix_sock_count++;
1483 			max_level = max(max_level,
1484 					unix_sk(sk)->recursion_level);
1485 		}
1486 	}
1487 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1488 		return -ETOOMANYREFS;
1489 
1490 	/*
1491 	 * Need to duplicate file references for the sake of garbage
1492 	 * collection.  Otherwise a socket in the fps might become a
1493 	 * candidate for GC while the skb is not yet queued.
1494 	 */
1495 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1496 	if (!UNIXCB(skb).fp)
1497 		return -ENOMEM;
1498 
1499 	if (unix_sock_count) {
1500 		for (i = scm->fp->count - 1; i >= 0; i--)
1501 			unix_inflight(scm->fp->fp[i]);
1502 	}
1503 	return max_level;
1504 }
1505 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1506 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507 {
1508 	int err = 0;
1509 
1510 	UNIXCB(skb).pid  = get_pid(scm->pid);
1511 	UNIXCB(skb).uid = scm->creds.uid;
1512 	UNIXCB(skb).gid = scm->creds.gid;
1513 	UNIXCB(skb).fp = NULL;
1514 	if (scm->fp && send_fds)
1515 		err = unix_attach_fds(scm, skb);
1516 
1517 	skb->destructor = unix_destruct_scm;
1518 	return err;
1519 }
1520 
1521 /*
1522  * Some apps rely on write() giving SCM_CREDENTIALS
1523  * We include credentials if source or destination socket
1524  * asserted SOCK_PASSCRED.
1525  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1526 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1527 			    const struct sock *other)
1528 {
1529 	if (UNIXCB(skb).pid)
1530 		return;
1531 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1532 	    !other->sk_socket ||
1533 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1534 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1535 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1536 	}
1537 }
1538 
1539 /*
1540  *	Send AF_UNIX data.
1541  */
1542 
unix_dgram_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1543 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1544 			      struct msghdr *msg, size_t len)
1545 {
1546 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1547 	struct sock *sk = sock->sk;
1548 	struct net *net = sock_net(sk);
1549 	struct unix_sock *u = unix_sk(sk);
1550 	struct sockaddr_un *sunaddr = msg->msg_name;
1551 	struct sock *other = NULL;
1552 	int namelen = 0; /* fake GCC */
1553 	int err;
1554 	unsigned int hash;
1555 	struct sk_buff *skb;
1556 	long timeo;
1557 	struct scm_cookie tmp_scm;
1558 	int max_level;
1559 	int data_len = 0;
1560 	int sk_locked;
1561 
1562 	if (NULL == siocb->scm)
1563 		siocb->scm = &tmp_scm;
1564 	wait_for_unix_gc();
1565 	err = scm_send(sock, msg, siocb->scm, false);
1566 	if (err < 0)
1567 		return err;
1568 
1569 	err = -EOPNOTSUPP;
1570 	if (msg->msg_flags&MSG_OOB)
1571 		goto out;
1572 
1573 	if (msg->msg_namelen) {
1574 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1575 		if (err < 0)
1576 			goto out;
1577 		namelen = err;
1578 	} else {
1579 		sunaddr = NULL;
1580 		err = -ENOTCONN;
1581 		other = unix_peer_get(sk);
1582 		if (!other)
1583 			goto out;
1584 	}
1585 
1586 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1587 	    && (err = unix_autobind(sock)) != 0)
1588 		goto out;
1589 
1590 	err = -EMSGSIZE;
1591 	if (len > sk->sk_sndbuf - 32)
1592 		goto out;
1593 
1594 	if (len > SKB_MAX_ALLOC)
1595 		data_len = min_t(size_t,
1596 				 len - SKB_MAX_ALLOC,
1597 				 MAX_SKB_FRAGS * PAGE_SIZE);
1598 
1599 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1600 				   msg->msg_flags & MSG_DONTWAIT, &err);
1601 	if (skb == NULL)
1602 		goto out;
1603 
1604 	err = unix_scm_to_skb(siocb->scm, skb, true);
1605 	if (err < 0)
1606 		goto out_free;
1607 	max_level = err + 1;
1608 	unix_get_secdata(siocb->scm, skb);
1609 
1610 	skb_put(skb, len - data_len);
1611 	skb->data_len = data_len;
1612 	skb->len = len;
1613 	err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1614 	if (err)
1615 		goto out_free;
1616 
1617 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1618 
1619 restart:
1620 	if (!other) {
1621 		err = -ECONNRESET;
1622 		if (sunaddr == NULL)
1623 			goto out_free;
1624 
1625 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1626 					hash, &err);
1627 		if (other == NULL)
1628 			goto out_free;
1629 	}
1630 
1631 	if (sk_filter(other, skb) < 0) {
1632 		/* Toss the packet but do not return any error to the sender */
1633 		err = len;
1634 		goto out_free;
1635 	}
1636 
1637 	sk_locked = 0;
1638 	unix_state_lock(other);
1639 restart_locked:
1640 	err = -EPERM;
1641 	if (!unix_may_send(sk, other))
1642 		goto out_unlock;
1643 
1644 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1645 		/*
1646 		 *	Check with 1003.1g - what should
1647 		 *	datagram error
1648 		 */
1649 		unix_state_unlock(other);
1650 		sock_put(other);
1651 
1652 		if (!sk_locked)
1653 			unix_state_lock(sk);
1654 
1655 		err = 0;
1656 		if (unix_peer(sk) == other) {
1657 			unix_peer(sk) = NULL;
1658 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1659 
1660 			unix_state_unlock(sk);
1661 
1662 			unix_dgram_disconnected(sk, other);
1663 			sock_put(other);
1664 			err = -ECONNREFUSED;
1665 		} else {
1666 			unix_state_unlock(sk);
1667 		}
1668 
1669 		other = NULL;
1670 		if (err)
1671 			goto out_free;
1672 		goto restart;
1673 	}
1674 
1675 	err = -EPIPE;
1676 	if (other->sk_shutdown & RCV_SHUTDOWN)
1677 		goto out_unlock;
1678 
1679 	if (sk->sk_type != SOCK_SEQPACKET) {
1680 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1681 		if (err)
1682 			goto out_unlock;
1683 	}
1684 
1685 	/* other == sk && unix_peer(other) != sk if
1686 	 * - unix_peer(sk) == NULL, destination address bound to sk
1687 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1688 	 */
1689 	if (other != sk &&
1690 	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1691 		if (timeo) {
1692 			timeo = unix_wait_for_peer(other, timeo);
1693 
1694 			err = sock_intr_errno(timeo);
1695 			if (signal_pending(current))
1696 				goto out_free;
1697 
1698 			goto restart;
1699 		}
1700 
1701 		if (!sk_locked) {
1702 			unix_state_unlock(other);
1703 			unix_state_double_lock(sk, other);
1704 		}
1705 
1706 		if (unix_peer(sk) != other ||
1707 		    unix_dgram_peer_wake_me(sk, other)) {
1708 			err = -EAGAIN;
1709 			sk_locked = 1;
1710 			goto out_unlock;
1711 		}
1712 
1713 		if (!sk_locked) {
1714 			sk_locked = 1;
1715 			goto restart_locked;
1716 		}
1717 	}
1718 
1719 	if (unlikely(sk_locked))
1720 		unix_state_unlock(sk);
1721 
1722 	if (sock_flag(other, SOCK_RCVTSTAMP))
1723 		__net_timestamp(skb);
1724 	maybe_add_creds(skb, sock, other);
1725 	skb_queue_tail(&other->sk_receive_queue, skb);
1726 	if (max_level > unix_sk(other)->recursion_level)
1727 		unix_sk(other)->recursion_level = max_level;
1728 	unix_state_unlock(other);
1729 	other->sk_data_ready(other, len);
1730 	sock_put(other);
1731 	scm_destroy(siocb->scm);
1732 	return len;
1733 
1734 out_unlock:
1735 	if (sk_locked)
1736 		unix_state_unlock(sk);
1737 	unix_state_unlock(other);
1738 out_free:
1739 	kfree_skb(skb);
1740 out:
1741 	if (other)
1742 		sock_put(other);
1743 	scm_destroy(siocb->scm);
1744 	return err;
1745 }
1746 
1747 
unix_stream_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1748 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1749 			       struct msghdr *msg, size_t len)
1750 {
1751 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1752 	struct sock *sk = sock->sk;
1753 	struct sock *other = NULL;
1754 	int err, size;
1755 	struct sk_buff *skb;
1756 	int sent = 0;
1757 	struct scm_cookie tmp_scm;
1758 	bool fds_sent = false;
1759 	int max_level;
1760 
1761 	if (NULL == siocb->scm)
1762 		siocb->scm = &tmp_scm;
1763 	wait_for_unix_gc();
1764 	err = scm_send(sock, msg, siocb->scm, false);
1765 	if (err < 0)
1766 		return err;
1767 
1768 	err = -EOPNOTSUPP;
1769 	if (msg->msg_flags&MSG_OOB)
1770 		goto out_err;
1771 
1772 	if (msg->msg_namelen) {
1773 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1774 		goto out_err;
1775 	} else {
1776 		err = -ENOTCONN;
1777 		other = unix_peer(sk);
1778 		if (!other)
1779 			goto out_err;
1780 	}
1781 
1782 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1783 		goto pipe_err;
1784 
1785 	while (sent < len) {
1786 		/*
1787 		 *	Optimisation for the fact that under 0.01% of X
1788 		 *	messages typically need breaking up.
1789 		 */
1790 
1791 		size = len-sent;
1792 
1793 		/* Keep two messages in the pipe so it schedules better */
1794 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1795 			size = (sk->sk_sndbuf >> 1) - 64;
1796 
1797 		if (size > SKB_MAX_ALLOC)
1798 			size = SKB_MAX_ALLOC;
1799 
1800 		/*
1801 		 *	Grab a buffer
1802 		 */
1803 
1804 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1805 					  &err);
1806 
1807 		if (skb == NULL)
1808 			goto out_err;
1809 
1810 		/*
1811 		 *	If you pass two values to the sock_alloc_send_skb
1812 		 *	it tries to grab the large buffer with GFP_NOFS
1813 		 *	(which can fail easily), and if it fails grab the
1814 		 *	fallback size buffer which is under a page and will
1815 		 *	succeed. [Alan]
1816 		 */
1817 		size = min_t(int, size, skb_tailroom(skb));
1818 
1819 
1820 		/* Only send the fds in the first buffer */
1821 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1822 		if (err < 0) {
1823 			kfree_skb(skb);
1824 			goto out_err;
1825 		}
1826 		max_level = err + 1;
1827 		fds_sent = true;
1828 
1829 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1830 		if (err) {
1831 			kfree_skb(skb);
1832 			goto out_err;
1833 		}
1834 
1835 		unix_state_lock(other);
1836 
1837 		if (sock_flag(other, SOCK_DEAD) ||
1838 		    (other->sk_shutdown & RCV_SHUTDOWN))
1839 			goto pipe_err_free;
1840 
1841 		maybe_add_creds(skb, sock, other);
1842 		skb_queue_tail(&other->sk_receive_queue, skb);
1843 		if (max_level > unix_sk(other)->recursion_level)
1844 			unix_sk(other)->recursion_level = max_level;
1845 		unix_state_unlock(other);
1846 		other->sk_data_ready(other, size);
1847 		sent += size;
1848 	}
1849 
1850 	scm_destroy(siocb->scm);
1851 	siocb->scm = NULL;
1852 
1853 	return sent;
1854 
1855 pipe_err_free:
1856 	unix_state_unlock(other);
1857 	kfree_skb(skb);
1858 pipe_err:
1859 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1860 		send_sig(SIGPIPE, current, 0);
1861 	err = -EPIPE;
1862 out_err:
1863 	scm_destroy(siocb->scm);
1864 	siocb->scm = NULL;
1865 	return sent ? : err;
1866 }
1867 
unix_seqpacket_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1868 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1869 				  struct msghdr *msg, size_t len)
1870 {
1871 	int err;
1872 	struct sock *sk = sock->sk;
1873 
1874 	err = sock_error(sk);
1875 	if (err)
1876 		return err;
1877 
1878 	if (sk->sk_state != TCP_ESTABLISHED)
1879 		return -ENOTCONN;
1880 
1881 	if (msg->msg_namelen)
1882 		msg->msg_namelen = 0;
1883 
1884 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1885 }
1886 
unix_seqpacket_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1887 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1888 			      struct msghdr *msg, size_t size,
1889 			      int flags)
1890 {
1891 	struct sock *sk = sock->sk;
1892 
1893 	if (sk->sk_state != TCP_ESTABLISHED)
1894 		return -ENOTCONN;
1895 
1896 	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1897 }
1898 
unix_copy_addr(struct msghdr * msg,struct sock * sk)1899 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1900 {
1901 	struct unix_sock *u = unix_sk(sk);
1902 
1903 	msg->msg_namelen = 0;
1904 	if (u->addr) {
1905 		msg->msg_namelen = u->addr->len;
1906 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1907 	}
1908 }
1909 
unix_dgram_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1910 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1911 			      struct msghdr *msg, size_t size,
1912 			      int flags)
1913 {
1914 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1915 	struct scm_cookie tmp_scm;
1916 	struct sock *sk = sock->sk;
1917 	struct unix_sock *u = unix_sk(sk);
1918 	int noblock = flags & MSG_DONTWAIT;
1919 	struct sk_buff *skb;
1920 	int err;
1921 	int peeked, skip;
1922 
1923 	err = -EOPNOTSUPP;
1924 	if (flags&MSG_OOB)
1925 		goto out;
1926 
1927 	msg->msg_namelen = 0;
1928 
1929 	err = mutex_lock_interruptible(&u->readlock);
1930 	if (err) {
1931 		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1932 		goto out;
1933 	}
1934 
1935 	skip = sk_peek_offset(sk, flags);
1936 
1937 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1938 	if (!skb) {
1939 		unix_state_lock(sk);
1940 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1941 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1942 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1943 			err = 0;
1944 		unix_state_unlock(sk);
1945 		goto out_unlock;
1946 	}
1947 
1948 	wake_up_interruptible_sync_poll(&u->peer_wait,
1949 					POLLOUT | POLLWRNORM | POLLWRBAND);
1950 
1951 	if (msg->msg_name)
1952 		unix_copy_addr(msg, skb->sk);
1953 
1954 	if (size > skb->len - skip)
1955 		size = skb->len - skip;
1956 	else if (size < skb->len - skip)
1957 		msg->msg_flags |= MSG_TRUNC;
1958 
1959 	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1960 	if (err)
1961 		goto out_free;
1962 
1963 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1964 		__sock_recv_timestamp(msg, sk, skb);
1965 
1966 	if (!siocb->scm) {
1967 		siocb->scm = &tmp_scm;
1968 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1969 	}
1970 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1971 	unix_set_secdata(siocb->scm, skb);
1972 
1973 	if (!(flags & MSG_PEEK)) {
1974 		if (UNIXCB(skb).fp)
1975 			unix_detach_fds(siocb->scm, skb);
1976 
1977 		sk_peek_offset_bwd(sk, skb->len);
1978 	} else {
1979 		/* It is questionable: on PEEK we could:
1980 		   - do not return fds - good, but too simple 8)
1981 		   - return fds, and do not return them on read (old strategy,
1982 		     apparently wrong)
1983 		   - clone fds (I chose it for now, it is the most universal
1984 		     solution)
1985 
1986 		   POSIX 1003.1g does not actually define this clearly
1987 		   at all. POSIX 1003.1g doesn't define a lot of things
1988 		   clearly however!
1989 
1990 		*/
1991 
1992 		sk_peek_offset_fwd(sk, size);
1993 
1994 		if (UNIXCB(skb).fp)
1995 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1996 	}
1997 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1998 
1999 	scm_recv(sock, msg, siocb->scm, flags);
2000 
2001 out_free:
2002 	skb_free_datagram(sk, skb);
2003 out_unlock:
2004 	mutex_unlock(&u->readlock);
2005 out:
2006 	return err;
2007 }
2008 
2009 /*
2010  *	Sleep until more data has arrived. But check for races..
2011  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last)2012 static long unix_stream_data_wait(struct sock *sk, long timeo,
2013 				  struct sk_buff *last)
2014 {
2015 	DEFINE_WAIT(wait);
2016 
2017 	unix_state_lock(sk);
2018 
2019 	for (;;) {
2020 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2021 
2022 		if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2023 		    sk->sk_err ||
2024 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2025 		    signal_pending(current) ||
2026 		    !timeo)
2027 			break;
2028 
2029 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2030 		unix_state_unlock(sk);
2031 		timeo = freezable_schedule_timeout(timeo);
2032 		unix_state_lock(sk);
2033 
2034 		if (sock_flag(sk, SOCK_DEAD))
2035 			break;
2036 
2037 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2038 	}
2039 
2040 	finish_wait(sk_sleep(sk), &wait);
2041 	unix_state_unlock(sk);
2042 	return timeo;
2043 }
2044 
unix_stream_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)2045 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2046 			       struct msghdr *msg, size_t size,
2047 			       int flags)
2048 {
2049 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2050 	struct scm_cookie tmp_scm;
2051 	struct sock *sk = sock->sk;
2052 	struct unix_sock *u = unix_sk(sk);
2053 	struct sockaddr_un *sunaddr = msg->msg_name;
2054 	int copied = 0;
2055 	int check_creds = 0;
2056 	int target;
2057 	int err = 0;
2058 	long timeo;
2059 	int skip;
2060 
2061 	err = -EINVAL;
2062 	if (sk->sk_state != TCP_ESTABLISHED)
2063 		goto out;
2064 
2065 	err = -EOPNOTSUPP;
2066 	if (flags&MSG_OOB)
2067 		goto out;
2068 
2069 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2070 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
2071 
2072 	msg->msg_namelen = 0;
2073 
2074 	/* Lock the socket to prevent queue disordering
2075 	 * while sleeps in memcpy_tomsg
2076 	 */
2077 
2078 	if (!siocb->scm) {
2079 		siocb->scm = &tmp_scm;
2080 		memset(&tmp_scm, 0, sizeof(tmp_scm));
2081 	}
2082 
2083 	err = mutex_lock_interruptible(&u->readlock);
2084 	if (err) {
2085 		err = sock_intr_errno(timeo);
2086 		goto out;
2087 	}
2088 
2089 	do {
2090 		int chunk;
2091 		struct sk_buff *skb, *last;
2092 
2093 		unix_state_lock(sk);
2094 		if (sock_flag(sk, SOCK_DEAD)) {
2095 			err = -ECONNRESET;
2096 			goto unlock;
2097 		}
2098 		last = skb = skb_peek(&sk->sk_receive_queue);
2099 again:
2100 		if (skb == NULL) {
2101 			unix_sk(sk)->recursion_level = 0;
2102 			if (copied >= target)
2103 				goto unlock;
2104 
2105 			/*
2106 			 *	POSIX 1003.1g mandates this order.
2107 			 */
2108 
2109 			err = sock_error(sk);
2110 			if (err)
2111 				goto unlock;
2112 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2113 				goto unlock;
2114 
2115 			unix_state_unlock(sk);
2116 			err = -EAGAIN;
2117 			if (!timeo)
2118 				break;
2119 			mutex_unlock(&u->readlock);
2120 
2121 			timeo = unix_stream_data_wait(sk, timeo, last);
2122 
2123 			if (signal_pending(current)
2124 			    ||  mutex_lock_interruptible(&u->readlock)) {
2125 				err = sock_intr_errno(timeo);
2126 				goto out;
2127 			}
2128 
2129 			continue;
2130  unlock:
2131 			unix_state_unlock(sk);
2132 			break;
2133 		}
2134 
2135 		skip = sk_peek_offset(sk, flags);
2136 		while (skip >= skb->len) {
2137 			skip -= skb->len;
2138 			last = skb;
2139 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2140 			if (!skb)
2141 				goto again;
2142 		}
2143 
2144 		unix_state_unlock(sk);
2145 
2146 		if (check_creds) {
2147 			/* Never glue messages from different writers */
2148 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
2149 			    !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2150 			    !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2151 				break;
2152 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2153 			/* Copy credentials */
2154 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2155 			check_creds = 1;
2156 		}
2157 
2158 		/* Copy address just once */
2159 		if (sunaddr) {
2160 			unix_copy_addr(msg, skb->sk);
2161 			sunaddr = NULL;
2162 		}
2163 
2164 		chunk = min_t(unsigned int, skb->len - skip, size);
2165 		if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2166 			if (copied == 0)
2167 				copied = -EFAULT;
2168 			break;
2169 		}
2170 		copied += chunk;
2171 		size -= chunk;
2172 
2173 		/* Mark read part of skb as used */
2174 		if (!(flags & MSG_PEEK)) {
2175 			skb_pull(skb, chunk);
2176 
2177 			sk_peek_offset_bwd(sk, chunk);
2178 
2179 			if (UNIXCB(skb).fp)
2180 				unix_detach_fds(siocb->scm, skb);
2181 
2182 			if (skb->len)
2183 				break;
2184 
2185 			skb_unlink(skb, &sk->sk_receive_queue);
2186 			consume_skb(skb);
2187 
2188 			if (siocb->scm->fp)
2189 				break;
2190 		} else {
2191 			/* It is questionable, see note in unix_dgram_recvmsg.
2192 			 */
2193 			if (UNIXCB(skb).fp)
2194 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2195 
2196 			sk_peek_offset_fwd(sk, chunk);
2197 
2198 			break;
2199 		}
2200 	} while (size);
2201 
2202 	mutex_unlock(&u->readlock);
2203 	scm_recv(sock, msg, siocb->scm, flags);
2204 out:
2205 	return copied ? : err;
2206 }
2207 
unix_shutdown(struct socket * sock,int mode)2208 static int unix_shutdown(struct socket *sock, int mode)
2209 {
2210 	struct sock *sk = sock->sk;
2211 	struct sock *other;
2212 
2213 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2214 		return -EINVAL;
2215 	/* This maps:
2216 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2217 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2218 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2219 	 */
2220 	++mode;
2221 
2222 	unix_state_lock(sk);
2223 	sk->sk_shutdown |= mode;
2224 	other = unix_peer(sk);
2225 	if (other)
2226 		sock_hold(other);
2227 	unix_state_unlock(sk);
2228 	sk->sk_state_change(sk);
2229 
2230 	if (other &&
2231 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2232 
2233 		int peer_mode = 0;
2234 
2235 		if (mode&RCV_SHUTDOWN)
2236 			peer_mode |= SEND_SHUTDOWN;
2237 		if (mode&SEND_SHUTDOWN)
2238 			peer_mode |= RCV_SHUTDOWN;
2239 		unix_state_lock(other);
2240 		other->sk_shutdown |= peer_mode;
2241 		unix_state_unlock(other);
2242 		other->sk_state_change(other);
2243 		if (peer_mode == SHUTDOWN_MASK)
2244 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2245 		else if (peer_mode & RCV_SHUTDOWN)
2246 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2247 	}
2248 	if (other)
2249 		sock_put(other);
2250 
2251 	return 0;
2252 }
2253 
unix_inq_len(struct sock * sk)2254 long unix_inq_len(struct sock *sk)
2255 {
2256 	struct sk_buff *skb;
2257 	long amount = 0;
2258 
2259 	if (sk->sk_state == TCP_LISTEN)
2260 		return -EINVAL;
2261 
2262 	spin_lock(&sk->sk_receive_queue.lock);
2263 	if (sk->sk_type == SOCK_STREAM ||
2264 	    sk->sk_type == SOCK_SEQPACKET) {
2265 		skb_queue_walk(&sk->sk_receive_queue, skb)
2266 			amount += skb->len;
2267 	} else {
2268 		skb = skb_peek(&sk->sk_receive_queue);
2269 		if (skb)
2270 			amount = skb->len;
2271 	}
2272 	spin_unlock(&sk->sk_receive_queue.lock);
2273 
2274 	return amount;
2275 }
2276 EXPORT_SYMBOL_GPL(unix_inq_len);
2277 
unix_outq_len(struct sock * sk)2278 long unix_outq_len(struct sock *sk)
2279 {
2280 	return sk_wmem_alloc_get(sk);
2281 }
2282 EXPORT_SYMBOL_GPL(unix_outq_len);
2283 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2284 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2285 {
2286 	struct sock *sk = sock->sk;
2287 	long amount = 0;
2288 	int err;
2289 
2290 	switch (cmd) {
2291 	case SIOCOUTQ:
2292 		amount = unix_outq_len(sk);
2293 		err = put_user(amount, (int __user *)arg);
2294 		break;
2295 	case SIOCINQ:
2296 		amount = unix_inq_len(sk);
2297 		if (amount < 0)
2298 			err = amount;
2299 		else
2300 			err = put_user(amount, (int __user *)arg);
2301 		break;
2302 	default:
2303 		err = -ENOIOCTLCMD;
2304 		break;
2305 	}
2306 	return err;
2307 }
2308 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2309 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2310 {
2311 	struct sock *sk = sock->sk;
2312 	unsigned int mask;
2313 
2314 	sock_poll_wait(file, sk_sleep(sk), wait);
2315 	mask = 0;
2316 
2317 	/* exceptional events? */
2318 	if (sk->sk_err)
2319 		mask |= POLLERR;
2320 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2321 		mask |= POLLHUP;
2322 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2323 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2324 
2325 	/* readable? */
2326 	if (!skb_queue_empty(&sk->sk_receive_queue))
2327 		mask |= POLLIN | POLLRDNORM;
2328 
2329 	/* Connection-based need to check for termination and startup */
2330 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2331 	    sk->sk_state == TCP_CLOSE)
2332 		mask |= POLLHUP;
2333 
2334 	/*
2335 	 * we set writable also when the other side has shut down the
2336 	 * connection. This prevents stuck sockets.
2337 	 */
2338 	if (unix_writable(sk))
2339 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2340 
2341 	return mask;
2342 }
2343 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2344 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2345 				    poll_table *wait)
2346 {
2347 	struct sock *sk = sock->sk, *other;
2348 	unsigned int mask, writable;
2349 
2350 	sock_poll_wait(file, sk_sleep(sk), wait);
2351 	mask = 0;
2352 
2353 	/* exceptional events? */
2354 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2355 		mask |= POLLERR |
2356 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2357 
2358 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2359 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2360 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2361 		mask |= POLLHUP;
2362 
2363 	/* readable? */
2364 	if (!skb_queue_empty(&sk->sk_receive_queue))
2365 		mask |= POLLIN | POLLRDNORM;
2366 
2367 	/* Connection-based need to check for termination and startup */
2368 	if (sk->sk_type == SOCK_SEQPACKET) {
2369 		if (sk->sk_state == TCP_CLOSE)
2370 			mask |= POLLHUP;
2371 		/* connection hasn't started yet? */
2372 		if (sk->sk_state == TCP_SYN_SENT)
2373 			return mask;
2374 	}
2375 
2376 	/* No write status requested, avoid expensive OUT tests. */
2377 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2378 		return mask;
2379 
2380 	writable = unix_writable(sk);
2381 	if (writable) {
2382 		unix_state_lock(sk);
2383 
2384 		other = unix_peer(sk);
2385 		if (other && unix_peer(other) != sk &&
2386 		    unix_recvq_full(other) &&
2387 		    unix_dgram_peer_wake_me(sk, other))
2388 			writable = 0;
2389 
2390 		unix_state_unlock(sk);
2391 	}
2392 
2393 	if (writable)
2394 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2395 	else
2396 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2397 
2398 	return mask;
2399 }
2400 
2401 #ifdef CONFIG_PROC_FS
2402 
2403 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2404 
2405 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2406 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2407 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2408 
unix_from_bucket(struct seq_file * seq,loff_t * pos)2409 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2410 {
2411 	unsigned long offset = get_offset(*pos);
2412 	unsigned long bucket = get_bucket(*pos);
2413 	struct sock *sk;
2414 	unsigned long count = 0;
2415 
2416 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2417 		if (sock_net(sk) != seq_file_net(seq))
2418 			continue;
2419 		if (++count == offset)
2420 			break;
2421 	}
2422 
2423 	return sk;
2424 }
2425 
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2426 static struct sock *unix_next_socket(struct seq_file *seq,
2427 				     struct sock *sk,
2428 				     loff_t *pos)
2429 {
2430 	unsigned long bucket;
2431 
2432 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2433 		sk = sk_next(sk);
2434 		if (!sk)
2435 			goto next_bucket;
2436 		if (sock_net(sk) == seq_file_net(seq))
2437 			return sk;
2438 	}
2439 
2440 	do {
2441 		sk = unix_from_bucket(seq, pos);
2442 		if (sk)
2443 			return sk;
2444 
2445 next_bucket:
2446 		bucket = get_bucket(*pos) + 1;
2447 		*pos = set_bucket_offset(bucket, 1);
2448 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2449 
2450 	return NULL;
2451 }
2452 
unix_seq_start(struct seq_file * seq,loff_t * pos)2453 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2454 	__acquires(unix_table_lock)
2455 {
2456 	spin_lock(&unix_table_lock);
2457 
2458 	if (!*pos)
2459 		return SEQ_START_TOKEN;
2460 
2461 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2462 		return NULL;
2463 
2464 	return unix_next_socket(seq, NULL, pos);
2465 }
2466 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2467 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2468 {
2469 	++*pos;
2470 	return unix_next_socket(seq, v, pos);
2471 }
2472 
unix_seq_stop(struct seq_file * seq,void * v)2473 static void unix_seq_stop(struct seq_file *seq, void *v)
2474 	__releases(unix_table_lock)
2475 {
2476 	spin_unlock(&unix_table_lock);
2477 }
2478 
unix_seq_show(struct seq_file * seq,void * v)2479 static int unix_seq_show(struct seq_file *seq, void *v)
2480 {
2481 
2482 	if (v == SEQ_START_TOKEN)
2483 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2484 			 "Inode Path\n");
2485 	else {
2486 		struct sock *s = v;
2487 		struct unix_sock *u = unix_sk(s);
2488 		unix_state_lock(s);
2489 
2490 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2491 			s,
2492 			atomic_read(&s->sk_refcnt),
2493 			0,
2494 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2495 			s->sk_type,
2496 			s->sk_socket ?
2497 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2498 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2499 			sock_i_ino(s));
2500 
2501 		if (u->addr) {
2502 			int i, len;
2503 			seq_putc(seq, ' ');
2504 
2505 			i = 0;
2506 			len = u->addr->len - sizeof(short);
2507 			if (!UNIX_ABSTRACT(s))
2508 				len--;
2509 			else {
2510 				seq_putc(seq, '@');
2511 				i++;
2512 			}
2513 			for ( ; i < len; i++)
2514 				seq_putc(seq, u->addr->name->sun_path[i]);
2515 		}
2516 		unix_state_unlock(s);
2517 		seq_putc(seq, '\n');
2518 	}
2519 
2520 	return 0;
2521 }
2522 
2523 static const struct seq_operations unix_seq_ops = {
2524 	.start  = unix_seq_start,
2525 	.next   = unix_seq_next,
2526 	.stop   = unix_seq_stop,
2527 	.show   = unix_seq_show,
2528 };
2529 
unix_seq_open(struct inode * inode,struct file * file)2530 static int unix_seq_open(struct inode *inode, struct file *file)
2531 {
2532 	return seq_open_net(inode, file, &unix_seq_ops,
2533 			    sizeof(struct seq_net_private));
2534 }
2535 
2536 static const struct file_operations unix_seq_fops = {
2537 	.owner		= THIS_MODULE,
2538 	.open		= unix_seq_open,
2539 	.read		= seq_read,
2540 	.llseek		= seq_lseek,
2541 	.release	= seq_release_net,
2542 };
2543 
2544 #endif
2545 
2546 static const struct net_proto_family unix_family_ops = {
2547 	.family = PF_UNIX,
2548 	.create = unix_create,
2549 	.owner	= THIS_MODULE,
2550 };
2551 
2552 
unix_net_init(struct net * net)2553 static int __net_init unix_net_init(struct net *net)
2554 {
2555 	int error = -ENOMEM;
2556 
2557 	net->unx.sysctl_max_dgram_qlen = 10;
2558 	if (unix_sysctl_register(net))
2559 		goto out;
2560 
2561 #ifdef CONFIG_PROC_FS
2562 	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2563 		unix_sysctl_unregister(net);
2564 		goto out;
2565 	}
2566 #endif
2567 	error = 0;
2568 out:
2569 	return error;
2570 }
2571 
unix_net_exit(struct net * net)2572 static void __net_exit unix_net_exit(struct net *net)
2573 {
2574 	unix_sysctl_unregister(net);
2575 	remove_proc_entry("unix", net->proc_net);
2576 }
2577 
2578 static struct pernet_operations unix_net_ops = {
2579 	.init = unix_net_init,
2580 	.exit = unix_net_exit,
2581 };
2582 
af_unix_init(void)2583 static int __init af_unix_init(void)
2584 {
2585 	int rc = -1;
2586 
2587 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2588 
2589 	rc = proto_register(&unix_proto, 1);
2590 	if (rc != 0) {
2591 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2592 		       __func__);
2593 		goto out;
2594 	}
2595 
2596 	sock_register(&unix_family_ops);
2597 	register_pernet_subsys(&unix_net_ops);
2598 out:
2599 	return rc;
2600 }
2601 
af_unix_exit(void)2602 static void __exit af_unix_exit(void)
2603 {
2604 	sock_unregister(PF_UNIX);
2605 	proto_unregister(&unix_proto);
2606 	unregister_pernet_subsys(&unix_net_ops);
2607 }
2608 
2609 /* Earlier than device_initcall() so that other drivers invoking
2610    request_module() don't end up in a loop when modprobe tries
2611    to use a UNIX socket. But later than subsys_initcall() because
2612    we depend on stuff initialised there */
2613 fs_initcall(af_unix_init);
2614 module_exit(af_unix_exit);
2615 
2616 MODULE_LICENSE("GPL");
2617 MODULE_ALIAS_NETPROTO(PF_UNIX);
2618