• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
unix_hash_fold(__wsum n)150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
unix_our_peer(struct sock * sk,struct sock * osk)160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
unix_may_send(struct sock * sk,struct sock * osk)165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
unix_recvq_full(struct sock const * sk)170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
unix_peer_get(struct sock * s)175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
unix_release_addr(struct unix_address * addr)187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned * hashp)200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
__unix_remove_socket(struct sock * sk)223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
unix_remove_socket(struct sock * sk)234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
unix_insert_socket(struct hlist_head * list,struct sock * sk)241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned hash)248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned hash)270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
unix_find_socket_byinode(struct net * net,struct inode * i)285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
unix_writable(struct sock * sk)309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
unix_write_space(struct sock * sk)314 static void unix_write_space(struct sock *sk)
315 {
316 	read_lock(&sk->sk_callback_lock);
317 	if (unix_writable(sk)) {
318 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
319 			wake_up_interruptible_sync(sk->sk_sleep);
320 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 	}
322 	read_unlock(&sk->sk_callback_lock);
323 }
324 
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332 		skb_queue_purge(&sk->sk_receive_queue);
333 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334 
335 		/* If one link of bidirectional dgram pipe is disconnected,
336 		 * we signal error. Messages are lost. Do not make this,
337 		 * when peer was not connected to us.
338 		 */
339 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 			other->sk_err = ECONNRESET;
341 			other->sk_error_report(other);
342 		}
343 	}
344 }
345 
unix_sock_destructor(struct sock * sk)346 static void unix_sock_destructor(struct sock *sk)
347 {
348 	struct unix_sock *u = unix_sk(sk);
349 
350 	skb_queue_purge(&sk->sk_receive_queue);
351 
352 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 	WARN_ON(!sk_unhashed(sk));
354 	WARN_ON(sk->sk_socket);
355 	if (!sock_flag(sk, SOCK_DEAD)) {
356 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357 		return;
358 	}
359 
360 	if (u->addr)
361 		unix_release_addr(u->addr);
362 
363 	atomic_dec(&unix_nr_socks);
364 	local_bh_disable();
365 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 	local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 		atomic_read(&unix_nr_socks));
370 #endif
371 }
372 
unix_release_sock(struct sock * sk,int embrion)373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375 	struct unix_sock *u = unix_sk(sk);
376 	struct dentry *dentry;
377 	struct vfsmount *mnt;
378 	struct sock *skpair;
379 	struct sk_buff *skb;
380 	int state;
381 
382 	unix_remove_socket(sk);
383 
384 	/* Clear state */
385 	unix_state_lock(sk);
386 	sock_orphan(sk);
387 	sk->sk_shutdown = SHUTDOWN_MASK;
388 	dentry	     = u->dentry;
389 	u->dentry    = NULL;
390 	mnt	     = u->mnt;
391 	u->mnt	     = NULL;
392 	state = sk->sk_state;
393 	sk->sk_state = TCP_CLOSE;
394 	unix_state_unlock(sk);
395 
396 	wake_up_interruptible_all(&u->peer_wait);
397 
398 	skpair = unix_peer(sk);
399 
400 	if (skpair != NULL) {
401 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402 			unix_state_lock(skpair);
403 			/* No more writes */
404 			skpair->sk_shutdown = SHUTDOWN_MASK;
405 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 				skpair->sk_err = ECONNRESET;
407 			unix_state_unlock(skpair);
408 			skpair->sk_state_change(skpair);
409 			read_lock(&skpair->sk_callback_lock);
410 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 			read_unlock(&skpair->sk_callback_lock);
412 		}
413 		sock_put(skpair); /* It may now die */
414 		unix_peer(sk) = NULL;
415 	}
416 
417 	/* Try to flush out this socket. Throw out buffers at least */
418 
419 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 		if (state == TCP_LISTEN)
421 			unix_release_sock(skb->sk, 1);
422 		/* passed fds are erased in the kfree_skb hook	      */
423 		kfree_skb(skb);
424 	}
425 
426 	if (dentry) {
427 		dput(dentry);
428 		mntput(mnt);
429 	}
430 
431 	sock_put(sk);
432 
433 	/* ---- Socket is dead now and most probably destroyed ---- */
434 
435 	/*
436 	 * Fixme: BSD difference: In BSD all sockets connected to use get
437 	 *	  ECONNRESET and we die on the spot. In Linux we behave
438 	 *	  like files and pipes do and wait for the last
439 	 *	  dereference.
440 	 *
441 	 * Can't we simply set sock->err?
442 	 *
443 	 *	  What the above comment does talk about? --ANK(980817)
444 	 */
445 
446 	if (unix_tot_inflight)
447 		unix_gc();		/* Garbage collect fds */
448 
449 	return 0;
450 }
451 
unix_listen(struct socket * sock,int backlog)452 static int unix_listen(struct socket *sock, int backlog)
453 {
454 	int err;
455 	struct sock *sk = sock->sk;
456 	struct unix_sock *u = unix_sk(sk);
457 
458 	err = -EOPNOTSUPP;
459 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460 		goto out;	/* Only stream/seqpacket sockets accept */
461 	err = -EINVAL;
462 	if (!u->addr)
463 		goto out;	/* No listens on an unbound socket */
464 	unix_state_lock(sk);
465 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466 		goto out_unlock;
467 	if (backlog > sk->sk_max_ack_backlog)
468 		wake_up_interruptible_all(&u->peer_wait);
469 	sk->sk_max_ack_backlog	= backlog;
470 	sk->sk_state		= TCP_LISTEN;
471 	/* set credentials so connect can copy them */
472 	sk->sk_peercred.pid	= task_tgid_vnr(current);
473 	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474 	err = 0;
475 
476 out_unlock:
477 	unix_state_unlock(sk);
478 out:
479 	return err;
480 }
481 
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485 			       int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 				    poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 			       struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 			      struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 			      int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 				  struct msghdr *, size_t);
506 
507 static const struct proto_ops unix_stream_ops = {
508 	.family =	PF_UNIX,
509 	.owner =	THIS_MODULE,
510 	.release =	unix_release,
511 	.bind =		unix_bind,
512 	.connect =	unix_stream_connect,
513 	.socketpair =	unix_socketpair,
514 	.accept =	unix_accept,
515 	.getname =	unix_getname,
516 	.poll =		unix_poll,
517 	.ioctl =	unix_ioctl,
518 	.listen =	unix_listen,
519 	.shutdown =	unix_shutdown,
520 	.setsockopt =	sock_no_setsockopt,
521 	.getsockopt =	sock_no_getsockopt,
522 	.sendmsg =	unix_stream_sendmsg,
523 	.recvmsg =	unix_stream_recvmsg,
524 	.mmap =		sock_no_mmap,
525 	.sendpage =	sock_no_sendpage,
526 };
527 
528 static const struct proto_ops unix_dgram_ops = {
529 	.family =	PF_UNIX,
530 	.owner =	THIS_MODULE,
531 	.release =	unix_release,
532 	.bind =		unix_bind,
533 	.connect =	unix_dgram_connect,
534 	.socketpair =	unix_socketpair,
535 	.accept =	sock_no_accept,
536 	.getname =	unix_getname,
537 	.poll =		unix_dgram_poll,
538 	.ioctl =	unix_ioctl,
539 	.listen =	sock_no_listen,
540 	.shutdown =	unix_shutdown,
541 	.setsockopt =	sock_no_setsockopt,
542 	.getsockopt =	sock_no_getsockopt,
543 	.sendmsg =	unix_dgram_sendmsg,
544 	.recvmsg =	unix_dgram_recvmsg,
545 	.mmap =		sock_no_mmap,
546 	.sendpage =	sock_no_sendpage,
547 };
548 
549 static const struct proto_ops unix_seqpacket_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_dgram_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_seqpacket_sendmsg,
565 	.recvmsg =	unix_dgram_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 };
569 
570 static struct proto unix_proto = {
571 	.name			= "UNIX",
572 	.owner			= THIS_MODULE,
573 	.obj_size		= sizeof(struct unix_sock),
574 };
575 
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583 
unix_create1(struct net * net,struct socket * sock)584 static struct sock *unix_create1(struct net *net, struct socket *sock)
585 {
586 	struct sock *sk = NULL;
587 	struct unix_sock *u;
588 
589 	atomic_inc(&unix_nr_socks);
590 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591 		goto out;
592 
593 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594 	if (!sk)
595 		goto out;
596 
597 	sock_init_data(sock, sk);
598 	lockdep_set_class(&sk->sk_receive_queue.lock,
599 				&af_unix_sk_receive_queue_lock_key);
600 
601 	sk->sk_write_space	= unix_write_space;
602 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603 	sk->sk_destruct		= unix_sock_destructor;
604 	u	  = unix_sk(sk);
605 	u->dentry = NULL;
606 	u->mnt	  = NULL;
607 	spin_lock_init(&u->lock);
608 	atomic_long_set(&u->inflight, 0);
609 	INIT_LIST_HEAD(&u->link);
610 	mutex_init(&u->readlock); /* single task reading lock */
611 	init_waitqueue_head(&u->peer_wait);
612 	unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614 	if (sk == NULL)
615 		atomic_dec(&unix_nr_socks);
616 	else {
617 		local_bh_disable();
618 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619 		local_bh_enable();
620 	}
621 	return sk;
622 }
623 
unix_create(struct net * net,struct socket * sock,int protocol)624 static int unix_create(struct net *net, struct socket *sock, int protocol)
625 {
626 	if (protocol && protocol != PF_UNIX)
627 		return -EPROTONOSUPPORT;
628 
629 	sock->state = SS_UNCONNECTED;
630 
631 	switch (sock->type) {
632 	case SOCK_STREAM:
633 		sock->ops = &unix_stream_ops;
634 		break;
635 		/*
636 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
637 		 *	nothing uses it.
638 		 */
639 	case SOCK_RAW:
640 		sock->type = SOCK_DGRAM;
641 	case SOCK_DGRAM:
642 		sock->ops = &unix_dgram_ops;
643 		break;
644 	case SOCK_SEQPACKET:
645 		sock->ops = &unix_seqpacket_ops;
646 		break;
647 	default:
648 		return -ESOCKTNOSUPPORT;
649 	}
650 
651 	return unix_create1(net, sock) ? 0 : -ENOMEM;
652 }
653 
unix_release(struct socket * sock)654 static int unix_release(struct socket *sock)
655 {
656 	struct sock *sk = sock->sk;
657 
658 	if (!sk)
659 		return 0;
660 
661 	sock->sk = NULL;
662 
663 	return unix_release_sock(sk, 0);
664 }
665 
unix_autobind(struct socket * sock)666 static int unix_autobind(struct socket *sock)
667 {
668 	struct sock *sk = sock->sk;
669 	struct net *net = sock_net(sk);
670 	struct unix_sock *u = unix_sk(sk);
671 	static u32 ordernum = 1;
672 	struct unix_address *addr;
673 	int err;
674 
675 	mutex_lock(&u->readlock);
676 
677 	err = 0;
678 	if (u->addr)
679 		goto out;
680 
681 	err = -ENOMEM;
682 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683 	if (!addr)
684 		goto out;
685 
686 	addr->name->sun_family = AF_UNIX;
687 	atomic_set(&addr->refcnt, 1);
688 
689 retry:
690 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692 
693 	spin_lock(&unix_table_lock);
694 	ordernum = (ordernum+1)&0xFFFFF;
695 
696 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697 				      addr->hash)) {
698 		spin_unlock(&unix_table_lock);
699 		/* Sanity yield. It is unusual case, but yet... */
700 		if (!(ordernum&0xFF))
701 			yield();
702 		goto retry;
703 	}
704 	addr->hash ^= sk->sk_type;
705 
706 	__unix_remove_socket(sk);
707 	u->addr = addr;
708 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
709 	spin_unlock(&unix_table_lock);
710 	err = 0;
711 
712 out:	mutex_unlock(&u->readlock);
713 	return err;
714 }
715 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned hash,int * error)716 static struct sock *unix_find_other(struct net *net,
717 				    struct sockaddr_un *sunname, int len,
718 				    int type, unsigned hash, int *error)
719 {
720 	struct sock *u;
721 	struct path path;
722 	int err = 0;
723 
724 	if (sunname->sun_path[0]) {
725 		struct inode *inode;
726 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727 		if (err)
728 			goto fail;
729 		inode = path.dentry->d_inode;
730 		err = inode_permission(inode, MAY_WRITE);
731 		if (err)
732 			goto put_fail;
733 
734 		err = -ECONNREFUSED;
735 		if (!S_ISSOCK(inode->i_mode))
736 			goto put_fail;
737 		u = unix_find_socket_byinode(net, inode);
738 		if (!u)
739 			goto put_fail;
740 
741 		if (u->sk_type == type)
742 			touch_atime(path.mnt, path.dentry);
743 
744 		path_put(&path);
745 
746 		err = -EPROTOTYPE;
747 		if (u->sk_type != type) {
748 			sock_put(u);
749 			goto fail;
750 		}
751 	} else {
752 		err = -ECONNREFUSED;
753 		u = unix_find_socket_byname(net, sunname, len, type, hash);
754 		if (u) {
755 			struct dentry *dentry;
756 			dentry = unix_sk(u)->dentry;
757 			if (dentry)
758 				touch_atime(unix_sk(u)->mnt, dentry);
759 		} else
760 			goto fail;
761 	}
762 	return u;
763 
764 put_fail:
765 	path_put(&path);
766 fail:
767 	*error = err;
768 	return NULL;
769 }
770 
771 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)772 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773 {
774 	struct sock *sk = sock->sk;
775 	struct net *net = sock_net(sk);
776 	struct unix_sock *u = unix_sk(sk);
777 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778 	struct dentry *dentry = NULL;
779 	struct nameidata nd;
780 	int err;
781 	unsigned hash;
782 	struct unix_address *addr;
783 	struct hlist_head *list;
784 
785 	err = -EINVAL;
786 	if (sunaddr->sun_family != AF_UNIX)
787 		goto out;
788 
789 	if (addr_len == sizeof(short)) {
790 		err = unix_autobind(sock);
791 		goto out;
792 	}
793 
794 	err = unix_mkname(sunaddr, addr_len, &hash);
795 	if (err < 0)
796 		goto out;
797 	addr_len = err;
798 
799 	mutex_lock(&u->readlock);
800 
801 	err = -EINVAL;
802 	if (u->addr)
803 		goto out_up;
804 
805 	err = -ENOMEM;
806 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807 	if (!addr)
808 		goto out_up;
809 
810 	memcpy(addr->name, sunaddr, addr_len);
811 	addr->len = addr_len;
812 	addr->hash = hash ^ sk->sk_type;
813 	atomic_set(&addr->refcnt, 1);
814 
815 	if (sunaddr->sun_path[0]) {
816 		unsigned int mode;
817 		err = 0;
818 		/*
819 		 * Get the parent directory, calculate the hash for last
820 		 * component.
821 		 */
822 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823 		if (err)
824 			goto out_mknod_parent;
825 
826 		dentry = lookup_create(&nd, 0);
827 		err = PTR_ERR(dentry);
828 		if (IS_ERR(dentry))
829 			goto out_mknod_unlock;
830 
831 		/*
832 		 * All right, let's create it.
833 		 */
834 		mode = S_IFSOCK |
835 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
836 		err = mnt_want_write(nd.path.mnt);
837 		if (err)
838 			goto out_mknod_dput;
839 		err = security_path_mknod(&nd.path, dentry, mode, 0);
840 		if (err)
841 			goto out_mknod_drop_write;
842 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843 out_mknod_drop_write:
844 		mnt_drop_write(nd.path.mnt);
845 		if (err)
846 			goto out_mknod_dput;
847 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848 		dput(nd.path.dentry);
849 		nd.path.dentry = dentry;
850 
851 		addr->hash = UNIX_HASH_SIZE;
852 	}
853 
854 	spin_lock(&unix_table_lock);
855 
856 	if (!sunaddr->sun_path[0]) {
857 		err = -EADDRINUSE;
858 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
859 					      sk->sk_type, hash)) {
860 			unix_release_addr(addr);
861 			goto out_unlock;
862 		}
863 
864 		list = &unix_socket_table[addr->hash];
865 	} else {
866 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867 		u->dentry = nd.path.dentry;
868 		u->mnt    = nd.path.mnt;
869 	}
870 
871 	err = 0;
872 	__unix_remove_socket(sk);
873 	u->addr = addr;
874 	__unix_insert_socket(list, sk);
875 
876 out_unlock:
877 	spin_unlock(&unix_table_lock);
878 out_up:
879 	mutex_unlock(&u->readlock);
880 out:
881 	return err;
882 
883 out_mknod_dput:
884 	dput(dentry);
885 out_mknod_unlock:
886 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887 	path_put(&nd.path);
888 out_mknod_parent:
889 	if (err == -EEXIST)
890 		err = -EADDRINUSE;
891 	unix_release_addr(addr);
892 	goto out_up;
893 }
894 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)895 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896 {
897 	if (unlikely(sk1 == sk2) || !sk2) {
898 		unix_state_lock(sk1);
899 		return;
900 	}
901 	if (sk1 < sk2) {
902 		unix_state_lock(sk1);
903 		unix_state_lock_nested(sk2);
904 	} else {
905 		unix_state_lock(sk2);
906 		unix_state_lock_nested(sk1);
907 	}
908 }
909 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)910 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911 {
912 	if (unlikely(sk1 == sk2) || !sk2) {
913 		unix_state_unlock(sk1);
914 		return;
915 	}
916 	unix_state_unlock(sk1);
917 	unix_state_unlock(sk2);
918 }
919 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)920 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921 			      int alen, int flags)
922 {
923 	struct sock *sk = sock->sk;
924 	struct net *net = sock_net(sk);
925 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926 	struct sock *other;
927 	unsigned hash;
928 	int err;
929 
930 	if (addr->sa_family != AF_UNSPEC) {
931 		err = unix_mkname(sunaddr, alen, &hash);
932 		if (err < 0)
933 			goto out;
934 		alen = err;
935 
936 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938 			goto out;
939 
940 restart:
941 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942 		if (!other)
943 			goto out;
944 
945 		unix_state_double_lock(sk, other);
946 
947 		/* Apparently VFS overslept socket death. Retry. */
948 		if (sock_flag(other, SOCK_DEAD)) {
949 			unix_state_double_unlock(sk, other);
950 			sock_put(other);
951 			goto restart;
952 		}
953 
954 		err = -EPERM;
955 		if (!unix_may_send(sk, other))
956 			goto out_unlock;
957 
958 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959 		if (err)
960 			goto out_unlock;
961 
962 	} else {
963 		/*
964 		 *	1003.1g breaking connected state with AF_UNSPEC
965 		 */
966 		other = NULL;
967 		unix_state_double_lock(sk, other);
968 	}
969 
970 	/*
971 	 * If it was connected, reconnect.
972 	 */
973 	if (unix_peer(sk)) {
974 		struct sock *old_peer = unix_peer(sk);
975 		unix_peer(sk) = other;
976 		unix_state_double_unlock(sk, other);
977 
978 		if (other != old_peer)
979 			unix_dgram_disconnected(sk, old_peer);
980 		sock_put(old_peer);
981 	} else {
982 		unix_peer(sk) = other;
983 		unix_state_double_unlock(sk, other);
984 	}
985 	return 0;
986 
987 out_unlock:
988 	unix_state_double_unlock(sk, other);
989 	sock_put(other);
990 out:
991 	return err;
992 }
993 
unix_wait_for_peer(struct sock * other,long timeo)994 static long unix_wait_for_peer(struct sock *other, long timeo)
995 {
996 	struct unix_sock *u = unix_sk(other);
997 	int sched;
998 	DEFINE_WAIT(wait);
999 
1000 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001 
1002 	sched = !sock_flag(other, SOCK_DEAD) &&
1003 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1004 		unix_recvq_full(other);
1005 
1006 	unix_state_unlock(other);
1007 
1008 	if (sched)
1009 		timeo = schedule_timeout(timeo);
1010 
1011 	finish_wait(&u->peer_wait, &wait);
1012 	return timeo;
1013 }
1014 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1015 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016 			       int addr_len, int flags)
1017 {
1018 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019 	struct sock *sk = sock->sk;
1020 	struct net *net = sock_net(sk);
1021 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022 	struct sock *newsk = NULL;
1023 	struct sock *other = NULL;
1024 	struct sk_buff *skb = NULL;
1025 	unsigned hash;
1026 	int st;
1027 	int err;
1028 	long timeo;
1029 
1030 	err = unix_mkname(sunaddr, addr_len, &hash);
1031 	if (err < 0)
1032 		goto out;
1033 	addr_len = err;
1034 
1035 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1036 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1037 		goto out;
1038 
1039 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040 
1041 	/* First of all allocate resources.
1042 	   If we will make it after state is locked,
1043 	   we will have to recheck all again in any case.
1044 	 */
1045 
1046 	err = -ENOMEM;
1047 
1048 	/* create new sock for complete connection */
1049 	newsk = unix_create1(sock_net(sk), NULL);
1050 	if (newsk == NULL)
1051 		goto out;
1052 
1053 	/* Allocate skb for sending to listening sock */
1054 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055 	if (skb == NULL)
1056 		goto out;
1057 
1058 restart:
1059 	/*  Find listening sock. */
1060 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061 	if (!other)
1062 		goto out;
1063 
1064 	/* Latch state of peer */
1065 	unix_state_lock(other);
1066 
1067 	/* Apparently VFS overslept socket death. Retry. */
1068 	if (sock_flag(other, SOCK_DEAD)) {
1069 		unix_state_unlock(other);
1070 		sock_put(other);
1071 		goto restart;
1072 	}
1073 
1074 	err = -ECONNREFUSED;
1075 	if (other->sk_state != TCP_LISTEN)
1076 		goto out_unlock;
1077 
1078 	if (unix_recvq_full(other)) {
1079 		err = -EAGAIN;
1080 		if (!timeo)
1081 			goto out_unlock;
1082 
1083 		timeo = unix_wait_for_peer(other, timeo);
1084 
1085 		err = sock_intr_errno(timeo);
1086 		if (signal_pending(current))
1087 			goto out;
1088 		sock_put(other);
1089 		goto restart;
1090 	}
1091 
1092 	/* Latch our state.
1093 
1094 	   It is tricky place. We need to grab write lock and cannot
1095 	   drop lock on peer. It is dangerous because deadlock is
1096 	   possible. Connect to self case and simultaneous
1097 	   attempt to connect are eliminated by checking socket
1098 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1099 	   check this before attempt to grab lock.
1100 
1101 	   Well, and we have to recheck the state after socket locked.
1102 	 */
1103 	st = sk->sk_state;
1104 
1105 	switch (st) {
1106 	case TCP_CLOSE:
1107 		/* This is ok... continue with connect */
1108 		break;
1109 	case TCP_ESTABLISHED:
1110 		/* Socket is already connected */
1111 		err = -EISCONN;
1112 		goto out_unlock;
1113 	default:
1114 		err = -EINVAL;
1115 		goto out_unlock;
1116 	}
1117 
1118 	unix_state_lock_nested(sk);
1119 
1120 	if (sk->sk_state != st) {
1121 		unix_state_unlock(sk);
1122 		unix_state_unlock(other);
1123 		sock_put(other);
1124 		goto restart;
1125 	}
1126 
1127 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1128 	if (err) {
1129 		unix_state_unlock(sk);
1130 		goto out_unlock;
1131 	}
1132 
1133 	/* The way is open! Fastly set all the necessary fields... */
1134 
1135 	sock_hold(sk);
1136 	unix_peer(newsk)	= sk;
1137 	newsk->sk_state		= TCP_ESTABLISHED;
1138 	newsk->sk_type		= sk->sk_type;
1139 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1140 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1141 	newu = unix_sk(newsk);
1142 	newsk->sk_sleep		= &newu->peer_wait;
1143 	otheru = unix_sk(other);
1144 
1145 	/* copy address information from listening to new sock*/
1146 	if (otheru->addr) {
1147 		atomic_inc(&otheru->addr->refcnt);
1148 		newu->addr = otheru->addr;
1149 	}
1150 	if (otheru->dentry) {
1151 		newu->dentry	= dget(otheru->dentry);
1152 		newu->mnt	= mntget(otheru->mnt);
1153 	}
1154 
1155 	/* Set credentials */
1156 	sk->sk_peercred = other->sk_peercred;
1157 
1158 	sock->state	= SS_CONNECTED;
1159 	sk->sk_state	= TCP_ESTABLISHED;
1160 	sock_hold(newsk);
1161 
1162 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1163 	unix_peer(sk)	= newsk;
1164 
1165 	unix_state_unlock(sk);
1166 
1167 	/* take ten and and send info to listening sock */
1168 	spin_lock(&other->sk_receive_queue.lock);
1169 	__skb_queue_tail(&other->sk_receive_queue, skb);
1170 	spin_unlock(&other->sk_receive_queue.lock);
1171 	unix_state_unlock(other);
1172 	other->sk_data_ready(other, 0);
1173 	sock_put(other);
1174 	return 0;
1175 
1176 out_unlock:
1177 	if (other)
1178 		unix_state_unlock(other);
1179 
1180 out:
1181 	if (skb)
1182 		kfree_skb(skb);
1183 	if (newsk)
1184 		unix_release_sock(newsk, 0);
1185 	if (other)
1186 		sock_put(other);
1187 	return err;
1188 }
1189 
unix_socketpair(struct socket * socka,struct socket * sockb)1190 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1191 {
1192 	struct sock *ska = socka->sk, *skb = sockb->sk;
1193 
1194 	/* Join our sockets back to back */
1195 	sock_hold(ska);
1196 	sock_hold(skb);
1197 	unix_peer(ska) = skb;
1198 	unix_peer(skb) = ska;
1199 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1200 	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1201 	ska->sk_peercred.uid = skb->sk_peercred.uid;
1202 	ska->sk_peercred.gid = skb->sk_peercred.gid;
1203 
1204 	if (ska->sk_type != SOCK_DGRAM) {
1205 		ska->sk_state = TCP_ESTABLISHED;
1206 		skb->sk_state = TCP_ESTABLISHED;
1207 		socka->state  = SS_CONNECTED;
1208 		sockb->state  = SS_CONNECTED;
1209 	}
1210 	return 0;
1211 }
1212 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1213 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1214 {
1215 	struct sock *sk = sock->sk;
1216 	struct sock *tsk;
1217 	struct sk_buff *skb;
1218 	int err;
1219 
1220 	err = -EOPNOTSUPP;
1221 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1222 		goto out;
1223 
1224 	err = -EINVAL;
1225 	if (sk->sk_state != TCP_LISTEN)
1226 		goto out;
1227 
1228 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1229 	 * so that no locks are necessary.
1230 	 */
1231 
1232 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1233 	if (!skb) {
1234 		/* This means receive shutdown. */
1235 		if (err == 0)
1236 			err = -EINVAL;
1237 		goto out;
1238 	}
1239 
1240 	tsk = skb->sk;
1241 	skb_free_datagram(sk, skb);
1242 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1243 
1244 	/* attach accepted sock to socket */
1245 	unix_state_lock(tsk);
1246 	newsock->state = SS_CONNECTED;
1247 	sock_graft(tsk, newsock);
1248 	unix_state_unlock(tsk);
1249 	return 0;
1250 
1251 out:
1252 	return err;
1253 }
1254 
1255 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1256 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1257 {
1258 	struct sock *sk = sock->sk;
1259 	struct unix_sock *u;
1260 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1261 	int err = 0;
1262 
1263 	if (peer) {
1264 		sk = unix_peer_get(sk);
1265 
1266 		err = -ENOTCONN;
1267 		if (!sk)
1268 			goto out;
1269 		err = 0;
1270 	} else {
1271 		sock_hold(sk);
1272 	}
1273 
1274 	u = unix_sk(sk);
1275 	unix_state_lock(sk);
1276 	if (!u->addr) {
1277 		sunaddr->sun_family = AF_UNIX;
1278 		sunaddr->sun_path[0] = 0;
1279 		*uaddr_len = sizeof(short);
1280 	} else {
1281 		struct unix_address *addr = u->addr;
1282 
1283 		*uaddr_len = addr->len;
1284 		memcpy(sunaddr, addr->name, *uaddr_len);
1285 	}
1286 	unix_state_unlock(sk);
1287 	sock_put(sk);
1288 out:
1289 	return err;
1290 }
1291 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1292 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1293 {
1294 	int i;
1295 
1296 	scm->fp = UNIXCB(skb).fp;
1297 	skb->destructor = sock_wfree;
1298 	UNIXCB(skb).fp = NULL;
1299 
1300 	for (i = scm->fp->count-1; i >= 0; i--)
1301 		unix_notinflight(scm->fp->fp[i]);
1302 }
1303 
unix_destruct_fds(struct sk_buff * skb)1304 static void unix_destruct_fds(struct sk_buff *skb)
1305 {
1306 	struct scm_cookie scm;
1307 	memset(&scm, 0, sizeof(scm));
1308 	unix_detach_fds(&scm, skb);
1309 
1310 	/* Alas, it calls VFS */
1311 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1312 	scm_destroy(&scm);
1313 	sock_wfree(skb);
1314 }
1315 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1316 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1317 {
1318 	int i;
1319 
1320 	/*
1321 	 * Need to duplicate file references for the sake of garbage
1322 	 * collection.  Otherwise a socket in the fps might become a
1323 	 * candidate for GC while the skb is not yet queued.
1324 	 */
1325 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1326 	if (!UNIXCB(skb).fp)
1327 		return -ENOMEM;
1328 
1329 	for (i = scm->fp->count-1; i >= 0; i--)
1330 		unix_inflight(scm->fp->fp[i]);
1331 	skb->destructor = unix_destruct_fds;
1332 	return 0;
1333 }
1334 
1335 /*
1336  *	Send AF_UNIX data.
1337  */
1338 
unix_dgram_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1339 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1340 			      struct msghdr *msg, size_t len)
1341 {
1342 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1343 	struct sock *sk = sock->sk;
1344 	struct net *net = sock_net(sk);
1345 	struct unix_sock *u = unix_sk(sk);
1346 	struct sockaddr_un *sunaddr = msg->msg_name;
1347 	struct sock *other = NULL;
1348 	int namelen = 0; /* fake GCC */
1349 	int err;
1350 	unsigned hash;
1351 	struct sk_buff *skb;
1352 	long timeo;
1353 	struct scm_cookie tmp_scm;
1354 
1355 	if (NULL == siocb->scm)
1356 		siocb->scm = &tmp_scm;
1357 	wait_for_unix_gc();
1358 	err = scm_send(sock, msg, siocb->scm);
1359 	if (err < 0)
1360 		return err;
1361 
1362 	err = -EOPNOTSUPP;
1363 	if (msg->msg_flags&MSG_OOB)
1364 		goto out;
1365 
1366 	if (msg->msg_namelen) {
1367 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1368 		if (err < 0)
1369 			goto out;
1370 		namelen = err;
1371 	} else {
1372 		sunaddr = NULL;
1373 		err = -ENOTCONN;
1374 		other = unix_peer_get(sk);
1375 		if (!other)
1376 			goto out;
1377 	}
1378 
1379 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1380 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1381 		goto out;
1382 
1383 	err = -EMSGSIZE;
1384 	if (len > sk->sk_sndbuf - 32)
1385 		goto out;
1386 
1387 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1388 	if (skb == NULL)
1389 		goto out;
1390 
1391 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1392 	if (siocb->scm->fp) {
1393 		err = unix_attach_fds(siocb->scm, skb);
1394 		if (err)
1395 			goto out_free;
1396 	}
1397 	unix_get_secdata(siocb->scm, skb);
1398 
1399 	skb_reset_transport_header(skb);
1400 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1401 	if (err)
1402 		goto out_free;
1403 
1404 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1405 
1406 restart:
1407 	if (!other) {
1408 		err = -ECONNRESET;
1409 		if (sunaddr == NULL)
1410 			goto out_free;
1411 
1412 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1413 					hash, &err);
1414 		if (other == NULL)
1415 			goto out_free;
1416 	}
1417 
1418 	unix_state_lock(other);
1419 	err = -EPERM;
1420 	if (!unix_may_send(sk, other))
1421 		goto out_unlock;
1422 
1423 	if (sock_flag(other, SOCK_DEAD)) {
1424 		/*
1425 		 *	Check with 1003.1g - what should
1426 		 *	datagram error
1427 		 */
1428 		unix_state_unlock(other);
1429 		sock_put(other);
1430 
1431 		err = 0;
1432 		unix_state_lock(sk);
1433 		if (unix_peer(sk) == other) {
1434 			unix_peer(sk) = NULL;
1435 			unix_state_unlock(sk);
1436 
1437 			unix_dgram_disconnected(sk, other);
1438 			sock_put(other);
1439 			err = -ECONNREFUSED;
1440 		} else {
1441 			unix_state_unlock(sk);
1442 		}
1443 
1444 		other = NULL;
1445 		if (err)
1446 			goto out_free;
1447 		goto restart;
1448 	}
1449 
1450 	err = -EPIPE;
1451 	if (other->sk_shutdown & RCV_SHUTDOWN)
1452 		goto out_unlock;
1453 
1454 	if (sk->sk_type != SOCK_SEQPACKET) {
1455 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1456 		if (err)
1457 			goto out_unlock;
1458 	}
1459 
1460 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1461 		if (!timeo) {
1462 			err = -EAGAIN;
1463 			goto out_unlock;
1464 		}
1465 
1466 		timeo = unix_wait_for_peer(other, timeo);
1467 
1468 		err = sock_intr_errno(timeo);
1469 		if (signal_pending(current))
1470 			goto out_free;
1471 
1472 		goto restart;
1473 	}
1474 
1475 	skb_queue_tail(&other->sk_receive_queue, skb);
1476 	unix_state_unlock(other);
1477 	other->sk_data_ready(other, len);
1478 	sock_put(other);
1479 	scm_destroy(siocb->scm);
1480 	return len;
1481 
1482 out_unlock:
1483 	unix_state_unlock(other);
1484 out_free:
1485 	kfree_skb(skb);
1486 out:
1487 	if (other)
1488 		sock_put(other);
1489 	scm_destroy(siocb->scm);
1490 	return err;
1491 }
1492 
1493 
unix_stream_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1494 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1495 			       struct msghdr *msg, size_t len)
1496 {
1497 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1498 	struct sock *sk = sock->sk;
1499 	struct sock *other = NULL;
1500 	struct sockaddr_un *sunaddr = msg->msg_name;
1501 	int err, size;
1502 	struct sk_buff *skb;
1503 	int sent = 0;
1504 	struct scm_cookie tmp_scm;
1505 
1506 	if (NULL == siocb->scm)
1507 		siocb->scm = &tmp_scm;
1508 	wait_for_unix_gc();
1509 	err = scm_send(sock, msg, siocb->scm);
1510 	if (err < 0)
1511 		return err;
1512 
1513 	err = -EOPNOTSUPP;
1514 	if (msg->msg_flags&MSG_OOB)
1515 		goto out_err;
1516 
1517 	if (msg->msg_namelen) {
1518 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1519 		goto out_err;
1520 	} else {
1521 		sunaddr = NULL;
1522 		err = -ENOTCONN;
1523 		other = unix_peer(sk);
1524 		if (!other)
1525 			goto out_err;
1526 	}
1527 
1528 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1529 		goto pipe_err;
1530 
1531 	while (sent < len) {
1532 		/*
1533 		 *	Optimisation for the fact that under 0.01% of X
1534 		 *	messages typically need breaking up.
1535 		 */
1536 
1537 		size = len-sent;
1538 
1539 		/* Keep two messages in the pipe so it schedules better */
1540 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1541 			size = (sk->sk_sndbuf >> 1) - 64;
1542 
1543 		if (size > SKB_MAX_ALLOC)
1544 			size = SKB_MAX_ALLOC;
1545 
1546 		/*
1547 		 *	Grab a buffer
1548 		 */
1549 
1550 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1551 					  &err);
1552 
1553 		if (skb == NULL)
1554 			goto out_err;
1555 
1556 		/*
1557 		 *	If you pass two values to the sock_alloc_send_skb
1558 		 *	it tries to grab the large buffer with GFP_NOFS
1559 		 *	(which can fail easily), and if it fails grab the
1560 		 *	fallback size buffer which is under a page and will
1561 		 *	succeed. [Alan]
1562 		 */
1563 		size = min_t(int, size, skb_tailroom(skb));
1564 
1565 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1566 		if (siocb->scm->fp) {
1567 			err = unix_attach_fds(siocb->scm, skb);
1568 			if (err) {
1569 				kfree_skb(skb);
1570 				goto out_err;
1571 			}
1572 		}
1573 
1574 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1575 		if (err) {
1576 			kfree_skb(skb);
1577 			goto out_err;
1578 		}
1579 
1580 		unix_state_lock(other);
1581 
1582 		if (sock_flag(other, SOCK_DEAD) ||
1583 		    (other->sk_shutdown & RCV_SHUTDOWN))
1584 			goto pipe_err_free;
1585 
1586 		skb_queue_tail(&other->sk_receive_queue, skb);
1587 		unix_state_unlock(other);
1588 		other->sk_data_ready(other, size);
1589 		sent += size;
1590 	}
1591 
1592 	scm_destroy(siocb->scm);
1593 	siocb->scm = NULL;
1594 
1595 	return sent;
1596 
1597 pipe_err_free:
1598 	unix_state_unlock(other);
1599 	kfree_skb(skb);
1600 pipe_err:
1601 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1602 		send_sig(SIGPIPE, current, 0);
1603 	err = -EPIPE;
1604 out_err:
1605 	scm_destroy(siocb->scm);
1606 	siocb->scm = NULL;
1607 	return sent ? : err;
1608 }
1609 
unix_seqpacket_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1610 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1611 				  struct msghdr *msg, size_t len)
1612 {
1613 	int err;
1614 	struct sock *sk = sock->sk;
1615 
1616 	err = sock_error(sk);
1617 	if (err)
1618 		return err;
1619 
1620 	if (sk->sk_state != TCP_ESTABLISHED)
1621 		return -ENOTCONN;
1622 
1623 	if (msg->msg_namelen)
1624 		msg->msg_namelen = 0;
1625 
1626 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1627 }
1628 
unix_copy_addr(struct msghdr * msg,struct sock * sk)1629 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1630 {
1631 	struct unix_sock *u = unix_sk(sk);
1632 
1633 	msg->msg_namelen = 0;
1634 	if (u->addr) {
1635 		msg->msg_namelen = u->addr->len;
1636 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1637 	}
1638 }
1639 
unix_dgram_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1640 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1641 			      struct msghdr *msg, size_t size,
1642 			      int flags)
1643 {
1644 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1645 	struct scm_cookie tmp_scm;
1646 	struct sock *sk = sock->sk;
1647 	struct unix_sock *u = unix_sk(sk);
1648 	int noblock = flags & MSG_DONTWAIT;
1649 	struct sk_buff *skb;
1650 	int err;
1651 
1652 	err = -EOPNOTSUPP;
1653 	if (flags&MSG_OOB)
1654 		goto out;
1655 
1656 	msg->msg_namelen = 0;
1657 
1658 	mutex_lock(&u->readlock);
1659 
1660 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1661 	if (!skb) {
1662 		unix_state_lock(sk);
1663 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1664 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1665 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1666 			err = 0;
1667 		unix_state_unlock(sk);
1668 		goto out_unlock;
1669 	}
1670 
1671 	wake_up_interruptible_sync(&u->peer_wait);
1672 
1673 	if (msg->msg_name)
1674 		unix_copy_addr(msg, skb->sk);
1675 
1676 	if (size > skb->len)
1677 		size = skb->len;
1678 	else if (size < skb->len)
1679 		msg->msg_flags |= MSG_TRUNC;
1680 
1681 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1682 	if (err)
1683 		goto out_free;
1684 
1685 	if (!siocb->scm) {
1686 		siocb->scm = &tmp_scm;
1687 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1688 	}
1689 	siocb->scm->creds = *UNIXCREDS(skb);
1690 	unix_set_secdata(siocb->scm, skb);
1691 
1692 	if (!(flags & MSG_PEEK)) {
1693 		if (UNIXCB(skb).fp)
1694 			unix_detach_fds(siocb->scm, skb);
1695 	} else {
1696 		/* It is questionable: on PEEK we could:
1697 		   - do not return fds - good, but too simple 8)
1698 		   - return fds, and do not return them on read (old strategy,
1699 		     apparently wrong)
1700 		   - clone fds (I chose it for now, it is the most universal
1701 		     solution)
1702 
1703 		   POSIX 1003.1g does not actually define this clearly
1704 		   at all. POSIX 1003.1g doesn't define a lot of things
1705 		   clearly however!
1706 
1707 		*/
1708 		if (UNIXCB(skb).fp)
1709 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1710 	}
1711 	err = size;
1712 
1713 	scm_recv(sock, msg, siocb->scm, flags);
1714 
1715 out_free:
1716 	skb_free_datagram(sk, skb);
1717 out_unlock:
1718 	mutex_unlock(&u->readlock);
1719 out:
1720 	return err;
1721 }
1722 
1723 /*
1724  *	Sleep until data has arrive. But check for races..
1725  */
1726 
unix_stream_data_wait(struct sock * sk,long timeo)1727 static long unix_stream_data_wait(struct sock *sk, long timeo)
1728 {
1729 	DEFINE_WAIT(wait);
1730 
1731 	unix_state_lock(sk);
1732 
1733 	for (;;) {
1734 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1735 
1736 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1737 		    sk->sk_err ||
1738 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1739 		    signal_pending(current) ||
1740 		    !timeo)
1741 			break;
1742 
1743 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1744 		unix_state_unlock(sk);
1745 		timeo = schedule_timeout(timeo);
1746 		unix_state_lock(sk);
1747 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1748 	}
1749 
1750 	finish_wait(sk->sk_sleep, &wait);
1751 	unix_state_unlock(sk);
1752 	return timeo;
1753 }
1754 
1755 
1756 
unix_stream_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1757 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1758 			       struct msghdr *msg, size_t size,
1759 			       int flags)
1760 {
1761 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1762 	struct scm_cookie tmp_scm;
1763 	struct sock *sk = sock->sk;
1764 	struct unix_sock *u = unix_sk(sk);
1765 	struct sockaddr_un *sunaddr = msg->msg_name;
1766 	int copied = 0;
1767 	int check_creds = 0;
1768 	int target;
1769 	int err = 0;
1770 	long timeo;
1771 
1772 	err = -EINVAL;
1773 	if (sk->sk_state != TCP_ESTABLISHED)
1774 		goto out;
1775 
1776 	err = -EOPNOTSUPP;
1777 	if (flags&MSG_OOB)
1778 		goto out;
1779 
1780 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1781 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1782 
1783 	msg->msg_namelen = 0;
1784 
1785 	/* Lock the socket to prevent queue disordering
1786 	 * while sleeps in memcpy_tomsg
1787 	 */
1788 
1789 	if (!siocb->scm) {
1790 		siocb->scm = &tmp_scm;
1791 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1792 	}
1793 
1794 	mutex_lock(&u->readlock);
1795 
1796 	do {
1797 		int chunk;
1798 		struct sk_buff *skb;
1799 
1800 		unix_state_lock(sk);
1801 		skb = skb_dequeue(&sk->sk_receive_queue);
1802 		if (skb == NULL) {
1803 			if (copied >= target)
1804 				goto unlock;
1805 
1806 			/*
1807 			 *	POSIX 1003.1g mandates this order.
1808 			 */
1809 
1810 			err = sock_error(sk);
1811 			if (err)
1812 				goto unlock;
1813 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1814 				goto unlock;
1815 
1816 			unix_state_unlock(sk);
1817 			err = -EAGAIN;
1818 			if (!timeo)
1819 				break;
1820 			mutex_unlock(&u->readlock);
1821 
1822 			timeo = unix_stream_data_wait(sk, timeo);
1823 
1824 			if (signal_pending(current)) {
1825 				err = sock_intr_errno(timeo);
1826 				goto out;
1827 			}
1828 			mutex_lock(&u->readlock);
1829 			continue;
1830  unlock:
1831 			unix_state_unlock(sk);
1832 			break;
1833 		}
1834 		unix_state_unlock(sk);
1835 
1836 		if (check_creds) {
1837 			/* Never glue messages from different writers */
1838 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1839 				   sizeof(siocb->scm->creds)) != 0) {
1840 				skb_queue_head(&sk->sk_receive_queue, skb);
1841 				break;
1842 			}
1843 		} else {
1844 			/* Copy credentials */
1845 			siocb->scm->creds = *UNIXCREDS(skb);
1846 			check_creds = 1;
1847 		}
1848 
1849 		/* Copy address just once */
1850 		if (sunaddr) {
1851 			unix_copy_addr(msg, skb->sk);
1852 			sunaddr = NULL;
1853 		}
1854 
1855 		chunk = min_t(unsigned int, skb->len, size);
1856 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1857 			skb_queue_head(&sk->sk_receive_queue, skb);
1858 			if (copied == 0)
1859 				copied = -EFAULT;
1860 			break;
1861 		}
1862 		copied += chunk;
1863 		size -= chunk;
1864 
1865 		/* Mark read part of skb as used */
1866 		if (!(flags & MSG_PEEK)) {
1867 			skb_pull(skb, chunk);
1868 
1869 			if (UNIXCB(skb).fp)
1870 				unix_detach_fds(siocb->scm, skb);
1871 
1872 			/* put the skb back if we didn't use it up.. */
1873 			if (skb->len) {
1874 				skb_queue_head(&sk->sk_receive_queue, skb);
1875 				break;
1876 			}
1877 
1878 			kfree_skb(skb);
1879 
1880 			if (siocb->scm->fp)
1881 				break;
1882 		} else {
1883 			/* It is questionable, see note in unix_dgram_recvmsg.
1884 			 */
1885 			if (UNIXCB(skb).fp)
1886 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1887 
1888 			/* put message back and return */
1889 			skb_queue_head(&sk->sk_receive_queue, skb);
1890 			break;
1891 		}
1892 	} while (size);
1893 
1894 	mutex_unlock(&u->readlock);
1895 	scm_recv(sock, msg, siocb->scm, flags);
1896 out:
1897 	return copied ? : err;
1898 }
1899 
unix_shutdown(struct socket * sock,int mode)1900 static int unix_shutdown(struct socket *sock, int mode)
1901 {
1902 	struct sock *sk = sock->sk;
1903 	struct sock *other;
1904 
1905 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1906 
1907 	if (mode) {
1908 		unix_state_lock(sk);
1909 		sk->sk_shutdown |= mode;
1910 		other = unix_peer(sk);
1911 		if (other)
1912 			sock_hold(other);
1913 		unix_state_unlock(sk);
1914 		sk->sk_state_change(sk);
1915 
1916 		if (other &&
1917 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1918 
1919 			int peer_mode = 0;
1920 
1921 			if (mode&RCV_SHUTDOWN)
1922 				peer_mode |= SEND_SHUTDOWN;
1923 			if (mode&SEND_SHUTDOWN)
1924 				peer_mode |= RCV_SHUTDOWN;
1925 			unix_state_lock(other);
1926 			other->sk_shutdown |= peer_mode;
1927 			unix_state_unlock(other);
1928 			other->sk_state_change(other);
1929 			read_lock(&other->sk_callback_lock);
1930 			if (peer_mode == SHUTDOWN_MASK)
1931 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1932 			else if (peer_mode & RCV_SHUTDOWN)
1933 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1934 			read_unlock(&other->sk_callback_lock);
1935 		}
1936 		if (other)
1937 			sock_put(other);
1938 	}
1939 	return 0;
1940 }
1941 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)1942 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1943 {
1944 	struct sock *sk = sock->sk;
1945 	long amount = 0;
1946 	int err;
1947 
1948 	switch (cmd) {
1949 	case SIOCOUTQ:
1950 		amount = atomic_read(&sk->sk_wmem_alloc);
1951 		err = put_user(amount, (int __user *)arg);
1952 		break;
1953 	case SIOCINQ:
1954 		{
1955 			struct sk_buff *skb;
1956 
1957 			if (sk->sk_state == TCP_LISTEN) {
1958 				err = -EINVAL;
1959 				break;
1960 			}
1961 
1962 			spin_lock(&sk->sk_receive_queue.lock);
1963 			if (sk->sk_type == SOCK_STREAM ||
1964 			    sk->sk_type == SOCK_SEQPACKET) {
1965 				skb_queue_walk(&sk->sk_receive_queue, skb)
1966 					amount += skb->len;
1967 			} else {
1968 				skb = skb_peek(&sk->sk_receive_queue);
1969 				if (skb)
1970 					amount = skb->len;
1971 			}
1972 			spin_unlock(&sk->sk_receive_queue.lock);
1973 			err = put_user(amount, (int __user *)arg);
1974 			break;
1975 		}
1976 
1977 	default:
1978 		err = -ENOIOCTLCMD;
1979 		break;
1980 	}
1981 	return err;
1982 }
1983 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)1984 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1985 {
1986 	struct sock *sk = sock->sk;
1987 	unsigned int mask;
1988 
1989 	poll_wait(file, sk->sk_sleep, wait);
1990 	mask = 0;
1991 
1992 	/* exceptional events? */
1993 	if (sk->sk_err)
1994 		mask |= POLLERR;
1995 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1996 		mask |= POLLHUP;
1997 	if (sk->sk_shutdown & RCV_SHUTDOWN)
1998 		mask |= POLLRDHUP;
1999 
2000 	/* readable? */
2001 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2002 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2003 		mask |= POLLIN | POLLRDNORM;
2004 
2005 	/* Connection-based need to check for termination and startup */
2006 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2007 	    sk->sk_state == TCP_CLOSE)
2008 		mask |= POLLHUP;
2009 
2010 	/*
2011 	 * we set writable also when the other side has shut down the
2012 	 * connection. This prevents stuck sockets.
2013 	 */
2014 	if (unix_writable(sk))
2015 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2016 
2017 	return mask;
2018 }
2019 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2020 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2021 				    poll_table *wait)
2022 {
2023 	struct sock *sk = sock->sk, *other;
2024 	unsigned int mask, writable;
2025 
2026 	poll_wait(file, sk->sk_sleep, wait);
2027 	mask = 0;
2028 
2029 	/* exceptional events? */
2030 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2031 		mask |= POLLERR;
2032 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2033 		mask |= POLLRDHUP;
2034 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2035 		mask |= POLLHUP;
2036 
2037 	/* readable? */
2038 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2039 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2040 		mask |= POLLIN | POLLRDNORM;
2041 
2042 	/* Connection-based need to check for termination and startup */
2043 	if (sk->sk_type == SOCK_SEQPACKET) {
2044 		if (sk->sk_state == TCP_CLOSE)
2045 			mask |= POLLHUP;
2046 		/* connection hasn't started yet? */
2047 		if (sk->sk_state == TCP_SYN_SENT)
2048 			return mask;
2049 	}
2050 
2051 	/* writable? */
2052 	writable = unix_writable(sk);
2053 	if (writable) {
2054 		other = unix_peer_get(sk);
2055 		if (other) {
2056 			if (unix_peer(other) != sk) {
2057 				poll_wait(file, &unix_sk(other)->peer_wait,
2058 					  wait);
2059 				if (unix_recvq_full(other))
2060 					writable = 0;
2061 			}
2062 
2063 			sock_put(other);
2064 		}
2065 	}
2066 
2067 	if (writable)
2068 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2069 	else
2070 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2071 
2072 	return mask;
2073 }
2074 
2075 #ifdef CONFIG_PROC_FS
first_unix_socket(int * i)2076 static struct sock *first_unix_socket(int *i)
2077 {
2078 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2079 		if (!hlist_empty(&unix_socket_table[*i]))
2080 			return __sk_head(&unix_socket_table[*i]);
2081 	}
2082 	return NULL;
2083 }
2084 
next_unix_socket(int * i,struct sock * s)2085 static struct sock *next_unix_socket(int *i, struct sock *s)
2086 {
2087 	struct sock *next = sk_next(s);
2088 	/* More in this chain? */
2089 	if (next)
2090 		return next;
2091 	/* Look for next non-empty chain. */
2092 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2093 		if (!hlist_empty(&unix_socket_table[*i]))
2094 			return __sk_head(&unix_socket_table[*i]);
2095 	}
2096 	return NULL;
2097 }
2098 
2099 struct unix_iter_state {
2100 	struct seq_net_private p;
2101 	int i;
2102 };
2103 
unix_seq_idx(struct seq_file * seq,loff_t pos)2104 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2105 {
2106 	struct unix_iter_state *iter = seq->private;
2107 	loff_t off = 0;
2108 	struct sock *s;
2109 
2110 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2111 		if (sock_net(s) != seq_file_net(seq))
2112 			continue;
2113 		if (off == pos)
2114 			return s;
2115 		++off;
2116 	}
2117 	return NULL;
2118 }
2119 
unix_seq_start(struct seq_file * seq,loff_t * pos)2120 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2121 	__acquires(unix_table_lock)
2122 {
2123 	spin_lock(&unix_table_lock);
2124 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2125 }
2126 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2127 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2128 {
2129 	struct unix_iter_state *iter = seq->private;
2130 	struct sock *sk = v;
2131 	++*pos;
2132 
2133 	if (v == SEQ_START_TOKEN)
2134 		sk = first_unix_socket(&iter->i);
2135 	else
2136 		sk = next_unix_socket(&iter->i, sk);
2137 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2138 		sk = next_unix_socket(&iter->i, sk);
2139 	return sk;
2140 }
2141 
unix_seq_stop(struct seq_file * seq,void * v)2142 static void unix_seq_stop(struct seq_file *seq, void *v)
2143 	__releases(unix_table_lock)
2144 {
2145 	spin_unlock(&unix_table_lock);
2146 }
2147 
unix_seq_show(struct seq_file * seq,void * v)2148 static int unix_seq_show(struct seq_file *seq, void *v)
2149 {
2150 
2151 	if (v == SEQ_START_TOKEN)
2152 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2153 			 "Inode Path\n");
2154 	else {
2155 		struct sock *s = v;
2156 		struct unix_sock *u = unix_sk(s);
2157 		unix_state_lock(s);
2158 
2159 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2160 			s,
2161 			atomic_read(&s->sk_refcnt),
2162 			0,
2163 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2164 			s->sk_type,
2165 			s->sk_socket ?
2166 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2167 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2168 			sock_i_ino(s));
2169 
2170 		if (u->addr) {
2171 			int i, len;
2172 			seq_putc(seq, ' ');
2173 
2174 			i = 0;
2175 			len = u->addr->len - sizeof(short);
2176 			if (!UNIX_ABSTRACT(s))
2177 				len--;
2178 			else {
2179 				seq_putc(seq, '@');
2180 				i++;
2181 			}
2182 			for ( ; i < len; i++)
2183 				seq_putc(seq, u->addr->name->sun_path[i]);
2184 		}
2185 		unix_state_unlock(s);
2186 		seq_putc(seq, '\n');
2187 	}
2188 
2189 	return 0;
2190 }
2191 
2192 static const struct seq_operations unix_seq_ops = {
2193 	.start  = unix_seq_start,
2194 	.next   = unix_seq_next,
2195 	.stop   = unix_seq_stop,
2196 	.show   = unix_seq_show,
2197 };
2198 
unix_seq_open(struct inode * inode,struct file * file)2199 static int unix_seq_open(struct inode *inode, struct file *file)
2200 {
2201 	return seq_open_net(inode, file, &unix_seq_ops,
2202 			    sizeof(struct unix_iter_state));
2203 }
2204 
2205 static const struct file_operations unix_seq_fops = {
2206 	.owner		= THIS_MODULE,
2207 	.open		= unix_seq_open,
2208 	.read		= seq_read,
2209 	.llseek		= seq_lseek,
2210 	.release	= seq_release_net,
2211 };
2212 
2213 #endif
2214 
2215 static struct net_proto_family unix_family_ops = {
2216 	.family = PF_UNIX,
2217 	.create = unix_create,
2218 	.owner	= THIS_MODULE,
2219 };
2220 
2221 
unix_net_init(struct net * net)2222 static int unix_net_init(struct net *net)
2223 {
2224 	int error = -ENOMEM;
2225 
2226 	net->unx.sysctl_max_dgram_qlen = 10;
2227 	if (unix_sysctl_register(net))
2228 		goto out;
2229 
2230 #ifdef CONFIG_PROC_FS
2231 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2232 		unix_sysctl_unregister(net);
2233 		goto out;
2234 	}
2235 #endif
2236 	error = 0;
2237 out:
2238 	return error;
2239 }
2240 
unix_net_exit(struct net * net)2241 static void unix_net_exit(struct net *net)
2242 {
2243 	unix_sysctl_unregister(net);
2244 	proc_net_remove(net, "unix");
2245 }
2246 
2247 static struct pernet_operations unix_net_ops = {
2248 	.init = unix_net_init,
2249 	.exit = unix_net_exit,
2250 };
2251 
af_unix_init(void)2252 static int __init af_unix_init(void)
2253 {
2254 	int rc = -1;
2255 	struct sk_buff *dummy_skb;
2256 
2257 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2258 
2259 	rc = proto_register(&unix_proto, 1);
2260 	if (rc != 0) {
2261 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2262 		       __func__);
2263 		goto out;
2264 	}
2265 
2266 	sock_register(&unix_family_ops);
2267 	register_pernet_subsys(&unix_net_ops);
2268 out:
2269 	return rc;
2270 }
2271 
af_unix_exit(void)2272 static void __exit af_unix_exit(void)
2273 {
2274 	sock_unregister(PF_UNIX);
2275 	proto_unregister(&unix_proto);
2276 	unregister_pernet_subsys(&unix_net_ops);
2277 }
2278 
2279 /* Earlier than device_initcall() so that other drivers invoking
2280    request_module() don't end up in a loop when modprobe tries
2281    to use a UNIX socket. But later than subsys_initcall() because
2282    we depend on stuff initialised there */
2283 fs_initcall(af_unix_init);
2284 module_exit(af_unix_exit);
2285 
2286 MODULE_LICENSE("GPL");
2287 MODULE_ALIAS_NETPROTO(PF_UNIX);
2288