1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 #include <linux/freezer.h>
118
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124
125
unix_sockets_unbound(void * addr)126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 unsigned long hash = (unsigned long)addr;
129
130 hash ^= hash >> 16;
131 hash ^= hash >> 8;
132 hash %= UNIX_HASH_SIZE;
133 return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135
136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
142 }
143
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 scm->secid = *UNIXSID(skb);
147 }
148 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)149 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
150 { }
151
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)152 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
153 { }
154 #endif /* CONFIG_SECURITY_NETWORK */
155
156 /*
157 * SMP locking strategy:
158 * hash table is protected with spinlock unix_table_lock
159 * each socket state is protected by separate spin lock.
160 */
161
unix_hash_fold(__wsum n)162 static inline unsigned int unix_hash_fold(__wsum n)
163 {
164 unsigned int hash = (__force unsigned int)n;
165
166 hash ^= hash>>16;
167 hash ^= hash>>8;
168 return hash&(UNIX_HASH_SIZE-1);
169 }
170
171 #define unix_peer(sk) (unix_sk(sk)->peer)
172
unix_our_peer(struct sock * sk,struct sock * osk)173 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
174 {
175 return unix_peer(osk) == sk;
176 }
177
unix_may_send(struct sock * sk,struct sock * osk)178 static inline int unix_may_send(struct sock *sk, struct sock *osk)
179 {
180 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
181 }
182
unix_recvq_full(struct sock const * sk)183 static inline int unix_recvq_full(struct sock const *sk)
184 {
185 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
186 }
187
unix_peer_get(struct sock * s)188 struct sock *unix_peer_get(struct sock *s)
189 {
190 struct sock *peer;
191
192 unix_state_lock(s);
193 peer = unix_peer(s);
194 if (peer)
195 sock_hold(peer);
196 unix_state_unlock(s);
197 return peer;
198 }
199 EXPORT_SYMBOL_GPL(unix_peer_get);
200
unix_release_addr(struct unix_address * addr)201 static inline void unix_release_addr(struct unix_address *addr)
202 {
203 if (atomic_dec_and_test(&addr->refcnt))
204 kfree(addr);
205 }
206
207 /*
208 * Check unix socket name:
209 * - should be not zero length.
210 * - if started by not zero, should be NULL terminated (FS object)
211 * - if started by zero, it is abstract name.
212 */
213
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)214 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
215 {
216 if (len <= sizeof(short) || len > sizeof(*sunaddr))
217 return -EINVAL;
218 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
219 return -EINVAL;
220 if (sunaddr->sun_path[0]) {
221 /*
222 * This may look like an off by one error but it is a bit more
223 * subtle. 108 is the longest valid AF_UNIX path for a binding.
224 * sun_path[108] doesn't as such exist. However in kernel space
225 * we are guaranteed that it is a valid memory location in our
226 * kernel address buffer.
227 */
228 ((char *)sunaddr)[len] = 0;
229 len = strlen(sunaddr->sun_path)+1+sizeof(short);
230 return len;
231 }
232
233 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
234 return len;
235 }
236
__unix_remove_socket(struct sock * sk)237 static void __unix_remove_socket(struct sock *sk)
238 {
239 sk_del_node_init(sk);
240 }
241
__unix_insert_socket(struct hlist_head * list,struct sock * sk)242 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
243 {
244 WARN_ON(!sk_unhashed(sk));
245 sk_add_node(sk, list);
246 }
247
unix_remove_socket(struct sock * sk)248 static inline void unix_remove_socket(struct sock *sk)
249 {
250 spin_lock(&unix_table_lock);
251 __unix_remove_socket(sk);
252 spin_unlock(&unix_table_lock);
253 }
254
unix_insert_socket(struct hlist_head * list,struct sock * sk)255 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
256 {
257 spin_lock(&unix_table_lock);
258 __unix_insert_socket(list, sk);
259 spin_unlock(&unix_table_lock);
260 }
261
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)262 static struct sock *__unix_find_socket_byname(struct net *net,
263 struct sockaddr_un *sunname,
264 int len, int type, unsigned int hash)
265 {
266 struct sock *s;
267
268 sk_for_each(s, &unix_socket_table[hash ^ type]) {
269 struct unix_sock *u = unix_sk(s);
270
271 if (!net_eq(sock_net(s), net))
272 continue;
273
274 if (u->addr->len == len &&
275 !memcmp(u->addr->name, sunname, len))
276 goto found;
277 }
278 s = NULL;
279 found:
280 return s;
281 }
282
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)283 static inline struct sock *unix_find_socket_byname(struct net *net,
284 struct sockaddr_un *sunname,
285 int len, int type,
286 unsigned int hash)
287 {
288 struct sock *s;
289
290 spin_lock(&unix_table_lock);
291 s = __unix_find_socket_byname(net, sunname, len, type, hash);
292 if (s)
293 sock_hold(s);
294 spin_unlock(&unix_table_lock);
295 return s;
296 }
297
unix_find_socket_byinode(struct inode * i)298 static struct sock *unix_find_socket_byinode(struct inode *i)
299 {
300 struct sock *s;
301
302 spin_lock(&unix_table_lock);
303 sk_for_each(s,
304 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
305 struct dentry *dentry = unix_sk(s)->path.dentry;
306
307 if (dentry && dentry->d_inode == i) {
308 sock_hold(s);
309 goto found;
310 }
311 }
312 s = NULL;
313 found:
314 spin_unlock(&unix_table_lock);
315 return s;
316 }
317
318 /* Support code for asymmetrically connected dgram sockets
319 *
320 * If a datagram socket is connected to a socket not itself connected
321 * to the first socket (eg, /dev/log), clients may only enqueue more
322 * messages if the present receive queue of the server socket is not
323 * "too large". This means there's a second writeability condition
324 * poll and sendmsg need to test. The dgram recv code will do a wake
325 * up on the peer_wait wait queue of a socket upon reception of a
326 * datagram which needs to be propagated to sleeping would-be writers
327 * since these might not have sent anything so far. This can't be
328 * accomplished via poll_wait because the lifetime of the server
329 * socket might be less than that of its clients if these break their
330 * association with it or if the server socket is closed while clients
331 * are still connected to it and there's no way to inform "a polling
332 * implementation" that it should let go of a certain wait queue
333 *
334 * In order to propagate a wake up, a wait_queue_t of the client
335 * socket is enqueued on the peer_wait queue of the server socket
336 * whose wake function does a wake_up on the ordinary client socket
337 * wait queue. This connection is established whenever a write (or
338 * poll for write) hit the flow control condition and broken when the
339 * association to the server socket is dissolved or after a wake up
340 * was relayed.
341 */
342
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)343 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
344 void *key)
345 {
346 struct unix_sock *u;
347 wait_queue_head_t *u_sleep;
348
349 u = container_of(q, struct unix_sock, peer_wake);
350
351 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
352 q);
353 u->peer_wake.private = NULL;
354
355 /* relaying can only happen while the wq still exists */
356 u_sleep = sk_sleep(&u->sk);
357 if (u_sleep)
358 wake_up_interruptible_poll(u_sleep, key);
359
360 return 0;
361 }
362
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)363 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
364 {
365 struct unix_sock *u, *u_other;
366 int rc;
367
368 u = unix_sk(sk);
369 u_other = unix_sk(other);
370 rc = 0;
371 spin_lock(&u_other->peer_wait.lock);
372
373 if (!u->peer_wake.private) {
374 u->peer_wake.private = other;
375 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
376
377 rc = 1;
378 }
379
380 spin_unlock(&u_other->peer_wait.lock);
381 return rc;
382 }
383
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)384 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
385 struct sock *other)
386 {
387 struct unix_sock *u, *u_other;
388
389 u = unix_sk(sk);
390 u_other = unix_sk(other);
391 spin_lock(&u_other->peer_wait.lock);
392
393 if (u->peer_wake.private == other) {
394 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
395 u->peer_wake.private = NULL;
396 }
397
398 spin_unlock(&u_other->peer_wait.lock);
399 }
400
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)401 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
402 struct sock *other)
403 {
404 unix_dgram_peer_wake_disconnect(sk, other);
405 wake_up_interruptible_poll(sk_sleep(sk),
406 POLLOUT |
407 POLLWRNORM |
408 POLLWRBAND);
409 }
410
411 /* preconditions:
412 * - unix_peer(sk) == other
413 * - association is stable
414 */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)415 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
416 {
417 int connected;
418
419 connected = unix_dgram_peer_wake_connect(sk, other);
420
421 if (unix_recvq_full(other))
422 return 1;
423
424 if (connected)
425 unix_dgram_peer_wake_disconnect(sk, other);
426
427 return 0;
428 }
429
unix_writable(struct sock * sk)430 static inline int unix_writable(struct sock *sk)
431 {
432 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
433 }
434
unix_write_space(struct sock * sk)435 static void unix_write_space(struct sock *sk)
436 {
437 struct socket_wq *wq;
438
439 rcu_read_lock();
440 if (unix_writable(sk)) {
441 wq = rcu_dereference(sk->sk_wq);
442 if (wq_has_sleeper(wq))
443 wake_up_interruptible_sync_poll(&wq->wait,
444 POLLOUT | POLLWRNORM | POLLWRBAND);
445 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
446 }
447 rcu_read_unlock();
448 }
449
450 /* When dgram socket disconnects (or changes its peer), we clear its receive
451 * queue of packets arrived from previous peer. First, it allows to do
452 * flow control based only on wmem_alloc; second, sk connected to peer
453 * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)454 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
455 {
456 if (!skb_queue_empty(&sk->sk_receive_queue)) {
457 skb_queue_purge(&sk->sk_receive_queue);
458 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
459
460 /* If one link of bidirectional dgram pipe is disconnected,
461 * we signal error. Messages are lost. Do not make this,
462 * when peer was not connected to us.
463 */
464 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
465 other->sk_err = ECONNRESET;
466 other->sk_error_report(other);
467 }
468 }
469 }
470
unix_sock_destructor(struct sock * sk)471 static void unix_sock_destructor(struct sock *sk)
472 {
473 struct unix_sock *u = unix_sk(sk);
474
475 skb_queue_purge(&sk->sk_receive_queue);
476
477 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
478 WARN_ON(!sk_unhashed(sk));
479 WARN_ON(sk->sk_socket);
480 if (!sock_flag(sk, SOCK_DEAD)) {
481 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
482 return;
483 }
484
485 if (u->addr)
486 unix_release_addr(u->addr);
487
488 atomic_long_dec(&unix_nr_socks);
489 local_bh_disable();
490 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
491 local_bh_enable();
492 #ifdef UNIX_REFCNT_DEBUG
493 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
494 atomic_long_read(&unix_nr_socks));
495 #endif
496 }
497
unix_release_sock(struct sock * sk,int embrion)498 static void unix_release_sock(struct sock *sk, int embrion)
499 {
500 struct unix_sock *u = unix_sk(sk);
501 struct path path;
502 struct sock *skpair;
503 struct sk_buff *skb;
504 int state;
505
506 unix_remove_socket(sk);
507
508 /* Clear state */
509 unix_state_lock(sk);
510 sock_orphan(sk);
511 sk->sk_shutdown = SHUTDOWN_MASK;
512 path = u->path;
513 u->path.dentry = NULL;
514 u->path.mnt = NULL;
515 state = sk->sk_state;
516 sk->sk_state = TCP_CLOSE;
517 unix_state_unlock(sk);
518
519 wake_up_interruptible_all(&u->peer_wait);
520
521 skpair = unix_peer(sk);
522
523 if (skpair != NULL) {
524 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
525 unix_state_lock(skpair);
526 /* No more writes */
527 skpair->sk_shutdown = SHUTDOWN_MASK;
528 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
529 skpair->sk_err = ECONNRESET;
530 unix_state_unlock(skpair);
531 skpair->sk_state_change(skpair);
532 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
533 }
534
535 unix_dgram_peer_wake_disconnect(sk, skpair);
536 sock_put(skpair); /* It may now die */
537 unix_peer(sk) = NULL;
538 }
539
540 /* Try to flush out this socket. Throw out buffers at least */
541
542 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
543 if (state == TCP_LISTEN)
544 unix_release_sock(skb->sk, 1);
545 /* passed fds are erased in the kfree_skb hook */
546 kfree_skb(skb);
547 }
548
549 if (path.dentry)
550 path_put(&path);
551
552 sock_put(sk);
553
554 /* ---- Socket is dead now and most probably destroyed ---- */
555
556 /*
557 * Fixme: BSD difference: In BSD all sockets connected to us get
558 * ECONNRESET and we die on the spot. In Linux we behave
559 * like files and pipes do and wait for the last
560 * dereference.
561 *
562 * Can't we simply set sock->err?
563 *
564 * What the above comment does talk about? --ANK(980817)
565 */
566
567 if (unix_tot_inflight)
568 unix_gc(); /* Garbage collect fds */
569 }
570
init_peercred(struct sock * sk)571 static void init_peercred(struct sock *sk)
572 {
573 put_pid(sk->sk_peer_pid);
574 if (sk->sk_peer_cred)
575 put_cred(sk->sk_peer_cred);
576 sk->sk_peer_pid = get_pid(task_tgid(current));
577 sk->sk_peer_cred = get_current_cred();
578 }
579
copy_peercred(struct sock * sk,struct sock * peersk)580 static void copy_peercred(struct sock *sk, struct sock *peersk)
581 {
582 put_pid(sk->sk_peer_pid);
583 if (sk->sk_peer_cred)
584 put_cred(sk->sk_peer_cred);
585 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
586 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
587 }
588
unix_listen(struct socket * sock,int backlog)589 static int unix_listen(struct socket *sock, int backlog)
590 {
591 int err;
592 struct sock *sk = sock->sk;
593 struct unix_sock *u = unix_sk(sk);
594 struct pid *old_pid = NULL;
595
596 err = -EOPNOTSUPP;
597 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
598 goto out; /* Only stream/seqpacket sockets accept */
599 err = -EINVAL;
600 if (!u->addr)
601 goto out; /* No listens on an unbound socket */
602 unix_state_lock(sk);
603 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
604 goto out_unlock;
605 if (backlog > sk->sk_max_ack_backlog)
606 wake_up_interruptible_all(&u->peer_wait);
607 sk->sk_max_ack_backlog = backlog;
608 sk->sk_state = TCP_LISTEN;
609 /* set credentials so connect can copy them */
610 init_peercred(sk);
611 err = 0;
612
613 out_unlock:
614 unix_state_unlock(sk);
615 put_pid(old_pid);
616 out:
617 return err;
618 }
619
620 static int unix_release(struct socket *);
621 static int unix_bind(struct socket *, struct sockaddr *, int);
622 static int unix_stream_connect(struct socket *, struct sockaddr *,
623 int addr_len, int flags);
624 static int unix_socketpair(struct socket *, struct socket *);
625 static int unix_accept(struct socket *, struct socket *, int);
626 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
627 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
628 static unsigned int unix_dgram_poll(struct file *, struct socket *,
629 poll_table *);
630 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
631 static int unix_shutdown(struct socket *, int);
632 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
633 struct msghdr *, size_t);
634 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
635 struct msghdr *, size_t, int);
636 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
637 struct msghdr *, size_t);
638 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
639 struct msghdr *, size_t, int);
640 static int unix_dgram_connect(struct socket *, struct sockaddr *,
641 int, int);
642 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
643 struct msghdr *, size_t);
644 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
645 struct msghdr *, size_t, int);
646
unix_set_peek_off(struct sock * sk,int val)647 static void unix_set_peek_off(struct sock *sk, int val)
648 {
649 struct unix_sock *u = unix_sk(sk);
650
651 mutex_lock(&u->readlock);
652 sk->sk_peek_off = val;
653 mutex_unlock(&u->readlock);
654 }
655
656
657 static const struct proto_ops unix_stream_ops = {
658 .family = PF_UNIX,
659 .owner = THIS_MODULE,
660 .release = unix_release,
661 .bind = unix_bind,
662 .connect = unix_stream_connect,
663 .socketpair = unix_socketpair,
664 .accept = unix_accept,
665 .getname = unix_getname,
666 .poll = unix_poll,
667 .ioctl = unix_ioctl,
668 .listen = unix_listen,
669 .shutdown = unix_shutdown,
670 .setsockopt = sock_no_setsockopt,
671 .getsockopt = sock_no_getsockopt,
672 .sendmsg = unix_stream_sendmsg,
673 .recvmsg = unix_stream_recvmsg,
674 .mmap = sock_no_mmap,
675 .sendpage = sock_no_sendpage,
676 .set_peek_off = unix_set_peek_off,
677 };
678
679 static const struct proto_ops unix_dgram_ops = {
680 .family = PF_UNIX,
681 .owner = THIS_MODULE,
682 .release = unix_release,
683 .bind = unix_bind,
684 .connect = unix_dgram_connect,
685 .socketpair = unix_socketpair,
686 .accept = sock_no_accept,
687 .getname = unix_getname,
688 .poll = unix_dgram_poll,
689 .ioctl = unix_ioctl,
690 .listen = sock_no_listen,
691 .shutdown = unix_shutdown,
692 .setsockopt = sock_no_setsockopt,
693 .getsockopt = sock_no_getsockopt,
694 .sendmsg = unix_dgram_sendmsg,
695 .recvmsg = unix_dgram_recvmsg,
696 .mmap = sock_no_mmap,
697 .sendpage = sock_no_sendpage,
698 .set_peek_off = unix_set_peek_off,
699 };
700
701 static const struct proto_ops unix_seqpacket_ops = {
702 .family = PF_UNIX,
703 .owner = THIS_MODULE,
704 .release = unix_release,
705 .bind = unix_bind,
706 .connect = unix_stream_connect,
707 .socketpair = unix_socketpair,
708 .accept = unix_accept,
709 .getname = unix_getname,
710 .poll = unix_dgram_poll,
711 .ioctl = unix_ioctl,
712 .listen = unix_listen,
713 .shutdown = unix_shutdown,
714 .setsockopt = sock_no_setsockopt,
715 .getsockopt = sock_no_getsockopt,
716 .sendmsg = unix_seqpacket_sendmsg,
717 .recvmsg = unix_seqpacket_recvmsg,
718 .mmap = sock_no_mmap,
719 .sendpage = sock_no_sendpage,
720 .set_peek_off = unix_set_peek_off,
721 };
722
723 static struct proto unix_proto = {
724 .name = "UNIX",
725 .owner = THIS_MODULE,
726 .obj_size = sizeof(struct unix_sock),
727 };
728
729 /*
730 * AF_UNIX sockets do not interact with hardware, hence they
731 * dont trigger interrupts - so it's safe for them to have
732 * bh-unsafe locking for their sk_receive_queue.lock. Split off
733 * this special lock-class by reinitializing the spinlock key:
734 */
735 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
736
unix_create1(struct net * net,struct socket * sock)737 static struct sock *unix_create1(struct net *net, struct socket *sock)
738 {
739 struct sock *sk = NULL;
740 struct unix_sock *u;
741
742 atomic_long_inc(&unix_nr_socks);
743 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
744 goto out;
745
746 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
747 if (!sk)
748 goto out;
749
750 sock_init_data(sock, sk);
751 lockdep_set_class(&sk->sk_receive_queue.lock,
752 &af_unix_sk_receive_queue_lock_key);
753
754 sk->sk_write_space = unix_write_space;
755 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
756 sk->sk_destruct = unix_sock_destructor;
757 u = unix_sk(sk);
758 u->path.dentry = NULL;
759 u->path.mnt = NULL;
760 spin_lock_init(&u->lock);
761 atomic_long_set(&u->inflight, 0);
762 INIT_LIST_HEAD(&u->link);
763 mutex_init(&u->readlock); /* single task reading lock */
764 init_waitqueue_head(&u->peer_wait);
765 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
766 unix_insert_socket(unix_sockets_unbound(sk), sk);
767 out:
768 if (sk == NULL)
769 atomic_long_dec(&unix_nr_socks);
770 else {
771 local_bh_disable();
772 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
773 local_bh_enable();
774 }
775 return sk;
776 }
777
unix_create(struct net * net,struct socket * sock,int protocol,int kern)778 static int unix_create(struct net *net, struct socket *sock, int protocol,
779 int kern)
780 {
781 if (protocol && protocol != PF_UNIX)
782 return -EPROTONOSUPPORT;
783
784 sock->state = SS_UNCONNECTED;
785
786 switch (sock->type) {
787 case SOCK_STREAM:
788 sock->ops = &unix_stream_ops;
789 break;
790 /*
791 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
792 * nothing uses it.
793 */
794 case SOCK_RAW:
795 sock->type = SOCK_DGRAM;
796 case SOCK_DGRAM:
797 sock->ops = &unix_dgram_ops;
798 break;
799 case SOCK_SEQPACKET:
800 sock->ops = &unix_seqpacket_ops;
801 break;
802 default:
803 return -ESOCKTNOSUPPORT;
804 }
805
806 return unix_create1(net, sock) ? 0 : -ENOMEM;
807 }
808
unix_release(struct socket * sock)809 static int unix_release(struct socket *sock)
810 {
811 struct sock *sk = sock->sk;
812
813 if (!sk)
814 return 0;
815
816 unix_release_sock(sk, 0);
817 sock->sk = NULL;
818
819 return 0;
820 }
821
unix_autobind(struct socket * sock)822 static int unix_autobind(struct socket *sock)
823 {
824 struct sock *sk = sock->sk;
825 struct net *net = sock_net(sk);
826 struct unix_sock *u = unix_sk(sk);
827 static u32 ordernum = 1;
828 struct unix_address *addr;
829 int err;
830 unsigned int retries = 0;
831
832 mutex_lock(&u->readlock);
833
834 err = 0;
835 if (u->addr)
836 goto out;
837
838 err = -ENOMEM;
839 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
840 if (!addr)
841 goto out;
842
843 addr->name->sun_family = AF_UNIX;
844 atomic_set(&addr->refcnt, 1);
845
846 retry:
847 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
848 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
849
850 spin_lock(&unix_table_lock);
851 ordernum = (ordernum+1)&0xFFFFF;
852
853 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
854 addr->hash)) {
855 spin_unlock(&unix_table_lock);
856 /*
857 * __unix_find_socket_byname() may take long time if many names
858 * are already in use.
859 */
860 cond_resched();
861 /* Give up if all names seems to be in use. */
862 if (retries++ == 0xFFFFF) {
863 err = -ENOSPC;
864 kfree(addr);
865 goto out;
866 }
867 goto retry;
868 }
869 addr->hash ^= sk->sk_type;
870
871 __unix_remove_socket(sk);
872 u->addr = addr;
873 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
874 spin_unlock(&unix_table_lock);
875 err = 0;
876
877 out: mutex_unlock(&u->readlock);
878 return err;
879 }
880
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)881 static struct sock *unix_find_other(struct net *net,
882 struct sockaddr_un *sunname, int len,
883 int type, unsigned int hash, int *error)
884 {
885 struct sock *u;
886 struct path path;
887 int err = 0;
888
889 if (sunname->sun_path[0]) {
890 struct inode *inode;
891 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
892 if (err)
893 goto fail;
894 inode = path.dentry->d_inode;
895 err = inode_permission(inode, MAY_WRITE);
896 if (err)
897 goto put_fail;
898
899 err = -ECONNREFUSED;
900 if (!S_ISSOCK(inode->i_mode))
901 goto put_fail;
902 u = unix_find_socket_byinode(inode);
903 if (!u)
904 goto put_fail;
905
906 if (u->sk_type == type)
907 touch_atime(&path);
908
909 path_put(&path);
910
911 err = -EPROTOTYPE;
912 if (u->sk_type != type) {
913 sock_put(u);
914 goto fail;
915 }
916 } else {
917 err = -ECONNREFUSED;
918 u = unix_find_socket_byname(net, sunname, len, type, hash);
919 if (u) {
920 struct dentry *dentry;
921 dentry = unix_sk(u)->path.dentry;
922 if (dentry)
923 touch_atime(&unix_sk(u)->path);
924 } else
925 goto fail;
926 }
927 return u;
928
929 put_fail:
930 path_put(&path);
931 fail:
932 *error = err;
933 return NULL;
934 }
935
unix_mknod(const char * sun_path,umode_t mode,struct path * res)936 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
937 {
938 struct dentry *dentry;
939 struct path path;
940 int err = 0;
941 /*
942 * Get the parent directory, calculate the hash for last
943 * component.
944 */
945 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
946 err = PTR_ERR(dentry);
947 if (IS_ERR(dentry))
948 return err;
949
950 /*
951 * All right, let's create it.
952 */
953 err = security_path_mknod(&path, dentry, mode, 0);
954 if (!err) {
955 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
956 if (!err) {
957 res->mnt = mntget(path.mnt);
958 res->dentry = dget(dentry);
959 }
960 }
961 done_path_create(&path, dentry);
962 return err;
963 }
964
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)965 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
966 {
967 struct sock *sk = sock->sk;
968 struct net *net = sock_net(sk);
969 struct unix_sock *u = unix_sk(sk);
970 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
971 char *sun_path = sunaddr->sun_path;
972 int err;
973 unsigned int hash;
974 struct unix_address *addr;
975 struct hlist_head *list;
976
977 err = -EINVAL;
978 if (sunaddr->sun_family != AF_UNIX)
979 goto out;
980
981 if (addr_len == sizeof(short)) {
982 err = unix_autobind(sock);
983 goto out;
984 }
985
986 err = unix_mkname(sunaddr, addr_len, &hash);
987 if (err < 0)
988 goto out;
989 addr_len = err;
990
991 mutex_lock(&u->readlock);
992
993 err = -EINVAL;
994 if (u->addr)
995 goto out_up;
996
997 err = -ENOMEM;
998 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
999 if (!addr)
1000 goto out_up;
1001
1002 memcpy(addr->name, sunaddr, addr_len);
1003 addr->len = addr_len;
1004 addr->hash = hash ^ sk->sk_type;
1005 atomic_set(&addr->refcnt, 1);
1006
1007 if (sun_path[0]) {
1008 struct path path;
1009 umode_t mode = S_IFSOCK |
1010 (SOCK_INODE(sock)->i_mode & ~current_umask());
1011 err = unix_mknod(sun_path, mode, &path);
1012 if (err) {
1013 if (err == -EEXIST)
1014 err = -EADDRINUSE;
1015 unix_release_addr(addr);
1016 goto out_up;
1017 }
1018 addr->hash = UNIX_HASH_SIZE;
1019 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
1020 spin_lock(&unix_table_lock);
1021 u->path = path;
1022 list = &unix_socket_table[hash];
1023 } else {
1024 spin_lock(&unix_table_lock);
1025 err = -EADDRINUSE;
1026 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1027 sk->sk_type, hash)) {
1028 unix_release_addr(addr);
1029 goto out_unlock;
1030 }
1031
1032 list = &unix_socket_table[addr->hash];
1033 }
1034
1035 err = 0;
1036 __unix_remove_socket(sk);
1037 u->addr = addr;
1038 __unix_insert_socket(list, sk);
1039
1040 out_unlock:
1041 spin_unlock(&unix_table_lock);
1042 out_up:
1043 mutex_unlock(&u->readlock);
1044 out:
1045 return err;
1046 }
1047
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1048 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1049 {
1050 if (unlikely(sk1 == sk2) || !sk2) {
1051 unix_state_lock(sk1);
1052 return;
1053 }
1054 if (sk1 < sk2) {
1055 unix_state_lock(sk1);
1056 unix_state_lock_nested(sk2);
1057 } else {
1058 unix_state_lock(sk2);
1059 unix_state_lock_nested(sk1);
1060 }
1061 }
1062
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1063 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1064 {
1065 if (unlikely(sk1 == sk2) || !sk2) {
1066 unix_state_unlock(sk1);
1067 return;
1068 }
1069 unix_state_unlock(sk1);
1070 unix_state_unlock(sk2);
1071 }
1072
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1073 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1074 int alen, int flags)
1075 {
1076 struct sock *sk = sock->sk;
1077 struct net *net = sock_net(sk);
1078 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1079 struct sock *other;
1080 unsigned int hash;
1081 int err;
1082
1083 if (addr->sa_family != AF_UNSPEC) {
1084 err = unix_mkname(sunaddr, alen, &hash);
1085 if (err < 0)
1086 goto out;
1087 alen = err;
1088
1089 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1090 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1091 goto out;
1092
1093 restart:
1094 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1095 if (!other)
1096 goto out;
1097
1098 unix_state_double_lock(sk, other);
1099
1100 /* Apparently VFS overslept socket death. Retry. */
1101 if (sock_flag(other, SOCK_DEAD)) {
1102 unix_state_double_unlock(sk, other);
1103 sock_put(other);
1104 goto restart;
1105 }
1106
1107 err = -EPERM;
1108 if (!unix_may_send(sk, other))
1109 goto out_unlock;
1110
1111 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1112 if (err)
1113 goto out_unlock;
1114
1115 } else {
1116 /*
1117 * 1003.1g breaking connected state with AF_UNSPEC
1118 */
1119 other = NULL;
1120 unix_state_double_lock(sk, other);
1121 }
1122
1123 /*
1124 * If it was connected, reconnect.
1125 */
1126 if (unix_peer(sk)) {
1127 struct sock *old_peer = unix_peer(sk);
1128 unix_peer(sk) = other;
1129 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1130
1131 unix_state_double_unlock(sk, other);
1132
1133 if (other != old_peer)
1134 unix_dgram_disconnected(sk, old_peer);
1135 sock_put(old_peer);
1136 } else {
1137 unix_peer(sk) = other;
1138 unix_state_double_unlock(sk, other);
1139 }
1140 return 0;
1141
1142 out_unlock:
1143 unix_state_double_unlock(sk, other);
1144 sock_put(other);
1145 out:
1146 return err;
1147 }
1148
unix_wait_for_peer(struct sock * other,long timeo)1149 static long unix_wait_for_peer(struct sock *other, long timeo)
1150 {
1151 struct unix_sock *u = unix_sk(other);
1152 int sched;
1153 DEFINE_WAIT(wait);
1154
1155 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1156
1157 sched = !sock_flag(other, SOCK_DEAD) &&
1158 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1159 unix_recvq_full(other);
1160
1161 unix_state_unlock(other);
1162
1163 if (sched)
1164 timeo = schedule_timeout(timeo);
1165
1166 finish_wait(&u->peer_wait, &wait);
1167 return timeo;
1168 }
1169
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1170 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1171 int addr_len, int flags)
1172 {
1173 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1174 struct sock *sk = sock->sk;
1175 struct net *net = sock_net(sk);
1176 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1177 struct sock *newsk = NULL;
1178 struct sock *other = NULL;
1179 struct sk_buff *skb = NULL;
1180 unsigned int hash;
1181 int st;
1182 int err;
1183 long timeo;
1184
1185 err = unix_mkname(sunaddr, addr_len, &hash);
1186 if (err < 0)
1187 goto out;
1188 addr_len = err;
1189
1190 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1191 (err = unix_autobind(sock)) != 0)
1192 goto out;
1193
1194 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1195
1196 /* First of all allocate resources.
1197 If we will make it after state is locked,
1198 we will have to recheck all again in any case.
1199 */
1200
1201 err = -ENOMEM;
1202
1203 /* create new sock for complete connection */
1204 newsk = unix_create1(sock_net(sk), NULL);
1205 if (newsk == NULL)
1206 goto out;
1207
1208 /* Allocate skb for sending to listening sock */
1209 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1210 if (skb == NULL)
1211 goto out;
1212
1213 restart:
1214 /* Find listening sock. */
1215 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1216 if (!other)
1217 goto out;
1218
1219 /* Latch state of peer */
1220 unix_state_lock(other);
1221
1222 /* Apparently VFS overslept socket death. Retry. */
1223 if (sock_flag(other, SOCK_DEAD)) {
1224 unix_state_unlock(other);
1225 sock_put(other);
1226 goto restart;
1227 }
1228
1229 err = -ECONNREFUSED;
1230 if (other->sk_state != TCP_LISTEN)
1231 goto out_unlock;
1232 if (other->sk_shutdown & RCV_SHUTDOWN)
1233 goto out_unlock;
1234
1235 if (unix_recvq_full(other)) {
1236 err = -EAGAIN;
1237 if (!timeo)
1238 goto out_unlock;
1239
1240 timeo = unix_wait_for_peer(other, timeo);
1241
1242 err = sock_intr_errno(timeo);
1243 if (signal_pending(current))
1244 goto out;
1245 sock_put(other);
1246 goto restart;
1247 }
1248
1249 /* Latch our state.
1250
1251 It is tricky place. We need to grab our state lock and cannot
1252 drop lock on peer. It is dangerous because deadlock is
1253 possible. Connect to self case and simultaneous
1254 attempt to connect are eliminated by checking socket
1255 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1256 check this before attempt to grab lock.
1257
1258 Well, and we have to recheck the state after socket locked.
1259 */
1260 st = sk->sk_state;
1261
1262 switch (st) {
1263 case TCP_CLOSE:
1264 /* This is ok... continue with connect */
1265 break;
1266 case TCP_ESTABLISHED:
1267 /* Socket is already connected */
1268 err = -EISCONN;
1269 goto out_unlock;
1270 default:
1271 err = -EINVAL;
1272 goto out_unlock;
1273 }
1274
1275 unix_state_lock_nested(sk);
1276
1277 if (sk->sk_state != st) {
1278 unix_state_unlock(sk);
1279 unix_state_unlock(other);
1280 sock_put(other);
1281 goto restart;
1282 }
1283
1284 err = security_unix_stream_connect(sk, other, newsk);
1285 if (err) {
1286 unix_state_unlock(sk);
1287 goto out_unlock;
1288 }
1289
1290 /* The way is open! Fastly set all the necessary fields... */
1291
1292 sock_hold(sk);
1293 unix_peer(newsk) = sk;
1294 newsk->sk_state = TCP_ESTABLISHED;
1295 newsk->sk_type = sk->sk_type;
1296 init_peercred(newsk);
1297 newu = unix_sk(newsk);
1298 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1299 otheru = unix_sk(other);
1300
1301 /* copy address information from listening to new sock*/
1302 if (otheru->addr) {
1303 atomic_inc(&otheru->addr->refcnt);
1304 newu->addr = otheru->addr;
1305 }
1306 if (otheru->path.dentry) {
1307 path_get(&otheru->path);
1308 newu->path = otheru->path;
1309 }
1310
1311 /* Set credentials */
1312 copy_peercred(sk, other);
1313
1314 sock->state = SS_CONNECTED;
1315 sk->sk_state = TCP_ESTABLISHED;
1316 sock_hold(newsk);
1317
1318 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1319 unix_peer(sk) = newsk;
1320
1321 unix_state_unlock(sk);
1322
1323 /* take ten and and send info to listening sock */
1324 spin_lock(&other->sk_receive_queue.lock);
1325 __skb_queue_tail(&other->sk_receive_queue, skb);
1326 spin_unlock(&other->sk_receive_queue.lock);
1327 unix_state_unlock(other);
1328 other->sk_data_ready(other, 0);
1329 sock_put(other);
1330 return 0;
1331
1332 out_unlock:
1333 if (other)
1334 unix_state_unlock(other);
1335
1336 out:
1337 kfree_skb(skb);
1338 if (newsk)
1339 unix_release_sock(newsk, 0);
1340 if (other)
1341 sock_put(other);
1342 return err;
1343 }
1344
unix_socketpair(struct socket * socka,struct socket * sockb)1345 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1346 {
1347 struct sock *ska = socka->sk, *skb = sockb->sk;
1348
1349 /* Join our sockets back to back */
1350 sock_hold(ska);
1351 sock_hold(skb);
1352 unix_peer(ska) = skb;
1353 unix_peer(skb) = ska;
1354 init_peercred(ska);
1355 init_peercred(skb);
1356
1357 if (ska->sk_type != SOCK_DGRAM) {
1358 ska->sk_state = TCP_ESTABLISHED;
1359 skb->sk_state = TCP_ESTABLISHED;
1360 socka->state = SS_CONNECTED;
1361 sockb->state = SS_CONNECTED;
1362 }
1363 return 0;
1364 }
1365
unix_accept(struct socket * sock,struct socket * newsock,int flags)1366 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1367 {
1368 struct sock *sk = sock->sk;
1369 struct sock *tsk;
1370 struct sk_buff *skb;
1371 int err;
1372
1373 err = -EOPNOTSUPP;
1374 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1375 goto out;
1376
1377 err = -EINVAL;
1378 if (sk->sk_state != TCP_LISTEN)
1379 goto out;
1380
1381 /* If socket state is TCP_LISTEN it cannot change (for now...),
1382 * so that no locks are necessary.
1383 */
1384
1385 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1386 if (!skb) {
1387 /* This means receive shutdown. */
1388 if (err == 0)
1389 err = -EINVAL;
1390 goto out;
1391 }
1392
1393 tsk = skb->sk;
1394 skb_free_datagram(sk, skb);
1395 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1396
1397 /* attach accepted sock to socket */
1398 unix_state_lock(tsk);
1399 newsock->state = SS_CONNECTED;
1400 sock_graft(tsk, newsock);
1401 unix_state_unlock(tsk);
1402 return 0;
1403
1404 out:
1405 return err;
1406 }
1407
1408
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1409 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1410 {
1411 struct sock *sk = sock->sk;
1412 struct unix_sock *u;
1413 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1414 int err = 0;
1415
1416 if (peer) {
1417 sk = unix_peer_get(sk);
1418
1419 err = -ENOTCONN;
1420 if (!sk)
1421 goto out;
1422 err = 0;
1423 } else {
1424 sock_hold(sk);
1425 }
1426
1427 u = unix_sk(sk);
1428 unix_state_lock(sk);
1429 if (!u->addr) {
1430 sunaddr->sun_family = AF_UNIX;
1431 sunaddr->sun_path[0] = 0;
1432 *uaddr_len = sizeof(short);
1433 } else {
1434 struct unix_address *addr = u->addr;
1435
1436 *uaddr_len = addr->len;
1437 memcpy(sunaddr, addr->name, *uaddr_len);
1438 }
1439 unix_state_unlock(sk);
1440 sock_put(sk);
1441 out:
1442 return err;
1443 }
1444
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1445 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1446 {
1447 int i;
1448
1449 scm->fp = UNIXCB(skb).fp;
1450 UNIXCB(skb).fp = NULL;
1451
1452 for (i = scm->fp->count-1; i >= 0; i--)
1453 unix_notinflight(scm->fp->fp[i]);
1454 }
1455
unix_destruct_scm(struct sk_buff * skb)1456 static void unix_destruct_scm(struct sk_buff *skb)
1457 {
1458 struct scm_cookie scm;
1459 memset(&scm, 0, sizeof(scm));
1460 scm.pid = UNIXCB(skb).pid;
1461 if (UNIXCB(skb).fp)
1462 unix_detach_fds(&scm, skb);
1463
1464 /* Alas, it calls VFS */
1465 /* So fscking what? fput() had been SMP-safe since the last Summer */
1466 scm_destroy(&scm);
1467 sock_wfree(skb);
1468 }
1469
1470 #define MAX_RECURSION_LEVEL 4
1471
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1472 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1473 {
1474 int i;
1475 unsigned char max_level = 0;
1476 int unix_sock_count = 0;
1477
1478 for (i = scm->fp->count - 1; i >= 0; i--) {
1479 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1480
1481 if (sk) {
1482 unix_sock_count++;
1483 max_level = max(max_level,
1484 unix_sk(sk)->recursion_level);
1485 }
1486 }
1487 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1488 return -ETOOMANYREFS;
1489
1490 /*
1491 * Need to duplicate file references for the sake of garbage
1492 * collection. Otherwise a socket in the fps might become a
1493 * candidate for GC while the skb is not yet queued.
1494 */
1495 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1496 if (!UNIXCB(skb).fp)
1497 return -ENOMEM;
1498
1499 if (unix_sock_count) {
1500 for (i = scm->fp->count - 1; i >= 0; i--)
1501 unix_inflight(scm->fp->fp[i]);
1502 }
1503 return max_level;
1504 }
1505
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1506 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507 {
1508 int err = 0;
1509
1510 UNIXCB(skb).pid = get_pid(scm->pid);
1511 UNIXCB(skb).uid = scm->creds.uid;
1512 UNIXCB(skb).gid = scm->creds.gid;
1513 UNIXCB(skb).fp = NULL;
1514 if (scm->fp && send_fds)
1515 err = unix_attach_fds(scm, skb);
1516
1517 skb->destructor = unix_destruct_scm;
1518 return err;
1519 }
1520
1521 /*
1522 * Some apps rely on write() giving SCM_CREDENTIALS
1523 * We include credentials if source or destination socket
1524 * asserted SOCK_PASSCRED.
1525 */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1526 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1527 const struct sock *other)
1528 {
1529 if (UNIXCB(skb).pid)
1530 return;
1531 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1532 !other->sk_socket ||
1533 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1534 UNIXCB(skb).pid = get_pid(task_tgid(current));
1535 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1536 }
1537 }
1538
1539 /*
1540 * Send AF_UNIX data.
1541 */
1542
unix_dgram_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1543 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1544 struct msghdr *msg, size_t len)
1545 {
1546 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1547 struct sock *sk = sock->sk;
1548 struct net *net = sock_net(sk);
1549 struct unix_sock *u = unix_sk(sk);
1550 struct sockaddr_un *sunaddr = msg->msg_name;
1551 struct sock *other = NULL;
1552 int namelen = 0; /* fake GCC */
1553 int err;
1554 unsigned int hash;
1555 struct sk_buff *skb;
1556 long timeo;
1557 struct scm_cookie tmp_scm;
1558 int max_level;
1559 int data_len = 0;
1560 int sk_locked;
1561
1562 if (NULL == siocb->scm)
1563 siocb->scm = &tmp_scm;
1564 wait_for_unix_gc();
1565 err = scm_send(sock, msg, siocb->scm, false);
1566 if (err < 0)
1567 return err;
1568
1569 err = -EOPNOTSUPP;
1570 if (msg->msg_flags&MSG_OOB)
1571 goto out;
1572
1573 if (msg->msg_namelen) {
1574 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1575 if (err < 0)
1576 goto out;
1577 namelen = err;
1578 } else {
1579 sunaddr = NULL;
1580 err = -ENOTCONN;
1581 other = unix_peer_get(sk);
1582 if (!other)
1583 goto out;
1584 }
1585
1586 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1587 && (err = unix_autobind(sock)) != 0)
1588 goto out;
1589
1590 err = -EMSGSIZE;
1591 if (len > sk->sk_sndbuf - 32)
1592 goto out;
1593
1594 if (len > SKB_MAX_ALLOC)
1595 data_len = min_t(size_t,
1596 len - SKB_MAX_ALLOC,
1597 MAX_SKB_FRAGS * PAGE_SIZE);
1598
1599 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1600 msg->msg_flags & MSG_DONTWAIT, &err);
1601 if (skb == NULL)
1602 goto out;
1603
1604 err = unix_scm_to_skb(siocb->scm, skb, true);
1605 if (err < 0)
1606 goto out_free;
1607 max_level = err + 1;
1608 unix_get_secdata(siocb->scm, skb);
1609
1610 skb_put(skb, len - data_len);
1611 skb->data_len = data_len;
1612 skb->len = len;
1613 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1614 if (err)
1615 goto out_free;
1616
1617 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1618
1619 restart:
1620 if (!other) {
1621 err = -ECONNRESET;
1622 if (sunaddr == NULL)
1623 goto out_free;
1624
1625 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1626 hash, &err);
1627 if (other == NULL)
1628 goto out_free;
1629 }
1630
1631 if (sk_filter(other, skb) < 0) {
1632 /* Toss the packet but do not return any error to the sender */
1633 err = len;
1634 goto out_free;
1635 }
1636
1637 sk_locked = 0;
1638 unix_state_lock(other);
1639 restart_locked:
1640 err = -EPERM;
1641 if (!unix_may_send(sk, other))
1642 goto out_unlock;
1643
1644 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1645 /*
1646 * Check with 1003.1g - what should
1647 * datagram error
1648 */
1649 unix_state_unlock(other);
1650 sock_put(other);
1651
1652 if (!sk_locked)
1653 unix_state_lock(sk);
1654
1655 err = 0;
1656 if (unix_peer(sk) == other) {
1657 unix_peer(sk) = NULL;
1658 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1659
1660 unix_state_unlock(sk);
1661
1662 unix_dgram_disconnected(sk, other);
1663 sock_put(other);
1664 err = -ECONNREFUSED;
1665 } else {
1666 unix_state_unlock(sk);
1667 }
1668
1669 other = NULL;
1670 if (err)
1671 goto out_free;
1672 goto restart;
1673 }
1674
1675 err = -EPIPE;
1676 if (other->sk_shutdown & RCV_SHUTDOWN)
1677 goto out_unlock;
1678
1679 if (sk->sk_type != SOCK_SEQPACKET) {
1680 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1681 if (err)
1682 goto out_unlock;
1683 }
1684
1685 /* other == sk && unix_peer(other) != sk if
1686 * - unix_peer(sk) == NULL, destination address bound to sk
1687 * - unix_peer(sk) == sk by time of get but disconnected before lock
1688 */
1689 if (other != sk &&
1690 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1691 if (timeo) {
1692 timeo = unix_wait_for_peer(other, timeo);
1693
1694 err = sock_intr_errno(timeo);
1695 if (signal_pending(current))
1696 goto out_free;
1697
1698 goto restart;
1699 }
1700
1701 if (!sk_locked) {
1702 unix_state_unlock(other);
1703 unix_state_double_lock(sk, other);
1704 }
1705
1706 if (unix_peer(sk) != other ||
1707 unix_dgram_peer_wake_me(sk, other)) {
1708 err = -EAGAIN;
1709 sk_locked = 1;
1710 goto out_unlock;
1711 }
1712
1713 if (!sk_locked) {
1714 sk_locked = 1;
1715 goto restart_locked;
1716 }
1717 }
1718
1719 if (unlikely(sk_locked))
1720 unix_state_unlock(sk);
1721
1722 if (sock_flag(other, SOCK_RCVTSTAMP))
1723 __net_timestamp(skb);
1724 maybe_add_creds(skb, sock, other);
1725 skb_queue_tail(&other->sk_receive_queue, skb);
1726 if (max_level > unix_sk(other)->recursion_level)
1727 unix_sk(other)->recursion_level = max_level;
1728 unix_state_unlock(other);
1729 other->sk_data_ready(other, len);
1730 sock_put(other);
1731 scm_destroy(siocb->scm);
1732 return len;
1733
1734 out_unlock:
1735 if (sk_locked)
1736 unix_state_unlock(sk);
1737 unix_state_unlock(other);
1738 out_free:
1739 kfree_skb(skb);
1740 out:
1741 if (other)
1742 sock_put(other);
1743 scm_destroy(siocb->scm);
1744 return err;
1745 }
1746
1747
unix_stream_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1748 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1749 struct msghdr *msg, size_t len)
1750 {
1751 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1752 struct sock *sk = sock->sk;
1753 struct sock *other = NULL;
1754 int err, size;
1755 struct sk_buff *skb;
1756 int sent = 0;
1757 struct scm_cookie tmp_scm;
1758 bool fds_sent = false;
1759 int max_level;
1760
1761 if (NULL == siocb->scm)
1762 siocb->scm = &tmp_scm;
1763 wait_for_unix_gc();
1764 err = scm_send(sock, msg, siocb->scm, false);
1765 if (err < 0)
1766 return err;
1767
1768 err = -EOPNOTSUPP;
1769 if (msg->msg_flags&MSG_OOB)
1770 goto out_err;
1771
1772 if (msg->msg_namelen) {
1773 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1774 goto out_err;
1775 } else {
1776 err = -ENOTCONN;
1777 other = unix_peer(sk);
1778 if (!other)
1779 goto out_err;
1780 }
1781
1782 if (sk->sk_shutdown & SEND_SHUTDOWN)
1783 goto pipe_err;
1784
1785 while (sent < len) {
1786 /*
1787 * Optimisation for the fact that under 0.01% of X
1788 * messages typically need breaking up.
1789 */
1790
1791 size = len-sent;
1792
1793 /* Keep two messages in the pipe so it schedules better */
1794 if (size > ((sk->sk_sndbuf >> 1) - 64))
1795 size = (sk->sk_sndbuf >> 1) - 64;
1796
1797 if (size > SKB_MAX_ALLOC)
1798 size = SKB_MAX_ALLOC;
1799
1800 /*
1801 * Grab a buffer
1802 */
1803
1804 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1805 &err);
1806
1807 if (skb == NULL)
1808 goto out_err;
1809
1810 /*
1811 * If you pass two values to the sock_alloc_send_skb
1812 * it tries to grab the large buffer with GFP_NOFS
1813 * (which can fail easily), and if it fails grab the
1814 * fallback size buffer which is under a page and will
1815 * succeed. [Alan]
1816 */
1817 size = min_t(int, size, skb_tailroom(skb));
1818
1819
1820 /* Only send the fds in the first buffer */
1821 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1822 if (err < 0) {
1823 kfree_skb(skb);
1824 goto out_err;
1825 }
1826 max_level = err + 1;
1827 fds_sent = true;
1828
1829 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1830 if (err) {
1831 kfree_skb(skb);
1832 goto out_err;
1833 }
1834
1835 unix_state_lock(other);
1836
1837 if (sock_flag(other, SOCK_DEAD) ||
1838 (other->sk_shutdown & RCV_SHUTDOWN))
1839 goto pipe_err_free;
1840
1841 maybe_add_creds(skb, sock, other);
1842 skb_queue_tail(&other->sk_receive_queue, skb);
1843 if (max_level > unix_sk(other)->recursion_level)
1844 unix_sk(other)->recursion_level = max_level;
1845 unix_state_unlock(other);
1846 other->sk_data_ready(other, size);
1847 sent += size;
1848 }
1849
1850 scm_destroy(siocb->scm);
1851 siocb->scm = NULL;
1852
1853 return sent;
1854
1855 pipe_err_free:
1856 unix_state_unlock(other);
1857 kfree_skb(skb);
1858 pipe_err:
1859 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1860 send_sig(SIGPIPE, current, 0);
1861 err = -EPIPE;
1862 out_err:
1863 scm_destroy(siocb->scm);
1864 siocb->scm = NULL;
1865 return sent ? : err;
1866 }
1867
unix_seqpacket_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1868 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1869 struct msghdr *msg, size_t len)
1870 {
1871 int err;
1872 struct sock *sk = sock->sk;
1873
1874 err = sock_error(sk);
1875 if (err)
1876 return err;
1877
1878 if (sk->sk_state != TCP_ESTABLISHED)
1879 return -ENOTCONN;
1880
1881 if (msg->msg_namelen)
1882 msg->msg_namelen = 0;
1883
1884 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1885 }
1886
unix_seqpacket_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1887 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1888 struct msghdr *msg, size_t size,
1889 int flags)
1890 {
1891 struct sock *sk = sock->sk;
1892
1893 if (sk->sk_state != TCP_ESTABLISHED)
1894 return -ENOTCONN;
1895
1896 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1897 }
1898
unix_copy_addr(struct msghdr * msg,struct sock * sk)1899 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1900 {
1901 struct unix_sock *u = unix_sk(sk);
1902
1903 msg->msg_namelen = 0;
1904 if (u->addr) {
1905 msg->msg_namelen = u->addr->len;
1906 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1907 }
1908 }
1909
unix_dgram_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1910 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1911 struct msghdr *msg, size_t size,
1912 int flags)
1913 {
1914 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1915 struct scm_cookie tmp_scm;
1916 struct sock *sk = sock->sk;
1917 struct unix_sock *u = unix_sk(sk);
1918 int noblock = flags & MSG_DONTWAIT;
1919 struct sk_buff *skb;
1920 int err;
1921 int peeked, skip;
1922
1923 err = -EOPNOTSUPP;
1924 if (flags&MSG_OOB)
1925 goto out;
1926
1927 msg->msg_namelen = 0;
1928
1929 err = mutex_lock_interruptible(&u->readlock);
1930 if (err) {
1931 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1932 goto out;
1933 }
1934
1935 skip = sk_peek_offset(sk, flags);
1936
1937 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1938 if (!skb) {
1939 unix_state_lock(sk);
1940 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1941 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1942 (sk->sk_shutdown & RCV_SHUTDOWN))
1943 err = 0;
1944 unix_state_unlock(sk);
1945 goto out_unlock;
1946 }
1947
1948 wake_up_interruptible_sync_poll(&u->peer_wait,
1949 POLLOUT | POLLWRNORM | POLLWRBAND);
1950
1951 if (msg->msg_name)
1952 unix_copy_addr(msg, skb->sk);
1953
1954 if (size > skb->len - skip)
1955 size = skb->len - skip;
1956 else if (size < skb->len - skip)
1957 msg->msg_flags |= MSG_TRUNC;
1958
1959 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1960 if (err)
1961 goto out_free;
1962
1963 if (sock_flag(sk, SOCK_RCVTSTAMP))
1964 __sock_recv_timestamp(msg, sk, skb);
1965
1966 if (!siocb->scm) {
1967 siocb->scm = &tmp_scm;
1968 memset(&tmp_scm, 0, sizeof(tmp_scm));
1969 }
1970 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1971 unix_set_secdata(siocb->scm, skb);
1972
1973 if (!(flags & MSG_PEEK)) {
1974 if (UNIXCB(skb).fp)
1975 unix_detach_fds(siocb->scm, skb);
1976
1977 sk_peek_offset_bwd(sk, skb->len);
1978 } else {
1979 /* It is questionable: on PEEK we could:
1980 - do not return fds - good, but too simple 8)
1981 - return fds, and do not return them on read (old strategy,
1982 apparently wrong)
1983 - clone fds (I chose it for now, it is the most universal
1984 solution)
1985
1986 POSIX 1003.1g does not actually define this clearly
1987 at all. POSIX 1003.1g doesn't define a lot of things
1988 clearly however!
1989
1990 */
1991
1992 sk_peek_offset_fwd(sk, size);
1993
1994 if (UNIXCB(skb).fp)
1995 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1996 }
1997 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1998
1999 scm_recv(sock, msg, siocb->scm, flags);
2000
2001 out_free:
2002 skb_free_datagram(sk, skb);
2003 out_unlock:
2004 mutex_unlock(&u->readlock);
2005 out:
2006 return err;
2007 }
2008
2009 /*
2010 * Sleep until more data has arrived. But check for races..
2011 */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last)2012 static long unix_stream_data_wait(struct sock *sk, long timeo,
2013 struct sk_buff *last)
2014 {
2015 DEFINE_WAIT(wait);
2016
2017 unix_state_lock(sk);
2018
2019 for (;;) {
2020 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2021
2022 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2023 sk->sk_err ||
2024 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2025 signal_pending(current) ||
2026 !timeo)
2027 break;
2028
2029 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2030 unix_state_unlock(sk);
2031 timeo = freezable_schedule_timeout(timeo);
2032 unix_state_lock(sk);
2033
2034 if (sock_flag(sk, SOCK_DEAD))
2035 break;
2036
2037 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2038 }
2039
2040 finish_wait(sk_sleep(sk), &wait);
2041 unix_state_unlock(sk);
2042 return timeo;
2043 }
2044
unix_stream_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)2045 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2046 struct msghdr *msg, size_t size,
2047 int flags)
2048 {
2049 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2050 struct scm_cookie tmp_scm;
2051 struct sock *sk = sock->sk;
2052 struct unix_sock *u = unix_sk(sk);
2053 struct sockaddr_un *sunaddr = msg->msg_name;
2054 int copied = 0;
2055 int check_creds = 0;
2056 int target;
2057 int err = 0;
2058 long timeo;
2059 int skip;
2060
2061 err = -EINVAL;
2062 if (sk->sk_state != TCP_ESTABLISHED)
2063 goto out;
2064
2065 err = -EOPNOTSUPP;
2066 if (flags&MSG_OOB)
2067 goto out;
2068
2069 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2070 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
2071
2072 msg->msg_namelen = 0;
2073
2074 /* Lock the socket to prevent queue disordering
2075 * while sleeps in memcpy_tomsg
2076 */
2077
2078 if (!siocb->scm) {
2079 siocb->scm = &tmp_scm;
2080 memset(&tmp_scm, 0, sizeof(tmp_scm));
2081 }
2082
2083 err = mutex_lock_interruptible(&u->readlock);
2084 if (err) {
2085 err = sock_intr_errno(timeo);
2086 goto out;
2087 }
2088
2089 do {
2090 int chunk;
2091 struct sk_buff *skb, *last;
2092
2093 unix_state_lock(sk);
2094 if (sock_flag(sk, SOCK_DEAD)) {
2095 err = -ECONNRESET;
2096 goto unlock;
2097 }
2098 last = skb = skb_peek(&sk->sk_receive_queue);
2099 again:
2100 if (skb == NULL) {
2101 unix_sk(sk)->recursion_level = 0;
2102 if (copied >= target)
2103 goto unlock;
2104
2105 /*
2106 * POSIX 1003.1g mandates this order.
2107 */
2108
2109 err = sock_error(sk);
2110 if (err)
2111 goto unlock;
2112 if (sk->sk_shutdown & RCV_SHUTDOWN)
2113 goto unlock;
2114
2115 unix_state_unlock(sk);
2116 err = -EAGAIN;
2117 if (!timeo)
2118 break;
2119 mutex_unlock(&u->readlock);
2120
2121 timeo = unix_stream_data_wait(sk, timeo, last);
2122
2123 if (signal_pending(current)
2124 || mutex_lock_interruptible(&u->readlock)) {
2125 err = sock_intr_errno(timeo);
2126 goto out;
2127 }
2128
2129 continue;
2130 unlock:
2131 unix_state_unlock(sk);
2132 break;
2133 }
2134
2135 skip = sk_peek_offset(sk, flags);
2136 while (skip >= skb->len) {
2137 skip -= skb->len;
2138 last = skb;
2139 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2140 if (!skb)
2141 goto again;
2142 }
2143
2144 unix_state_unlock(sk);
2145
2146 if (check_creds) {
2147 /* Never glue messages from different writers */
2148 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
2149 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2150 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2151 break;
2152 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2153 /* Copy credentials */
2154 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2155 check_creds = 1;
2156 }
2157
2158 /* Copy address just once */
2159 if (sunaddr) {
2160 unix_copy_addr(msg, skb->sk);
2161 sunaddr = NULL;
2162 }
2163
2164 chunk = min_t(unsigned int, skb->len - skip, size);
2165 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2166 if (copied == 0)
2167 copied = -EFAULT;
2168 break;
2169 }
2170 copied += chunk;
2171 size -= chunk;
2172
2173 /* Mark read part of skb as used */
2174 if (!(flags & MSG_PEEK)) {
2175 skb_pull(skb, chunk);
2176
2177 sk_peek_offset_bwd(sk, chunk);
2178
2179 if (UNIXCB(skb).fp)
2180 unix_detach_fds(siocb->scm, skb);
2181
2182 if (skb->len)
2183 break;
2184
2185 skb_unlink(skb, &sk->sk_receive_queue);
2186 consume_skb(skb);
2187
2188 if (siocb->scm->fp)
2189 break;
2190 } else {
2191 /* It is questionable, see note in unix_dgram_recvmsg.
2192 */
2193 if (UNIXCB(skb).fp)
2194 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2195
2196 sk_peek_offset_fwd(sk, chunk);
2197
2198 break;
2199 }
2200 } while (size);
2201
2202 mutex_unlock(&u->readlock);
2203 scm_recv(sock, msg, siocb->scm, flags);
2204 out:
2205 return copied ? : err;
2206 }
2207
unix_shutdown(struct socket * sock,int mode)2208 static int unix_shutdown(struct socket *sock, int mode)
2209 {
2210 struct sock *sk = sock->sk;
2211 struct sock *other;
2212
2213 if (mode < SHUT_RD || mode > SHUT_RDWR)
2214 return -EINVAL;
2215 /* This maps:
2216 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2217 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2218 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2219 */
2220 ++mode;
2221
2222 unix_state_lock(sk);
2223 sk->sk_shutdown |= mode;
2224 other = unix_peer(sk);
2225 if (other)
2226 sock_hold(other);
2227 unix_state_unlock(sk);
2228 sk->sk_state_change(sk);
2229
2230 if (other &&
2231 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2232
2233 int peer_mode = 0;
2234
2235 if (mode&RCV_SHUTDOWN)
2236 peer_mode |= SEND_SHUTDOWN;
2237 if (mode&SEND_SHUTDOWN)
2238 peer_mode |= RCV_SHUTDOWN;
2239 unix_state_lock(other);
2240 other->sk_shutdown |= peer_mode;
2241 unix_state_unlock(other);
2242 other->sk_state_change(other);
2243 if (peer_mode == SHUTDOWN_MASK)
2244 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2245 else if (peer_mode & RCV_SHUTDOWN)
2246 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2247 }
2248 if (other)
2249 sock_put(other);
2250
2251 return 0;
2252 }
2253
unix_inq_len(struct sock * sk)2254 long unix_inq_len(struct sock *sk)
2255 {
2256 struct sk_buff *skb;
2257 long amount = 0;
2258
2259 if (sk->sk_state == TCP_LISTEN)
2260 return -EINVAL;
2261
2262 spin_lock(&sk->sk_receive_queue.lock);
2263 if (sk->sk_type == SOCK_STREAM ||
2264 sk->sk_type == SOCK_SEQPACKET) {
2265 skb_queue_walk(&sk->sk_receive_queue, skb)
2266 amount += skb->len;
2267 } else {
2268 skb = skb_peek(&sk->sk_receive_queue);
2269 if (skb)
2270 amount = skb->len;
2271 }
2272 spin_unlock(&sk->sk_receive_queue.lock);
2273
2274 return amount;
2275 }
2276 EXPORT_SYMBOL_GPL(unix_inq_len);
2277
unix_outq_len(struct sock * sk)2278 long unix_outq_len(struct sock *sk)
2279 {
2280 return sk_wmem_alloc_get(sk);
2281 }
2282 EXPORT_SYMBOL_GPL(unix_outq_len);
2283
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2284 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2285 {
2286 struct sock *sk = sock->sk;
2287 long amount = 0;
2288 int err;
2289
2290 switch (cmd) {
2291 case SIOCOUTQ:
2292 amount = unix_outq_len(sk);
2293 err = put_user(amount, (int __user *)arg);
2294 break;
2295 case SIOCINQ:
2296 amount = unix_inq_len(sk);
2297 if (amount < 0)
2298 err = amount;
2299 else
2300 err = put_user(amount, (int __user *)arg);
2301 break;
2302 default:
2303 err = -ENOIOCTLCMD;
2304 break;
2305 }
2306 return err;
2307 }
2308
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2309 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2310 {
2311 struct sock *sk = sock->sk;
2312 unsigned int mask;
2313
2314 sock_poll_wait(file, sk_sleep(sk), wait);
2315 mask = 0;
2316
2317 /* exceptional events? */
2318 if (sk->sk_err)
2319 mask |= POLLERR;
2320 if (sk->sk_shutdown == SHUTDOWN_MASK)
2321 mask |= POLLHUP;
2322 if (sk->sk_shutdown & RCV_SHUTDOWN)
2323 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2324
2325 /* readable? */
2326 if (!skb_queue_empty(&sk->sk_receive_queue))
2327 mask |= POLLIN | POLLRDNORM;
2328
2329 /* Connection-based need to check for termination and startup */
2330 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2331 sk->sk_state == TCP_CLOSE)
2332 mask |= POLLHUP;
2333
2334 /*
2335 * we set writable also when the other side has shut down the
2336 * connection. This prevents stuck sockets.
2337 */
2338 if (unix_writable(sk))
2339 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2340
2341 return mask;
2342 }
2343
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2344 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2345 poll_table *wait)
2346 {
2347 struct sock *sk = sock->sk, *other;
2348 unsigned int mask, writable;
2349
2350 sock_poll_wait(file, sk_sleep(sk), wait);
2351 mask = 0;
2352
2353 /* exceptional events? */
2354 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2355 mask |= POLLERR |
2356 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2357
2358 if (sk->sk_shutdown & RCV_SHUTDOWN)
2359 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2360 if (sk->sk_shutdown == SHUTDOWN_MASK)
2361 mask |= POLLHUP;
2362
2363 /* readable? */
2364 if (!skb_queue_empty(&sk->sk_receive_queue))
2365 mask |= POLLIN | POLLRDNORM;
2366
2367 /* Connection-based need to check for termination and startup */
2368 if (sk->sk_type == SOCK_SEQPACKET) {
2369 if (sk->sk_state == TCP_CLOSE)
2370 mask |= POLLHUP;
2371 /* connection hasn't started yet? */
2372 if (sk->sk_state == TCP_SYN_SENT)
2373 return mask;
2374 }
2375
2376 /* No write status requested, avoid expensive OUT tests. */
2377 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2378 return mask;
2379
2380 writable = unix_writable(sk);
2381 if (writable) {
2382 unix_state_lock(sk);
2383
2384 other = unix_peer(sk);
2385 if (other && unix_peer(other) != sk &&
2386 unix_recvq_full(other) &&
2387 unix_dgram_peer_wake_me(sk, other))
2388 writable = 0;
2389
2390 unix_state_unlock(sk);
2391 }
2392
2393 if (writable)
2394 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2395 else
2396 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2397
2398 return mask;
2399 }
2400
2401 #ifdef CONFIG_PROC_FS
2402
2403 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2404
2405 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2406 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2407 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2408
unix_from_bucket(struct seq_file * seq,loff_t * pos)2409 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2410 {
2411 unsigned long offset = get_offset(*pos);
2412 unsigned long bucket = get_bucket(*pos);
2413 struct sock *sk;
2414 unsigned long count = 0;
2415
2416 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2417 if (sock_net(sk) != seq_file_net(seq))
2418 continue;
2419 if (++count == offset)
2420 break;
2421 }
2422
2423 return sk;
2424 }
2425
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2426 static struct sock *unix_next_socket(struct seq_file *seq,
2427 struct sock *sk,
2428 loff_t *pos)
2429 {
2430 unsigned long bucket;
2431
2432 while (sk > (struct sock *)SEQ_START_TOKEN) {
2433 sk = sk_next(sk);
2434 if (!sk)
2435 goto next_bucket;
2436 if (sock_net(sk) == seq_file_net(seq))
2437 return sk;
2438 }
2439
2440 do {
2441 sk = unix_from_bucket(seq, pos);
2442 if (sk)
2443 return sk;
2444
2445 next_bucket:
2446 bucket = get_bucket(*pos) + 1;
2447 *pos = set_bucket_offset(bucket, 1);
2448 } while (bucket < ARRAY_SIZE(unix_socket_table));
2449
2450 return NULL;
2451 }
2452
unix_seq_start(struct seq_file * seq,loff_t * pos)2453 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2454 __acquires(unix_table_lock)
2455 {
2456 spin_lock(&unix_table_lock);
2457
2458 if (!*pos)
2459 return SEQ_START_TOKEN;
2460
2461 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2462 return NULL;
2463
2464 return unix_next_socket(seq, NULL, pos);
2465 }
2466
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2467 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2468 {
2469 ++*pos;
2470 return unix_next_socket(seq, v, pos);
2471 }
2472
unix_seq_stop(struct seq_file * seq,void * v)2473 static void unix_seq_stop(struct seq_file *seq, void *v)
2474 __releases(unix_table_lock)
2475 {
2476 spin_unlock(&unix_table_lock);
2477 }
2478
unix_seq_show(struct seq_file * seq,void * v)2479 static int unix_seq_show(struct seq_file *seq, void *v)
2480 {
2481
2482 if (v == SEQ_START_TOKEN)
2483 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2484 "Inode Path\n");
2485 else {
2486 struct sock *s = v;
2487 struct unix_sock *u = unix_sk(s);
2488 unix_state_lock(s);
2489
2490 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2491 s,
2492 atomic_read(&s->sk_refcnt),
2493 0,
2494 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2495 s->sk_type,
2496 s->sk_socket ?
2497 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2498 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2499 sock_i_ino(s));
2500
2501 if (u->addr) {
2502 int i, len;
2503 seq_putc(seq, ' ');
2504
2505 i = 0;
2506 len = u->addr->len - sizeof(short);
2507 if (!UNIX_ABSTRACT(s))
2508 len--;
2509 else {
2510 seq_putc(seq, '@');
2511 i++;
2512 }
2513 for ( ; i < len; i++)
2514 seq_putc(seq, u->addr->name->sun_path[i]);
2515 }
2516 unix_state_unlock(s);
2517 seq_putc(seq, '\n');
2518 }
2519
2520 return 0;
2521 }
2522
2523 static const struct seq_operations unix_seq_ops = {
2524 .start = unix_seq_start,
2525 .next = unix_seq_next,
2526 .stop = unix_seq_stop,
2527 .show = unix_seq_show,
2528 };
2529
unix_seq_open(struct inode * inode,struct file * file)2530 static int unix_seq_open(struct inode *inode, struct file *file)
2531 {
2532 return seq_open_net(inode, file, &unix_seq_ops,
2533 sizeof(struct seq_net_private));
2534 }
2535
2536 static const struct file_operations unix_seq_fops = {
2537 .owner = THIS_MODULE,
2538 .open = unix_seq_open,
2539 .read = seq_read,
2540 .llseek = seq_lseek,
2541 .release = seq_release_net,
2542 };
2543
2544 #endif
2545
2546 static const struct net_proto_family unix_family_ops = {
2547 .family = PF_UNIX,
2548 .create = unix_create,
2549 .owner = THIS_MODULE,
2550 };
2551
2552
unix_net_init(struct net * net)2553 static int __net_init unix_net_init(struct net *net)
2554 {
2555 int error = -ENOMEM;
2556
2557 net->unx.sysctl_max_dgram_qlen = 10;
2558 if (unix_sysctl_register(net))
2559 goto out;
2560
2561 #ifdef CONFIG_PROC_FS
2562 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2563 unix_sysctl_unregister(net);
2564 goto out;
2565 }
2566 #endif
2567 error = 0;
2568 out:
2569 return error;
2570 }
2571
unix_net_exit(struct net * net)2572 static void __net_exit unix_net_exit(struct net *net)
2573 {
2574 unix_sysctl_unregister(net);
2575 remove_proc_entry("unix", net->proc_net);
2576 }
2577
2578 static struct pernet_operations unix_net_ops = {
2579 .init = unix_net_init,
2580 .exit = unix_net_exit,
2581 };
2582
af_unix_init(void)2583 static int __init af_unix_init(void)
2584 {
2585 int rc = -1;
2586
2587 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2588
2589 rc = proto_register(&unix_proto, 1);
2590 if (rc != 0) {
2591 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2592 __func__);
2593 goto out;
2594 }
2595
2596 sock_register(&unix_family_ops);
2597 register_pernet_subsys(&unix_net_ops);
2598 out:
2599 return rc;
2600 }
2601
af_unix_exit(void)2602 static void __exit af_unix_exit(void)
2603 {
2604 sock_unregister(PF_UNIX);
2605 proto_unregister(&unix_proto);
2606 unregister_pernet_subsys(&unix_net_ops);
2607 }
2608
2609 /* Earlier than device_initcall() so that other drivers invoking
2610 request_module() don't end up in a loop when modprobe tries
2611 to use a UNIX socket. But later than subsys_initcall() because
2612 we depend on stuff initialised there */
2613 fs_initcall(af_unix_init);
2614 module_exit(af_unix_exit);
2615
2616 MODULE_LICENSE("GPL");
2617 MODULE_ALIAS_NETPROTO(PF_UNIX);
2618