1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
126
127
unix_sockets_unbound(void * addr)128 static struct hlist_head *unix_sockets_unbound(void *addr)
129 {
130 unsigned long hash = (unsigned long)addr;
131
132 hash ^= hash >> 16;
133 hash ^= hash >> 8;
134 hash %= UNIX_HASH_SIZE;
135 return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 }
137
138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139
140 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
144 }
145
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147 {
148 scm->secid = *UNIXSID(skb);
149 }
150 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)151 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)154 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 #endif /* CONFIG_SECURITY_NETWORK */
157
158 /*
159 * SMP locking strategy:
160 * hash table is protected with spinlock unix_table_lock
161 * each socket state is protected by separate spin lock.
162 */
163
unix_hash_fold(__wsum n)164 static inline unsigned int unix_hash_fold(__wsum n)
165 {
166 unsigned int hash = (__force unsigned int)csum_fold(n);
167
168 hash ^= hash>>8;
169 return hash&(UNIX_HASH_SIZE-1);
170 }
171
172 #define unix_peer(sk) (unix_sk(sk)->peer)
173
unix_our_peer(struct sock * sk,struct sock * osk)174 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
175 {
176 return unix_peer(osk) == sk;
177 }
178
unix_may_send(struct sock * sk,struct sock * osk)179 static inline int unix_may_send(struct sock *sk, struct sock *osk)
180 {
181 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
182 }
183
unix_recvq_full(struct sock const * sk)184 static inline int unix_recvq_full(struct sock const *sk)
185 {
186 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
187 }
188
unix_peer_get(struct sock * s)189 struct sock *unix_peer_get(struct sock *s)
190 {
191 struct sock *peer;
192
193 unix_state_lock(s);
194 peer = unix_peer(s);
195 if (peer)
196 sock_hold(peer);
197 unix_state_unlock(s);
198 return peer;
199 }
200 EXPORT_SYMBOL_GPL(unix_peer_get);
201
unix_release_addr(struct unix_address * addr)202 static inline void unix_release_addr(struct unix_address *addr)
203 {
204 if (atomic_dec_and_test(&addr->refcnt))
205 kfree(addr);
206 }
207
208 /*
209 * Check unix socket name:
210 * - should be not zero length.
211 * - if started by not zero, should be NULL terminated (FS object)
212 * - if started by zero, it is abstract name.
213 */
214
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)215 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
216 {
217 if (len <= sizeof(short) || len > sizeof(*sunaddr))
218 return -EINVAL;
219 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
220 return -EINVAL;
221 if (sunaddr->sun_path[0]) {
222 /*
223 * This may look like an off by one error but it is a bit more
224 * subtle. 108 is the longest valid AF_UNIX path for a binding.
225 * sun_path[108] doesn't as such exist. However in kernel space
226 * we are guaranteed that it is a valid memory location in our
227 * kernel address buffer.
228 */
229 ((char *)sunaddr)[len] = 0;
230 len = strlen(sunaddr->sun_path)+1+sizeof(short);
231 return len;
232 }
233
234 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
235 return len;
236 }
237
__unix_remove_socket(struct sock * sk)238 static void __unix_remove_socket(struct sock *sk)
239 {
240 sk_del_node_init(sk);
241 }
242
__unix_insert_socket(struct hlist_head * list,struct sock * sk)243 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
244 {
245 WARN_ON(!sk_unhashed(sk));
246 sk_add_node(sk, list);
247 }
248
unix_remove_socket(struct sock * sk)249 static inline void unix_remove_socket(struct sock *sk)
250 {
251 spin_lock(&unix_table_lock);
252 __unix_remove_socket(sk);
253 spin_unlock(&unix_table_lock);
254 }
255
unix_insert_socket(struct hlist_head * list,struct sock * sk)256 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
257 {
258 spin_lock(&unix_table_lock);
259 __unix_insert_socket(list, sk);
260 spin_unlock(&unix_table_lock);
261 }
262
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)263 static struct sock *__unix_find_socket_byname(struct net *net,
264 struct sockaddr_un *sunname,
265 int len, int type, unsigned int hash)
266 {
267 struct sock *s;
268
269 sk_for_each(s, &unix_socket_table[hash ^ type]) {
270 struct unix_sock *u = unix_sk(s);
271
272 if (!net_eq(sock_net(s), net))
273 continue;
274
275 if (u->addr->len == len &&
276 !memcmp(u->addr->name, sunname, len))
277 goto found;
278 }
279 s = NULL;
280 found:
281 return s;
282 }
283
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)284 static inline struct sock *unix_find_socket_byname(struct net *net,
285 struct sockaddr_un *sunname,
286 int len, int type,
287 unsigned int hash)
288 {
289 struct sock *s;
290
291 spin_lock(&unix_table_lock);
292 s = __unix_find_socket_byname(net, sunname, len, type, hash);
293 if (s)
294 sock_hold(s);
295 spin_unlock(&unix_table_lock);
296 return s;
297 }
298
unix_find_socket_byinode(struct inode * i)299 static struct sock *unix_find_socket_byinode(struct inode *i)
300 {
301 struct sock *s;
302
303 spin_lock(&unix_table_lock);
304 sk_for_each(s,
305 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
306 struct dentry *dentry = unix_sk(s)->path.dentry;
307
308 if (dentry && d_backing_inode(dentry) == i) {
309 sock_hold(s);
310 goto found;
311 }
312 }
313 s = NULL;
314 found:
315 spin_unlock(&unix_table_lock);
316 return s;
317 }
318
319 /* Support code for asymmetrically connected dgram sockets
320 *
321 * If a datagram socket is connected to a socket not itself connected
322 * to the first socket (eg, /dev/log), clients may only enqueue more
323 * messages if the present receive queue of the server socket is not
324 * "too large". This means there's a second writeability condition
325 * poll and sendmsg need to test. The dgram recv code will do a wake
326 * up on the peer_wait wait queue of a socket upon reception of a
327 * datagram which needs to be propagated to sleeping would-be writers
328 * since these might not have sent anything so far. This can't be
329 * accomplished via poll_wait because the lifetime of the server
330 * socket might be less than that of its clients if these break their
331 * association with it or if the server socket is closed while clients
332 * are still connected to it and there's no way to inform "a polling
333 * implementation" that it should let go of a certain wait queue
334 *
335 * In order to propagate a wake up, a wait_queue_t of the client
336 * socket is enqueued on the peer_wait queue of the server socket
337 * whose wake function does a wake_up on the ordinary client socket
338 * wait queue. This connection is established whenever a write (or
339 * poll for write) hit the flow control condition and broken when the
340 * association to the server socket is dissolved or after a wake up
341 * was relayed.
342 */
343
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)344 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
345 void *key)
346 {
347 struct unix_sock *u;
348 wait_queue_head_t *u_sleep;
349
350 u = container_of(q, struct unix_sock, peer_wake);
351
352 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
353 q);
354 u->peer_wake.private = NULL;
355
356 /* relaying can only happen while the wq still exists */
357 u_sleep = sk_sleep(&u->sk);
358 if (u_sleep)
359 wake_up_interruptible_poll(u_sleep, key);
360
361 return 0;
362 }
363
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)364 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
365 {
366 struct unix_sock *u, *u_other;
367 int rc;
368
369 u = unix_sk(sk);
370 u_other = unix_sk(other);
371 rc = 0;
372 spin_lock(&u_other->peer_wait.lock);
373
374 if (!u->peer_wake.private) {
375 u->peer_wake.private = other;
376 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
377
378 rc = 1;
379 }
380
381 spin_unlock(&u_other->peer_wait.lock);
382 return rc;
383 }
384
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)385 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
386 struct sock *other)
387 {
388 struct unix_sock *u, *u_other;
389
390 u = unix_sk(sk);
391 u_other = unix_sk(other);
392 spin_lock(&u_other->peer_wait.lock);
393
394 if (u->peer_wake.private == other) {
395 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
396 u->peer_wake.private = NULL;
397 }
398
399 spin_unlock(&u_other->peer_wait.lock);
400 }
401
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)402 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
403 struct sock *other)
404 {
405 unix_dgram_peer_wake_disconnect(sk, other);
406 wake_up_interruptible_poll(sk_sleep(sk),
407 POLLOUT |
408 POLLWRNORM |
409 POLLWRBAND);
410 }
411
412 /* preconditions:
413 * - unix_peer(sk) == other
414 * - association is stable
415 */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)416 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
417 {
418 int connected;
419
420 connected = unix_dgram_peer_wake_connect(sk, other);
421
422 if (unix_recvq_full(other))
423 return 1;
424
425 if (connected)
426 unix_dgram_peer_wake_disconnect(sk, other);
427
428 return 0;
429 }
430
unix_writable(struct sock * sk)431 static inline int unix_writable(struct sock *sk)
432 {
433 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
434 }
435
unix_write_space(struct sock * sk)436 static void unix_write_space(struct sock *sk)
437 {
438 struct socket_wq *wq;
439
440 rcu_read_lock();
441 if (unix_writable(sk)) {
442 wq = rcu_dereference(sk->sk_wq);
443 if (wq_has_sleeper(wq))
444 wake_up_interruptible_sync_poll(&wq->wait,
445 POLLOUT | POLLWRNORM | POLLWRBAND);
446 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
447 }
448 rcu_read_unlock();
449 }
450
451 /* When dgram socket disconnects (or changes its peer), we clear its receive
452 * queue of packets arrived from previous peer. First, it allows to do
453 * flow control based only on wmem_alloc; second, sk connected to peer
454 * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)455 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
456 {
457 if (!skb_queue_empty(&sk->sk_receive_queue)) {
458 skb_queue_purge(&sk->sk_receive_queue);
459 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
460
461 /* If one link of bidirectional dgram pipe is disconnected,
462 * we signal error. Messages are lost. Do not make this,
463 * when peer was not connected to us.
464 */
465 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
466 other->sk_err = ECONNRESET;
467 other->sk_error_report(other);
468 }
469 }
470 }
471
unix_sock_destructor(struct sock * sk)472 static void unix_sock_destructor(struct sock *sk)
473 {
474 struct unix_sock *u = unix_sk(sk);
475
476 skb_queue_purge(&sk->sk_receive_queue);
477
478 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
479 WARN_ON(!sk_unhashed(sk));
480 WARN_ON(sk->sk_socket);
481 if (!sock_flag(sk, SOCK_DEAD)) {
482 pr_info("Attempt to release alive unix socket: %p\n", sk);
483 return;
484 }
485
486 if (u->addr)
487 unix_release_addr(u->addr);
488
489 atomic_long_dec(&unix_nr_socks);
490 local_bh_disable();
491 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
492 local_bh_enable();
493 #ifdef UNIX_REFCNT_DEBUG
494 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
495 atomic_long_read(&unix_nr_socks));
496 #endif
497 }
498
unix_release_sock(struct sock * sk,int embrion)499 static void unix_release_sock(struct sock *sk, int embrion)
500 {
501 struct unix_sock *u = unix_sk(sk);
502 struct path path;
503 struct sock *skpair;
504 struct sk_buff *skb;
505 int state;
506
507 unix_remove_socket(sk);
508
509 /* Clear state */
510 unix_state_lock(sk);
511 sock_orphan(sk);
512 sk->sk_shutdown = SHUTDOWN_MASK;
513 path = u->path;
514 u->path.dentry = NULL;
515 u->path.mnt = NULL;
516 state = sk->sk_state;
517 sk->sk_state = TCP_CLOSE;
518 unix_state_unlock(sk);
519
520 wake_up_interruptible_all(&u->peer_wait);
521
522 skpair = unix_peer(sk);
523
524 if (skpair != NULL) {
525 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
526 unix_state_lock(skpair);
527 /* No more writes */
528 skpair->sk_shutdown = SHUTDOWN_MASK;
529 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
530 skpair->sk_err = ECONNRESET;
531 unix_state_unlock(skpair);
532 skpair->sk_state_change(skpair);
533 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
534 }
535
536 unix_dgram_peer_wake_disconnect(sk, skpair);
537 sock_put(skpair); /* It may now die */
538 unix_peer(sk) = NULL;
539 }
540
541 /* Try to flush out this socket. Throw out buffers at least */
542
543 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
544 if (state == TCP_LISTEN)
545 unix_release_sock(skb->sk, 1);
546 /* passed fds are erased in the kfree_skb hook */
547 kfree_skb(skb);
548 }
549
550 if (path.dentry)
551 path_put(&path);
552
553 sock_put(sk);
554
555 /* ---- Socket is dead now and most probably destroyed ---- */
556
557 /*
558 * Fixme: BSD difference: In BSD all sockets connected to us get
559 * ECONNRESET and we die on the spot. In Linux we behave
560 * like files and pipes do and wait for the last
561 * dereference.
562 *
563 * Can't we simply set sock->err?
564 *
565 * What the above comment does talk about? --ANK(980817)
566 */
567
568 if (unix_tot_inflight)
569 unix_gc(); /* Garbage collect fds */
570 }
571
init_peercred(struct sock * sk)572 static void init_peercred(struct sock *sk)
573 {
574 put_pid(sk->sk_peer_pid);
575 if (sk->sk_peer_cred)
576 put_cred(sk->sk_peer_cred);
577 sk->sk_peer_pid = get_pid(task_tgid(current));
578 sk->sk_peer_cred = get_current_cred();
579 }
580
copy_peercred(struct sock * sk,struct sock * peersk)581 static void copy_peercred(struct sock *sk, struct sock *peersk)
582 {
583 put_pid(sk->sk_peer_pid);
584 if (sk->sk_peer_cred)
585 put_cred(sk->sk_peer_cred);
586 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
587 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
588 }
589
unix_listen(struct socket * sock,int backlog)590 static int unix_listen(struct socket *sock, int backlog)
591 {
592 int err;
593 struct sock *sk = sock->sk;
594 struct unix_sock *u = unix_sk(sk);
595 struct pid *old_pid = NULL;
596
597 err = -EOPNOTSUPP;
598 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
599 goto out; /* Only stream/seqpacket sockets accept */
600 err = -EINVAL;
601 if (!u->addr)
602 goto out; /* No listens on an unbound socket */
603 unix_state_lock(sk);
604 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
605 goto out_unlock;
606 if (backlog > sk->sk_max_ack_backlog)
607 wake_up_interruptible_all(&u->peer_wait);
608 sk->sk_max_ack_backlog = backlog;
609 sk->sk_state = TCP_LISTEN;
610 /* set credentials so connect can copy them */
611 init_peercred(sk);
612 err = 0;
613
614 out_unlock:
615 unix_state_unlock(sk);
616 put_pid(old_pid);
617 out:
618 return err;
619 }
620
621 static int unix_release(struct socket *);
622 static int unix_bind(struct socket *, struct sockaddr *, int);
623 static int unix_stream_connect(struct socket *, struct sockaddr *,
624 int addr_len, int flags);
625 static int unix_socketpair(struct socket *, struct socket *);
626 static int unix_accept(struct socket *, struct socket *, int);
627 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
628 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
629 static unsigned int unix_dgram_poll(struct file *, struct socket *,
630 poll_table *);
631 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
632 static int unix_shutdown(struct socket *, int);
633 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
634 struct msghdr *, size_t);
635 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
636 struct msghdr *, size_t, int);
637 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
638 struct msghdr *, size_t);
639 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
640 struct msghdr *, size_t, int);
641 static int unix_dgram_connect(struct socket *, struct sockaddr *,
642 int, int);
643 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
644 struct msghdr *, size_t);
645 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
646 struct msghdr *, size_t, int);
647
unix_set_peek_off(struct sock * sk,int val)648 static int unix_set_peek_off(struct sock *sk, int val)
649 {
650 struct unix_sock *u = unix_sk(sk);
651
652 if (mutex_lock_interruptible(&u->readlock))
653 return -EINTR;
654
655 sk->sk_peek_off = val;
656 mutex_unlock(&u->readlock);
657
658 return 0;
659 }
660
661
662 static const struct proto_ops unix_stream_ops = {
663 .family = PF_UNIX,
664 .owner = THIS_MODULE,
665 .release = unix_release,
666 .bind = unix_bind,
667 .connect = unix_stream_connect,
668 .socketpair = unix_socketpair,
669 .accept = unix_accept,
670 .getname = unix_getname,
671 .poll = unix_poll,
672 .ioctl = unix_ioctl,
673 .listen = unix_listen,
674 .shutdown = unix_shutdown,
675 .setsockopt = sock_no_setsockopt,
676 .getsockopt = sock_no_getsockopt,
677 .sendmsg = unix_stream_sendmsg,
678 .recvmsg = unix_stream_recvmsg,
679 .mmap = sock_no_mmap,
680 .sendpage = sock_no_sendpage,
681 .set_peek_off = unix_set_peek_off,
682 };
683
684 static const struct proto_ops unix_dgram_ops = {
685 .family = PF_UNIX,
686 .owner = THIS_MODULE,
687 .release = unix_release,
688 .bind = unix_bind,
689 .connect = unix_dgram_connect,
690 .socketpair = unix_socketpair,
691 .accept = sock_no_accept,
692 .getname = unix_getname,
693 .poll = unix_dgram_poll,
694 .ioctl = unix_ioctl,
695 .listen = sock_no_listen,
696 .shutdown = unix_shutdown,
697 .setsockopt = sock_no_setsockopt,
698 .getsockopt = sock_no_getsockopt,
699 .sendmsg = unix_dgram_sendmsg,
700 .recvmsg = unix_dgram_recvmsg,
701 .mmap = sock_no_mmap,
702 .sendpage = sock_no_sendpage,
703 .set_peek_off = unix_set_peek_off,
704 };
705
706 static const struct proto_ops unix_seqpacket_ops = {
707 .family = PF_UNIX,
708 .owner = THIS_MODULE,
709 .release = unix_release,
710 .bind = unix_bind,
711 .connect = unix_stream_connect,
712 .socketpair = unix_socketpair,
713 .accept = unix_accept,
714 .getname = unix_getname,
715 .poll = unix_dgram_poll,
716 .ioctl = unix_ioctl,
717 .listen = unix_listen,
718 .shutdown = unix_shutdown,
719 .setsockopt = sock_no_setsockopt,
720 .getsockopt = sock_no_getsockopt,
721 .sendmsg = unix_seqpacket_sendmsg,
722 .recvmsg = unix_seqpacket_recvmsg,
723 .mmap = sock_no_mmap,
724 .sendpage = sock_no_sendpage,
725 .set_peek_off = unix_set_peek_off,
726 };
727
728 static struct proto unix_proto = {
729 .name = "UNIX",
730 .owner = THIS_MODULE,
731 .obj_size = sizeof(struct unix_sock),
732 };
733
734 /*
735 * AF_UNIX sockets do not interact with hardware, hence they
736 * dont trigger interrupts - so it's safe for them to have
737 * bh-unsafe locking for their sk_receive_queue.lock. Split off
738 * this special lock-class by reinitializing the spinlock key:
739 */
740 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
741
unix_create1(struct net * net,struct socket * sock)742 static struct sock *unix_create1(struct net *net, struct socket *sock)
743 {
744 struct sock *sk = NULL;
745 struct unix_sock *u;
746
747 atomic_long_inc(&unix_nr_socks);
748 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
749 goto out;
750
751 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
752 if (!sk)
753 goto out;
754
755 sock_init_data(sock, sk);
756 lockdep_set_class(&sk->sk_receive_queue.lock,
757 &af_unix_sk_receive_queue_lock_key);
758
759 sk->sk_write_space = unix_write_space;
760 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
761 sk->sk_destruct = unix_sock_destructor;
762 u = unix_sk(sk);
763 u->path.dentry = NULL;
764 u->path.mnt = NULL;
765 spin_lock_init(&u->lock);
766 atomic_long_set(&u->inflight, 0);
767 INIT_LIST_HEAD(&u->link);
768 mutex_init(&u->readlock); /* single task reading lock */
769 init_waitqueue_head(&u->peer_wait);
770 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
771 unix_insert_socket(unix_sockets_unbound(sk), sk);
772 out:
773 if (sk == NULL)
774 atomic_long_dec(&unix_nr_socks);
775 else {
776 local_bh_disable();
777 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
778 local_bh_enable();
779 }
780 return sk;
781 }
782
unix_create(struct net * net,struct socket * sock,int protocol,int kern)783 static int unix_create(struct net *net, struct socket *sock, int protocol,
784 int kern)
785 {
786 if (protocol && protocol != PF_UNIX)
787 return -EPROTONOSUPPORT;
788
789 sock->state = SS_UNCONNECTED;
790
791 switch (sock->type) {
792 case SOCK_STREAM:
793 sock->ops = &unix_stream_ops;
794 break;
795 /*
796 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
797 * nothing uses it.
798 */
799 case SOCK_RAW:
800 sock->type = SOCK_DGRAM;
801 case SOCK_DGRAM:
802 sock->ops = &unix_dgram_ops;
803 break;
804 case SOCK_SEQPACKET:
805 sock->ops = &unix_seqpacket_ops;
806 break;
807 default:
808 return -ESOCKTNOSUPPORT;
809 }
810
811 return unix_create1(net, sock) ? 0 : -ENOMEM;
812 }
813
unix_release(struct socket * sock)814 static int unix_release(struct socket *sock)
815 {
816 struct sock *sk = sock->sk;
817
818 if (!sk)
819 return 0;
820
821 unix_release_sock(sk, 0);
822 sock->sk = NULL;
823
824 return 0;
825 }
826
unix_autobind(struct socket * sock)827 static int unix_autobind(struct socket *sock)
828 {
829 struct sock *sk = sock->sk;
830 struct net *net = sock_net(sk);
831 struct unix_sock *u = unix_sk(sk);
832 static u32 ordernum = 1;
833 struct unix_address *addr;
834 int err;
835 unsigned int retries = 0;
836
837 err = mutex_lock_interruptible(&u->readlock);
838 if (err)
839 return err;
840
841 err = 0;
842 if (u->addr)
843 goto out;
844
845 err = -ENOMEM;
846 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
847 if (!addr)
848 goto out;
849
850 addr->name->sun_family = AF_UNIX;
851 atomic_set(&addr->refcnt, 1);
852
853 retry:
854 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
855 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
856
857 spin_lock(&unix_table_lock);
858 ordernum = (ordernum+1)&0xFFFFF;
859
860 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
861 addr->hash)) {
862 spin_unlock(&unix_table_lock);
863 /*
864 * __unix_find_socket_byname() may take long time if many names
865 * are already in use.
866 */
867 cond_resched();
868 /* Give up if all names seems to be in use. */
869 if (retries++ == 0xFFFFF) {
870 err = -ENOSPC;
871 kfree(addr);
872 goto out;
873 }
874 goto retry;
875 }
876 addr->hash ^= sk->sk_type;
877
878 __unix_remove_socket(sk);
879 u->addr = addr;
880 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
881 spin_unlock(&unix_table_lock);
882 err = 0;
883
884 out: mutex_unlock(&u->readlock);
885 return err;
886 }
887
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)888 static struct sock *unix_find_other(struct net *net,
889 struct sockaddr_un *sunname, int len,
890 int type, unsigned int hash, int *error)
891 {
892 struct sock *u;
893 struct path path;
894 int err = 0;
895
896 if (sunname->sun_path[0]) {
897 struct inode *inode;
898 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
899 if (err)
900 goto fail;
901 inode = d_backing_inode(path.dentry);
902 err = inode_permission(inode, MAY_WRITE);
903 if (err)
904 goto put_fail;
905
906 err = -ECONNREFUSED;
907 if (!S_ISSOCK(inode->i_mode))
908 goto put_fail;
909 u = unix_find_socket_byinode(inode);
910 if (!u)
911 goto put_fail;
912
913 if (u->sk_type == type)
914 touch_atime(&path);
915
916 path_put(&path);
917
918 err = -EPROTOTYPE;
919 if (u->sk_type != type) {
920 sock_put(u);
921 goto fail;
922 }
923 } else {
924 err = -ECONNREFUSED;
925 u = unix_find_socket_byname(net, sunname, len, type, hash);
926 if (u) {
927 struct dentry *dentry;
928 dentry = unix_sk(u)->path.dentry;
929 if (dentry)
930 touch_atime(&unix_sk(u)->path);
931 } else
932 goto fail;
933 }
934 return u;
935
936 put_fail:
937 path_put(&path);
938 fail:
939 *error = err;
940 return NULL;
941 }
942
unix_mknod(const char * sun_path,umode_t mode,struct path * res)943 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
944 {
945 struct dentry *dentry;
946 struct path path;
947 int err = 0;
948 /*
949 * Get the parent directory, calculate the hash for last
950 * component.
951 */
952 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
953 err = PTR_ERR(dentry);
954 if (IS_ERR(dentry))
955 return err;
956
957 /*
958 * All right, let's create it.
959 */
960 err = security_path_mknod(&path, dentry, mode, 0);
961 if (!err) {
962 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
963 if (!err) {
964 res->mnt = mntget(path.mnt);
965 res->dentry = dget(dentry);
966 }
967 }
968 done_path_create(&path, dentry);
969 return err;
970 }
971
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)972 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
973 {
974 struct sock *sk = sock->sk;
975 struct net *net = sock_net(sk);
976 struct unix_sock *u = unix_sk(sk);
977 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
978 char *sun_path = sunaddr->sun_path;
979 int err;
980 unsigned int hash;
981 struct unix_address *addr;
982 struct hlist_head *list;
983
984 err = -EINVAL;
985 if (sunaddr->sun_family != AF_UNIX)
986 goto out;
987
988 if (addr_len == sizeof(short)) {
989 err = unix_autobind(sock);
990 goto out;
991 }
992
993 err = unix_mkname(sunaddr, addr_len, &hash);
994 if (err < 0)
995 goto out;
996 addr_len = err;
997
998 err = mutex_lock_interruptible(&u->readlock);
999 if (err)
1000 goto out;
1001
1002 err = -EINVAL;
1003 if (u->addr)
1004 goto out_up;
1005
1006 err = -ENOMEM;
1007 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1008 if (!addr)
1009 goto out_up;
1010
1011 memcpy(addr->name, sunaddr, addr_len);
1012 addr->len = addr_len;
1013 addr->hash = hash ^ sk->sk_type;
1014 atomic_set(&addr->refcnt, 1);
1015
1016 if (sun_path[0]) {
1017 struct path path;
1018 umode_t mode = S_IFSOCK |
1019 (SOCK_INODE(sock)->i_mode & ~current_umask());
1020 err = unix_mknod(sun_path, mode, &path);
1021 if (err) {
1022 if (err == -EEXIST)
1023 err = -EADDRINUSE;
1024 unix_release_addr(addr);
1025 goto out_up;
1026 }
1027 addr->hash = UNIX_HASH_SIZE;
1028 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
1029 spin_lock(&unix_table_lock);
1030 u->path = path;
1031 list = &unix_socket_table[hash];
1032 } else {
1033 spin_lock(&unix_table_lock);
1034 err = -EADDRINUSE;
1035 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1036 sk->sk_type, hash)) {
1037 unix_release_addr(addr);
1038 goto out_unlock;
1039 }
1040
1041 list = &unix_socket_table[addr->hash];
1042 }
1043
1044 err = 0;
1045 __unix_remove_socket(sk);
1046 u->addr = addr;
1047 __unix_insert_socket(list, sk);
1048
1049 out_unlock:
1050 spin_unlock(&unix_table_lock);
1051 out_up:
1052 mutex_unlock(&u->readlock);
1053 out:
1054 return err;
1055 }
1056
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1057 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1058 {
1059 if (unlikely(sk1 == sk2) || !sk2) {
1060 unix_state_lock(sk1);
1061 return;
1062 }
1063 if (sk1 < sk2) {
1064 unix_state_lock(sk1);
1065 unix_state_lock_nested(sk2);
1066 } else {
1067 unix_state_lock(sk2);
1068 unix_state_lock_nested(sk1);
1069 }
1070 }
1071
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1072 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1073 {
1074 if (unlikely(sk1 == sk2) || !sk2) {
1075 unix_state_unlock(sk1);
1076 return;
1077 }
1078 unix_state_unlock(sk1);
1079 unix_state_unlock(sk2);
1080 }
1081
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1082 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1083 int alen, int flags)
1084 {
1085 struct sock *sk = sock->sk;
1086 struct net *net = sock_net(sk);
1087 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1088 struct sock *other;
1089 unsigned int hash;
1090 int err;
1091
1092 if (addr->sa_family != AF_UNSPEC) {
1093 err = unix_mkname(sunaddr, alen, &hash);
1094 if (err < 0)
1095 goto out;
1096 alen = err;
1097
1098 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1099 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1100 goto out;
1101
1102 restart:
1103 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1104 if (!other)
1105 goto out;
1106
1107 unix_state_double_lock(sk, other);
1108
1109 /* Apparently VFS overslept socket death. Retry. */
1110 if (sock_flag(other, SOCK_DEAD)) {
1111 unix_state_double_unlock(sk, other);
1112 sock_put(other);
1113 goto restart;
1114 }
1115
1116 err = -EPERM;
1117 if (!unix_may_send(sk, other))
1118 goto out_unlock;
1119
1120 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1121 if (err)
1122 goto out_unlock;
1123
1124 } else {
1125 /*
1126 * 1003.1g breaking connected state with AF_UNSPEC
1127 */
1128 other = NULL;
1129 unix_state_double_lock(sk, other);
1130 }
1131
1132 /*
1133 * If it was connected, reconnect.
1134 */
1135 if (unix_peer(sk)) {
1136 struct sock *old_peer = unix_peer(sk);
1137 unix_peer(sk) = other;
1138 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1139
1140 unix_state_double_unlock(sk, other);
1141
1142 if (other != old_peer)
1143 unix_dgram_disconnected(sk, old_peer);
1144 sock_put(old_peer);
1145 } else {
1146 unix_peer(sk) = other;
1147 unix_state_double_unlock(sk, other);
1148 }
1149 return 0;
1150
1151 out_unlock:
1152 unix_state_double_unlock(sk, other);
1153 sock_put(other);
1154 out:
1155 return err;
1156 }
1157
unix_wait_for_peer(struct sock * other,long timeo)1158 static long unix_wait_for_peer(struct sock *other, long timeo)
1159 {
1160 struct unix_sock *u = unix_sk(other);
1161 int sched;
1162 DEFINE_WAIT(wait);
1163
1164 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1165
1166 sched = !sock_flag(other, SOCK_DEAD) &&
1167 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1168 unix_recvq_full(other);
1169
1170 unix_state_unlock(other);
1171
1172 if (sched)
1173 timeo = schedule_timeout(timeo);
1174
1175 finish_wait(&u->peer_wait, &wait);
1176 return timeo;
1177 }
1178
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1179 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1180 int addr_len, int flags)
1181 {
1182 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1183 struct sock *sk = sock->sk;
1184 struct net *net = sock_net(sk);
1185 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1186 struct sock *newsk = NULL;
1187 struct sock *other = NULL;
1188 struct sk_buff *skb = NULL;
1189 unsigned int hash;
1190 int st;
1191 int err;
1192 long timeo;
1193
1194 err = unix_mkname(sunaddr, addr_len, &hash);
1195 if (err < 0)
1196 goto out;
1197 addr_len = err;
1198
1199 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1200 (err = unix_autobind(sock)) != 0)
1201 goto out;
1202
1203 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1204
1205 /* First of all allocate resources.
1206 If we will make it after state is locked,
1207 we will have to recheck all again in any case.
1208 */
1209
1210 err = -ENOMEM;
1211
1212 /* create new sock for complete connection */
1213 newsk = unix_create1(sock_net(sk), NULL);
1214 if (newsk == NULL)
1215 goto out;
1216
1217 /* Allocate skb for sending to listening sock */
1218 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1219 if (skb == NULL)
1220 goto out;
1221
1222 restart:
1223 /* Find listening sock. */
1224 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1225 if (!other)
1226 goto out;
1227
1228 /* Latch state of peer */
1229 unix_state_lock(other);
1230
1231 /* Apparently VFS overslept socket death. Retry. */
1232 if (sock_flag(other, SOCK_DEAD)) {
1233 unix_state_unlock(other);
1234 sock_put(other);
1235 goto restart;
1236 }
1237
1238 err = -ECONNREFUSED;
1239 if (other->sk_state != TCP_LISTEN)
1240 goto out_unlock;
1241 if (other->sk_shutdown & RCV_SHUTDOWN)
1242 goto out_unlock;
1243
1244 if (unix_recvq_full(other)) {
1245 err = -EAGAIN;
1246 if (!timeo)
1247 goto out_unlock;
1248
1249 timeo = unix_wait_for_peer(other, timeo);
1250
1251 err = sock_intr_errno(timeo);
1252 if (signal_pending(current))
1253 goto out;
1254 sock_put(other);
1255 goto restart;
1256 }
1257
1258 /* Latch our state.
1259
1260 It is tricky place. We need to grab our state lock and cannot
1261 drop lock on peer. It is dangerous because deadlock is
1262 possible. Connect to self case and simultaneous
1263 attempt to connect are eliminated by checking socket
1264 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1265 check this before attempt to grab lock.
1266
1267 Well, and we have to recheck the state after socket locked.
1268 */
1269 st = sk->sk_state;
1270
1271 switch (st) {
1272 case TCP_CLOSE:
1273 /* This is ok... continue with connect */
1274 break;
1275 case TCP_ESTABLISHED:
1276 /* Socket is already connected */
1277 err = -EISCONN;
1278 goto out_unlock;
1279 default:
1280 err = -EINVAL;
1281 goto out_unlock;
1282 }
1283
1284 unix_state_lock_nested(sk);
1285
1286 if (sk->sk_state != st) {
1287 unix_state_unlock(sk);
1288 unix_state_unlock(other);
1289 sock_put(other);
1290 goto restart;
1291 }
1292
1293 err = security_unix_stream_connect(sk, other, newsk);
1294 if (err) {
1295 unix_state_unlock(sk);
1296 goto out_unlock;
1297 }
1298
1299 /* The way is open! Fastly set all the necessary fields... */
1300
1301 sock_hold(sk);
1302 unix_peer(newsk) = sk;
1303 newsk->sk_state = TCP_ESTABLISHED;
1304 newsk->sk_type = sk->sk_type;
1305 init_peercred(newsk);
1306 newu = unix_sk(newsk);
1307 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1308 otheru = unix_sk(other);
1309
1310 /* copy address information from listening to new sock*/
1311 if (otheru->addr) {
1312 atomic_inc(&otheru->addr->refcnt);
1313 newu->addr = otheru->addr;
1314 }
1315 if (otheru->path.dentry) {
1316 path_get(&otheru->path);
1317 newu->path = otheru->path;
1318 }
1319
1320 /* Set credentials */
1321 copy_peercred(sk, other);
1322
1323 sock->state = SS_CONNECTED;
1324 sk->sk_state = TCP_ESTABLISHED;
1325 sock_hold(newsk);
1326
1327 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1328 unix_peer(sk) = newsk;
1329
1330 unix_state_unlock(sk);
1331
1332 /* take ten and and send info to listening sock */
1333 spin_lock(&other->sk_receive_queue.lock);
1334 __skb_queue_tail(&other->sk_receive_queue, skb);
1335 spin_unlock(&other->sk_receive_queue.lock);
1336 unix_state_unlock(other);
1337 other->sk_data_ready(other);
1338 sock_put(other);
1339 return 0;
1340
1341 out_unlock:
1342 if (other)
1343 unix_state_unlock(other);
1344
1345 out:
1346 kfree_skb(skb);
1347 if (newsk)
1348 unix_release_sock(newsk, 0);
1349 if (other)
1350 sock_put(other);
1351 return err;
1352 }
1353
unix_socketpair(struct socket * socka,struct socket * sockb)1354 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1355 {
1356 struct sock *ska = socka->sk, *skb = sockb->sk;
1357
1358 /* Join our sockets back to back */
1359 sock_hold(ska);
1360 sock_hold(skb);
1361 unix_peer(ska) = skb;
1362 unix_peer(skb) = ska;
1363 init_peercred(ska);
1364 init_peercred(skb);
1365
1366 if (ska->sk_type != SOCK_DGRAM) {
1367 ska->sk_state = TCP_ESTABLISHED;
1368 skb->sk_state = TCP_ESTABLISHED;
1369 socka->state = SS_CONNECTED;
1370 sockb->state = SS_CONNECTED;
1371 }
1372 return 0;
1373 }
1374
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1375 static void unix_sock_inherit_flags(const struct socket *old,
1376 struct socket *new)
1377 {
1378 if (test_bit(SOCK_PASSCRED, &old->flags))
1379 set_bit(SOCK_PASSCRED, &new->flags);
1380 if (test_bit(SOCK_PASSSEC, &old->flags))
1381 set_bit(SOCK_PASSSEC, &new->flags);
1382 }
1383
unix_accept(struct socket * sock,struct socket * newsock,int flags)1384 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1385 {
1386 struct sock *sk = sock->sk;
1387 struct sock *tsk;
1388 struct sk_buff *skb;
1389 int err;
1390
1391 err = -EOPNOTSUPP;
1392 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1393 goto out;
1394
1395 err = -EINVAL;
1396 if (sk->sk_state != TCP_LISTEN)
1397 goto out;
1398
1399 /* If socket state is TCP_LISTEN it cannot change (for now...),
1400 * so that no locks are necessary.
1401 */
1402
1403 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1404 if (!skb) {
1405 /* This means receive shutdown. */
1406 if (err == 0)
1407 err = -EINVAL;
1408 goto out;
1409 }
1410
1411 tsk = skb->sk;
1412 skb_free_datagram(sk, skb);
1413 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1414
1415 /* attach accepted sock to socket */
1416 unix_state_lock(tsk);
1417 newsock->state = SS_CONNECTED;
1418 unix_sock_inherit_flags(sock, newsock);
1419 sock_graft(tsk, newsock);
1420 unix_state_unlock(tsk);
1421 return 0;
1422
1423 out:
1424 return err;
1425 }
1426
1427
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1428 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1429 {
1430 struct sock *sk = sock->sk;
1431 struct unix_sock *u;
1432 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1433 int err = 0;
1434
1435 if (peer) {
1436 sk = unix_peer_get(sk);
1437
1438 err = -ENOTCONN;
1439 if (!sk)
1440 goto out;
1441 err = 0;
1442 } else {
1443 sock_hold(sk);
1444 }
1445
1446 u = unix_sk(sk);
1447 unix_state_lock(sk);
1448 if (!u->addr) {
1449 sunaddr->sun_family = AF_UNIX;
1450 sunaddr->sun_path[0] = 0;
1451 *uaddr_len = sizeof(short);
1452 } else {
1453 struct unix_address *addr = u->addr;
1454
1455 *uaddr_len = addr->len;
1456 memcpy(sunaddr, addr->name, *uaddr_len);
1457 }
1458 unix_state_unlock(sk);
1459 sock_put(sk);
1460 out:
1461 return err;
1462 }
1463
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1464 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1465 {
1466 int i;
1467
1468 scm->fp = UNIXCB(skb).fp;
1469 UNIXCB(skb).fp = NULL;
1470
1471 for (i = scm->fp->count-1; i >= 0; i--)
1472 unix_notinflight(scm->fp->fp[i]);
1473 }
1474
unix_destruct_scm(struct sk_buff * skb)1475 static void unix_destruct_scm(struct sk_buff *skb)
1476 {
1477 struct scm_cookie scm;
1478 memset(&scm, 0, sizeof(scm));
1479 scm.pid = UNIXCB(skb).pid;
1480 if (UNIXCB(skb).fp)
1481 unix_detach_fds(&scm, skb);
1482
1483 /* Alas, it calls VFS */
1484 /* So fscking what? fput() had been SMP-safe since the last Summer */
1485 scm_destroy(&scm);
1486 sock_wfree(skb);
1487 }
1488
1489 /*
1490 * The "user->unix_inflight" variable is protected by the garbage
1491 * collection lock, and we just read it locklessly here. If you go
1492 * over the limit, there might be a tiny race in actually noticing
1493 * it across threads. Tough.
1494 */
too_many_unix_fds(struct task_struct * p)1495 static inline bool too_many_unix_fds(struct task_struct *p)
1496 {
1497 struct user_struct *user = current_user();
1498
1499 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1500 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1501 return false;
1502 }
1503
1504 #define MAX_RECURSION_LEVEL 4
1505
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1506 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1507 {
1508 int i;
1509 unsigned char max_level = 0;
1510 int unix_sock_count = 0;
1511
1512 if (too_many_unix_fds(current))
1513 return -ETOOMANYREFS;
1514
1515 for (i = scm->fp->count - 1; i >= 0; i--) {
1516 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1517
1518 if (sk) {
1519 unix_sock_count++;
1520 max_level = max(max_level,
1521 unix_sk(sk)->recursion_level);
1522 }
1523 }
1524 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1525 return -ETOOMANYREFS;
1526
1527 /*
1528 * Need to duplicate file references for the sake of garbage
1529 * collection. Otherwise a socket in the fps might become a
1530 * candidate for GC while the skb is not yet queued.
1531 */
1532 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1533 if (!UNIXCB(skb).fp)
1534 return -ENOMEM;
1535
1536 for (i = scm->fp->count - 1; i >= 0; i--)
1537 unix_inflight(scm->fp->fp[i]);
1538 return max_level;
1539 }
1540
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1541 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1542 {
1543 int err = 0;
1544
1545 UNIXCB(skb).pid = get_pid(scm->pid);
1546 UNIXCB(skb).uid = scm->creds.uid;
1547 UNIXCB(skb).gid = scm->creds.gid;
1548 UNIXCB(skb).fp = NULL;
1549 if (scm->fp && send_fds)
1550 err = unix_attach_fds(scm, skb);
1551
1552 skb->destructor = unix_destruct_scm;
1553 return err;
1554 }
1555
1556 /*
1557 * Some apps rely on write() giving SCM_CREDENTIALS
1558 * We include credentials if source or destination socket
1559 * asserted SOCK_PASSCRED.
1560 */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1561 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1562 const struct sock *other)
1563 {
1564 if (UNIXCB(skb).pid)
1565 return;
1566 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1567 !other->sk_socket ||
1568 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1569 UNIXCB(skb).pid = get_pid(task_tgid(current));
1570 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1571 }
1572 }
1573
1574 /*
1575 * Send AF_UNIX data.
1576 */
1577
unix_dgram_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1578 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1579 struct msghdr *msg, size_t len)
1580 {
1581 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1582 struct sock *sk = sock->sk;
1583 struct net *net = sock_net(sk);
1584 struct unix_sock *u = unix_sk(sk);
1585 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586 struct sock *other = NULL;
1587 int namelen = 0; /* fake GCC */
1588 int err;
1589 unsigned int hash;
1590 struct sk_buff *skb;
1591 long timeo;
1592 struct scm_cookie tmp_scm;
1593 int max_level;
1594 int data_len = 0;
1595 int sk_locked;
1596
1597 if (NULL == siocb->scm)
1598 siocb->scm = &tmp_scm;
1599 wait_for_unix_gc();
1600 err = scm_send(sock, msg, siocb->scm, false);
1601 if (err < 0)
1602 return err;
1603
1604 err = -EOPNOTSUPP;
1605 if (msg->msg_flags&MSG_OOB)
1606 goto out;
1607
1608 if (msg->msg_namelen) {
1609 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1610 if (err < 0)
1611 goto out;
1612 namelen = err;
1613 } else {
1614 sunaddr = NULL;
1615 err = -ENOTCONN;
1616 other = unix_peer_get(sk);
1617 if (!other)
1618 goto out;
1619 }
1620
1621 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1622 && (err = unix_autobind(sock)) != 0)
1623 goto out;
1624
1625 err = -EMSGSIZE;
1626 if (len > sk->sk_sndbuf - 32)
1627 goto out;
1628
1629 if (len > SKB_MAX_ALLOC) {
1630 data_len = min_t(size_t,
1631 len - SKB_MAX_ALLOC,
1632 MAX_SKB_FRAGS * PAGE_SIZE);
1633 data_len = PAGE_ALIGN(data_len);
1634
1635 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1636 }
1637
1638 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1639 msg->msg_flags & MSG_DONTWAIT, &err,
1640 PAGE_ALLOC_COSTLY_ORDER);
1641 if (skb == NULL)
1642 goto out;
1643
1644 err = unix_scm_to_skb(siocb->scm, skb, true);
1645 if (err < 0)
1646 goto out_free;
1647 max_level = err + 1;
1648 unix_get_secdata(siocb->scm, skb);
1649
1650 skb_put(skb, len - data_len);
1651 skb->data_len = data_len;
1652 skb->len = len;
1653 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1654 if (err)
1655 goto out_free;
1656
1657 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1658
1659 restart:
1660 if (!other) {
1661 err = -ECONNRESET;
1662 if (sunaddr == NULL)
1663 goto out_free;
1664
1665 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1666 hash, &err);
1667 if (other == NULL)
1668 goto out_free;
1669 }
1670
1671 if (sk_filter(other, skb) < 0) {
1672 /* Toss the packet but do not return any error to the sender */
1673 err = len;
1674 goto out_free;
1675 }
1676
1677 sk_locked = 0;
1678 unix_state_lock(other);
1679 restart_locked:
1680 err = -EPERM;
1681 if (!unix_may_send(sk, other))
1682 goto out_unlock;
1683
1684 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1685 /*
1686 * Check with 1003.1g - what should
1687 * datagram error
1688 */
1689 unix_state_unlock(other);
1690 sock_put(other);
1691
1692 if (!sk_locked)
1693 unix_state_lock(sk);
1694
1695 err = 0;
1696 if (unix_peer(sk) == other) {
1697 unix_peer(sk) = NULL;
1698 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1699
1700 unix_state_unlock(sk);
1701
1702 unix_dgram_disconnected(sk, other);
1703 sock_put(other);
1704 err = -ECONNREFUSED;
1705 } else {
1706 unix_state_unlock(sk);
1707 }
1708
1709 other = NULL;
1710 if (err)
1711 goto out_free;
1712 goto restart;
1713 }
1714
1715 err = -EPIPE;
1716 if (other->sk_shutdown & RCV_SHUTDOWN)
1717 goto out_unlock;
1718
1719 if (sk->sk_type != SOCK_SEQPACKET) {
1720 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1721 if (err)
1722 goto out_unlock;
1723 }
1724
1725 /* other == sk && unix_peer(other) != sk if
1726 * - unix_peer(sk) == NULL, destination address bound to sk
1727 * - unix_peer(sk) == sk by time of get but disconnected before lock
1728 */
1729 if (other != sk &&
1730 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1731 if (timeo) {
1732 timeo = unix_wait_for_peer(other, timeo);
1733
1734 err = sock_intr_errno(timeo);
1735 if (signal_pending(current))
1736 goto out_free;
1737
1738 goto restart;
1739 }
1740
1741 if (!sk_locked) {
1742 unix_state_unlock(other);
1743 unix_state_double_lock(sk, other);
1744 }
1745
1746 if (unix_peer(sk) != other ||
1747 unix_dgram_peer_wake_me(sk, other)) {
1748 err = -EAGAIN;
1749 sk_locked = 1;
1750 goto out_unlock;
1751 }
1752
1753 if (!sk_locked) {
1754 sk_locked = 1;
1755 goto restart_locked;
1756 }
1757 }
1758
1759 if (unlikely(sk_locked))
1760 unix_state_unlock(sk);
1761
1762 if (sock_flag(other, SOCK_RCVTSTAMP))
1763 __net_timestamp(skb);
1764 maybe_add_creds(skb, sock, other);
1765 skb_queue_tail(&other->sk_receive_queue, skb);
1766 if (max_level > unix_sk(other)->recursion_level)
1767 unix_sk(other)->recursion_level = max_level;
1768 unix_state_unlock(other);
1769 other->sk_data_ready(other);
1770 sock_put(other);
1771 scm_destroy(siocb->scm);
1772 return len;
1773
1774 out_unlock:
1775 if (sk_locked)
1776 unix_state_unlock(sk);
1777 unix_state_unlock(other);
1778 out_free:
1779 kfree_skb(skb);
1780 out:
1781 if (other)
1782 sock_put(other);
1783 scm_destroy(siocb->scm);
1784 return err;
1785 }
1786
1787 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1788 * bytes, and a minimun of a full page.
1789 */
1790 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1791
unix_stream_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1792 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1793 struct msghdr *msg, size_t len)
1794 {
1795 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1796 struct sock *sk = sock->sk;
1797 struct sock *other = NULL;
1798 int err, size;
1799 struct sk_buff *skb;
1800 int sent = 0;
1801 struct scm_cookie tmp_scm;
1802 bool fds_sent = false;
1803 int max_level;
1804 int data_len;
1805
1806 if (NULL == siocb->scm)
1807 siocb->scm = &tmp_scm;
1808 wait_for_unix_gc();
1809 err = scm_send(sock, msg, siocb->scm, false);
1810 if (err < 0)
1811 return err;
1812
1813 err = -EOPNOTSUPP;
1814 if (msg->msg_flags&MSG_OOB)
1815 goto out_err;
1816
1817 if (msg->msg_namelen) {
1818 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1819 goto out_err;
1820 } else {
1821 err = -ENOTCONN;
1822 other = unix_peer(sk);
1823 if (!other)
1824 goto out_err;
1825 }
1826
1827 if (sk->sk_shutdown & SEND_SHUTDOWN)
1828 goto pipe_err;
1829
1830 while (sent < len) {
1831 size = len - sent;
1832
1833 /* Keep two messages in the pipe so it schedules better */
1834 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1835
1836 /* allow fallback to order-0 allocations */
1837 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1838
1839 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1840
1841 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1842
1843 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1844 msg->msg_flags & MSG_DONTWAIT, &err,
1845 get_order(UNIX_SKB_FRAGS_SZ));
1846 if (!skb)
1847 goto out_err;
1848
1849 /* Only send the fds in the first buffer */
1850 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1851 if (err < 0) {
1852 kfree_skb(skb);
1853 goto out_err;
1854 }
1855 max_level = err + 1;
1856 fds_sent = true;
1857
1858 skb_put(skb, size - data_len);
1859 skb->data_len = data_len;
1860 skb->len = size;
1861 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
1862 sent, size);
1863 if (err) {
1864 kfree_skb(skb);
1865 goto out_err;
1866 }
1867
1868 unix_state_lock(other);
1869
1870 if (sock_flag(other, SOCK_DEAD) ||
1871 (other->sk_shutdown & RCV_SHUTDOWN))
1872 goto pipe_err_free;
1873
1874 maybe_add_creds(skb, sock, other);
1875 skb_queue_tail(&other->sk_receive_queue, skb);
1876 if (max_level > unix_sk(other)->recursion_level)
1877 unix_sk(other)->recursion_level = max_level;
1878 unix_state_unlock(other);
1879 other->sk_data_ready(other);
1880 sent += size;
1881 }
1882
1883 scm_destroy(siocb->scm);
1884 siocb->scm = NULL;
1885
1886 return sent;
1887
1888 pipe_err_free:
1889 unix_state_unlock(other);
1890 kfree_skb(skb);
1891 pipe_err:
1892 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1893 send_sig(SIGPIPE, current, 0);
1894 err = -EPIPE;
1895 out_err:
1896 scm_destroy(siocb->scm);
1897 siocb->scm = NULL;
1898 return sent ? : err;
1899 }
1900
unix_seqpacket_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1901 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1902 struct msghdr *msg, size_t len)
1903 {
1904 int err;
1905 struct sock *sk = sock->sk;
1906
1907 err = sock_error(sk);
1908 if (err)
1909 return err;
1910
1911 if (sk->sk_state != TCP_ESTABLISHED)
1912 return -ENOTCONN;
1913
1914 if (msg->msg_namelen)
1915 msg->msg_namelen = 0;
1916
1917 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1918 }
1919
unix_seqpacket_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1920 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1921 struct msghdr *msg, size_t size,
1922 int flags)
1923 {
1924 struct sock *sk = sock->sk;
1925
1926 if (sk->sk_state != TCP_ESTABLISHED)
1927 return -ENOTCONN;
1928
1929 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1930 }
1931
unix_copy_addr(struct msghdr * msg,struct sock * sk)1932 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1933 {
1934 struct unix_sock *u = unix_sk(sk);
1935
1936 if (u->addr) {
1937 msg->msg_namelen = u->addr->len;
1938 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1939 }
1940 }
1941
unix_dgram_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)1942 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1943 struct msghdr *msg, size_t size,
1944 int flags)
1945 {
1946 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1947 struct scm_cookie tmp_scm;
1948 struct sock *sk = sock->sk;
1949 struct unix_sock *u = unix_sk(sk);
1950 int noblock = flags & MSG_DONTWAIT;
1951 struct sk_buff *skb;
1952 int err;
1953 int peeked, skip;
1954
1955 err = -EOPNOTSUPP;
1956 if (flags&MSG_OOB)
1957 goto out;
1958
1959 err = mutex_lock_interruptible(&u->readlock);
1960 if (unlikely(err)) {
1961 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1962 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1963 */
1964 err = noblock ? -EAGAIN : -ERESTARTSYS;
1965 goto out;
1966 }
1967
1968 skip = sk_peek_offset(sk, flags);
1969
1970 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1971 if (!skb) {
1972 unix_state_lock(sk);
1973 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1974 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1975 (sk->sk_shutdown & RCV_SHUTDOWN))
1976 err = 0;
1977 unix_state_unlock(sk);
1978 goto out_unlock;
1979 }
1980
1981 wake_up_interruptible_sync_poll(&u->peer_wait,
1982 POLLOUT | POLLWRNORM | POLLWRBAND);
1983
1984 if (msg->msg_name)
1985 unix_copy_addr(msg, skb->sk);
1986
1987 if (size > skb->len - skip)
1988 size = skb->len - skip;
1989 else if (size < skb->len - skip)
1990 msg->msg_flags |= MSG_TRUNC;
1991
1992 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1993 if (err)
1994 goto out_free;
1995
1996 if (sock_flag(sk, SOCK_RCVTSTAMP))
1997 __sock_recv_timestamp(msg, sk, skb);
1998
1999 if (!siocb->scm) {
2000 siocb->scm = &tmp_scm;
2001 memset(&tmp_scm, 0, sizeof(tmp_scm));
2002 }
2003 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2004 unix_set_secdata(siocb->scm, skb);
2005
2006 if (!(flags & MSG_PEEK)) {
2007 if (UNIXCB(skb).fp)
2008 unix_detach_fds(siocb->scm, skb);
2009
2010 sk_peek_offset_bwd(sk, skb->len);
2011 } else {
2012 /* It is questionable: on PEEK we could:
2013 - do not return fds - good, but too simple 8)
2014 - return fds, and do not return them on read (old strategy,
2015 apparently wrong)
2016 - clone fds (I chose it for now, it is the most universal
2017 solution)
2018
2019 POSIX 1003.1g does not actually define this clearly
2020 at all. POSIX 1003.1g doesn't define a lot of things
2021 clearly however!
2022
2023 */
2024
2025 sk_peek_offset_fwd(sk, size);
2026
2027 if (UNIXCB(skb).fp)
2028 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2029 }
2030 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2031
2032 scm_recv(sock, msg, siocb->scm, flags);
2033
2034 out_free:
2035 skb_free_datagram(sk, skb);
2036 out_unlock:
2037 mutex_unlock(&u->readlock);
2038 out:
2039 return err;
2040 }
2041
2042 /*
2043 * Sleep until more data has arrived. But check for races..
2044 */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last)2045 static long unix_stream_data_wait(struct sock *sk, long timeo,
2046 struct sk_buff *last)
2047 {
2048 DEFINE_WAIT(wait);
2049
2050 unix_state_lock(sk);
2051
2052 for (;;) {
2053 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2054
2055 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2056 sk->sk_err ||
2057 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2058 signal_pending(current) ||
2059 !timeo)
2060 break;
2061
2062 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2063 unix_state_unlock(sk);
2064 timeo = freezable_schedule_timeout(timeo);
2065 unix_state_lock(sk);
2066
2067 if (sock_flag(sk, SOCK_DEAD))
2068 break;
2069
2070 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2071 }
2072
2073 finish_wait(sk_sleep(sk), &wait);
2074 unix_state_unlock(sk);
2075 return timeo;
2076 }
2077
unix_skb_len(const struct sk_buff * skb)2078 static unsigned int unix_skb_len(const struct sk_buff *skb)
2079 {
2080 return skb->len - UNIXCB(skb).consumed;
2081 }
2082
unix_stream_recvmsg(struct kiocb * iocb,struct socket * sock,struct msghdr * msg,size_t size,int flags)2083 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2084 struct msghdr *msg, size_t size,
2085 int flags)
2086 {
2087 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2088 struct scm_cookie tmp_scm;
2089 struct sock *sk = sock->sk;
2090 struct unix_sock *u = unix_sk(sk);
2091 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
2092 int copied = 0;
2093 int noblock = flags & MSG_DONTWAIT;
2094 int check_creds = 0;
2095 int target;
2096 int err = 0;
2097 long timeo;
2098 int skip;
2099
2100 err = -EINVAL;
2101 if (sk->sk_state != TCP_ESTABLISHED)
2102 goto out;
2103
2104 err = -EOPNOTSUPP;
2105 if (flags&MSG_OOB)
2106 goto out;
2107
2108 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2109 timeo = sock_rcvtimeo(sk, noblock);
2110
2111 /* Lock the socket to prevent queue disordering
2112 * while sleeps in memcpy_tomsg
2113 */
2114
2115 if (!siocb->scm) {
2116 siocb->scm = &tmp_scm;
2117 memset(&tmp_scm, 0, sizeof(tmp_scm));
2118 }
2119
2120 mutex_lock(&u->readlock);
2121
2122 if (flags & MSG_PEEK)
2123 skip = sk_peek_offset(sk, flags);
2124 else
2125 skip = 0;
2126
2127 do {
2128 int chunk;
2129 struct sk_buff *skb, *last;
2130
2131 unix_state_lock(sk);
2132 if (sock_flag(sk, SOCK_DEAD)) {
2133 err = -ECONNRESET;
2134 goto unlock;
2135 }
2136 last = skb = skb_peek(&sk->sk_receive_queue);
2137 again:
2138 if (skb == NULL) {
2139 unix_sk(sk)->recursion_level = 0;
2140 if (copied >= target)
2141 goto unlock;
2142
2143 /*
2144 * POSIX 1003.1g mandates this order.
2145 */
2146
2147 err = sock_error(sk);
2148 if (err)
2149 goto unlock;
2150 if (sk->sk_shutdown & RCV_SHUTDOWN)
2151 goto unlock;
2152
2153 unix_state_unlock(sk);
2154 err = -EAGAIN;
2155 if (!timeo)
2156 break;
2157 mutex_unlock(&u->readlock);
2158
2159 timeo = unix_stream_data_wait(sk, timeo, last);
2160
2161 if (signal_pending(current)) {
2162 err = sock_intr_errno(timeo);
2163 goto out;
2164 }
2165
2166 mutex_lock(&u->readlock);
2167 continue;
2168 unlock:
2169 unix_state_unlock(sk);
2170 break;
2171 }
2172
2173 while (skip >= unix_skb_len(skb)) {
2174 skip -= unix_skb_len(skb);
2175 last = skb;
2176 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2177 if (!skb)
2178 goto again;
2179 }
2180
2181 unix_state_unlock(sk);
2182
2183 if (check_creds) {
2184 /* Never glue messages from different writers */
2185 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
2186 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2187 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2188 break;
2189 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2190 /* Copy credentials */
2191 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2192 check_creds = 1;
2193 }
2194
2195 /* Copy address just once */
2196 if (sunaddr) {
2197 unix_copy_addr(msg, skb->sk);
2198 sunaddr = NULL;
2199 }
2200
2201 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2202 if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
2203 msg->msg_iov, chunk)) {
2204 if (copied == 0)
2205 copied = -EFAULT;
2206 break;
2207 }
2208 copied += chunk;
2209 size -= chunk;
2210
2211 /* Mark read part of skb as used */
2212 if (!(flags & MSG_PEEK)) {
2213 UNIXCB(skb).consumed += chunk;
2214
2215 sk_peek_offset_bwd(sk, chunk);
2216
2217 if (UNIXCB(skb).fp)
2218 unix_detach_fds(siocb->scm, skb);
2219
2220 if (unix_skb_len(skb))
2221 break;
2222
2223 skb_unlink(skb, &sk->sk_receive_queue);
2224 consume_skb(skb);
2225
2226 if (siocb->scm->fp)
2227 break;
2228 } else {
2229 /* It is questionable, see note in unix_dgram_recvmsg.
2230 */
2231 if (UNIXCB(skb).fp)
2232 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2233
2234 sk_peek_offset_fwd(sk, chunk);
2235
2236 if (UNIXCB(skb).fp)
2237 break;
2238
2239 skip = 0;
2240 last = skb;
2241 unix_state_lock(sk);
2242 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2243 if (skb)
2244 goto again;
2245 unix_state_unlock(sk);
2246 break;
2247 }
2248 } while (size);
2249
2250 mutex_unlock(&u->readlock);
2251 scm_recv(sock, msg, siocb->scm, flags);
2252 out:
2253 return copied ? : err;
2254 }
2255
unix_shutdown(struct socket * sock,int mode)2256 static int unix_shutdown(struct socket *sock, int mode)
2257 {
2258 struct sock *sk = sock->sk;
2259 struct sock *other;
2260
2261 if (mode < SHUT_RD || mode > SHUT_RDWR)
2262 return -EINVAL;
2263 /* This maps:
2264 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2265 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2266 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2267 */
2268 ++mode;
2269
2270 unix_state_lock(sk);
2271 sk->sk_shutdown |= mode;
2272 other = unix_peer(sk);
2273 if (other)
2274 sock_hold(other);
2275 unix_state_unlock(sk);
2276 sk->sk_state_change(sk);
2277
2278 if (other &&
2279 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2280
2281 int peer_mode = 0;
2282
2283 if (mode&RCV_SHUTDOWN)
2284 peer_mode |= SEND_SHUTDOWN;
2285 if (mode&SEND_SHUTDOWN)
2286 peer_mode |= RCV_SHUTDOWN;
2287 unix_state_lock(other);
2288 other->sk_shutdown |= peer_mode;
2289 unix_state_unlock(other);
2290 other->sk_state_change(other);
2291 if (peer_mode == SHUTDOWN_MASK)
2292 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2293 else if (peer_mode & RCV_SHUTDOWN)
2294 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2295 }
2296 if (other)
2297 sock_put(other);
2298
2299 return 0;
2300 }
2301
unix_inq_len(struct sock * sk)2302 long unix_inq_len(struct sock *sk)
2303 {
2304 struct sk_buff *skb;
2305 long amount = 0;
2306
2307 if (sk->sk_state == TCP_LISTEN)
2308 return -EINVAL;
2309
2310 spin_lock(&sk->sk_receive_queue.lock);
2311 if (sk->sk_type == SOCK_STREAM ||
2312 sk->sk_type == SOCK_SEQPACKET) {
2313 skb_queue_walk(&sk->sk_receive_queue, skb)
2314 amount += unix_skb_len(skb);
2315 } else {
2316 skb = skb_peek(&sk->sk_receive_queue);
2317 if (skb)
2318 amount = skb->len;
2319 }
2320 spin_unlock(&sk->sk_receive_queue.lock);
2321
2322 return amount;
2323 }
2324 EXPORT_SYMBOL_GPL(unix_inq_len);
2325
unix_outq_len(struct sock * sk)2326 long unix_outq_len(struct sock *sk)
2327 {
2328 return sk_wmem_alloc_get(sk);
2329 }
2330 EXPORT_SYMBOL_GPL(unix_outq_len);
2331
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2332 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2333 {
2334 struct sock *sk = sock->sk;
2335 long amount = 0;
2336 int err;
2337
2338 switch (cmd) {
2339 case SIOCOUTQ:
2340 amount = unix_outq_len(sk);
2341 err = put_user(amount, (int __user *)arg);
2342 break;
2343 case SIOCINQ:
2344 amount = unix_inq_len(sk);
2345 if (amount < 0)
2346 err = amount;
2347 else
2348 err = put_user(amount, (int __user *)arg);
2349 break;
2350 default:
2351 err = -ENOIOCTLCMD;
2352 break;
2353 }
2354 return err;
2355 }
2356
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2357 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2358 {
2359 struct sock *sk = sock->sk;
2360 unsigned int mask;
2361
2362 sock_poll_wait(file, sk_sleep(sk), wait);
2363 mask = 0;
2364
2365 /* exceptional events? */
2366 if (sk->sk_err)
2367 mask |= POLLERR;
2368 if (sk->sk_shutdown == SHUTDOWN_MASK)
2369 mask |= POLLHUP;
2370 if (sk->sk_shutdown & RCV_SHUTDOWN)
2371 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2372
2373 /* readable? */
2374 if (!skb_queue_empty(&sk->sk_receive_queue))
2375 mask |= POLLIN | POLLRDNORM;
2376
2377 /* Connection-based need to check for termination and startup */
2378 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2379 sk->sk_state == TCP_CLOSE)
2380 mask |= POLLHUP;
2381
2382 /*
2383 * we set writable also when the other side has shut down the
2384 * connection. This prevents stuck sockets.
2385 */
2386 if (unix_writable(sk))
2387 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2388
2389 return mask;
2390 }
2391
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2392 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2393 poll_table *wait)
2394 {
2395 struct sock *sk = sock->sk, *other;
2396 unsigned int mask, writable;
2397
2398 sock_poll_wait(file, sk_sleep(sk), wait);
2399 mask = 0;
2400
2401 /* exceptional events? */
2402 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2403 mask |= POLLERR |
2404 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2405
2406 if (sk->sk_shutdown & RCV_SHUTDOWN)
2407 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2408 if (sk->sk_shutdown == SHUTDOWN_MASK)
2409 mask |= POLLHUP;
2410
2411 /* readable? */
2412 if (!skb_queue_empty(&sk->sk_receive_queue))
2413 mask |= POLLIN | POLLRDNORM;
2414
2415 /* Connection-based need to check for termination and startup */
2416 if (sk->sk_type == SOCK_SEQPACKET) {
2417 if (sk->sk_state == TCP_CLOSE)
2418 mask |= POLLHUP;
2419 /* connection hasn't started yet? */
2420 if (sk->sk_state == TCP_SYN_SENT)
2421 return mask;
2422 }
2423
2424 /* No write status requested, avoid expensive OUT tests. */
2425 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2426 return mask;
2427
2428 writable = unix_writable(sk);
2429 if (writable) {
2430 unix_state_lock(sk);
2431
2432 other = unix_peer(sk);
2433 if (other && unix_peer(other) != sk &&
2434 unix_recvq_full(other) &&
2435 unix_dgram_peer_wake_me(sk, other))
2436 writable = 0;
2437
2438 unix_state_unlock(sk);
2439 }
2440
2441 if (writable)
2442 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2443 else
2444 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2445
2446 return mask;
2447 }
2448
2449 #ifdef CONFIG_PROC_FS
2450
2451 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2452
2453 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2454 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2455 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2456
unix_from_bucket(struct seq_file * seq,loff_t * pos)2457 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2458 {
2459 unsigned long offset = get_offset(*pos);
2460 unsigned long bucket = get_bucket(*pos);
2461 struct sock *sk;
2462 unsigned long count = 0;
2463
2464 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2465 if (sock_net(sk) != seq_file_net(seq))
2466 continue;
2467 if (++count == offset)
2468 break;
2469 }
2470
2471 return sk;
2472 }
2473
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2474 static struct sock *unix_next_socket(struct seq_file *seq,
2475 struct sock *sk,
2476 loff_t *pos)
2477 {
2478 unsigned long bucket;
2479
2480 while (sk > (struct sock *)SEQ_START_TOKEN) {
2481 sk = sk_next(sk);
2482 if (!sk)
2483 goto next_bucket;
2484 if (sock_net(sk) == seq_file_net(seq))
2485 return sk;
2486 }
2487
2488 do {
2489 sk = unix_from_bucket(seq, pos);
2490 if (sk)
2491 return sk;
2492
2493 next_bucket:
2494 bucket = get_bucket(*pos) + 1;
2495 *pos = set_bucket_offset(bucket, 1);
2496 } while (bucket < ARRAY_SIZE(unix_socket_table));
2497
2498 return NULL;
2499 }
2500
unix_seq_start(struct seq_file * seq,loff_t * pos)2501 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2502 __acquires(unix_table_lock)
2503 {
2504 spin_lock(&unix_table_lock);
2505
2506 if (!*pos)
2507 return SEQ_START_TOKEN;
2508
2509 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2510 return NULL;
2511
2512 return unix_next_socket(seq, NULL, pos);
2513 }
2514
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2515 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2516 {
2517 ++*pos;
2518 return unix_next_socket(seq, v, pos);
2519 }
2520
unix_seq_stop(struct seq_file * seq,void * v)2521 static void unix_seq_stop(struct seq_file *seq, void *v)
2522 __releases(unix_table_lock)
2523 {
2524 spin_unlock(&unix_table_lock);
2525 }
2526
unix_seq_show(struct seq_file * seq,void * v)2527 static int unix_seq_show(struct seq_file *seq, void *v)
2528 {
2529
2530 if (v == SEQ_START_TOKEN)
2531 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2532 "Inode Path\n");
2533 else {
2534 struct sock *s = v;
2535 struct unix_sock *u = unix_sk(s);
2536 unix_state_lock(s);
2537
2538 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2539 s,
2540 atomic_read(&s->sk_refcnt),
2541 0,
2542 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2543 s->sk_type,
2544 s->sk_socket ?
2545 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2546 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2547 sock_i_ino(s));
2548
2549 if (u->addr) {
2550 int i, len;
2551 seq_putc(seq, ' ');
2552
2553 i = 0;
2554 len = u->addr->len - sizeof(short);
2555 if (!UNIX_ABSTRACT(s))
2556 len--;
2557 else {
2558 seq_putc(seq, '@');
2559 i++;
2560 }
2561 for ( ; i < len; i++)
2562 seq_putc(seq, u->addr->name->sun_path[i]);
2563 }
2564 unix_state_unlock(s);
2565 seq_putc(seq, '\n');
2566 }
2567
2568 return 0;
2569 }
2570
2571 static const struct seq_operations unix_seq_ops = {
2572 .start = unix_seq_start,
2573 .next = unix_seq_next,
2574 .stop = unix_seq_stop,
2575 .show = unix_seq_show,
2576 };
2577
unix_seq_open(struct inode * inode,struct file * file)2578 static int unix_seq_open(struct inode *inode, struct file *file)
2579 {
2580 return seq_open_net(inode, file, &unix_seq_ops,
2581 sizeof(struct seq_net_private));
2582 }
2583
2584 static const struct file_operations unix_seq_fops = {
2585 .owner = THIS_MODULE,
2586 .open = unix_seq_open,
2587 .read = seq_read,
2588 .llseek = seq_lseek,
2589 .release = seq_release_net,
2590 };
2591
2592 #endif
2593
2594 static const struct net_proto_family unix_family_ops = {
2595 .family = PF_UNIX,
2596 .create = unix_create,
2597 .owner = THIS_MODULE,
2598 };
2599
2600
unix_net_init(struct net * net)2601 static int __net_init unix_net_init(struct net *net)
2602 {
2603 int error = -ENOMEM;
2604
2605 net->unx.sysctl_max_dgram_qlen = 10;
2606 if (unix_sysctl_register(net))
2607 goto out;
2608
2609 #ifdef CONFIG_PROC_FS
2610 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2611 unix_sysctl_unregister(net);
2612 goto out;
2613 }
2614 #endif
2615 error = 0;
2616 out:
2617 return error;
2618 }
2619
unix_net_exit(struct net * net)2620 static void __net_exit unix_net_exit(struct net *net)
2621 {
2622 unix_sysctl_unregister(net);
2623 remove_proc_entry("unix", net->proc_net);
2624 }
2625
2626 static struct pernet_operations unix_net_ops = {
2627 .init = unix_net_init,
2628 .exit = unix_net_exit,
2629 };
2630
af_unix_init(void)2631 static int __init af_unix_init(void)
2632 {
2633 int rc = -1;
2634
2635 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2636
2637 rc = proto_register(&unix_proto, 1);
2638 if (rc != 0) {
2639 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2640 goto out;
2641 }
2642
2643 sock_register(&unix_family_ops);
2644 register_pernet_subsys(&unix_net_ops);
2645 out:
2646 return rc;
2647 }
2648
af_unix_exit(void)2649 static void __exit af_unix_exit(void)
2650 {
2651 sock_unregister(PF_UNIX);
2652 proto_unregister(&unix_proto);
2653 unregister_pernet_subsys(&unix_net_ops);
2654 }
2655
2656 /* Earlier than device_initcall() so that other drivers invoking
2657 request_module() don't end up in a loop when modprobe tries
2658 to use a UNIX socket. But later than subsys_initcall() because
2659 we depend on stuff initialised there */
2660 fs_initcall(af_unix_init);
2661 module_exit(af_unix_exit);
2662
2663 MODULE_LICENSE("GPL");
2664 MODULE_ALIAS_NETPROTO(PF_UNIX);
2665