1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 *
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116
117 #include "scm.h"
118
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124
125
unix_sockets_unbound(void * addr)126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 unsigned long hash = (unsigned long)addr;
129
130 hash ^= hash >> 16;
131 hash ^= hash >> 8;
132 hash %= UNIX_HASH_SIZE;
133 return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135
136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 UNIXCB(skb).secid = scm->secid;
142 }
143
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 scm->secid = UNIXCB(skb).secid;
147 }
148
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151 return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162 return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165
166 /*
167 * SMP locking strategy:
168 * hash table is protected with spinlock unix_table_lock
169 * each socket state is protected by separate spin lock.
170 */
171
unix_hash_fold(__wsum n)172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174 unsigned int hash = (__force unsigned int)csum_fold(n);
175
176 hash ^= hash>>8;
177 return hash&(UNIX_HASH_SIZE-1);
178 }
179
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181
unix_our_peer(struct sock * sk,struct sock * osk)182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184 return unix_peer(osk) == sk;
185 }
186
unix_may_send(struct sock * sk,struct sock * osk)187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191
unix_recvq_full(const struct sock * sk)192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196
unix_recvq_full_lockless(const struct sock * sk)197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199 return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 READ_ONCE(sk->sk_max_ack_backlog);
201 }
202
unix_peer_get(struct sock * s)203 struct sock *unix_peer_get(struct sock *s)
204 {
205 struct sock *peer;
206
207 unix_state_lock(s);
208 peer = unix_peer(s);
209 if (peer)
210 sock_hold(peer);
211 unix_state_unlock(s);
212 return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215
unix_release_addr(struct unix_address * addr)216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218 if (refcount_dec_and_test(&addr->refcnt))
219 kfree(addr);
220 }
221
222 /*
223 * Check unix socket name:
224 * - should be not zero length.
225 * - if started by not zero, should be NULL terminated (FS object)
226 * - if started by zero, it is abstract name.
227 */
228
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231 *hashp = 0;
232
233 if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 return -EINVAL;
235 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 return -EINVAL;
237 if (sunaddr->sun_path[0]) {
238 /*
239 * This may look like an off by one error but it is a bit more
240 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 * sun_path[108] doesn't as such exist. However in kernel space
242 * we are guaranteed that it is a valid memory location in our
243 * kernel address buffer.
244 */
245 ((char *)sunaddr)[len] = 0;
246 len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 return len;
248 }
249
250 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 return len;
252 }
253
__unix_remove_socket(struct sock * sk)254 static void __unix_remove_socket(struct sock *sk)
255 {
256 sk_del_node_init(sk);
257 }
258
__unix_insert_socket(struct hlist_head * list,struct sock * sk)259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261 WARN_ON(!sk_unhashed(sk));
262 sk_add_node(sk, list);
263 }
264
unix_remove_socket(struct sock * sk)265 static inline void unix_remove_socket(struct sock *sk)
266 {
267 spin_lock(&unix_table_lock);
268 __unix_remove_socket(sk);
269 spin_unlock(&unix_table_lock);
270 }
271
unix_insert_socket(struct hlist_head * list,struct sock * sk)272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274 spin_lock(&unix_table_lock);
275 __unix_insert_socket(list, sk);
276 spin_unlock(&unix_table_lock);
277 }
278
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)279 static struct sock *__unix_find_socket_byname(struct net *net,
280 struct sockaddr_un *sunname,
281 int len, int type, unsigned int hash)
282 {
283 struct sock *s;
284
285 sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 struct unix_sock *u = unix_sk(s);
287
288 if (!net_eq(sock_net(s), net))
289 continue;
290
291 if (u->addr->len == len &&
292 !memcmp(u->addr->name, sunname, len))
293 goto found;
294 }
295 s = NULL;
296 found:
297 return s;
298 }
299
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)300 static inline struct sock *unix_find_socket_byname(struct net *net,
301 struct sockaddr_un *sunname,
302 int len, int type,
303 unsigned int hash)
304 {
305 struct sock *s;
306
307 spin_lock(&unix_table_lock);
308 s = __unix_find_socket_byname(net, sunname, len, type, hash);
309 if (s)
310 sock_hold(s);
311 spin_unlock(&unix_table_lock);
312 return s;
313 }
314
unix_find_socket_byinode(struct inode * i)315 static struct sock *unix_find_socket_byinode(struct inode *i)
316 {
317 struct sock *s;
318
319 spin_lock(&unix_table_lock);
320 sk_for_each(s,
321 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
322 struct dentry *dentry = unix_sk(s)->path.dentry;
323
324 if (dentry && d_backing_inode(dentry) == i) {
325 sock_hold(s);
326 goto found;
327 }
328 }
329 s = NULL;
330 found:
331 spin_unlock(&unix_table_lock);
332 return s;
333 }
334
335 /* Support code for asymmetrically connected dgram sockets
336 *
337 * If a datagram socket is connected to a socket not itself connected
338 * to the first socket (eg, /dev/log), clients may only enqueue more
339 * messages if the present receive queue of the server socket is not
340 * "too large". This means there's a second writeability condition
341 * poll and sendmsg need to test. The dgram recv code will do a wake
342 * up on the peer_wait wait queue of a socket upon reception of a
343 * datagram which needs to be propagated to sleeping would-be writers
344 * since these might not have sent anything so far. This can't be
345 * accomplished via poll_wait because the lifetime of the server
346 * socket might be less than that of its clients if these break their
347 * association with it or if the server socket is closed while clients
348 * are still connected to it and there's no way to inform "a polling
349 * implementation" that it should let go of a certain wait queue
350 *
351 * In order to propagate a wake up, a wait_queue_entry_t of the client
352 * socket is enqueued on the peer_wait queue of the server socket
353 * whose wake function does a wake_up on the ordinary client socket
354 * wait queue. This connection is established whenever a write (or
355 * poll for write) hit the flow control condition and broken when the
356 * association to the server socket is dissolved or after a wake up
357 * was relayed.
358 */
359
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)360 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
361 void *key)
362 {
363 struct unix_sock *u;
364 wait_queue_head_t *u_sleep;
365
366 u = container_of(q, struct unix_sock, peer_wake);
367
368 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
369 q);
370 u->peer_wake.private = NULL;
371
372 /* relaying can only happen while the wq still exists */
373 u_sleep = sk_sleep(&u->sk);
374 if (u_sleep)
375 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
376
377 return 0;
378 }
379
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)380 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
381 {
382 struct unix_sock *u, *u_other;
383 int rc;
384
385 u = unix_sk(sk);
386 u_other = unix_sk(other);
387 rc = 0;
388 spin_lock(&u_other->peer_wait.lock);
389
390 if (!u->peer_wake.private) {
391 u->peer_wake.private = other;
392 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
393
394 rc = 1;
395 }
396
397 spin_unlock(&u_other->peer_wait.lock);
398 return rc;
399 }
400
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)401 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
402 struct sock *other)
403 {
404 struct unix_sock *u, *u_other;
405
406 u = unix_sk(sk);
407 u_other = unix_sk(other);
408 spin_lock(&u_other->peer_wait.lock);
409
410 if (u->peer_wake.private == other) {
411 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
412 u->peer_wake.private = NULL;
413 }
414
415 spin_unlock(&u_other->peer_wait.lock);
416 }
417
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)418 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
419 struct sock *other)
420 {
421 unix_dgram_peer_wake_disconnect(sk, other);
422 wake_up_interruptible_poll(sk_sleep(sk),
423 EPOLLOUT |
424 EPOLLWRNORM |
425 EPOLLWRBAND);
426 }
427
428 /* preconditions:
429 * - unix_peer(sk) == other
430 * - association is stable
431 */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)432 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
433 {
434 int connected;
435
436 connected = unix_dgram_peer_wake_connect(sk, other);
437
438 /* If other is SOCK_DEAD, we want to make sure we signal
439 * POLLOUT, such that a subsequent write() can get a
440 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
441 * to other and its full, we will hang waiting for POLLOUT.
442 */
443 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
444 return 1;
445
446 if (connected)
447 unix_dgram_peer_wake_disconnect(sk, other);
448
449 return 0;
450 }
451
unix_writable(const struct sock * sk)452 static int unix_writable(const struct sock *sk)
453 {
454 return sk->sk_state != TCP_LISTEN &&
455 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
456 }
457
unix_write_space(struct sock * sk)458 static void unix_write_space(struct sock *sk)
459 {
460 struct socket_wq *wq;
461
462 rcu_read_lock();
463 if (unix_writable(sk)) {
464 wq = rcu_dereference(sk->sk_wq);
465 if (skwq_has_sleeper(wq))
466 wake_up_interruptible_sync_poll(&wq->wait,
467 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
468 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
469 }
470 rcu_read_unlock();
471 }
472
473 /* When dgram socket disconnects (or changes its peer), we clear its receive
474 * queue of packets arrived from previous peer. First, it allows to do
475 * flow control based only on wmem_alloc; second, sk connected to peer
476 * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)477 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
478 {
479 if (!skb_queue_empty(&sk->sk_receive_queue)) {
480 skb_queue_purge(&sk->sk_receive_queue);
481 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
482
483 /* If one link of bidirectional dgram pipe is disconnected,
484 * we signal error. Messages are lost. Do not make this,
485 * when peer was not connected to us.
486 */
487 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
488 other->sk_err = ECONNRESET;
489 other->sk_error_report(other);
490 }
491 }
492 }
493
unix_sock_destructor(struct sock * sk)494 static void unix_sock_destructor(struct sock *sk)
495 {
496 struct unix_sock *u = unix_sk(sk);
497
498 skb_queue_purge(&sk->sk_receive_queue);
499
500 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
501 WARN_ON(!sk_unhashed(sk));
502 WARN_ON(sk->sk_socket);
503 if (!sock_flag(sk, SOCK_DEAD)) {
504 pr_info("Attempt to release alive unix socket: %p\n", sk);
505 return;
506 }
507
508 if (u->addr)
509 unix_release_addr(u->addr);
510
511 atomic_long_dec(&unix_nr_socks);
512 local_bh_disable();
513 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
514 local_bh_enable();
515 #ifdef UNIX_REFCNT_DEBUG
516 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
517 atomic_long_read(&unix_nr_socks));
518 #endif
519 }
520
unix_release_sock(struct sock * sk,int embrion)521 static void unix_release_sock(struct sock *sk, int embrion)
522 {
523 struct unix_sock *u = unix_sk(sk);
524 struct path path;
525 struct sock *skpair;
526 struct sk_buff *skb;
527 int state;
528
529 unix_remove_socket(sk);
530
531 /* Clear state */
532 unix_state_lock(sk);
533 sock_orphan(sk);
534 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
535 path = u->path;
536 u->path.dentry = NULL;
537 u->path.mnt = NULL;
538 state = sk->sk_state;
539 sk->sk_state = TCP_CLOSE;
540
541 skpair = unix_peer(sk);
542 unix_peer(sk) = NULL;
543
544 unix_state_unlock(sk);
545
546 wake_up_interruptible_all(&u->peer_wait);
547
548 if (skpair != NULL) {
549 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
550 unix_state_lock(skpair);
551 /* No more writes */
552 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
553 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
554 skpair->sk_err = ECONNRESET;
555 unix_state_unlock(skpair);
556 skpair->sk_state_change(skpair);
557 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
558 }
559
560 unix_dgram_peer_wake_disconnect(sk, skpair);
561 sock_put(skpair); /* It may now die */
562 }
563
564 /* Try to flush out this socket. Throw out buffers at least */
565
566 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
567 if (state == TCP_LISTEN)
568 unix_release_sock(skb->sk, 1);
569 /* passed fds are erased in the kfree_skb hook */
570 UNIXCB(skb).consumed = skb->len;
571 kfree_skb(skb);
572 }
573
574 if (path.dentry)
575 path_put(&path);
576
577 sock_put(sk);
578
579 /* ---- Socket is dead now and most probably destroyed ---- */
580
581 /*
582 * Fixme: BSD difference: In BSD all sockets connected to us get
583 * ECONNRESET and we die on the spot. In Linux we behave
584 * like files and pipes do and wait for the last
585 * dereference.
586 *
587 * Can't we simply set sock->err?
588 *
589 * What the above comment does talk about? --ANK(980817)
590 */
591
592 if (READ_ONCE(unix_tot_inflight))
593 unix_gc(); /* Garbage collect fds */
594 }
595
init_peercred(struct sock * sk)596 static void init_peercred(struct sock *sk)
597 {
598 const struct cred *old_cred;
599 struct pid *old_pid;
600
601 spin_lock(&sk->sk_peer_lock);
602 old_pid = sk->sk_peer_pid;
603 old_cred = sk->sk_peer_cred;
604 sk->sk_peer_pid = get_pid(task_tgid(current));
605 sk->sk_peer_cred = get_current_cred();
606 spin_unlock(&sk->sk_peer_lock);
607
608 put_pid(old_pid);
609 put_cred(old_cred);
610 }
611
copy_peercred(struct sock * sk,struct sock * peersk)612 static void copy_peercred(struct sock *sk, struct sock *peersk)
613 {
614 const struct cred *old_cred;
615 struct pid *old_pid;
616
617 if (sk < peersk) {
618 spin_lock(&sk->sk_peer_lock);
619 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
620 } else {
621 spin_lock(&peersk->sk_peer_lock);
622 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
623 }
624 old_pid = sk->sk_peer_pid;
625 old_cred = sk->sk_peer_cred;
626 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
627 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
628
629 spin_unlock(&sk->sk_peer_lock);
630 spin_unlock(&peersk->sk_peer_lock);
631
632 put_pid(old_pid);
633 put_cred(old_cred);
634 }
635
unix_listen(struct socket * sock,int backlog)636 static int unix_listen(struct socket *sock, int backlog)
637 {
638 int err;
639 struct sock *sk = sock->sk;
640 struct unix_sock *u = unix_sk(sk);
641 struct pid *old_pid = NULL;
642
643 err = -EOPNOTSUPP;
644 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
645 goto out; /* Only stream/seqpacket sockets accept */
646 err = -EINVAL;
647 if (!u->addr)
648 goto out; /* No listens on an unbound socket */
649 unix_state_lock(sk);
650 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
651 goto out_unlock;
652 if (backlog > sk->sk_max_ack_backlog)
653 wake_up_interruptible_all(&u->peer_wait);
654 sk->sk_max_ack_backlog = backlog;
655 sk->sk_state = TCP_LISTEN;
656 /* set credentials so connect can copy them */
657 init_peercred(sk);
658 err = 0;
659
660 out_unlock:
661 unix_state_unlock(sk);
662 put_pid(old_pid);
663 out:
664 return err;
665 }
666
667 static int unix_release(struct socket *);
668 static int unix_bind(struct socket *, struct sockaddr *, int);
669 static int unix_stream_connect(struct socket *, struct sockaddr *,
670 int addr_len, int flags);
671 static int unix_socketpair(struct socket *, struct socket *);
672 static int unix_accept(struct socket *, struct socket *, int, bool);
673 static int unix_getname(struct socket *, struct sockaddr *, int);
674 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
675 static __poll_t unix_dgram_poll(struct file *, struct socket *,
676 poll_table *);
677 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
678 #ifdef CONFIG_COMPAT
679 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
680 #endif
681 static int unix_shutdown(struct socket *, int);
682 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
683 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
684 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
685 size_t size, int flags);
686 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
687 struct pipe_inode_info *, size_t size,
688 unsigned int flags);
689 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
690 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
691 static int unix_dgram_connect(struct socket *, struct sockaddr *,
692 int, int);
693 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
694 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
695 int);
696
unix_set_peek_off(struct sock * sk,int val)697 static int unix_set_peek_off(struct sock *sk, int val)
698 {
699 struct unix_sock *u = unix_sk(sk);
700
701 if (mutex_lock_interruptible(&u->iolock))
702 return -EINTR;
703
704 WRITE_ONCE(sk->sk_peek_off, val);
705 mutex_unlock(&u->iolock);
706
707 return 0;
708 }
709
710
711 static const struct proto_ops unix_stream_ops = {
712 .family = PF_UNIX,
713 .owner = THIS_MODULE,
714 .release = unix_release,
715 .bind = unix_bind,
716 .connect = unix_stream_connect,
717 .socketpair = unix_socketpair,
718 .accept = unix_accept,
719 .getname = unix_getname,
720 .poll = unix_poll,
721 .ioctl = unix_ioctl,
722 #ifdef CONFIG_COMPAT
723 .compat_ioctl = unix_compat_ioctl,
724 #endif
725 .listen = unix_listen,
726 .shutdown = unix_shutdown,
727 .setsockopt = sock_no_setsockopt,
728 .getsockopt = sock_no_getsockopt,
729 .sendmsg = unix_stream_sendmsg,
730 .recvmsg = unix_stream_recvmsg,
731 .mmap = sock_no_mmap,
732 .sendpage = unix_stream_sendpage,
733 .splice_read = unix_stream_splice_read,
734 .set_peek_off = unix_set_peek_off,
735 };
736
737 static const struct proto_ops unix_dgram_ops = {
738 .family = PF_UNIX,
739 .owner = THIS_MODULE,
740 .release = unix_release,
741 .bind = unix_bind,
742 .connect = unix_dgram_connect,
743 .socketpair = unix_socketpair,
744 .accept = sock_no_accept,
745 .getname = unix_getname,
746 .poll = unix_dgram_poll,
747 .ioctl = unix_ioctl,
748 #ifdef CONFIG_COMPAT
749 .compat_ioctl = unix_compat_ioctl,
750 #endif
751 .listen = sock_no_listen,
752 .shutdown = unix_shutdown,
753 .setsockopt = sock_no_setsockopt,
754 .getsockopt = sock_no_getsockopt,
755 .sendmsg = unix_dgram_sendmsg,
756 .recvmsg = unix_dgram_recvmsg,
757 .mmap = sock_no_mmap,
758 .sendpage = sock_no_sendpage,
759 .set_peek_off = unix_set_peek_off,
760 };
761
762 static const struct proto_ops unix_seqpacket_ops = {
763 .family = PF_UNIX,
764 .owner = THIS_MODULE,
765 .release = unix_release,
766 .bind = unix_bind,
767 .connect = unix_stream_connect,
768 .socketpair = unix_socketpair,
769 .accept = unix_accept,
770 .getname = unix_getname,
771 .poll = unix_dgram_poll,
772 .ioctl = unix_ioctl,
773 #ifdef CONFIG_COMPAT
774 .compat_ioctl = unix_compat_ioctl,
775 #endif
776 .listen = unix_listen,
777 .shutdown = unix_shutdown,
778 .setsockopt = sock_no_setsockopt,
779 .getsockopt = sock_no_getsockopt,
780 .sendmsg = unix_seqpacket_sendmsg,
781 .recvmsg = unix_seqpacket_recvmsg,
782 .mmap = sock_no_mmap,
783 .sendpage = sock_no_sendpage,
784 .set_peek_off = unix_set_peek_off,
785 };
786
787 static struct proto unix_proto = {
788 .name = "UNIX",
789 .owner = THIS_MODULE,
790 .obj_size = sizeof(struct unix_sock),
791 };
792
unix_create1(struct net * net,struct socket * sock,int kern)793 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
794 {
795 struct sock *sk = NULL;
796 struct unix_sock *u;
797
798 atomic_long_inc(&unix_nr_socks);
799 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
800 goto out;
801
802 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
803 if (!sk)
804 goto out;
805
806 sock_init_data(sock, sk);
807
808 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
809 sk->sk_write_space = unix_write_space;
810 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
811 sk->sk_destruct = unix_sock_destructor;
812 u = unix_sk(sk);
813 u->path.dentry = NULL;
814 u->path.mnt = NULL;
815 spin_lock_init(&u->lock);
816 atomic_long_set(&u->inflight, 0);
817 INIT_LIST_HEAD(&u->link);
818 mutex_init(&u->iolock); /* single task reading lock */
819 mutex_init(&u->bindlock); /* single task binding lock */
820 init_waitqueue_head(&u->peer_wait);
821 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
822 unix_insert_socket(unix_sockets_unbound(sk), sk);
823 out:
824 if (sk == NULL)
825 atomic_long_dec(&unix_nr_socks);
826 else {
827 local_bh_disable();
828 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
829 local_bh_enable();
830 }
831 return sk;
832 }
833
unix_create(struct net * net,struct socket * sock,int protocol,int kern)834 static int unix_create(struct net *net, struct socket *sock, int protocol,
835 int kern)
836 {
837 if (protocol && protocol != PF_UNIX)
838 return -EPROTONOSUPPORT;
839
840 sock->state = SS_UNCONNECTED;
841
842 switch (sock->type) {
843 case SOCK_STREAM:
844 sock->ops = &unix_stream_ops;
845 break;
846 /*
847 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
848 * nothing uses it.
849 */
850 case SOCK_RAW:
851 sock->type = SOCK_DGRAM;
852 /* fall through */
853 case SOCK_DGRAM:
854 sock->ops = &unix_dgram_ops;
855 break;
856 case SOCK_SEQPACKET:
857 sock->ops = &unix_seqpacket_ops;
858 break;
859 default:
860 return -ESOCKTNOSUPPORT;
861 }
862
863 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
864 }
865
unix_release(struct socket * sock)866 static int unix_release(struct socket *sock)
867 {
868 struct sock *sk = sock->sk;
869
870 if (!sk)
871 return 0;
872
873 unix_release_sock(sk, 0);
874 sock->sk = NULL;
875
876 return 0;
877 }
878
unix_autobind(struct socket * sock)879 static int unix_autobind(struct socket *sock)
880 {
881 struct sock *sk = sock->sk;
882 struct net *net = sock_net(sk);
883 struct unix_sock *u = unix_sk(sk);
884 static u32 ordernum = 1;
885 struct unix_address *addr;
886 int err;
887 unsigned int retries = 0;
888
889 err = mutex_lock_interruptible(&u->bindlock);
890 if (err)
891 return err;
892
893 err = 0;
894 if (u->addr)
895 goto out;
896
897 err = -ENOMEM;
898 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
899 if (!addr)
900 goto out;
901
902 addr->name->sun_family = AF_UNIX;
903 refcount_set(&addr->refcnt, 1);
904
905 retry:
906 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
907 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
908
909 spin_lock(&unix_table_lock);
910 ordernum = (ordernum+1)&0xFFFFF;
911
912 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
913 addr->hash)) {
914 spin_unlock(&unix_table_lock);
915 /*
916 * __unix_find_socket_byname() may take long time if many names
917 * are already in use.
918 */
919 cond_resched();
920 /* Give up if all names seems to be in use. */
921 if (retries++ == 0xFFFFF) {
922 err = -ENOSPC;
923 kfree(addr);
924 goto out;
925 }
926 goto retry;
927 }
928 addr->hash ^= sk->sk_type;
929
930 __unix_remove_socket(sk);
931 smp_store_release(&u->addr, addr);
932 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
933 spin_unlock(&unix_table_lock);
934 err = 0;
935
936 out: mutex_unlock(&u->bindlock);
937 return err;
938 }
939
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)940 static struct sock *unix_find_other(struct net *net,
941 struct sockaddr_un *sunname, int len,
942 int type, unsigned int hash, int *error)
943 {
944 struct sock *u;
945 struct path path;
946 int err = 0;
947
948 if (sunname->sun_path[0]) {
949 struct inode *inode;
950 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
951 if (err)
952 goto fail;
953 inode = d_backing_inode(path.dentry);
954 err = inode_permission(inode, MAY_WRITE);
955 if (err)
956 goto put_fail;
957
958 err = -ECONNREFUSED;
959 if (!S_ISSOCK(inode->i_mode))
960 goto put_fail;
961 u = unix_find_socket_byinode(inode);
962 if (!u)
963 goto put_fail;
964
965 if (u->sk_type == type)
966 touch_atime(&path);
967
968 path_put(&path);
969
970 err = -EPROTOTYPE;
971 if (u->sk_type != type) {
972 sock_put(u);
973 goto fail;
974 }
975 } else {
976 err = -ECONNREFUSED;
977 u = unix_find_socket_byname(net, sunname, len, type, hash);
978 if (u) {
979 struct dentry *dentry;
980 dentry = unix_sk(u)->path.dentry;
981 if (dentry)
982 touch_atime(&unix_sk(u)->path);
983 } else
984 goto fail;
985 }
986 return u;
987
988 put_fail:
989 path_put(&path);
990 fail:
991 *error = err;
992 return NULL;
993 }
994
unix_mknod(const char * sun_path,umode_t mode,struct path * res)995 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
996 {
997 struct dentry *dentry;
998 struct path path;
999 int err = 0;
1000 /*
1001 * Get the parent directory, calculate the hash for last
1002 * component.
1003 */
1004 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1005 err = PTR_ERR(dentry);
1006 if (IS_ERR(dentry))
1007 return err;
1008
1009 /*
1010 * All right, let's create it.
1011 */
1012 err = security_path_mknod(&path, dentry, mode, 0);
1013 if (!err) {
1014 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1015 if (!err) {
1016 res->mnt = mntget(path.mnt);
1017 res->dentry = dget(dentry);
1018 }
1019 }
1020 done_path_create(&path, dentry);
1021 return err;
1022 }
1023
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1024 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1025 {
1026 struct sock *sk = sock->sk;
1027 struct net *net = sock_net(sk);
1028 struct unix_sock *u = unix_sk(sk);
1029 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1030 char *sun_path = sunaddr->sun_path;
1031 int err;
1032 unsigned int hash;
1033 struct unix_address *addr;
1034 struct hlist_head *list;
1035 struct path path = { };
1036
1037 err = -EINVAL;
1038 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1039 sunaddr->sun_family != AF_UNIX)
1040 goto out;
1041
1042 if (addr_len == sizeof(short)) {
1043 err = unix_autobind(sock);
1044 goto out;
1045 }
1046
1047 err = unix_mkname(sunaddr, addr_len, &hash);
1048 if (err < 0)
1049 goto out;
1050 addr_len = err;
1051
1052 if (sun_path[0]) {
1053 umode_t mode = S_IFSOCK |
1054 (SOCK_INODE(sock)->i_mode & ~current_umask());
1055 err = unix_mknod(sun_path, mode, &path);
1056 if (err) {
1057 if (err == -EEXIST)
1058 err = -EADDRINUSE;
1059 goto out;
1060 }
1061 }
1062
1063 err = mutex_lock_interruptible(&u->bindlock);
1064 if (err)
1065 goto out_put;
1066
1067 err = -EINVAL;
1068 if (u->addr)
1069 goto out_up;
1070
1071 err = -ENOMEM;
1072 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1073 if (!addr)
1074 goto out_up;
1075
1076 memcpy(addr->name, sunaddr, addr_len);
1077 addr->len = addr_len;
1078 addr->hash = hash ^ sk->sk_type;
1079 refcount_set(&addr->refcnt, 1);
1080
1081 if (sun_path[0]) {
1082 addr->hash = UNIX_HASH_SIZE;
1083 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1084 spin_lock(&unix_table_lock);
1085 u->path = path;
1086 list = &unix_socket_table[hash];
1087 } else {
1088 spin_lock(&unix_table_lock);
1089 err = -EADDRINUSE;
1090 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1091 sk->sk_type, hash)) {
1092 unix_release_addr(addr);
1093 goto out_unlock;
1094 }
1095
1096 list = &unix_socket_table[addr->hash];
1097 }
1098
1099 err = 0;
1100 __unix_remove_socket(sk);
1101 smp_store_release(&u->addr, addr);
1102 __unix_insert_socket(list, sk);
1103
1104 out_unlock:
1105 spin_unlock(&unix_table_lock);
1106 out_up:
1107 mutex_unlock(&u->bindlock);
1108 out_put:
1109 if (err)
1110 path_put(&path);
1111 out:
1112 return err;
1113 }
1114
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1115 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1116 {
1117 if (unlikely(sk1 == sk2) || !sk2) {
1118 unix_state_lock(sk1);
1119 return;
1120 }
1121 if (sk1 > sk2)
1122 swap(sk1, sk2);
1123
1124 unix_state_lock(sk1);
1125 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1126 }
1127
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1128 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1129 {
1130 if (unlikely(sk1 == sk2) || !sk2) {
1131 unix_state_unlock(sk1);
1132 return;
1133 }
1134 unix_state_unlock(sk1);
1135 unix_state_unlock(sk2);
1136 }
1137
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1138 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1139 int alen, int flags)
1140 {
1141 struct sock *sk = sock->sk;
1142 struct net *net = sock_net(sk);
1143 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1144 struct sock *other;
1145 unsigned int hash;
1146 int err;
1147
1148 err = -EINVAL;
1149 if (alen < offsetofend(struct sockaddr, sa_family))
1150 goto out;
1151
1152 if (addr->sa_family != AF_UNSPEC) {
1153 err = unix_mkname(sunaddr, alen, &hash);
1154 if (err < 0)
1155 goto out;
1156 alen = err;
1157
1158 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1159 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1160 goto out;
1161
1162 restart:
1163 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1164 if (!other)
1165 goto out;
1166
1167 unix_state_double_lock(sk, other);
1168
1169 /* Apparently VFS overslept socket death. Retry. */
1170 if (sock_flag(other, SOCK_DEAD)) {
1171 unix_state_double_unlock(sk, other);
1172 sock_put(other);
1173 goto restart;
1174 }
1175
1176 err = -EPERM;
1177 if (!unix_may_send(sk, other))
1178 goto out_unlock;
1179
1180 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1181 if (err)
1182 goto out_unlock;
1183
1184 } else {
1185 /*
1186 * 1003.1g breaking connected state with AF_UNSPEC
1187 */
1188 other = NULL;
1189 unix_state_double_lock(sk, other);
1190 }
1191
1192 /*
1193 * If it was connected, reconnect.
1194 */
1195 if (unix_peer(sk)) {
1196 struct sock *old_peer = unix_peer(sk);
1197 unix_peer(sk) = other;
1198 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1199
1200 unix_state_double_unlock(sk, other);
1201
1202 if (other != old_peer)
1203 unix_dgram_disconnected(sk, old_peer);
1204 sock_put(old_peer);
1205 } else {
1206 unix_peer(sk) = other;
1207 unix_state_double_unlock(sk, other);
1208 }
1209 return 0;
1210
1211 out_unlock:
1212 unix_state_double_unlock(sk, other);
1213 sock_put(other);
1214 out:
1215 return err;
1216 }
1217
unix_wait_for_peer(struct sock * other,long timeo)1218 static long unix_wait_for_peer(struct sock *other, long timeo)
1219 {
1220 struct unix_sock *u = unix_sk(other);
1221 int sched;
1222 DEFINE_WAIT(wait);
1223
1224 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1225
1226 sched = !sock_flag(other, SOCK_DEAD) &&
1227 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1228 unix_recvq_full_lockless(other);
1229
1230 unix_state_unlock(other);
1231
1232 if (sched)
1233 timeo = schedule_timeout(timeo);
1234
1235 finish_wait(&u->peer_wait, &wait);
1236 return timeo;
1237 }
1238
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1239 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1240 int addr_len, int flags)
1241 {
1242 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1243 struct sock *sk = sock->sk;
1244 struct net *net = sock_net(sk);
1245 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1246 struct sock *newsk = NULL;
1247 struct sock *other = NULL;
1248 struct sk_buff *skb = NULL;
1249 unsigned int hash;
1250 int st;
1251 int err;
1252 long timeo;
1253
1254 err = unix_mkname(sunaddr, addr_len, &hash);
1255 if (err < 0)
1256 goto out;
1257 addr_len = err;
1258
1259 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1260 (err = unix_autobind(sock)) != 0)
1261 goto out;
1262
1263 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1264
1265 /* First of all allocate resources.
1266 If we will make it after state is locked,
1267 we will have to recheck all again in any case.
1268 */
1269
1270 err = -ENOMEM;
1271
1272 /* create new sock for complete connection */
1273 newsk = unix_create1(sock_net(sk), NULL, 0);
1274 if (newsk == NULL)
1275 goto out;
1276
1277 /* Allocate skb for sending to listening sock */
1278 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1279 if (skb == NULL)
1280 goto out;
1281
1282 restart:
1283 /* Find listening sock. */
1284 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1285 if (!other)
1286 goto out;
1287
1288 /* Latch state of peer */
1289 unix_state_lock(other);
1290
1291 /* Apparently VFS overslept socket death. Retry. */
1292 if (sock_flag(other, SOCK_DEAD)) {
1293 unix_state_unlock(other);
1294 sock_put(other);
1295 goto restart;
1296 }
1297
1298 err = -ECONNREFUSED;
1299 if (other->sk_state != TCP_LISTEN)
1300 goto out_unlock;
1301 if (other->sk_shutdown & RCV_SHUTDOWN)
1302 goto out_unlock;
1303
1304 if (unix_recvq_full(other)) {
1305 err = -EAGAIN;
1306 if (!timeo)
1307 goto out_unlock;
1308
1309 timeo = unix_wait_for_peer(other, timeo);
1310
1311 err = sock_intr_errno(timeo);
1312 if (signal_pending(current))
1313 goto out;
1314 sock_put(other);
1315 goto restart;
1316 }
1317
1318 /* Latch our state.
1319
1320 It is tricky place. We need to grab our state lock and cannot
1321 drop lock on peer. It is dangerous because deadlock is
1322 possible. Connect to self case and simultaneous
1323 attempt to connect are eliminated by checking socket
1324 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1325 check this before attempt to grab lock.
1326
1327 Well, and we have to recheck the state after socket locked.
1328 */
1329 st = sk->sk_state;
1330
1331 switch (st) {
1332 case TCP_CLOSE:
1333 /* This is ok... continue with connect */
1334 break;
1335 case TCP_ESTABLISHED:
1336 /* Socket is already connected */
1337 err = -EISCONN;
1338 goto out_unlock;
1339 default:
1340 err = -EINVAL;
1341 goto out_unlock;
1342 }
1343
1344 unix_state_lock_nested(sk, U_LOCK_SECOND);
1345
1346 if (sk->sk_state != st) {
1347 unix_state_unlock(sk);
1348 unix_state_unlock(other);
1349 sock_put(other);
1350 goto restart;
1351 }
1352
1353 err = security_unix_stream_connect(sk, other, newsk);
1354 if (err) {
1355 unix_state_unlock(sk);
1356 goto out_unlock;
1357 }
1358
1359 /* The way is open! Fastly set all the necessary fields... */
1360
1361 sock_hold(sk);
1362 unix_peer(newsk) = sk;
1363 newsk->sk_state = TCP_ESTABLISHED;
1364 newsk->sk_type = sk->sk_type;
1365 init_peercred(newsk);
1366 newu = unix_sk(newsk);
1367 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1368 otheru = unix_sk(other);
1369
1370 /* copy address information from listening to new sock
1371 *
1372 * The contents of *(otheru->addr) and otheru->path
1373 * are seen fully set up here, since we have found
1374 * otheru in hash under unix_table_lock. Insertion
1375 * into the hash chain we'd found it in had been done
1376 * in an earlier critical area protected by unix_table_lock,
1377 * the same one where we'd set *(otheru->addr) contents,
1378 * as well as otheru->path and otheru->addr itself.
1379 *
1380 * Using smp_store_release() here to set newu->addr
1381 * is enough to make those stores, as well as stores
1382 * to newu->path visible to anyone who gets newu->addr
1383 * by smp_load_acquire(). IOW, the same warranties
1384 * as for unix_sock instances bound in unix_bind() or
1385 * in unix_autobind().
1386 */
1387 if (otheru->path.dentry) {
1388 path_get(&otheru->path);
1389 newu->path = otheru->path;
1390 }
1391 refcount_inc(&otheru->addr->refcnt);
1392 smp_store_release(&newu->addr, otheru->addr);
1393
1394 /* Set credentials */
1395 copy_peercred(sk, other);
1396
1397 sock->state = SS_CONNECTED;
1398 sk->sk_state = TCP_ESTABLISHED;
1399 sock_hold(newsk);
1400
1401 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1402 unix_peer(sk) = newsk;
1403
1404 unix_state_unlock(sk);
1405
1406 /* take ten and and send info to listening sock */
1407 spin_lock(&other->sk_receive_queue.lock);
1408 __skb_queue_tail(&other->sk_receive_queue, skb);
1409 spin_unlock(&other->sk_receive_queue.lock);
1410 unix_state_unlock(other);
1411 other->sk_data_ready(other);
1412 sock_put(other);
1413 return 0;
1414
1415 out_unlock:
1416 if (other)
1417 unix_state_unlock(other);
1418
1419 out:
1420 kfree_skb(skb);
1421 if (newsk)
1422 unix_release_sock(newsk, 0);
1423 if (other)
1424 sock_put(other);
1425 return err;
1426 }
1427
unix_socketpair(struct socket * socka,struct socket * sockb)1428 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1429 {
1430 struct sock *ska = socka->sk, *skb = sockb->sk;
1431
1432 /* Join our sockets back to back */
1433 sock_hold(ska);
1434 sock_hold(skb);
1435 unix_peer(ska) = skb;
1436 unix_peer(skb) = ska;
1437 init_peercred(ska);
1438 init_peercred(skb);
1439
1440 if (ska->sk_type != SOCK_DGRAM) {
1441 ska->sk_state = TCP_ESTABLISHED;
1442 skb->sk_state = TCP_ESTABLISHED;
1443 socka->state = SS_CONNECTED;
1444 sockb->state = SS_CONNECTED;
1445 }
1446 return 0;
1447 }
1448
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1449 static void unix_sock_inherit_flags(const struct socket *old,
1450 struct socket *new)
1451 {
1452 if (test_bit(SOCK_PASSCRED, &old->flags))
1453 set_bit(SOCK_PASSCRED, &new->flags);
1454 if (test_bit(SOCK_PASSSEC, &old->flags))
1455 set_bit(SOCK_PASSSEC, &new->flags);
1456 }
1457
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1458 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1459 bool kern)
1460 {
1461 struct sock *sk = sock->sk;
1462 struct sock *tsk;
1463 struct sk_buff *skb;
1464 int err;
1465
1466 err = -EOPNOTSUPP;
1467 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1468 goto out;
1469
1470 err = -EINVAL;
1471 if (sk->sk_state != TCP_LISTEN)
1472 goto out;
1473
1474 /* If socket state is TCP_LISTEN it cannot change (for now...),
1475 * so that no locks are necessary.
1476 */
1477
1478 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1479 if (!skb) {
1480 /* This means receive shutdown. */
1481 if (err == 0)
1482 err = -EINVAL;
1483 goto out;
1484 }
1485
1486 tsk = skb->sk;
1487 skb_free_datagram(sk, skb);
1488 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1489
1490 /* attach accepted sock to socket */
1491 unix_state_lock(tsk);
1492 newsock->state = SS_CONNECTED;
1493 unix_sock_inherit_flags(sock, newsock);
1494 sock_graft(tsk, newsock);
1495 unix_state_unlock(tsk);
1496 return 0;
1497
1498 out:
1499 return err;
1500 }
1501
1502
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1503 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1504 {
1505 struct sock *sk = sock->sk;
1506 struct unix_address *addr;
1507 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1508 int err = 0;
1509
1510 if (peer) {
1511 sk = unix_peer_get(sk);
1512
1513 err = -ENOTCONN;
1514 if (!sk)
1515 goto out;
1516 err = 0;
1517 } else {
1518 sock_hold(sk);
1519 }
1520
1521 addr = smp_load_acquire(&unix_sk(sk)->addr);
1522 if (!addr) {
1523 sunaddr->sun_family = AF_UNIX;
1524 sunaddr->sun_path[0] = 0;
1525 err = sizeof(short);
1526 } else {
1527 err = addr->len;
1528 memcpy(sunaddr, addr->name, addr->len);
1529 }
1530 sock_put(sk);
1531 out:
1532 return err;
1533 }
1534
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1535 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1536 {
1537 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1538
1539 /*
1540 * Garbage collection of unix sockets starts by selecting a set of
1541 * candidate sockets which have reference only from being in flight
1542 * (total_refs == inflight_refs). This condition is checked once during
1543 * the candidate collection phase, and candidates are marked as such, so
1544 * that non-candidates can later be ignored. While inflight_refs is
1545 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1546 * is an instantaneous decision.
1547 *
1548 * Once a candidate, however, the socket must not be reinstalled into a
1549 * file descriptor while the garbage collection is in progress.
1550 *
1551 * If the above conditions are met, then the directed graph of
1552 * candidates (*) does not change while unix_gc_lock is held.
1553 *
1554 * Any operations that changes the file count through file descriptors
1555 * (dup, close, sendmsg) does not change the graph since candidates are
1556 * not installed in fds.
1557 *
1558 * Dequeing a candidate via recvmsg would install it into an fd, but
1559 * that takes unix_gc_lock to decrement the inflight count, so it's
1560 * serialized with garbage collection.
1561 *
1562 * MSG_PEEK is special in that it does not change the inflight count,
1563 * yet does install the socket into an fd. The following lock/unlock
1564 * pair is to ensure serialization with garbage collection. It must be
1565 * done between incrementing the file count and installing the file into
1566 * an fd.
1567 *
1568 * If garbage collection starts after the barrier provided by the
1569 * lock/unlock, then it will see the elevated refcount and not mark this
1570 * as a candidate. If a garbage collection is already in progress
1571 * before the file count was incremented, then the lock/unlock pair will
1572 * ensure that garbage collection is finished before progressing to
1573 * installing the fd.
1574 *
1575 * (*) A -> B where B is on the queue of A or B is on the queue of C
1576 * which is on the queue of listening socket A.
1577 */
1578 spin_lock(&unix_gc_lock);
1579 spin_unlock(&unix_gc_lock);
1580 }
1581
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1582 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1583 {
1584 int err = 0;
1585
1586 UNIXCB(skb).pid = get_pid(scm->pid);
1587 UNIXCB(skb).uid = scm->creds.uid;
1588 UNIXCB(skb).gid = scm->creds.gid;
1589 UNIXCB(skb).fp = NULL;
1590 unix_get_secdata(scm, skb);
1591 if (scm->fp && send_fds)
1592 err = unix_attach_fds(scm, skb);
1593
1594 skb->destructor = unix_destruct_scm;
1595 return err;
1596 }
1597
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1598 static bool unix_passcred_enabled(const struct socket *sock,
1599 const struct sock *other)
1600 {
1601 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1602 !other->sk_socket ||
1603 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1604 }
1605
1606 /*
1607 * Some apps rely on write() giving SCM_CREDENTIALS
1608 * We include credentials if source or destination socket
1609 * asserted SOCK_PASSCRED.
1610 */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1611 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1612 const struct sock *other)
1613 {
1614 if (UNIXCB(skb).pid)
1615 return;
1616 if (unix_passcred_enabled(sock, other)) {
1617 UNIXCB(skb).pid = get_pid(task_tgid(current));
1618 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1619 }
1620 }
1621
maybe_init_creds(struct scm_cookie * scm,struct socket * socket,const struct sock * other)1622 static int maybe_init_creds(struct scm_cookie *scm,
1623 struct socket *socket,
1624 const struct sock *other)
1625 {
1626 int err;
1627 struct msghdr msg = { .msg_controllen = 0 };
1628
1629 err = scm_send(socket, &msg, scm, false);
1630 if (err)
1631 return err;
1632
1633 if (unix_passcred_enabled(socket, other)) {
1634 scm->pid = get_pid(task_tgid(current));
1635 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1636 }
1637 return err;
1638 }
1639
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1640 static bool unix_skb_scm_eq(struct sk_buff *skb,
1641 struct scm_cookie *scm)
1642 {
1643 const struct unix_skb_parms *u = &UNIXCB(skb);
1644
1645 return u->pid == scm->pid &&
1646 uid_eq(u->uid, scm->creds.uid) &&
1647 gid_eq(u->gid, scm->creds.gid) &&
1648 unix_secdata_eq(scm, skb);
1649 }
1650
1651 /*
1652 * Send AF_UNIX data.
1653 */
1654
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1655 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1656 size_t len)
1657 {
1658 struct sock *sk = sock->sk;
1659 struct net *net = sock_net(sk);
1660 struct unix_sock *u = unix_sk(sk);
1661 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1662 struct sock *other = NULL;
1663 int namelen = 0; /* fake GCC */
1664 int err;
1665 unsigned int hash;
1666 struct sk_buff *skb;
1667 long timeo;
1668 struct scm_cookie scm;
1669 int data_len = 0;
1670 int sk_locked;
1671
1672 wait_for_unix_gc();
1673 err = scm_send(sock, msg, &scm, false);
1674 if (err < 0)
1675 return err;
1676
1677 err = -EOPNOTSUPP;
1678 if (msg->msg_flags&MSG_OOB)
1679 goto out;
1680
1681 if (msg->msg_namelen) {
1682 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1683 if (err < 0)
1684 goto out;
1685 namelen = err;
1686 } else {
1687 sunaddr = NULL;
1688 err = -ENOTCONN;
1689 other = unix_peer_get(sk);
1690 if (!other)
1691 goto out;
1692 }
1693
1694 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1695 && (err = unix_autobind(sock)) != 0)
1696 goto out;
1697
1698 err = -EMSGSIZE;
1699 if (len > sk->sk_sndbuf - 32)
1700 goto out;
1701
1702 if (len > SKB_MAX_ALLOC) {
1703 data_len = min_t(size_t,
1704 len - SKB_MAX_ALLOC,
1705 MAX_SKB_FRAGS * PAGE_SIZE);
1706 data_len = PAGE_ALIGN(data_len);
1707
1708 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1709 }
1710
1711 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1712 msg->msg_flags & MSG_DONTWAIT, &err,
1713 PAGE_ALLOC_COSTLY_ORDER);
1714 if (skb == NULL)
1715 goto out;
1716
1717 err = unix_scm_to_skb(&scm, skb, true);
1718 if (err < 0)
1719 goto out_free;
1720
1721 skb_put(skb, len - data_len);
1722 skb->data_len = data_len;
1723 skb->len = len;
1724 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1725 if (err)
1726 goto out_free;
1727
1728 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1729
1730 restart:
1731 if (!other) {
1732 err = -ECONNRESET;
1733 if (sunaddr == NULL)
1734 goto out_free;
1735
1736 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1737 hash, &err);
1738 if (other == NULL)
1739 goto out_free;
1740 }
1741
1742 if (sk_filter(other, skb) < 0) {
1743 /* Toss the packet but do not return any error to the sender */
1744 err = len;
1745 goto out_free;
1746 }
1747
1748 sk_locked = 0;
1749 unix_state_lock(other);
1750 restart_locked:
1751 err = -EPERM;
1752 if (!unix_may_send(sk, other))
1753 goto out_unlock;
1754
1755 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1756 /*
1757 * Check with 1003.1g - what should
1758 * datagram error
1759 */
1760 unix_state_unlock(other);
1761 sock_put(other);
1762
1763 if (!sk_locked)
1764 unix_state_lock(sk);
1765
1766 err = 0;
1767 if (unix_peer(sk) == other) {
1768 unix_peer(sk) = NULL;
1769 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1770
1771 unix_state_unlock(sk);
1772
1773 unix_dgram_disconnected(sk, other);
1774 sock_put(other);
1775 err = -ECONNREFUSED;
1776 } else {
1777 unix_state_unlock(sk);
1778 }
1779
1780 other = NULL;
1781 if (err)
1782 goto out_free;
1783 goto restart;
1784 }
1785
1786 err = -EPIPE;
1787 if (other->sk_shutdown & RCV_SHUTDOWN)
1788 goto out_unlock;
1789
1790 if (sk->sk_type != SOCK_SEQPACKET) {
1791 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1792 if (err)
1793 goto out_unlock;
1794 }
1795
1796 /* other == sk && unix_peer(other) != sk if
1797 * - unix_peer(sk) == NULL, destination address bound to sk
1798 * - unix_peer(sk) == sk by time of get but disconnected before lock
1799 */
1800 if (other != sk &&
1801 unlikely(unix_peer(other) != sk &&
1802 unix_recvq_full_lockless(other))) {
1803 if (timeo) {
1804 timeo = unix_wait_for_peer(other, timeo);
1805
1806 err = sock_intr_errno(timeo);
1807 if (signal_pending(current))
1808 goto out_free;
1809
1810 goto restart;
1811 }
1812
1813 if (!sk_locked) {
1814 unix_state_unlock(other);
1815 unix_state_double_lock(sk, other);
1816 }
1817
1818 if (unix_peer(sk) != other ||
1819 unix_dgram_peer_wake_me(sk, other)) {
1820 err = -EAGAIN;
1821 sk_locked = 1;
1822 goto out_unlock;
1823 }
1824
1825 if (!sk_locked) {
1826 sk_locked = 1;
1827 goto restart_locked;
1828 }
1829 }
1830
1831 if (unlikely(sk_locked))
1832 unix_state_unlock(sk);
1833
1834 if (sock_flag(other, SOCK_RCVTSTAMP))
1835 __net_timestamp(skb);
1836 maybe_add_creds(skb, sock, other);
1837 skb_queue_tail(&other->sk_receive_queue, skb);
1838 unix_state_unlock(other);
1839 other->sk_data_ready(other);
1840 sock_put(other);
1841 scm_destroy(&scm);
1842 return len;
1843
1844 out_unlock:
1845 if (sk_locked)
1846 unix_state_unlock(sk);
1847 unix_state_unlock(other);
1848 out_free:
1849 kfree_skb(skb);
1850 out:
1851 if (other)
1852 sock_put(other);
1853 scm_destroy(&scm);
1854 return err;
1855 }
1856
1857 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1858 * bytes, and a minimum of a full page.
1859 */
1860 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1861
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1862 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1863 size_t len)
1864 {
1865 struct sock *sk = sock->sk;
1866 struct sock *other = NULL;
1867 int err, size;
1868 struct sk_buff *skb;
1869 int sent = 0;
1870 struct scm_cookie scm;
1871 bool fds_sent = false;
1872 int data_len;
1873
1874 wait_for_unix_gc();
1875 err = scm_send(sock, msg, &scm, false);
1876 if (err < 0)
1877 return err;
1878
1879 err = -EOPNOTSUPP;
1880 if (msg->msg_flags&MSG_OOB)
1881 goto out_err;
1882
1883 if (msg->msg_namelen) {
1884 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1885 goto out_err;
1886 } else {
1887 err = -ENOTCONN;
1888 other = unix_peer(sk);
1889 if (!other)
1890 goto out_err;
1891 }
1892
1893 if (sk->sk_shutdown & SEND_SHUTDOWN)
1894 goto pipe_err;
1895
1896 while (sent < len) {
1897 size = len - sent;
1898
1899 /* Keep two messages in the pipe so it schedules better */
1900 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1901
1902 /* allow fallback to order-0 allocations */
1903 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1904
1905 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1906
1907 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1908
1909 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1910 msg->msg_flags & MSG_DONTWAIT, &err,
1911 get_order(UNIX_SKB_FRAGS_SZ));
1912 if (!skb)
1913 goto out_err;
1914
1915 /* Only send the fds in the first buffer */
1916 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1917 if (err < 0) {
1918 kfree_skb(skb);
1919 goto out_err;
1920 }
1921 fds_sent = true;
1922
1923 skb_put(skb, size - data_len);
1924 skb->data_len = data_len;
1925 skb->len = size;
1926 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1927 if (err) {
1928 kfree_skb(skb);
1929 goto out_err;
1930 }
1931
1932 unix_state_lock(other);
1933
1934 if (sock_flag(other, SOCK_DEAD) ||
1935 (other->sk_shutdown & RCV_SHUTDOWN))
1936 goto pipe_err_free;
1937
1938 maybe_add_creds(skb, sock, other);
1939 skb_queue_tail(&other->sk_receive_queue, skb);
1940 unix_state_unlock(other);
1941 other->sk_data_ready(other);
1942 sent += size;
1943 }
1944
1945 scm_destroy(&scm);
1946
1947 return sent;
1948
1949 pipe_err_free:
1950 unix_state_unlock(other);
1951 kfree_skb(skb);
1952 pipe_err:
1953 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1954 send_sig(SIGPIPE, current, 0);
1955 err = -EPIPE;
1956 out_err:
1957 scm_destroy(&scm);
1958 return sent ? : err;
1959 }
1960
unix_stream_sendpage(struct socket * socket,struct page * page,int offset,size_t size,int flags)1961 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1962 int offset, size_t size, int flags)
1963 {
1964 int err;
1965 bool send_sigpipe = false;
1966 bool init_scm = true;
1967 struct scm_cookie scm;
1968 struct sock *other, *sk = socket->sk;
1969 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1970
1971 if (flags & MSG_OOB)
1972 return -EOPNOTSUPP;
1973
1974 other = unix_peer(sk);
1975 if (!other || sk->sk_state != TCP_ESTABLISHED)
1976 return -ENOTCONN;
1977
1978 if (false) {
1979 alloc_skb:
1980 spin_unlock(&other->sk_receive_queue.lock);
1981 unix_state_unlock(other);
1982 mutex_unlock(&unix_sk(other)->iolock);
1983 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1984 &err, 0);
1985 if (!newskb)
1986 goto err;
1987 }
1988
1989 /* we must acquire iolock as we modify already present
1990 * skbs in the sk_receive_queue and mess with skb->len
1991 */
1992 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1993 if (err) {
1994 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1995 goto err;
1996 }
1997
1998 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1999 err = -EPIPE;
2000 send_sigpipe = true;
2001 goto err_unlock;
2002 }
2003
2004 unix_state_lock(other);
2005
2006 if (sock_flag(other, SOCK_DEAD) ||
2007 other->sk_shutdown & RCV_SHUTDOWN) {
2008 err = -EPIPE;
2009 send_sigpipe = true;
2010 goto err_state_unlock;
2011 }
2012
2013 if (init_scm) {
2014 err = maybe_init_creds(&scm, socket, other);
2015 if (err)
2016 goto err_state_unlock;
2017 init_scm = false;
2018 }
2019
2020 spin_lock(&other->sk_receive_queue.lock);
2021 skb = skb_peek_tail(&other->sk_receive_queue);
2022 if (tail && tail == skb) {
2023 skb = newskb;
2024 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2025 if (newskb) {
2026 skb = newskb;
2027 } else {
2028 tail = skb;
2029 goto alloc_skb;
2030 }
2031 } else if (newskb) {
2032 /* this is fast path, we don't necessarily need to
2033 * call to kfree_skb even though with newskb == NULL
2034 * this - does no harm
2035 */
2036 consume_skb(newskb);
2037 newskb = NULL;
2038 }
2039
2040 if (skb_append_pagefrags(skb, page, offset, size)) {
2041 tail = skb;
2042 goto alloc_skb;
2043 }
2044
2045 skb->len += size;
2046 skb->data_len += size;
2047 skb->truesize += size;
2048 refcount_add(size, &sk->sk_wmem_alloc);
2049
2050 if (newskb) {
2051 unix_scm_to_skb(&scm, skb, false);
2052 __skb_queue_tail(&other->sk_receive_queue, newskb);
2053 }
2054
2055 spin_unlock(&other->sk_receive_queue.lock);
2056 unix_state_unlock(other);
2057 mutex_unlock(&unix_sk(other)->iolock);
2058
2059 other->sk_data_ready(other);
2060 scm_destroy(&scm);
2061 return size;
2062
2063 err_state_unlock:
2064 unix_state_unlock(other);
2065 err_unlock:
2066 mutex_unlock(&unix_sk(other)->iolock);
2067 err:
2068 kfree_skb(newskb);
2069 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2070 send_sig(SIGPIPE, current, 0);
2071 if (!init_scm)
2072 scm_destroy(&scm);
2073 return err;
2074 }
2075
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2076 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2077 size_t len)
2078 {
2079 int err;
2080 struct sock *sk = sock->sk;
2081
2082 err = sock_error(sk);
2083 if (err)
2084 return err;
2085
2086 if (sk->sk_state != TCP_ESTABLISHED)
2087 return -ENOTCONN;
2088
2089 if (msg->msg_namelen)
2090 msg->msg_namelen = 0;
2091
2092 return unix_dgram_sendmsg(sock, msg, len);
2093 }
2094
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2095 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2096 size_t size, int flags)
2097 {
2098 struct sock *sk = sock->sk;
2099
2100 if (sk->sk_state != TCP_ESTABLISHED)
2101 return -ENOTCONN;
2102
2103 return unix_dgram_recvmsg(sock, msg, size, flags);
2104 }
2105
unix_copy_addr(struct msghdr * msg,struct sock * sk)2106 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2107 {
2108 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2109
2110 if (addr) {
2111 msg->msg_namelen = addr->len;
2112 memcpy(msg->msg_name, addr->name, addr->len);
2113 }
2114 }
2115
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2116 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2117 size_t size, int flags)
2118 {
2119 struct scm_cookie scm;
2120 struct sock *sk = sock->sk;
2121 struct unix_sock *u = unix_sk(sk);
2122 struct sk_buff *skb, *last;
2123 long timeo;
2124 int skip;
2125 int err;
2126
2127 err = -EOPNOTSUPP;
2128 if (flags&MSG_OOB)
2129 goto out;
2130
2131 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2132
2133 do {
2134 mutex_lock(&u->iolock);
2135
2136 skip = sk_peek_offset(sk, flags);
2137 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2138 &last);
2139 if (skb)
2140 break;
2141
2142 mutex_unlock(&u->iolock);
2143
2144 if (err != -EAGAIN)
2145 break;
2146 } while (timeo &&
2147 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2148
2149 if (!skb) { /* implies iolock unlocked */
2150 unix_state_lock(sk);
2151 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2152 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2153 (sk->sk_shutdown & RCV_SHUTDOWN))
2154 err = 0;
2155 unix_state_unlock(sk);
2156 goto out;
2157 }
2158
2159 if (wq_has_sleeper(&u->peer_wait))
2160 wake_up_interruptible_sync_poll(&u->peer_wait,
2161 EPOLLOUT | EPOLLWRNORM |
2162 EPOLLWRBAND);
2163
2164 if (msg->msg_name)
2165 unix_copy_addr(msg, skb->sk);
2166
2167 if (size > skb->len - skip)
2168 size = skb->len - skip;
2169 else if (size < skb->len - skip)
2170 msg->msg_flags |= MSG_TRUNC;
2171
2172 err = skb_copy_datagram_msg(skb, skip, msg, size);
2173 if (err)
2174 goto out_free;
2175
2176 if (sock_flag(sk, SOCK_RCVTSTAMP))
2177 __sock_recv_timestamp(msg, sk, skb);
2178
2179 memset(&scm, 0, sizeof(scm));
2180
2181 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2182 unix_set_secdata(&scm, skb);
2183
2184 if (!(flags & MSG_PEEK)) {
2185 if (UNIXCB(skb).fp)
2186 unix_detach_fds(&scm, skb);
2187
2188 sk_peek_offset_bwd(sk, skb->len);
2189 } else {
2190 /* It is questionable: on PEEK we could:
2191 - do not return fds - good, but too simple 8)
2192 - return fds, and do not return them on read (old strategy,
2193 apparently wrong)
2194 - clone fds (I chose it for now, it is the most universal
2195 solution)
2196
2197 POSIX 1003.1g does not actually define this clearly
2198 at all. POSIX 1003.1g doesn't define a lot of things
2199 clearly however!
2200
2201 */
2202
2203 sk_peek_offset_fwd(sk, size);
2204
2205 if (UNIXCB(skb).fp)
2206 unix_peek_fds(&scm, skb);
2207 }
2208 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2209
2210 scm_recv(sock, msg, &scm, flags);
2211
2212 out_free:
2213 skb_free_datagram(sk, skb);
2214 mutex_unlock(&u->iolock);
2215 out:
2216 return err;
2217 }
2218
2219 /*
2220 * Sleep until more data has arrived. But check for races..
2221 */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2222 static long unix_stream_data_wait(struct sock *sk, long timeo,
2223 struct sk_buff *last, unsigned int last_len,
2224 bool freezable)
2225 {
2226 struct sk_buff *tail;
2227 DEFINE_WAIT(wait);
2228
2229 unix_state_lock(sk);
2230
2231 for (;;) {
2232 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2233
2234 tail = skb_peek_tail(&sk->sk_receive_queue);
2235 if (tail != last ||
2236 (tail && tail->len != last_len) ||
2237 sk->sk_err ||
2238 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2239 signal_pending(current) ||
2240 !timeo)
2241 break;
2242
2243 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2244 unix_state_unlock(sk);
2245 if (freezable)
2246 timeo = freezable_schedule_timeout(timeo);
2247 else
2248 timeo = schedule_timeout(timeo);
2249 unix_state_lock(sk);
2250
2251 if (sock_flag(sk, SOCK_DEAD))
2252 break;
2253
2254 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2255 }
2256
2257 finish_wait(sk_sleep(sk), &wait);
2258 unix_state_unlock(sk);
2259 return timeo;
2260 }
2261
unix_skb_len(const struct sk_buff * skb)2262 static unsigned int unix_skb_len(const struct sk_buff *skb)
2263 {
2264 return skb->len - UNIXCB(skb).consumed;
2265 }
2266
2267 struct unix_stream_read_state {
2268 int (*recv_actor)(struct sk_buff *, int, int,
2269 struct unix_stream_read_state *);
2270 struct socket *socket;
2271 struct msghdr *msg;
2272 struct pipe_inode_info *pipe;
2273 size_t size;
2274 int flags;
2275 unsigned int splice_flags;
2276 };
2277
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2278 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2279 bool freezable)
2280 {
2281 struct scm_cookie scm;
2282 struct socket *sock = state->socket;
2283 struct sock *sk = sock->sk;
2284 struct unix_sock *u = unix_sk(sk);
2285 int copied = 0;
2286 int flags = state->flags;
2287 int noblock = flags & MSG_DONTWAIT;
2288 bool check_creds = false;
2289 int target;
2290 int err = 0;
2291 long timeo;
2292 int skip;
2293 size_t size = state->size;
2294 unsigned int last_len;
2295
2296 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2297 err = -EINVAL;
2298 goto out;
2299 }
2300
2301 if (unlikely(flags & MSG_OOB)) {
2302 err = -EOPNOTSUPP;
2303 goto out;
2304 }
2305
2306 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2307 timeo = sock_rcvtimeo(sk, noblock);
2308
2309 memset(&scm, 0, sizeof(scm));
2310
2311 /* Lock the socket to prevent queue disordering
2312 * while sleeps in memcpy_tomsg
2313 */
2314 mutex_lock(&u->iolock);
2315
2316 skip = max(sk_peek_offset(sk, flags), 0);
2317
2318 do {
2319 int chunk;
2320 bool drop_skb;
2321 struct sk_buff *skb, *last;
2322
2323 redo:
2324 unix_state_lock(sk);
2325 if (sock_flag(sk, SOCK_DEAD)) {
2326 err = -ECONNRESET;
2327 goto unlock;
2328 }
2329 last = skb = skb_peek(&sk->sk_receive_queue);
2330 last_len = last ? last->len : 0;
2331 again:
2332 if (skb == NULL) {
2333 if (copied >= target)
2334 goto unlock;
2335
2336 /*
2337 * POSIX 1003.1g mandates this order.
2338 */
2339
2340 err = sock_error(sk);
2341 if (err)
2342 goto unlock;
2343 if (sk->sk_shutdown & RCV_SHUTDOWN)
2344 goto unlock;
2345
2346 unix_state_unlock(sk);
2347 if (!timeo) {
2348 err = -EAGAIN;
2349 break;
2350 }
2351
2352 mutex_unlock(&u->iolock);
2353
2354 timeo = unix_stream_data_wait(sk, timeo, last,
2355 last_len, freezable);
2356
2357 if (signal_pending(current)) {
2358 err = sock_intr_errno(timeo);
2359 scm_destroy(&scm);
2360 goto out;
2361 }
2362
2363 mutex_lock(&u->iolock);
2364 goto redo;
2365 unlock:
2366 unix_state_unlock(sk);
2367 break;
2368 }
2369
2370 while (skip >= unix_skb_len(skb)) {
2371 skip -= unix_skb_len(skb);
2372 last = skb;
2373 last_len = skb->len;
2374 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2375 if (!skb)
2376 goto again;
2377 }
2378
2379 unix_state_unlock(sk);
2380
2381 if (check_creds) {
2382 /* Never glue messages from different writers */
2383 if (!unix_skb_scm_eq(skb, &scm))
2384 break;
2385 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2386 /* Copy credentials */
2387 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2388 unix_set_secdata(&scm, skb);
2389 check_creds = true;
2390 }
2391
2392 /* Copy address just once */
2393 if (state->msg && state->msg->msg_name) {
2394 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2395 state->msg->msg_name);
2396 unix_copy_addr(state->msg, skb->sk);
2397 sunaddr = NULL;
2398 }
2399
2400 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2401 skb_get(skb);
2402 chunk = state->recv_actor(skb, skip, chunk, state);
2403 drop_skb = !unix_skb_len(skb);
2404 /* skb is only safe to use if !drop_skb */
2405 consume_skb(skb);
2406 if (chunk < 0) {
2407 if (copied == 0)
2408 copied = -EFAULT;
2409 break;
2410 }
2411 copied += chunk;
2412 size -= chunk;
2413
2414 if (drop_skb) {
2415 /* the skb was touched by a concurrent reader;
2416 * we should not expect anything from this skb
2417 * anymore and assume it invalid - we can be
2418 * sure it was dropped from the socket queue
2419 *
2420 * let's report a short read
2421 */
2422 err = 0;
2423 break;
2424 }
2425
2426 /* Mark read part of skb as used */
2427 if (!(flags & MSG_PEEK)) {
2428 UNIXCB(skb).consumed += chunk;
2429
2430 sk_peek_offset_bwd(sk, chunk);
2431
2432 if (UNIXCB(skb).fp)
2433 unix_detach_fds(&scm, skb);
2434
2435 if (unix_skb_len(skb))
2436 break;
2437
2438 skb_unlink(skb, &sk->sk_receive_queue);
2439 consume_skb(skb);
2440
2441 if (scm.fp)
2442 break;
2443 } else {
2444 /* It is questionable, see note in unix_dgram_recvmsg.
2445 */
2446 if (UNIXCB(skb).fp)
2447 unix_peek_fds(&scm, skb);
2448
2449 sk_peek_offset_fwd(sk, chunk);
2450
2451 if (UNIXCB(skb).fp)
2452 break;
2453
2454 skip = 0;
2455 last = skb;
2456 last_len = skb->len;
2457 unix_state_lock(sk);
2458 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2459 if (skb)
2460 goto again;
2461 unix_state_unlock(sk);
2462 break;
2463 }
2464 } while (size);
2465
2466 mutex_unlock(&u->iolock);
2467 if (state->msg)
2468 scm_recv(sock, state->msg, &scm, flags);
2469 else
2470 scm_destroy(&scm);
2471 out:
2472 return copied ? : err;
2473 }
2474
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2475 static int unix_stream_read_actor(struct sk_buff *skb,
2476 int skip, int chunk,
2477 struct unix_stream_read_state *state)
2478 {
2479 int ret;
2480
2481 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2482 state->msg, chunk);
2483 return ret ?: chunk;
2484 }
2485
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2486 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2487 size_t size, int flags)
2488 {
2489 struct unix_stream_read_state state = {
2490 .recv_actor = unix_stream_read_actor,
2491 .socket = sock,
2492 .msg = msg,
2493 .size = size,
2494 .flags = flags
2495 };
2496
2497 return unix_stream_read_generic(&state, true);
2498 }
2499
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2500 static int unix_stream_splice_actor(struct sk_buff *skb,
2501 int skip, int chunk,
2502 struct unix_stream_read_state *state)
2503 {
2504 return skb_splice_bits(skb, state->socket->sk,
2505 UNIXCB(skb).consumed + skip,
2506 state->pipe, chunk, state->splice_flags);
2507 }
2508
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2509 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2510 struct pipe_inode_info *pipe,
2511 size_t size, unsigned int flags)
2512 {
2513 struct unix_stream_read_state state = {
2514 .recv_actor = unix_stream_splice_actor,
2515 .socket = sock,
2516 .pipe = pipe,
2517 .size = size,
2518 .splice_flags = flags,
2519 };
2520
2521 if (unlikely(*ppos))
2522 return -ESPIPE;
2523
2524 if (sock->file->f_flags & O_NONBLOCK ||
2525 flags & SPLICE_F_NONBLOCK)
2526 state.flags = MSG_DONTWAIT;
2527
2528 return unix_stream_read_generic(&state, false);
2529 }
2530
unix_shutdown(struct socket * sock,int mode)2531 static int unix_shutdown(struct socket *sock, int mode)
2532 {
2533 struct sock *sk = sock->sk;
2534 struct sock *other;
2535
2536 if (mode < SHUT_RD || mode > SHUT_RDWR)
2537 return -EINVAL;
2538 /* This maps:
2539 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2540 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2541 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2542 */
2543 ++mode;
2544
2545 unix_state_lock(sk);
2546 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2547 other = unix_peer(sk);
2548 if (other)
2549 sock_hold(other);
2550 unix_state_unlock(sk);
2551 sk->sk_state_change(sk);
2552
2553 if (other &&
2554 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2555
2556 int peer_mode = 0;
2557
2558 if (mode&RCV_SHUTDOWN)
2559 peer_mode |= SEND_SHUTDOWN;
2560 if (mode&SEND_SHUTDOWN)
2561 peer_mode |= RCV_SHUTDOWN;
2562 unix_state_lock(other);
2563 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2564 unix_state_unlock(other);
2565 other->sk_state_change(other);
2566 if (peer_mode == SHUTDOWN_MASK)
2567 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2568 else if (peer_mode & RCV_SHUTDOWN)
2569 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2570 }
2571 if (other)
2572 sock_put(other);
2573
2574 return 0;
2575 }
2576
unix_inq_len(struct sock * sk)2577 long unix_inq_len(struct sock *sk)
2578 {
2579 struct sk_buff *skb;
2580 long amount = 0;
2581
2582 if (sk->sk_state == TCP_LISTEN)
2583 return -EINVAL;
2584
2585 spin_lock(&sk->sk_receive_queue.lock);
2586 if (sk->sk_type == SOCK_STREAM ||
2587 sk->sk_type == SOCK_SEQPACKET) {
2588 skb_queue_walk(&sk->sk_receive_queue, skb)
2589 amount += unix_skb_len(skb);
2590 } else {
2591 skb = skb_peek(&sk->sk_receive_queue);
2592 if (skb)
2593 amount = skb->len;
2594 }
2595 spin_unlock(&sk->sk_receive_queue.lock);
2596
2597 return amount;
2598 }
2599 EXPORT_SYMBOL_GPL(unix_inq_len);
2600
unix_outq_len(struct sock * sk)2601 long unix_outq_len(struct sock *sk)
2602 {
2603 return sk_wmem_alloc_get(sk);
2604 }
2605 EXPORT_SYMBOL_GPL(unix_outq_len);
2606
unix_open_file(struct sock * sk)2607 static int unix_open_file(struct sock *sk)
2608 {
2609 struct path path;
2610 struct file *f;
2611 int fd;
2612
2613 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2614 return -EPERM;
2615
2616 if (!smp_load_acquire(&unix_sk(sk)->addr))
2617 return -ENOENT;
2618
2619 path = unix_sk(sk)->path;
2620 if (!path.dentry)
2621 return -ENOENT;
2622
2623 path_get(&path);
2624
2625 fd = get_unused_fd_flags(O_CLOEXEC);
2626 if (fd < 0)
2627 goto out;
2628
2629 f = dentry_open(&path, O_PATH, current_cred());
2630 if (IS_ERR(f)) {
2631 put_unused_fd(fd);
2632 fd = PTR_ERR(f);
2633 goto out;
2634 }
2635
2636 fd_install(fd, f);
2637 out:
2638 path_put(&path);
2639
2640 return fd;
2641 }
2642
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2643 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2644 {
2645 struct sock *sk = sock->sk;
2646 long amount = 0;
2647 int err;
2648
2649 switch (cmd) {
2650 case SIOCOUTQ:
2651 amount = unix_outq_len(sk);
2652 err = put_user(amount, (int __user *)arg);
2653 break;
2654 case SIOCINQ:
2655 amount = unix_inq_len(sk);
2656 if (amount < 0)
2657 err = amount;
2658 else
2659 err = put_user(amount, (int __user *)arg);
2660 break;
2661 case SIOCUNIXFILE:
2662 err = unix_open_file(sk);
2663 break;
2664 default:
2665 err = -ENOIOCTLCMD;
2666 break;
2667 }
2668 return err;
2669 }
2670
2671 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2672 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2673 {
2674 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2675 }
2676 #endif
2677
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2678 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2679 {
2680 struct sock *sk = sock->sk;
2681 __poll_t mask;
2682 u8 shutdown;
2683
2684 sock_poll_wait(file, sock, wait);
2685 mask = 0;
2686 shutdown = READ_ONCE(sk->sk_shutdown);
2687
2688 /* exceptional events? */
2689 if (sk->sk_err)
2690 mask |= EPOLLERR;
2691 if (shutdown == SHUTDOWN_MASK)
2692 mask |= EPOLLHUP;
2693 if (shutdown & RCV_SHUTDOWN)
2694 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2695
2696 /* readable? */
2697 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2698 mask |= EPOLLIN | EPOLLRDNORM;
2699
2700 /* Connection-based need to check for termination and startup */
2701 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2702 sk->sk_state == TCP_CLOSE)
2703 mask |= EPOLLHUP;
2704
2705 /*
2706 * we set writable also when the other side has shut down the
2707 * connection. This prevents stuck sockets.
2708 */
2709 if (unix_writable(sk))
2710 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2711
2712 return mask;
2713 }
2714
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2715 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2716 poll_table *wait)
2717 {
2718 struct sock *sk = sock->sk, *other;
2719 unsigned int writable;
2720 __poll_t mask;
2721 u8 shutdown;
2722
2723 sock_poll_wait(file, sock, wait);
2724 mask = 0;
2725 shutdown = READ_ONCE(sk->sk_shutdown);
2726
2727 /* exceptional events? */
2728 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2729 mask |= EPOLLERR |
2730 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2731
2732 if (shutdown & RCV_SHUTDOWN)
2733 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2734 if (shutdown == SHUTDOWN_MASK)
2735 mask |= EPOLLHUP;
2736
2737 /* readable? */
2738 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2739 mask |= EPOLLIN | EPOLLRDNORM;
2740
2741 /* Connection-based need to check for termination and startup */
2742 if (sk->sk_type == SOCK_SEQPACKET) {
2743 if (sk->sk_state == TCP_CLOSE)
2744 mask |= EPOLLHUP;
2745 /* connection hasn't started yet? */
2746 if (sk->sk_state == TCP_SYN_SENT)
2747 return mask;
2748 }
2749
2750 /* No write status requested, avoid expensive OUT tests. */
2751 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2752 return mask;
2753
2754 writable = unix_writable(sk);
2755 if (writable) {
2756 unix_state_lock(sk);
2757
2758 other = unix_peer(sk);
2759 if (other && unix_peer(other) != sk &&
2760 unix_recvq_full_lockless(other) &&
2761 unix_dgram_peer_wake_me(sk, other))
2762 writable = 0;
2763
2764 unix_state_unlock(sk);
2765 }
2766
2767 if (writable)
2768 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2769 else
2770 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2771
2772 return mask;
2773 }
2774
2775 #ifdef CONFIG_PROC_FS
2776
2777 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2778
2779 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2780 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2781 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2782
unix_from_bucket(struct seq_file * seq,loff_t * pos)2783 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2784 {
2785 unsigned long offset = get_offset(*pos);
2786 unsigned long bucket = get_bucket(*pos);
2787 struct sock *sk;
2788 unsigned long count = 0;
2789
2790 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2791 if (sock_net(sk) != seq_file_net(seq))
2792 continue;
2793 if (++count == offset)
2794 break;
2795 }
2796
2797 return sk;
2798 }
2799
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2800 static struct sock *unix_next_socket(struct seq_file *seq,
2801 struct sock *sk,
2802 loff_t *pos)
2803 {
2804 unsigned long bucket;
2805
2806 while (sk > (struct sock *)SEQ_START_TOKEN) {
2807 sk = sk_next(sk);
2808 if (!sk)
2809 goto next_bucket;
2810 if (sock_net(sk) == seq_file_net(seq))
2811 return sk;
2812 }
2813
2814 do {
2815 sk = unix_from_bucket(seq, pos);
2816 if (sk)
2817 return sk;
2818
2819 next_bucket:
2820 bucket = get_bucket(*pos) + 1;
2821 *pos = set_bucket_offset(bucket, 1);
2822 } while (bucket < ARRAY_SIZE(unix_socket_table));
2823
2824 return NULL;
2825 }
2826
unix_seq_start(struct seq_file * seq,loff_t * pos)2827 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2828 __acquires(unix_table_lock)
2829 {
2830 spin_lock(&unix_table_lock);
2831
2832 if (!*pos)
2833 return SEQ_START_TOKEN;
2834
2835 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2836 return NULL;
2837
2838 return unix_next_socket(seq, NULL, pos);
2839 }
2840
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2841 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2842 {
2843 ++*pos;
2844 return unix_next_socket(seq, v, pos);
2845 }
2846
unix_seq_stop(struct seq_file * seq,void * v)2847 static void unix_seq_stop(struct seq_file *seq, void *v)
2848 __releases(unix_table_lock)
2849 {
2850 spin_unlock(&unix_table_lock);
2851 }
2852
unix_seq_show(struct seq_file * seq,void * v)2853 static int unix_seq_show(struct seq_file *seq, void *v)
2854 {
2855
2856 if (v == SEQ_START_TOKEN)
2857 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2858 "Inode Path\n");
2859 else {
2860 struct sock *s = v;
2861 struct unix_sock *u = unix_sk(s);
2862 unix_state_lock(s);
2863
2864 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2865 s,
2866 refcount_read(&s->sk_refcnt),
2867 0,
2868 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2869 s->sk_type,
2870 s->sk_socket ?
2871 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2872 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2873 sock_i_ino(s));
2874
2875 if (u->addr) { // under unix_table_lock here
2876 int i, len;
2877 seq_putc(seq, ' ');
2878
2879 i = 0;
2880 len = u->addr->len - sizeof(short);
2881 if (!UNIX_ABSTRACT(s))
2882 len--;
2883 else {
2884 seq_putc(seq, '@');
2885 i++;
2886 }
2887 for ( ; i < len; i++)
2888 seq_putc(seq, u->addr->name->sun_path[i] ?:
2889 '@');
2890 }
2891 unix_state_unlock(s);
2892 seq_putc(seq, '\n');
2893 }
2894
2895 return 0;
2896 }
2897
2898 static const struct seq_operations unix_seq_ops = {
2899 .start = unix_seq_start,
2900 .next = unix_seq_next,
2901 .stop = unix_seq_stop,
2902 .show = unix_seq_show,
2903 };
2904 #endif
2905
2906 static const struct net_proto_family unix_family_ops = {
2907 .family = PF_UNIX,
2908 .create = unix_create,
2909 .owner = THIS_MODULE,
2910 };
2911
2912
unix_net_init(struct net * net)2913 static int __net_init unix_net_init(struct net *net)
2914 {
2915 int error = -ENOMEM;
2916
2917 net->unx.sysctl_max_dgram_qlen = 10;
2918 if (unix_sysctl_register(net))
2919 goto out;
2920
2921 #ifdef CONFIG_PROC_FS
2922 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2923 sizeof(struct seq_net_private))) {
2924 unix_sysctl_unregister(net);
2925 goto out;
2926 }
2927 #endif
2928 error = 0;
2929 out:
2930 return error;
2931 }
2932
unix_net_exit(struct net * net)2933 static void __net_exit unix_net_exit(struct net *net)
2934 {
2935 unix_sysctl_unregister(net);
2936 remove_proc_entry("unix", net->proc_net);
2937 }
2938
2939 static struct pernet_operations unix_net_ops = {
2940 .init = unix_net_init,
2941 .exit = unix_net_exit,
2942 };
2943
af_unix_init(void)2944 static int __init af_unix_init(void)
2945 {
2946 int rc = -1;
2947
2948 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2949
2950 rc = proto_register(&unix_proto, 1);
2951 if (rc != 0) {
2952 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2953 goto out;
2954 }
2955
2956 sock_register(&unix_family_ops);
2957 register_pernet_subsys(&unix_net_ops);
2958 out:
2959 return rc;
2960 }
2961
af_unix_exit(void)2962 static void __exit af_unix_exit(void)
2963 {
2964 sock_unregister(PF_UNIX);
2965 proto_unregister(&unix_proto);
2966 unregister_pernet_subsys(&unix_net_ops);
2967 }
2968
2969 /* Earlier than device_initcall() so that other drivers invoking
2970 request_module() don't end up in a loop when modprobe tries
2971 to use a UNIX socket. But later than subsys_initcall() because
2972 we depend on stuff initialised there */
2973 fs_initcall(af_unix_init);
2974 module_exit(af_unix_exit);
2975
2976 MODULE_LICENSE("GPL");
2977 MODULE_ALIAS_NETPROTO(PF_UNIX);
2978