1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 *
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
96 #include <linux/in.h>
97 #include <linux/fs.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
108 #include <net/scm.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119
120 #include "scm.h"
121
122 static atomic_long_t unix_nr_socks;
123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
125
126 /* SMP locking strategy:
127 * hash table is protected with spinlock.
128 * each socket state is protected by separate spinlock.
129 */
130
unix_unbound_hash(struct sock * sk)131 static unsigned int unix_unbound_hash(struct sock *sk)
132 {
133 unsigned long hash = (unsigned long)sk;
134
135 hash ^= hash >> 16;
136 hash ^= hash >> 8;
137 hash ^= sk->sk_type;
138
139 return hash & UNIX_HASH_MOD;
140 }
141
unix_bsd_hash(struct inode * i)142 static unsigned int unix_bsd_hash(struct inode *i)
143 {
144 return i->i_ino & UNIX_HASH_MOD;
145 }
146
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148 int addr_len, int type)
149 {
150 __wsum csum = csum_partial(sunaddr, addr_len, 0);
151 unsigned int hash;
152
153 hash = (__force unsigned int)csum_fold(csum);
154 hash ^= hash >> 8;
155 hash ^= type;
156
157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
158 }
159
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)160 static void unix_table_double_lock(struct net *net,
161 unsigned int hash1, unsigned int hash2)
162 {
163 if (hash1 == hash2) {
164 spin_lock(&net->unx.table.locks[hash1]);
165 return;
166 }
167
168 if (hash1 > hash2)
169 swap(hash1, hash2);
170
171 spin_lock(&net->unx.table.locks[hash1]);
172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
173 }
174
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)175 static void unix_table_double_unlock(struct net *net,
176 unsigned int hash1, unsigned int hash2)
177 {
178 if (hash1 == hash2) {
179 spin_unlock(&net->unx.table.locks[hash1]);
180 return;
181 }
182
183 spin_unlock(&net->unx.table.locks[hash1]);
184 spin_unlock(&net->unx.table.locks[hash2]);
185 }
186
187 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189 {
190 UNIXCB(skb).secid = scm->secid;
191 }
192
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194 {
195 scm->secid = UNIXCB(skb).secid;
196 }
197
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199 {
200 return (scm->secid == UNIXCB(skb).secid);
201 }
202 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
204 { }
205
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207 { }
208
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
210 {
211 return true;
212 }
213 #endif /* CONFIG_SECURITY_NETWORK */
214
unix_our_peer(struct sock * sk,struct sock * osk)215 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
216 {
217 return unix_peer(osk) == sk;
218 }
219
unix_may_send(struct sock * sk,struct sock * osk)220 static inline int unix_may_send(struct sock *sk, struct sock *osk)
221 {
222 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
223 }
224
unix_recvq_full(const struct sock * sk)225 static inline int unix_recvq_full(const struct sock *sk)
226 {
227 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
228 }
229
unix_recvq_full_lockless(const struct sock * sk)230 static inline int unix_recvq_full_lockless(const struct sock *sk)
231 {
232 return skb_queue_len_lockless(&sk->sk_receive_queue) >
233 READ_ONCE(sk->sk_max_ack_backlog);
234 }
235
unix_peer_get(struct sock * s)236 struct sock *unix_peer_get(struct sock *s)
237 {
238 struct sock *peer;
239
240 unix_state_lock(s);
241 peer = unix_peer(s);
242 if (peer)
243 sock_hold(peer);
244 unix_state_unlock(s);
245 return peer;
246 }
247 EXPORT_SYMBOL_GPL(unix_peer_get);
248
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)249 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
250 int addr_len)
251 {
252 struct unix_address *addr;
253
254 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
255 if (!addr)
256 return NULL;
257
258 refcount_set(&addr->refcnt, 1);
259 addr->len = addr_len;
260 memcpy(addr->name, sunaddr, addr_len);
261
262 return addr;
263 }
264
unix_release_addr(struct unix_address * addr)265 static inline void unix_release_addr(struct unix_address *addr)
266 {
267 if (refcount_dec_and_test(&addr->refcnt))
268 kfree(addr);
269 }
270
271 /*
272 * Check unix socket name:
273 * - should be not zero length.
274 * - if started by not zero, should be NULL terminated (FS object)
275 * - if started by zero, it is abstract name.
276 */
277
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)278 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
279 {
280 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
281 addr_len > sizeof(*sunaddr))
282 return -EINVAL;
283
284 if (sunaddr->sun_family != AF_UNIX)
285 return -EINVAL;
286
287 return 0;
288 }
289
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)290 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
291 {
292 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
293 short offset = offsetof(struct sockaddr_storage, __data);
294
295 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
296
297 /* This may look like an off by one error but it is a bit more
298 * subtle. 108 is the longest valid AF_UNIX path for a binding.
299 * sun_path[108] doesn't as such exist. However in kernel space
300 * we are guaranteed that it is a valid memory location in our
301 * kernel address buffer because syscall functions always pass
302 * a pointer of struct sockaddr_storage which has a bigger buffer
303 * than 108. Also, we must terminate sun_path for strlen() in
304 * getname_kernel().
305 */
306 addr->__data[addr_len - offset] = 0;
307
308 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
309 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
310 * know the actual buffer.
311 */
312 return strlen(addr->__data) + offset + 1;
313 }
314
__unix_remove_socket(struct sock * sk)315 static void __unix_remove_socket(struct sock *sk)
316 {
317 sk_del_node_init(sk);
318 }
319
__unix_insert_socket(struct net * net,struct sock * sk)320 static void __unix_insert_socket(struct net *net, struct sock *sk)
321 {
322 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
323 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
324 }
325
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)326 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
327 struct unix_address *addr, unsigned int hash)
328 {
329 __unix_remove_socket(sk);
330 smp_store_release(&unix_sk(sk)->addr, addr);
331
332 sk->sk_hash = hash;
333 __unix_insert_socket(net, sk);
334 }
335
unix_remove_socket(struct net * net,struct sock * sk)336 static void unix_remove_socket(struct net *net, struct sock *sk)
337 {
338 spin_lock(&net->unx.table.locks[sk->sk_hash]);
339 __unix_remove_socket(sk);
340 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
341 }
342
unix_insert_unbound_socket(struct net * net,struct sock * sk)343 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
344 {
345 spin_lock(&net->unx.table.locks[sk->sk_hash]);
346 __unix_insert_socket(net, sk);
347 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
348 }
349
unix_insert_bsd_socket(struct sock * sk)350 static void unix_insert_bsd_socket(struct sock *sk)
351 {
352 spin_lock(&bsd_socket_locks[sk->sk_hash]);
353 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
354 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
355 }
356
unix_remove_bsd_socket(struct sock * sk)357 static void unix_remove_bsd_socket(struct sock *sk)
358 {
359 if (!hlist_unhashed(&sk->sk_bind_node)) {
360 spin_lock(&bsd_socket_locks[sk->sk_hash]);
361 __sk_del_bind_node(sk);
362 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
363
364 sk_node_init(&sk->sk_bind_node);
365 }
366 }
367
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)368 static struct sock *__unix_find_socket_byname(struct net *net,
369 struct sockaddr_un *sunname,
370 int len, unsigned int hash)
371 {
372 struct sock *s;
373
374 sk_for_each(s, &net->unx.table.buckets[hash]) {
375 struct unix_sock *u = unix_sk(s);
376
377 if (u->addr->len == len &&
378 !memcmp(u->addr->name, sunname, len))
379 return s;
380 }
381 return NULL;
382 }
383
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)384 static inline struct sock *unix_find_socket_byname(struct net *net,
385 struct sockaddr_un *sunname,
386 int len, unsigned int hash)
387 {
388 struct sock *s;
389
390 spin_lock(&net->unx.table.locks[hash]);
391 s = __unix_find_socket_byname(net, sunname, len, hash);
392 if (s)
393 sock_hold(s);
394 spin_unlock(&net->unx.table.locks[hash]);
395 return s;
396 }
397
unix_find_socket_byinode(struct inode * i)398 static struct sock *unix_find_socket_byinode(struct inode *i)
399 {
400 unsigned int hash = unix_bsd_hash(i);
401 struct sock *s;
402
403 spin_lock(&bsd_socket_locks[hash]);
404 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
405 struct dentry *dentry = unix_sk(s)->path.dentry;
406
407 if (dentry && d_backing_inode(dentry) == i) {
408 sock_hold(s);
409 spin_unlock(&bsd_socket_locks[hash]);
410 return s;
411 }
412 }
413 spin_unlock(&bsd_socket_locks[hash]);
414 return NULL;
415 }
416
417 /* Support code for asymmetrically connected dgram sockets
418 *
419 * If a datagram socket is connected to a socket not itself connected
420 * to the first socket (eg, /dev/log), clients may only enqueue more
421 * messages if the present receive queue of the server socket is not
422 * "too large". This means there's a second writeability condition
423 * poll and sendmsg need to test. The dgram recv code will do a wake
424 * up on the peer_wait wait queue of a socket upon reception of a
425 * datagram which needs to be propagated to sleeping would-be writers
426 * since these might not have sent anything so far. This can't be
427 * accomplished via poll_wait because the lifetime of the server
428 * socket might be less than that of its clients if these break their
429 * association with it or if the server socket is closed while clients
430 * are still connected to it and there's no way to inform "a polling
431 * implementation" that it should let go of a certain wait queue
432 *
433 * In order to propagate a wake up, a wait_queue_entry_t of the client
434 * socket is enqueued on the peer_wait queue of the server socket
435 * whose wake function does a wake_up on the ordinary client socket
436 * wait queue. This connection is established whenever a write (or
437 * poll for write) hit the flow control condition and broken when the
438 * association to the server socket is dissolved or after a wake up
439 * was relayed.
440 */
441
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)442 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
443 void *key)
444 {
445 struct unix_sock *u;
446 wait_queue_head_t *u_sleep;
447
448 u = container_of(q, struct unix_sock, peer_wake);
449
450 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
451 q);
452 u->peer_wake.private = NULL;
453
454 /* relaying can only happen while the wq still exists */
455 u_sleep = sk_sleep(&u->sk);
456 if (u_sleep)
457 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
458
459 return 0;
460 }
461
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)462 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
463 {
464 struct unix_sock *u, *u_other;
465 int rc;
466
467 u = unix_sk(sk);
468 u_other = unix_sk(other);
469 rc = 0;
470 spin_lock(&u_other->peer_wait.lock);
471
472 if (!u->peer_wake.private) {
473 u->peer_wake.private = other;
474 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
475
476 rc = 1;
477 }
478
479 spin_unlock(&u_other->peer_wait.lock);
480 return rc;
481 }
482
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)483 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
484 struct sock *other)
485 {
486 struct unix_sock *u, *u_other;
487
488 u = unix_sk(sk);
489 u_other = unix_sk(other);
490 spin_lock(&u_other->peer_wait.lock);
491
492 if (u->peer_wake.private == other) {
493 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
494 u->peer_wake.private = NULL;
495 }
496
497 spin_unlock(&u_other->peer_wait.lock);
498 }
499
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)500 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
501 struct sock *other)
502 {
503 unix_dgram_peer_wake_disconnect(sk, other);
504 wake_up_interruptible_poll(sk_sleep(sk),
505 EPOLLOUT |
506 EPOLLWRNORM |
507 EPOLLWRBAND);
508 }
509
510 /* preconditions:
511 * - unix_peer(sk) == other
512 * - association is stable
513 */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)514 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
515 {
516 int connected;
517
518 connected = unix_dgram_peer_wake_connect(sk, other);
519
520 /* If other is SOCK_DEAD, we want to make sure we signal
521 * POLLOUT, such that a subsequent write() can get a
522 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
523 * to other and its full, we will hang waiting for POLLOUT.
524 */
525 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
526 return 1;
527
528 if (connected)
529 unix_dgram_peer_wake_disconnect(sk, other);
530
531 return 0;
532 }
533
unix_writable(const struct sock * sk)534 static int unix_writable(const struct sock *sk)
535 {
536 return sk->sk_state != TCP_LISTEN &&
537 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
538 }
539
unix_write_space(struct sock * sk)540 static void unix_write_space(struct sock *sk)
541 {
542 struct socket_wq *wq;
543
544 rcu_read_lock();
545 if (unix_writable(sk)) {
546 wq = rcu_dereference(sk->sk_wq);
547 if (skwq_has_sleeper(wq))
548 wake_up_interruptible_sync_poll(&wq->wait,
549 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
550 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
551 }
552 rcu_read_unlock();
553 }
554
555 /* When dgram socket disconnects (or changes its peer), we clear its receive
556 * queue of packets arrived from previous peer. First, it allows to do
557 * flow control based only on wmem_alloc; second, sk connected to peer
558 * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)559 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
560 {
561 if (!skb_queue_empty(&sk->sk_receive_queue)) {
562 skb_queue_purge(&sk->sk_receive_queue);
563 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
564
565 /* If one link of bidirectional dgram pipe is disconnected,
566 * we signal error. Messages are lost. Do not make this,
567 * when peer was not connected to us.
568 */
569 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
570 WRITE_ONCE(other->sk_err, ECONNRESET);
571 sk_error_report(other);
572 }
573 }
574 other->sk_state = TCP_CLOSE;
575 }
576
unix_sock_destructor(struct sock * sk)577 static void unix_sock_destructor(struct sock *sk)
578 {
579 struct unix_sock *u = unix_sk(sk);
580
581 skb_queue_purge(&sk->sk_receive_queue);
582
583 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
584 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
585 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
586 if (!sock_flag(sk, SOCK_DEAD)) {
587 pr_info("Attempt to release alive unix socket: %p\n", sk);
588 return;
589 }
590
591 if (u->addr)
592 unix_release_addr(u->addr);
593
594 atomic_long_dec(&unix_nr_socks);
595 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
596 #ifdef UNIX_REFCNT_DEBUG
597 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
598 atomic_long_read(&unix_nr_socks));
599 #endif
600 }
601
unix_release_sock(struct sock * sk,int embrion)602 static void unix_release_sock(struct sock *sk, int embrion)
603 {
604 struct unix_sock *u = unix_sk(sk);
605 struct sock *skpair;
606 struct sk_buff *skb;
607 struct path path;
608 int state;
609
610 unix_remove_socket(sock_net(sk), sk);
611 unix_remove_bsd_socket(sk);
612
613 /* Clear state */
614 unix_state_lock(sk);
615 sock_orphan(sk);
616 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
617 path = u->path;
618 u->path.dentry = NULL;
619 u->path.mnt = NULL;
620 state = sk->sk_state;
621 sk->sk_state = TCP_CLOSE;
622
623 skpair = unix_peer(sk);
624 unix_peer(sk) = NULL;
625
626 unix_state_unlock(sk);
627
628 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
629 if (u->oob_skb) {
630 kfree_skb(u->oob_skb);
631 u->oob_skb = NULL;
632 }
633 #endif
634
635 wake_up_interruptible_all(&u->peer_wait);
636
637 if (skpair != NULL) {
638 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
639 unix_state_lock(skpair);
640 /* No more writes */
641 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
642 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
643 WRITE_ONCE(skpair->sk_err, ECONNRESET);
644 unix_state_unlock(skpair);
645 skpair->sk_state_change(skpair);
646 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
647 }
648
649 unix_dgram_peer_wake_disconnect(sk, skpair);
650 sock_put(skpair); /* It may now die */
651 }
652
653 /* Try to flush out this socket. Throw out buffers at least */
654
655 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
656 if (state == TCP_LISTEN)
657 unix_release_sock(skb->sk, 1);
658 /* passed fds are erased in the kfree_skb hook */
659 UNIXCB(skb).consumed = skb->len;
660 kfree_skb(skb);
661 }
662
663 if (path.dentry)
664 path_put(&path);
665
666 sock_put(sk);
667
668 /* ---- Socket is dead now and most probably destroyed ---- */
669
670 /*
671 * Fixme: BSD difference: In BSD all sockets connected to us get
672 * ECONNRESET and we die on the spot. In Linux we behave
673 * like files and pipes do and wait for the last
674 * dereference.
675 *
676 * Can't we simply set sock->err?
677 *
678 * What the above comment does talk about? --ANK(980817)
679 */
680
681 if (READ_ONCE(unix_tot_inflight))
682 unix_gc(); /* Garbage collect fds */
683 }
684
init_peercred(struct sock * sk)685 static void init_peercred(struct sock *sk)
686 {
687 const struct cred *old_cred;
688 struct pid *old_pid;
689
690 spin_lock(&sk->sk_peer_lock);
691 old_pid = sk->sk_peer_pid;
692 old_cred = sk->sk_peer_cred;
693 sk->sk_peer_pid = get_pid(task_tgid(current));
694 sk->sk_peer_cred = get_current_cred();
695 spin_unlock(&sk->sk_peer_lock);
696
697 put_pid(old_pid);
698 put_cred(old_cred);
699 }
700
copy_peercred(struct sock * sk,struct sock * peersk)701 static void copy_peercred(struct sock *sk, struct sock *peersk)
702 {
703 const struct cred *old_cred;
704 struct pid *old_pid;
705
706 if (sk < peersk) {
707 spin_lock(&sk->sk_peer_lock);
708 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
709 } else {
710 spin_lock(&peersk->sk_peer_lock);
711 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
712 }
713 old_pid = sk->sk_peer_pid;
714 old_cred = sk->sk_peer_cred;
715 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
716 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
717
718 spin_unlock(&sk->sk_peer_lock);
719 spin_unlock(&peersk->sk_peer_lock);
720
721 put_pid(old_pid);
722 put_cred(old_cred);
723 }
724
unix_listen(struct socket * sock,int backlog)725 static int unix_listen(struct socket *sock, int backlog)
726 {
727 int err;
728 struct sock *sk = sock->sk;
729 struct unix_sock *u = unix_sk(sk);
730
731 err = -EOPNOTSUPP;
732 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
733 goto out; /* Only stream/seqpacket sockets accept */
734 err = -EINVAL;
735 if (!u->addr)
736 goto out; /* No listens on an unbound socket */
737 unix_state_lock(sk);
738 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
739 goto out_unlock;
740 if (backlog > sk->sk_max_ack_backlog)
741 wake_up_interruptible_all(&u->peer_wait);
742 sk->sk_max_ack_backlog = backlog;
743 sk->sk_state = TCP_LISTEN;
744 /* set credentials so connect can copy them */
745 init_peercred(sk);
746 err = 0;
747
748 out_unlock:
749 unix_state_unlock(sk);
750 out:
751 return err;
752 }
753
754 static int unix_release(struct socket *);
755 static int unix_bind(struct socket *, struct sockaddr *, int);
756 static int unix_stream_connect(struct socket *, struct sockaddr *,
757 int addr_len, int flags);
758 static int unix_socketpair(struct socket *, struct socket *);
759 static int unix_accept(struct socket *, struct socket *, int, bool);
760 static int unix_getname(struct socket *, struct sockaddr *, int);
761 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
762 static __poll_t unix_dgram_poll(struct file *, struct socket *,
763 poll_table *);
764 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
765 #ifdef CONFIG_COMPAT
766 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
767 #endif
768 static int unix_shutdown(struct socket *, int);
769 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
770 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
771 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
772 struct pipe_inode_info *, size_t size,
773 unsigned int flags);
774 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
775 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
776 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
777 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
778 static int unix_dgram_connect(struct socket *, struct sockaddr *,
779 int, int);
780 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
781 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
782 int);
783
unix_set_peek_off(struct sock * sk,int val)784 static int unix_set_peek_off(struct sock *sk, int val)
785 {
786 struct unix_sock *u = unix_sk(sk);
787
788 if (mutex_lock_interruptible(&u->iolock))
789 return -EINTR;
790
791 WRITE_ONCE(sk->sk_peek_off, val);
792 mutex_unlock(&u->iolock);
793
794 return 0;
795 }
796
797 #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)798 static int unix_count_nr_fds(struct sock *sk)
799 {
800 struct sk_buff *skb;
801 struct unix_sock *u;
802 int nr_fds = 0;
803
804 spin_lock(&sk->sk_receive_queue.lock);
805 skb = skb_peek(&sk->sk_receive_queue);
806 while (skb) {
807 u = unix_sk(skb->sk);
808 nr_fds += atomic_read(&u->scm_stat.nr_fds);
809 skb = skb_peek_next(skb, &sk->sk_receive_queue);
810 }
811 spin_unlock(&sk->sk_receive_queue.lock);
812
813 return nr_fds;
814 }
815
unix_show_fdinfo(struct seq_file * m,struct socket * sock)816 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
817 {
818 struct sock *sk = sock->sk;
819 unsigned char s_state;
820 struct unix_sock *u;
821 int nr_fds = 0;
822
823 if (sk) {
824 s_state = READ_ONCE(sk->sk_state);
825 u = unix_sk(sk);
826
827 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
828 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
829 * SOCK_DGRAM is ordinary. So, no lock is needed.
830 */
831 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
832 nr_fds = atomic_read(&u->scm_stat.nr_fds);
833 else if (s_state == TCP_LISTEN)
834 nr_fds = unix_count_nr_fds(sk);
835
836 seq_printf(m, "scm_fds: %u\n", nr_fds);
837 }
838 }
839 #else
840 #define unix_show_fdinfo NULL
841 #endif
842
843 static const struct proto_ops unix_stream_ops = {
844 .family = PF_UNIX,
845 .owner = THIS_MODULE,
846 .release = unix_release,
847 .bind = unix_bind,
848 .connect = unix_stream_connect,
849 .socketpair = unix_socketpair,
850 .accept = unix_accept,
851 .getname = unix_getname,
852 .poll = unix_poll,
853 .ioctl = unix_ioctl,
854 #ifdef CONFIG_COMPAT
855 .compat_ioctl = unix_compat_ioctl,
856 #endif
857 .listen = unix_listen,
858 .shutdown = unix_shutdown,
859 .sendmsg = unix_stream_sendmsg,
860 .recvmsg = unix_stream_recvmsg,
861 .read_skb = unix_stream_read_skb,
862 .mmap = sock_no_mmap,
863 .splice_read = unix_stream_splice_read,
864 .set_peek_off = unix_set_peek_off,
865 .show_fdinfo = unix_show_fdinfo,
866 };
867
868 static const struct proto_ops unix_dgram_ops = {
869 .family = PF_UNIX,
870 .owner = THIS_MODULE,
871 .release = unix_release,
872 .bind = unix_bind,
873 .connect = unix_dgram_connect,
874 .socketpair = unix_socketpair,
875 .accept = sock_no_accept,
876 .getname = unix_getname,
877 .poll = unix_dgram_poll,
878 .ioctl = unix_ioctl,
879 #ifdef CONFIG_COMPAT
880 .compat_ioctl = unix_compat_ioctl,
881 #endif
882 .listen = sock_no_listen,
883 .shutdown = unix_shutdown,
884 .sendmsg = unix_dgram_sendmsg,
885 .read_skb = unix_read_skb,
886 .recvmsg = unix_dgram_recvmsg,
887 .mmap = sock_no_mmap,
888 .set_peek_off = unix_set_peek_off,
889 .show_fdinfo = unix_show_fdinfo,
890 };
891
892 static const struct proto_ops unix_seqpacket_ops = {
893 .family = PF_UNIX,
894 .owner = THIS_MODULE,
895 .release = unix_release,
896 .bind = unix_bind,
897 .connect = unix_stream_connect,
898 .socketpair = unix_socketpair,
899 .accept = unix_accept,
900 .getname = unix_getname,
901 .poll = unix_dgram_poll,
902 .ioctl = unix_ioctl,
903 #ifdef CONFIG_COMPAT
904 .compat_ioctl = unix_compat_ioctl,
905 #endif
906 .listen = unix_listen,
907 .shutdown = unix_shutdown,
908 .sendmsg = unix_seqpacket_sendmsg,
909 .recvmsg = unix_seqpacket_recvmsg,
910 .mmap = sock_no_mmap,
911 .set_peek_off = unix_set_peek_off,
912 .show_fdinfo = unix_show_fdinfo,
913 };
914
unix_close(struct sock * sk,long timeout)915 static void unix_close(struct sock *sk, long timeout)
916 {
917 /* Nothing to do here, unix socket does not need a ->close().
918 * This is merely for sockmap.
919 */
920 }
921
unix_unhash(struct sock * sk)922 static void unix_unhash(struct sock *sk)
923 {
924 /* Nothing to do here, unix socket does not need a ->unhash().
925 * This is merely for sockmap.
926 */
927 }
928
unix_bpf_bypass_getsockopt(int level,int optname)929 static bool unix_bpf_bypass_getsockopt(int level, int optname)
930 {
931 if (level == SOL_SOCKET) {
932 switch (optname) {
933 case SO_PEERPIDFD:
934 return true;
935 default:
936 return false;
937 }
938 }
939
940 return false;
941 }
942
943 struct proto unix_dgram_proto = {
944 .name = "UNIX",
945 .owner = THIS_MODULE,
946 .obj_size = sizeof(struct unix_sock),
947 .close = unix_close,
948 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
949 #ifdef CONFIG_BPF_SYSCALL
950 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
951 #endif
952 };
953
954 struct proto unix_stream_proto = {
955 .name = "UNIX-STREAM",
956 .owner = THIS_MODULE,
957 .obj_size = sizeof(struct unix_sock),
958 .close = unix_close,
959 .unhash = unix_unhash,
960 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
961 #ifdef CONFIG_BPF_SYSCALL
962 .psock_update_sk_prot = unix_stream_bpf_update_proto,
963 #endif
964 };
965
unix_create1(struct net * net,struct socket * sock,int kern,int type)966 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
967 {
968 struct unix_sock *u;
969 struct sock *sk;
970 int err;
971
972 atomic_long_inc(&unix_nr_socks);
973 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
974 err = -ENFILE;
975 goto err;
976 }
977
978 if (type == SOCK_STREAM)
979 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
980 else /*dgram and seqpacket */
981 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
982
983 if (!sk) {
984 err = -ENOMEM;
985 goto err;
986 }
987
988 sock_init_data(sock, sk);
989
990 sk->sk_hash = unix_unbound_hash(sk);
991 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
992 sk->sk_write_space = unix_write_space;
993 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
994 sk->sk_destruct = unix_sock_destructor;
995 u = unix_sk(sk);
996 u->path.dentry = NULL;
997 u->path.mnt = NULL;
998 spin_lock_init(&u->lock);
999 atomic_long_set(&u->inflight, 0);
1000 INIT_LIST_HEAD(&u->link);
1001 mutex_init(&u->iolock); /* single task reading lock */
1002 mutex_init(&u->bindlock); /* single task binding lock */
1003 init_waitqueue_head(&u->peer_wait);
1004 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1005 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1006 unix_insert_unbound_socket(net, sk);
1007
1008 sock_prot_inuse_add(net, sk->sk_prot, 1);
1009
1010 return sk;
1011
1012 err:
1013 atomic_long_dec(&unix_nr_socks);
1014 return ERR_PTR(err);
1015 }
1016
unix_create(struct net * net,struct socket * sock,int protocol,int kern)1017 static int unix_create(struct net *net, struct socket *sock, int protocol,
1018 int kern)
1019 {
1020 struct sock *sk;
1021
1022 if (protocol && protocol != PF_UNIX)
1023 return -EPROTONOSUPPORT;
1024
1025 sock->state = SS_UNCONNECTED;
1026
1027 switch (sock->type) {
1028 case SOCK_STREAM:
1029 sock->ops = &unix_stream_ops;
1030 break;
1031 /*
1032 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1033 * nothing uses it.
1034 */
1035 case SOCK_RAW:
1036 sock->type = SOCK_DGRAM;
1037 fallthrough;
1038 case SOCK_DGRAM:
1039 sock->ops = &unix_dgram_ops;
1040 break;
1041 case SOCK_SEQPACKET:
1042 sock->ops = &unix_seqpacket_ops;
1043 break;
1044 default:
1045 return -ESOCKTNOSUPPORT;
1046 }
1047
1048 sk = unix_create1(net, sock, kern, sock->type);
1049 if (IS_ERR(sk))
1050 return PTR_ERR(sk);
1051
1052 return 0;
1053 }
1054
unix_release(struct socket * sock)1055 static int unix_release(struct socket *sock)
1056 {
1057 struct sock *sk = sock->sk;
1058
1059 if (!sk)
1060 return 0;
1061
1062 sk->sk_prot->close(sk, 0);
1063 unix_release_sock(sk, 0);
1064 sock->sk = NULL;
1065
1066 return 0;
1067 }
1068
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type)1069 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1070 int type)
1071 {
1072 struct inode *inode;
1073 struct path path;
1074 struct sock *sk;
1075 int err;
1076
1077 unix_mkname_bsd(sunaddr, addr_len);
1078 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1079 if (err)
1080 goto fail;
1081
1082 err = path_permission(&path, MAY_WRITE);
1083 if (err)
1084 goto path_put;
1085
1086 err = -ECONNREFUSED;
1087 inode = d_backing_inode(path.dentry);
1088 if (!S_ISSOCK(inode->i_mode))
1089 goto path_put;
1090
1091 sk = unix_find_socket_byinode(inode);
1092 if (!sk)
1093 goto path_put;
1094
1095 err = -EPROTOTYPE;
1096 if (sk->sk_type == type)
1097 touch_atime(&path);
1098 else
1099 goto sock_put;
1100
1101 path_put(&path);
1102
1103 return sk;
1104
1105 sock_put:
1106 sock_put(sk);
1107 path_put:
1108 path_put(&path);
1109 fail:
1110 return ERR_PTR(err);
1111 }
1112
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1113 static struct sock *unix_find_abstract(struct net *net,
1114 struct sockaddr_un *sunaddr,
1115 int addr_len, int type)
1116 {
1117 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1118 struct dentry *dentry;
1119 struct sock *sk;
1120
1121 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1122 if (!sk)
1123 return ERR_PTR(-ECONNREFUSED);
1124
1125 dentry = unix_sk(sk)->path.dentry;
1126 if (dentry)
1127 touch_atime(&unix_sk(sk)->path);
1128
1129 return sk;
1130 }
1131
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1132 static struct sock *unix_find_other(struct net *net,
1133 struct sockaddr_un *sunaddr,
1134 int addr_len, int type)
1135 {
1136 struct sock *sk;
1137
1138 if (sunaddr->sun_path[0])
1139 sk = unix_find_bsd(sunaddr, addr_len, type);
1140 else
1141 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1142
1143 return sk;
1144 }
1145
unix_autobind(struct sock * sk)1146 static int unix_autobind(struct sock *sk)
1147 {
1148 unsigned int new_hash, old_hash = sk->sk_hash;
1149 struct unix_sock *u = unix_sk(sk);
1150 struct net *net = sock_net(sk);
1151 struct unix_address *addr;
1152 u32 lastnum, ordernum;
1153 int err;
1154
1155 err = mutex_lock_interruptible(&u->bindlock);
1156 if (err)
1157 return err;
1158
1159 if (u->addr)
1160 goto out;
1161
1162 err = -ENOMEM;
1163 addr = kzalloc(sizeof(*addr) +
1164 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1165 if (!addr)
1166 goto out;
1167
1168 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1169 addr->name->sun_family = AF_UNIX;
1170 refcount_set(&addr->refcnt, 1);
1171
1172 ordernum = get_random_u32();
1173 lastnum = ordernum & 0xFFFFF;
1174 retry:
1175 ordernum = (ordernum + 1) & 0xFFFFF;
1176 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1177
1178 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1179 unix_table_double_lock(net, old_hash, new_hash);
1180
1181 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1182 unix_table_double_unlock(net, old_hash, new_hash);
1183
1184 /* __unix_find_socket_byname() may take long time if many names
1185 * are already in use.
1186 */
1187 cond_resched();
1188
1189 if (ordernum == lastnum) {
1190 /* Give up if all names seems to be in use. */
1191 err = -ENOSPC;
1192 unix_release_addr(addr);
1193 goto out;
1194 }
1195
1196 goto retry;
1197 }
1198
1199 __unix_set_addr_hash(net, sk, addr, new_hash);
1200 unix_table_double_unlock(net, old_hash, new_hash);
1201 err = 0;
1202
1203 out: mutex_unlock(&u->bindlock);
1204 return err;
1205 }
1206
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1207 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1208 int addr_len)
1209 {
1210 umode_t mode = S_IFSOCK |
1211 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1212 unsigned int new_hash, old_hash = sk->sk_hash;
1213 struct unix_sock *u = unix_sk(sk);
1214 struct net *net = sock_net(sk);
1215 struct mnt_idmap *idmap;
1216 struct unix_address *addr;
1217 struct dentry *dentry;
1218 struct path parent;
1219 int err;
1220
1221 addr_len = unix_mkname_bsd(sunaddr, addr_len);
1222 addr = unix_create_addr(sunaddr, addr_len);
1223 if (!addr)
1224 return -ENOMEM;
1225
1226 /*
1227 * Get the parent directory, calculate the hash for last
1228 * component.
1229 */
1230 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1231 if (IS_ERR(dentry)) {
1232 err = PTR_ERR(dentry);
1233 goto out;
1234 }
1235
1236 /*
1237 * All right, let's create it.
1238 */
1239 idmap = mnt_idmap(parent.mnt);
1240 err = security_path_mknod(&parent, dentry, mode, 0);
1241 if (!err)
1242 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1243 if (err)
1244 goto out_path;
1245 err = mutex_lock_interruptible(&u->bindlock);
1246 if (err)
1247 goto out_unlink;
1248 if (u->addr)
1249 goto out_unlock;
1250
1251 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1252 unix_table_double_lock(net, old_hash, new_hash);
1253 u->path.mnt = mntget(parent.mnt);
1254 u->path.dentry = dget(dentry);
1255 __unix_set_addr_hash(net, sk, addr, new_hash);
1256 unix_table_double_unlock(net, old_hash, new_hash);
1257 unix_insert_bsd_socket(sk);
1258 mutex_unlock(&u->bindlock);
1259 done_path_create(&parent, dentry);
1260 return 0;
1261
1262 out_unlock:
1263 mutex_unlock(&u->bindlock);
1264 err = -EINVAL;
1265 out_unlink:
1266 /* failed after successful mknod? unlink what we'd created... */
1267 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1268 out_path:
1269 done_path_create(&parent, dentry);
1270 out:
1271 unix_release_addr(addr);
1272 return err == -EEXIST ? -EADDRINUSE : err;
1273 }
1274
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1275 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1276 int addr_len)
1277 {
1278 unsigned int new_hash, old_hash = sk->sk_hash;
1279 struct unix_sock *u = unix_sk(sk);
1280 struct net *net = sock_net(sk);
1281 struct unix_address *addr;
1282 int err;
1283
1284 addr = unix_create_addr(sunaddr, addr_len);
1285 if (!addr)
1286 return -ENOMEM;
1287
1288 err = mutex_lock_interruptible(&u->bindlock);
1289 if (err)
1290 goto out;
1291
1292 if (u->addr) {
1293 err = -EINVAL;
1294 goto out_mutex;
1295 }
1296
1297 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1298 unix_table_double_lock(net, old_hash, new_hash);
1299
1300 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1301 goto out_spin;
1302
1303 __unix_set_addr_hash(net, sk, addr, new_hash);
1304 unix_table_double_unlock(net, old_hash, new_hash);
1305 mutex_unlock(&u->bindlock);
1306 return 0;
1307
1308 out_spin:
1309 unix_table_double_unlock(net, old_hash, new_hash);
1310 err = -EADDRINUSE;
1311 out_mutex:
1312 mutex_unlock(&u->bindlock);
1313 out:
1314 unix_release_addr(addr);
1315 return err;
1316 }
1317
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1318 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1319 {
1320 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1321 struct sock *sk = sock->sk;
1322 int err;
1323
1324 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1325 sunaddr->sun_family == AF_UNIX)
1326 return unix_autobind(sk);
1327
1328 err = unix_validate_addr(sunaddr, addr_len);
1329 if (err)
1330 return err;
1331
1332 if (sunaddr->sun_path[0])
1333 err = unix_bind_bsd(sk, sunaddr, addr_len);
1334 else
1335 err = unix_bind_abstract(sk, sunaddr, addr_len);
1336
1337 return err;
1338 }
1339
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1340 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1341 {
1342 if (unlikely(sk1 == sk2) || !sk2) {
1343 unix_state_lock(sk1);
1344 return;
1345 }
1346 if (sk1 > sk2)
1347 swap(sk1, sk2);
1348
1349 unix_state_lock(sk1);
1350 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1351 }
1352
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1353 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1354 {
1355 if (unlikely(sk1 == sk2) || !sk2) {
1356 unix_state_unlock(sk1);
1357 return;
1358 }
1359 unix_state_unlock(sk1);
1360 unix_state_unlock(sk2);
1361 }
1362
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1363 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1364 int alen, int flags)
1365 {
1366 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1367 struct sock *sk = sock->sk;
1368 struct sock *other;
1369 int err;
1370
1371 err = -EINVAL;
1372 if (alen < offsetofend(struct sockaddr, sa_family))
1373 goto out;
1374
1375 if (addr->sa_family != AF_UNSPEC) {
1376 err = unix_validate_addr(sunaddr, alen);
1377 if (err)
1378 goto out;
1379
1380 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1381 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1382 !unix_sk(sk)->addr) {
1383 err = unix_autobind(sk);
1384 if (err)
1385 goto out;
1386 }
1387
1388 restart:
1389 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1390 if (IS_ERR(other)) {
1391 err = PTR_ERR(other);
1392 goto out;
1393 }
1394
1395 unix_state_double_lock(sk, other);
1396
1397 /* Apparently VFS overslept socket death. Retry. */
1398 if (sock_flag(other, SOCK_DEAD)) {
1399 unix_state_double_unlock(sk, other);
1400 sock_put(other);
1401 goto restart;
1402 }
1403
1404 err = -EPERM;
1405 if (!unix_may_send(sk, other))
1406 goto out_unlock;
1407
1408 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1409 if (err)
1410 goto out_unlock;
1411
1412 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1413 } else {
1414 /*
1415 * 1003.1g breaking connected state with AF_UNSPEC
1416 */
1417 other = NULL;
1418 unix_state_double_lock(sk, other);
1419 }
1420
1421 /*
1422 * If it was connected, reconnect.
1423 */
1424 if (unix_peer(sk)) {
1425 struct sock *old_peer = unix_peer(sk);
1426
1427 unix_peer(sk) = other;
1428 if (!other)
1429 sk->sk_state = TCP_CLOSE;
1430 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1431
1432 unix_state_double_unlock(sk, other);
1433
1434 if (other != old_peer)
1435 unix_dgram_disconnected(sk, old_peer);
1436 sock_put(old_peer);
1437 } else {
1438 unix_peer(sk) = other;
1439 unix_state_double_unlock(sk, other);
1440 }
1441
1442 return 0;
1443
1444 out_unlock:
1445 unix_state_double_unlock(sk, other);
1446 sock_put(other);
1447 out:
1448 return err;
1449 }
1450
unix_wait_for_peer(struct sock * other,long timeo)1451 static long unix_wait_for_peer(struct sock *other, long timeo)
1452 __releases(&unix_sk(other)->lock)
1453 {
1454 struct unix_sock *u = unix_sk(other);
1455 int sched;
1456 DEFINE_WAIT(wait);
1457
1458 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1459
1460 sched = !sock_flag(other, SOCK_DEAD) &&
1461 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1462 unix_recvq_full_lockless(other);
1463
1464 unix_state_unlock(other);
1465
1466 if (sched)
1467 timeo = schedule_timeout(timeo);
1468
1469 finish_wait(&u->peer_wait, &wait);
1470 return timeo;
1471 }
1472
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1473 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1474 int addr_len, int flags)
1475 {
1476 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1477 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1478 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1479 struct net *net = sock_net(sk);
1480 struct sk_buff *skb = NULL;
1481 long timeo;
1482 int err;
1483 int st;
1484
1485 err = unix_validate_addr(sunaddr, addr_len);
1486 if (err)
1487 goto out;
1488
1489 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1490 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1491 err = unix_autobind(sk);
1492 if (err)
1493 goto out;
1494 }
1495
1496 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1497
1498 /* First of all allocate resources.
1499 If we will make it after state is locked,
1500 we will have to recheck all again in any case.
1501 */
1502
1503 /* create new sock for complete connection */
1504 newsk = unix_create1(net, NULL, 0, sock->type);
1505 if (IS_ERR(newsk)) {
1506 err = PTR_ERR(newsk);
1507 newsk = NULL;
1508 goto out;
1509 }
1510
1511 err = -ENOMEM;
1512
1513 /* Allocate skb for sending to listening sock */
1514 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1515 if (skb == NULL)
1516 goto out;
1517
1518 restart:
1519 /* Find listening sock. */
1520 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1521 if (IS_ERR(other)) {
1522 err = PTR_ERR(other);
1523 other = NULL;
1524 goto out;
1525 }
1526
1527 /* Latch state of peer */
1528 unix_state_lock(other);
1529
1530 /* Apparently VFS overslept socket death. Retry. */
1531 if (sock_flag(other, SOCK_DEAD)) {
1532 unix_state_unlock(other);
1533 sock_put(other);
1534 goto restart;
1535 }
1536
1537 err = -ECONNREFUSED;
1538 if (other->sk_state != TCP_LISTEN)
1539 goto out_unlock;
1540 if (other->sk_shutdown & RCV_SHUTDOWN)
1541 goto out_unlock;
1542
1543 if (unix_recvq_full(other)) {
1544 err = -EAGAIN;
1545 if (!timeo)
1546 goto out_unlock;
1547
1548 timeo = unix_wait_for_peer(other, timeo);
1549
1550 err = sock_intr_errno(timeo);
1551 if (signal_pending(current))
1552 goto out;
1553 sock_put(other);
1554 goto restart;
1555 }
1556
1557 /* Latch our state.
1558
1559 It is tricky place. We need to grab our state lock and cannot
1560 drop lock on peer. It is dangerous because deadlock is
1561 possible. Connect to self case and simultaneous
1562 attempt to connect are eliminated by checking socket
1563 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1564 check this before attempt to grab lock.
1565
1566 Well, and we have to recheck the state after socket locked.
1567 */
1568 st = sk->sk_state;
1569
1570 switch (st) {
1571 case TCP_CLOSE:
1572 /* This is ok... continue with connect */
1573 break;
1574 case TCP_ESTABLISHED:
1575 /* Socket is already connected */
1576 err = -EISCONN;
1577 goto out_unlock;
1578 default:
1579 err = -EINVAL;
1580 goto out_unlock;
1581 }
1582
1583 unix_state_lock_nested(sk, U_LOCK_SECOND);
1584
1585 if (sk->sk_state != st) {
1586 unix_state_unlock(sk);
1587 unix_state_unlock(other);
1588 sock_put(other);
1589 goto restart;
1590 }
1591
1592 err = security_unix_stream_connect(sk, other, newsk);
1593 if (err) {
1594 unix_state_unlock(sk);
1595 goto out_unlock;
1596 }
1597
1598 /* The way is open! Fastly set all the necessary fields... */
1599
1600 sock_hold(sk);
1601 unix_peer(newsk) = sk;
1602 newsk->sk_state = TCP_ESTABLISHED;
1603 newsk->sk_type = sk->sk_type;
1604 init_peercred(newsk);
1605 newu = unix_sk(newsk);
1606 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1607 otheru = unix_sk(other);
1608
1609 /* copy address information from listening to new sock
1610 *
1611 * The contents of *(otheru->addr) and otheru->path
1612 * are seen fully set up here, since we have found
1613 * otheru in hash under its lock. Insertion into the
1614 * hash chain we'd found it in had been done in an
1615 * earlier critical area protected by the chain's lock,
1616 * the same one where we'd set *(otheru->addr) contents,
1617 * as well as otheru->path and otheru->addr itself.
1618 *
1619 * Using smp_store_release() here to set newu->addr
1620 * is enough to make those stores, as well as stores
1621 * to newu->path visible to anyone who gets newu->addr
1622 * by smp_load_acquire(). IOW, the same warranties
1623 * as for unix_sock instances bound in unix_bind() or
1624 * in unix_autobind().
1625 */
1626 if (otheru->path.dentry) {
1627 path_get(&otheru->path);
1628 newu->path = otheru->path;
1629 }
1630 refcount_inc(&otheru->addr->refcnt);
1631 smp_store_release(&newu->addr, otheru->addr);
1632
1633 /* Set credentials */
1634 copy_peercred(sk, other);
1635
1636 sock->state = SS_CONNECTED;
1637 sk->sk_state = TCP_ESTABLISHED;
1638 sock_hold(newsk);
1639
1640 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1641 unix_peer(sk) = newsk;
1642
1643 unix_state_unlock(sk);
1644
1645 /* take ten and send info to listening sock */
1646 spin_lock(&other->sk_receive_queue.lock);
1647 __skb_queue_tail(&other->sk_receive_queue, skb);
1648 spin_unlock(&other->sk_receive_queue.lock);
1649 unix_state_unlock(other);
1650 other->sk_data_ready(other);
1651 sock_put(other);
1652 return 0;
1653
1654 out_unlock:
1655 if (other)
1656 unix_state_unlock(other);
1657
1658 out:
1659 kfree_skb(skb);
1660 if (newsk)
1661 unix_release_sock(newsk, 0);
1662 if (other)
1663 sock_put(other);
1664 return err;
1665 }
1666
unix_socketpair(struct socket * socka,struct socket * sockb)1667 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1668 {
1669 struct sock *ska = socka->sk, *skb = sockb->sk;
1670
1671 /* Join our sockets back to back */
1672 sock_hold(ska);
1673 sock_hold(skb);
1674 unix_peer(ska) = skb;
1675 unix_peer(skb) = ska;
1676 init_peercred(ska);
1677 init_peercred(skb);
1678
1679 ska->sk_state = TCP_ESTABLISHED;
1680 skb->sk_state = TCP_ESTABLISHED;
1681 socka->state = SS_CONNECTED;
1682 sockb->state = SS_CONNECTED;
1683 return 0;
1684 }
1685
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1686 static void unix_sock_inherit_flags(const struct socket *old,
1687 struct socket *new)
1688 {
1689 if (test_bit(SOCK_PASSCRED, &old->flags))
1690 set_bit(SOCK_PASSCRED, &new->flags);
1691 if (test_bit(SOCK_PASSPIDFD, &old->flags))
1692 set_bit(SOCK_PASSPIDFD, &new->flags);
1693 if (test_bit(SOCK_PASSSEC, &old->flags))
1694 set_bit(SOCK_PASSSEC, &new->flags);
1695 }
1696
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1697 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1698 bool kern)
1699 {
1700 struct sock *sk = sock->sk;
1701 struct sock *tsk;
1702 struct sk_buff *skb;
1703 int err;
1704
1705 err = -EOPNOTSUPP;
1706 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1707 goto out;
1708
1709 err = -EINVAL;
1710 if (sk->sk_state != TCP_LISTEN)
1711 goto out;
1712
1713 /* If socket state is TCP_LISTEN it cannot change (for now...),
1714 * so that no locks are necessary.
1715 */
1716
1717 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1718 &err);
1719 if (!skb) {
1720 /* This means receive shutdown. */
1721 if (err == 0)
1722 err = -EINVAL;
1723 goto out;
1724 }
1725
1726 tsk = skb->sk;
1727 skb_free_datagram(sk, skb);
1728 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1729
1730 /* attach accepted sock to socket */
1731 unix_state_lock(tsk);
1732 newsock->state = SS_CONNECTED;
1733 unix_sock_inherit_flags(sock, newsock);
1734 sock_graft(tsk, newsock);
1735 unix_state_unlock(tsk);
1736 return 0;
1737
1738 out:
1739 return err;
1740 }
1741
1742
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1743 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1744 {
1745 struct sock *sk = sock->sk;
1746 struct unix_address *addr;
1747 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1748 int err = 0;
1749
1750 if (peer) {
1751 sk = unix_peer_get(sk);
1752
1753 err = -ENOTCONN;
1754 if (!sk)
1755 goto out;
1756 err = 0;
1757 } else {
1758 sock_hold(sk);
1759 }
1760
1761 addr = smp_load_acquire(&unix_sk(sk)->addr);
1762 if (!addr) {
1763 sunaddr->sun_family = AF_UNIX;
1764 sunaddr->sun_path[0] = 0;
1765 err = offsetof(struct sockaddr_un, sun_path);
1766 } else {
1767 err = addr->len;
1768 memcpy(sunaddr, addr->name, addr->len);
1769 }
1770 sock_put(sk);
1771 out:
1772 return err;
1773 }
1774
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1775 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1776 {
1777 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1778
1779 /*
1780 * Garbage collection of unix sockets starts by selecting a set of
1781 * candidate sockets which have reference only from being in flight
1782 * (total_refs == inflight_refs). This condition is checked once during
1783 * the candidate collection phase, and candidates are marked as such, so
1784 * that non-candidates can later be ignored. While inflight_refs is
1785 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1786 * is an instantaneous decision.
1787 *
1788 * Once a candidate, however, the socket must not be reinstalled into a
1789 * file descriptor while the garbage collection is in progress.
1790 *
1791 * If the above conditions are met, then the directed graph of
1792 * candidates (*) does not change while unix_gc_lock is held.
1793 *
1794 * Any operations that changes the file count through file descriptors
1795 * (dup, close, sendmsg) does not change the graph since candidates are
1796 * not installed in fds.
1797 *
1798 * Dequeing a candidate via recvmsg would install it into an fd, but
1799 * that takes unix_gc_lock to decrement the inflight count, so it's
1800 * serialized with garbage collection.
1801 *
1802 * MSG_PEEK is special in that it does not change the inflight count,
1803 * yet does install the socket into an fd. The following lock/unlock
1804 * pair is to ensure serialization with garbage collection. It must be
1805 * done between incrementing the file count and installing the file into
1806 * an fd.
1807 *
1808 * If garbage collection starts after the barrier provided by the
1809 * lock/unlock, then it will see the elevated refcount and not mark this
1810 * as a candidate. If a garbage collection is already in progress
1811 * before the file count was incremented, then the lock/unlock pair will
1812 * ensure that garbage collection is finished before progressing to
1813 * installing the fd.
1814 *
1815 * (*) A -> B where B is on the queue of A or B is on the queue of C
1816 * which is on the queue of listening socket A.
1817 */
1818 spin_lock(&unix_gc_lock);
1819 spin_unlock(&unix_gc_lock);
1820 }
1821
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1822 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1823 {
1824 int err = 0;
1825
1826 UNIXCB(skb).pid = get_pid(scm->pid);
1827 UNIXCB(skb).uid = scm->creds.uid;
1828 UNIXCB(skb).gid = scm->creds.gid;
1829 UNIXCB(skb).fp = NULL;
1830 unix_get_secdata(scm, skb);
1831 if (scm->fp && send_fds)
1832 err = unix_attach_fds(scm, skb);
1833
1834 skb->destructor = unix_destruct_scm;
1835 return err;
1836 }
1837
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1838 static bool unix_passcred_enabled(const struct socket *sock,
1839 const struct sock *other)
1840 {
1841 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1842 test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1843 !other->sk_socket ||
1844 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1845 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1846 }
1847
1848 /*
1849 * Some apps rely on write() giving SCM_CREDENTIALS
1850 * We include credentials if source or destination socket
1851 * asserted SOCK_PASSCRED.
1852 */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1853 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1854 const struct sock *other)
1855 {
1856 if (UNIXCB(skb).pid)
1857 return;
1858 if (unix_passcred_enabled(sock, other)) {
1859 UNIXCB(skb).pid = get_pid(task_tgid(current));
1860 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1861 }
1862 }
1863
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1864 static bool unix_skb_scm_eq(struct sk_buff *skb,
1865 struct scm_cookie *scm)
1866 {
1867 return UNIXCB(skb).pid == scm->pid &&
1868 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1869 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1870 unix_secdata_eq(scm, skb);
1871 }
1872
scm_stat_add(struct sock * sk,struct sk_buff * skb)1873 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1874 {
1875 struct scm_fp_list *fp = UNIXCB(skb).fp;
1876 struct unix_sock *u = unix_sk(sk);
1877
1878 if (unlikely(fp && fp->count))
1879 atomic_add(fp->count, &u->scm_stat.nr_fds);
1880 }
1881
scm_stat_del(struct sock * sk,struct sk_buff * skb)1882 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1883 {
1884 struct scm_fp_list *fp = UNIXCB(skb).fp;
1885 struct unix_sock *u = unix_sk(sk);
1886
1887 if (unlikely(fp && fp->count))
1888 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1889 }
1890
1891 /*
1892 * Send AF_UNIX data.
1893 */
1894
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1895 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1896 size_t len)
1897 {
1898 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1899 struct sock *sk = sock->sk, *other = NULL;
1900 struct unix_sock *u = unix_sk(sk);
1901 struct scm_cookie scm;
1902 struct sk_buff *skb;
1903 int data_len = 0;
1904 int sk_locked;
1905 long timeo;
1906 int err;
1907
1908 wait_for_unix_gc();
1909 err = scm_send(sock, msg, &scm, false);
1910 if (err < 0)
1911 return err;
1912
1913 err = -EOPNOTSUPP;
1914 if (msg->msg_flags&MSG_OOB)
1915 goto out;
1916
1917 if (msg->msg_namelen) {
1918 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1919 if (err)
1920 goto out;
1921 } else {
1922 sunaddr = NULL;
1923 err = -ENOTCONN;
1924 other = unix_peer_get(sk);
1925 if (!other)
1926 goto out;
1927 }
1928
1929 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1930 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1931 err = unix_autobind(sk);
1932 if (err)
1933 goto out;
1934 }
1935
1936 err = -EMSGSIZE;
1937 if (len > sk->sk_sndbuf - 32)
1938 goto out;
1939
1940 if (len > SKB_MAX_ALLOC) {
1941 data_len = min_t(size_t,
1942 len - SKB_MAX_ALLOC,
1943 MAX_SKB_FRAGS * PAGE_SIZE);
1944 data_len = PAGE_ALIGN(data_len);
1945
1946 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1947 }
1948
1949 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1950 msg->msg_flags & MSG_DONTWAIT, &err,
1951 PAGE_ALLOC_COSTLY_ORDER);
1952 if (skb == NULL)
1953 goto out;
1954
1955 err = unix_scm_to_skb(&scm, skb, true);
1956 if (err < 0)
1957 goto out_free;
1958
1959 skb_put(skb, len - data_len);
1960 skb->data_len = data_len;
1961 skb->len = len;
1962 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1963 if (err)
1964 goto out_free;
1965
1966 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1967
1968 restart:
1969 if (!other) {
1970 err = -ECONNRESET;
1971 if (sunaddr == NULL)
1972 goto out_free;
1973
1974 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1975 sk->sk_type);
1976 if (IS_ERR(other)) {
1977 err = PTR_ERR(other);
1978 other = NULL;
1979 goto out_free;
1980 }
1981 }
1982
1983 if (sk_filter(other, skb) < 0) {
1984 /* Toss the packet but do not return any error to the sender */
1985 err = len;
1986 goto out_free;
1987 }
1988
1989 sk_locked = 0;
1990 unix_state_lock(other);
1991 restart_locked:
1992 err = -EPERM;
1993 if (!unix_may_send(sk, other))
1994 goto out_unlock;
1995
1996 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1997 /*
1998 * Check with 1003.1g - what should
1999 * datagram error
2000 */
2001 unix_state_unlock(other);
2002 sock_put(other);
2003
2004 if (!sk_locked)
2005 unix_state_lock(sk);
2006
2007 err = 0;
2008 if (sk->sk_type == SOCK_SEQPACKET) {
2009 /* We are here only when racing with unix_release_sock()
2010 * is clearing @other. Never change state to TCP_CLOSE
2011 * unlike SOCK_DGRAM wants.
2012 */
2013 unix_state_unlock(sk);
2014 err = -EPIPE;
2015 } else if (unix_peer(sk) == other) {
2016 unix_peer(sk) = NULL;
2017 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2018
2019 sk->sk_state = TCP_CLOSE;
2020 unix_state_unlock(sk);
2021
2022 unix_dgram_disconnected(sk, other);
2023 sock_put(other);
2024 err = -ECONNREFUSED;
2025 } else {
2026 unix_state_unlock(sk);
2027 }
2028
2029 other = NULL;
2030 if (err)
2031 goto out_free;
2032 goto restart;
2033 }
2034
2035 err = -EPIPE;
2036 if (other->sk_shutdown & RCV_SHUTDOWN)
2037 goto out_unlock;
2038
2039 if (sk->sk_type != SOCK_SEQPACKET) {
2040 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2041 if (err)
2042 goto out_unlock;
2043 }
2044
2045 /* other == sk && unix_peer(other) != sk if
2046 * - unix_peer(sk) == NULL, destination address bound to sk
2047 * - unix_peer(sk) == sk by time of get but disconnected before lock
2048 */
2049 if (other != sk &&
2050 unlikely(unix_peer(other) != sk &&
2051 unix_recvq_full_lockless(other))) {
2052 if (timeo) {
2053 timeo = unix_wait_for_peer(other, timeo);
2054
2055 err = sock_intr_errno(timeo);
2056 if (signal_pending(current))
2057 goto out_free;
2058
2059 goto restart;
2060 }
2061
2062 if (!sk_locked) {
2063 unix_state_unlock(other);
2064 unix_state_double_lock(sk, other);
2065 }
2066
2067 if (unix_peer(sk) != other ||
2068 unix_dgram_peer_wake_me(sk, other)) {
2069 err = -EAGAIN;
2070 sk_locked = 1;
2071 goto out_unlock;
2072 }
2073
2074 if (!sk_locked) {
2075 sk_locked = 1;
2076 goto restart_locked;
2077 }
2078 }
2079
2080 if (unlikely(sk_locked))
2081 unix_state_unlock(sk);
2082
2083 if (sock_flag(other, SOCK_RCVTSTAMP))
2084 __net_timestamp(skb);
2085 maybe_add_creds(skb, sock, other);
2086 scm_stat_add(other, skb);
2087 skb_queue_tail(&other->sk_receive_queue, skb);
2088 unix_state_unlock(other);
2089 other->sk_data_ready(other);
2090 sock_put(other);
2091 scm_destroy(&scm);
2092 return len;
2093
2094 out_unlock:
2095 if (sk_locked)
2096 unix_state_unlock(sk);
2097 unix_state_unlock(other);
2098 out_free:
2099 kfree_skb(skb);
2100 out:
2101 if (other)
2102 sock_put(other);
2103 scm_destroy(&scm);
2104 return err;
2105 }
2106
2107 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2108 * bytes, and a minimum of a full page.
2109 */
2110 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2111
2112 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)2113 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2114 struct scm_cookie *scm, bool fds_sent)
2115 {
2116 struct unix_sock *ousk = unix_sk(other);
2117 struct sk_buff *skb;
2118 int err = 0;
2119
2120 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2121
2122 if (!skb)
2123 return err;
2124
2125 err = unix_scm_to_skb(scm, skb, !fds_sent);
2126 if (err < 0) {
2127 kfree_skb(skb);
2128 return err;
2129 }
2130 skb_put(skb, 1);
2131 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2132
2133 if (err) {
2134 kfree_skb(skb);
2135 return err;
2136 }
2137
2138 unix_state_lock(other);
2139
2140 if (sock_flag(other, SOCK_DEAD) ||
2141 (other->sk_shutdown & RCV_SHUTDOWN)) {
2142 unix_state_unlock(other);
2143 kfree_skb(skb);
2144 return -EPIPE;
2145 }
2146
2147 maybe_add_creds(skb, sock, other);
2148 skb_get(skb);
2149
2150 if (ousk->oob_skb)
2151 consume_skb(ousk->oob_skb);
2152
2153 WRITE_ONCE(ousk->oob_skb, skb);
2154
2155 scm_stat_add(other, skb);
2156 skb_queue_tail(&other->sk_receive_queue, skb);
2157 sk_send_sigurg(other);
2158 unix_state_unlock(other);
2159 other->sk_data_ready(other);
2160
2161 return err;
2162 }
2163 #endif
2164
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2165 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2166 size_t len)
2167 {
2168 struct sock *sk = sock->sk;
2169 struct sock *other = NULL;
2170 int err, size;
2171 struct sk_buff *skb;
2172 int sent = 0;
2173 struct scm_cookie scm;
2174 bool fds_sent = false;
2175 int data_len;
2176
2177 wait_for_unix_gc();
2178 err = scm_send(sock, msg, &scm, false);
2179 if (err < 0)
2180 return err;
2181
2182 err = -EOPNOTSUPP;
2183 if (msg->msg_flags & MSG_OOB) {
2184 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2185 if (len)
2186 len--;
2187 else
2188 #endif
2189 goto out_err;
2190 }
2191
2192 if (msg->msg_namelen) {
2193 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2194 goto out_err;
2195 } else {
2196 err = -ENOTCONN;
2197 other = unix_peer(sk);
2198 if (!other)
2199 goto out_err;
2200 }
2201
2202 if (sk->sk_shutdown & SEND_SHUTDOWN)
2203 goto pipe_err;
2204
2205 while (sent < len) {
2206 size = len - sent;
2207
2208 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2209 skb = sock_alloc_send_pskb(sk, 0, 0,
2210 msg->msg_flags & MSG_DONTWAIT,
2211 &err, 0);
2212 } else {
2213 /* Keep two messages in the pipe so it schedules better */
2214 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2215
2216 /* allow fallback to order-0 allocations */
2217 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2218
2219 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2220
2221 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2222
2223 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2224 msg->msg_flags & MSG_DONTWAIT, &err,
2225 get_order(UNIX_SKB_FRAGS_SZ));
2226 }
2227 if (!skb)
2228 goto out_err;
2229
2230 /* Only send the fds in the first buffer */
2231 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2232 if (err < 0) {
2233 kfree_skb(skb);
2234 goto out_err;
2235 }
2236 fds_sent = true;
2237
2238 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2239 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2240 sk->sk_allocation);
2241 if (err < 0) {
2242 kfree_skb(skb);
2243 goto out_err;
2244 }
2245 size = err;
2246 refcount_add(size, &sk->sk_wmem_alloc);
2247 } else {
2248 skb_put(skb, size - data_len);
2249 skb->data_len = data_len;
2250 skb->len = size;
2251 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2252 if (err) {
2253 kfree_skb(skb);
2254 goto out_err;
2255 }
2256 }
2257
2258 unix_state_lock(other);
2259
2260 if (sock_flag(other, SOCK_DEAD) ||
2261 (other->sk_shutdown & RCV_SHUTDOWN))
2262 goto pipe_err_free;
2263
2264 maybe_add_creds(skb, sock, other);
2265 scm_stat_add(other, skb);
2266 skb_queue_tail(&other->sk_receive_queue, skb);
2267 unix_state_unlock(other);
2268 other->sk_data_ready(other);
2269 sent += size;
2270 }
2271
2272 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2273 if (msg->msg_flags & MSG_OOB) {
2274 err = queue_oob(sock, msg, other, &scm, fds_sent);
2275 if (err)
2276 goto out_err;
2277 sent++;
2278 }
2279 #endif
2280
2281 scm_destroy(&scm);
2282
2283 return sent;
2284
2285 pipe_err_free:
2286 unix_state_unlock(other);
2287 kfree_skb(skb);
2288 pipe_err:
2289 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2290 send_sig(SIGPIPE, current, 0);
2291 err = -EPIPE;
2292 out_err:
2293 scm_destroy(&scm);
2294 return sent ? : err;
2295 }
2296
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2297 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2298 size_t len)
2299 {
2300 int err;
2301 struct sock *sk = sock->sk;
2302
2303 err = sock_error(sk);
2304 if (err)
2305 return err;
2306
2307 if (sk->sk_state != TCP_ESTABLISHED)
2308 return -ENOTCONN;
2309
2310 if (msg->msg_namelen)
2311 msg->msg_namelen = 0;
2312
2313 return unix_dgram_sendmsg(sock, msg, len);
2314 }
2315
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2316 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2317 size_t size, int flags)
2318 {
2319 struct sock *sk = sock->sk;
2320
2321 if (sk->sk_state != TCP_ESTABLISHED)
2322 return -ENOTCONN;
2323
2324 return unix_dgram_recvmsg(sock, msg, size, flags);
2325 }
2326
unix_copy_addr(struct msghdr * msg,struct sock * sk)2327 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2328 {
2329 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2330
2331 if (addr) {
2332 msg->msg_namelen = addr->len;
2333 memcpy(msg->msg_name, addr->name, addr->len);
2334 }
2335 }
2336
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2337 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2338 int flags)
2339 {
2340 struct scm_cookie scm;
2341 struct socket *sock = sk->sk_socket;
2342 struct unix_sock *u = unix_sk(sk);
2343 struct sk_buff *skb, *last;
2344 long timeo;
2345 int skip;
2346 int err;
2347
2348 err = -EOPNOTSUPP;
2349 if (flags&MSG_OOB)
2350 goto out;
2351
2352 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2353
2354 do {
2355 mutex_lock(&u->iolock);
2356
2357 skip = sk_peek_offset(sk, flags);
2358 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2359 &skip, &err, &last);
2360 if (skb) {
2361 if (!(flags & MSG_PEEK))
2362 scm_stat_del(sk, skb);
2363 break;
2364 }
2365
2366 mutex_unlock(&u->iolock);
2367
2368 if (err != -EAGAIN)
2369 break;
2370 } while (timeo &&
2371 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2372 &err, &timeo, last));
2373
2374 if (!skb) { /* implies iolock unlocked */
2375 unix_state_lock(sk);
2376 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2377 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2378 (sk->sk_shutdown & RCV_SHUTDOWN))
2379 err = 0;
2380 unix_state_unlock(sk);
2381 goto out;
2382 }
2383
2384 if (wq_has_sleeper(&u->peer_wait))
2385 wake_up_interruptible_sync_poll(&u->peer_wait,
2386 EPOLLOUT | EPOLLWRNORM |
2387 EPOLLWRBAND);
2388
2389 if (msg->msg_name)
2390 unix_copy_addr(msg, skb->sk);
2391
2392 if (size > skb->len - skip)
2393 size = skb->len - skip;
2394 else if (size < skb->len - skip)
2395 msg->msg_flags |= MSG_TRUNC;
2396
2397 err = skb_copy_datagram_msg(skb, skip, msg, size);
2398 if (err)
2399 goto out_free;
2400
2401 if (sock_flag(sk, SOCK_RCVTSTAMP))
2402 __sock_recv_timestamp(msg, sk, skb);
2403
2404 memset(&scm, 0, sizeof(scm));
2405
2406 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2407 unix_set_secdata(&scm, skb);
2408
2409 if (!(flags & MSG_PEEK)) {
2410 if (UNIXCB(skb).fp)
2411 unix_detach_fds(&scm, skb);
2412
2413 sk_peek_offset_bwd(sk, skb->len);
2414 } else {
2415 /* It is questionable: on PEEK we could:
2416 - do not return fds - good, but too simple 8)
2417 - return fds, and do not return them on read (old strategy,
2418 apparently wrong)
2419 - clone fds (I chose it for now, it is the most universal
2420 solution)
2421
2422 POSIX 1003.1g does not actually define this clearly
2423 at all. POSIX 1003.1g doesn't define a lot of things
2424 clearly however!
2425
2426 */
2427
2428 sk_peek_offset_fwd(sk, size);
2429
2430 if (UNIXCB(skb).fp)
2431 unix_peek_fds(&scm, skb);
2432 }
2433 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2434
2435 scm_recv_unix(sock, msg, &scm, flags);
2436
2437 out_free:
2438 skb_free_datagram(sk, skb);
2439 mutex_unlock(&u->iolock);
2440 out:
2441 return err;
2442 }
2443
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2444 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2445 int flags)
2446 {
2447 struct sock *sk = sock->sk;
2448
2449 #ifdef CONFIG_BPF_SYSCALL
2450 const struct proto *prot = READ_ONCE(sk->sk_prot);
2451
2452 if (prot != &unix_dgram_proto)
2453 return prot->recvmsg(sk, msg, size, flags, NULL);
2454 #endif
2455 return __unix_dgram_recvmsg(sk, msg, size, flags);
2456 }
2457
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2458 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2459 {
2460 struct unix_sock *u = unix_sk(sk);
2461 struct sk_buff *skb;
2462 int err;
2463
2464 mutex_lock(&u->iolock);
2465 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2466 mutex_unlock(&u->iolock);
2467 if (!skb)
2468 return err;
2469
2470 return recv_actor(sk, skb);
2471 }
2472
2473 /*
2474 * Sleep until more data has arrived. But check for races..
2475 */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2476 static long unix_stream_data_wait(struct sock *sk, long timeo,
2477 struct sk_buff *last, unsigned int last_len,
2478 bool freezable)
2479 {
2480 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2481 struct sk_buff *tail;
2482 DEFINE_WAIT(wait);
2483
2484 unix_state_lock(sk);
2485
2486 for (;;) {
2487 prepare_to_wait(sk_sleep(sk), &wait, state);
2488
2489 tail = skb_peek_tail(&sk->sk_receive_queue);
2490 if (tail != last ||
2491 (tail && tail->len != last_len) ||
2492 sk->sk_err ||
2493 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2494 signal_pending(current) ||
2495 !timeo)
2496 break;
2497
2498 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2499 unix_state_unlock(sk);
2500 timeo = schedule_timeout(timeo);
2501 unix_state_lock(sk);
2502
2503 if (sock_flag(sk, SOCK_DEAD))
2504 break;
2505
2506 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2507 }
2508
2509 finish_wait(sk_sleep(sk), &wait);
2510 unix_state_unlock(sk);
2511 return timeo;
2512 }
2513
unix_skb_len(const struct sk_buff * skb)2514 static unsigned int unix_skb_len(const struct sk_buff *skb)
2515 {
2516 return skb->len - UNIXCB(skb).consumed;
2517 }
2518
2519 struct unix_stream_read_state {
2520 int (*recv_actor)(struct sk_buff *, int, int,
2521 struct unix_stream_read_state *);
2522 struct socket *socket;
2523 struct msghdr *msg;
2524 struct pipe_inode_info *pipe;
2525 size_t size;
2526 int flags;
2527 unsigned int splice_flags;
2528 };
2529
2530 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2531 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2532 {
2533 struct socket *sock = state->socket;
2534 struct sock *sk = sock->sk;
2535 struct unix_sock *u = unix_sk(sk);
2536 int chunk = 1;
2537 struct sk_buff *oob_skb;
2538
2539 mutex_lock(&u->iolock);
2540 unix_state_lock(sk);
2541
2542 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2543 unix_state_unlock(sk);
2544 mutex_unlock(&u->iolock);
2545 return -EINVAL;
2546 }
2547
2548 oob_skb = u->oob_skb;
2549
2550 if (!(state->flags & MSG_PEEK))
2551 WRITE_ONCE(u->oob_skb, NULL);
2552 else
2553 skb_get(oob_skb);
2554 unix_state_unlock(sk);
2555
2556 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2557
2558 if (!(state->flags & MSG_PEEK))
2559 UNIXCB(oob_skb).consumed += 1;
2560
2561 consume_skb(oob_skb);
2562
2563 mutex_unlock(&u->iolock);
2564
2565 if (chunk < 0)
2566 return -EFAULT;
2567
2568 state->msg->msg_flags |= MSG_OOB;
2569 return 1;
2570 }
2571
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2572 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2573 int flags, int copied)
2574 {
2575 struct unix_sock *u = unix_sk(sk);
2576
2577 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2578 skb_unlink(skb, &sk->sk_receive_queue);
2579 consume_skb(skb);
2580 skb = NULL;
2581 } else {
2582 if (skb == u->oob_skb) {
2583 if (copied) {
2584 skb = NULL;
2585 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2586 if (!(flags & MSG_PEEK)) {
2587 WRITE_ONCE(u->oob_skb, NULL);
2588 consume_skb(skb);
2589 }
2590 } else if (!(flags & MSG_PEEK)) {
2591 skb_unlink(skb, &sk->sk_receive_queue);
2592 consume_skb(skb);
2593 skb = skb_peek(&sk->sk_receive_queue);
2594 }
2595 }
2596 }
2597 return skb;
2598 }
2599 #endif
2600
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2601 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2602 {
2603 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2604 return -ENOTCONN;
2605
2606 return unix_read_skb(sk, recv_actor);
2607 }
2608
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2609 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2610 bool freezable)
2611 {
2612 struct scm_cookie scm;
2613 struct socket *sock = state->socket;
2614 struct sock *sk = sock->sk;
2615 struct unix_sock *u = unix_sk(sk);
2616 int copied = 0;
2617 int flags = state->flags;
2618 int noblock = flags & MSG_DONTWAIT;
2619 bool check_creds = false;
2620 int target;
2621 int err = 0;
2622 long timeo;
2623 int skip;
2624 size_t size = state->size;
2625 unsigned int last_len;
2626
2627 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2628 err = -EINVAL;
2629 goto out;
2630 }
2631
2632 if (unlikely(flags & MSG_OOB)) {
2633 err = -EOPNOTSUPP;
2634 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2635 err = unix_stream_recv_urg(state);
2636 #endif
2637 goto out;
2638 }
2639
2640 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2641 timeo = sock_rcvtimeo(sk, noblock);
2642
2643 memset(&scm, 0, sizeof(scm));
2644
2645 /* Lock the socket to prevent queue disordering
2646 * while sleeps in memcpy_tomsg
2647 */
2648 mutex_lock(&u->iolock);
2649
2650 skip = max(sk_peek_offset(sk, flags), 0);
2651
2652 do {
2653 int chunk;
2654 bool drop_skb;
2655 struct sk_buff *skb, *last;
2656
2657 redo:
2658 unix_state_lock(sk);
2659 if (sock_flag(sk, SOCK_DEAD)) {
2660 err = -ECONNRESET;
2661 goto unlock;
2662 }
2663 last = skb = skb_peek(&sk->sk_receive_queue);
2664 last_len = last ? last->len : 0;
2665
2666 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2667 if (skb) {
2668 skb = manage_oob(skb, sk, flags, copied);
2669 if (!skb) {
2670 unix_state_unlock(sk);
2671 if (copied)
2672 break;
2673 goto redo;
2674 }
2675 }
2676 #endif
2677 again:
2678 if (skb == NULL) {
2679 if (copied >= target)
2680 goto unlock;
2681
2682 /*
2683 * POSIX 1003.1g mandates this order.
2684 */
2685
2686 err = sock_error(sk);
2687 if (err)
2688 goto unlock;
2689 if (sk->sk_shutdown & RCV_SHUTDOWN)
2690 goto unlock;
2691
2692 unix_state_unlock(sk);
2693 if (!timeo) {
2694 err = -EAGAIN;
2695 break;
2696 }
2697
2698 mutex_unlock(&u->iolock);
2699
2700 timeo = unix_stream_data_wait(sk, timeo, last,
2701 last_len, freezable);
2702
2703 if (signal_pending(current)) {
2704 err = sock_intr_errno(timeo);
2705 scm_destroy(&scm);
2706 goto out;
2707 }
2708
2709 mutex_lock(&u->iolock);
2710 goto redo;
2711 unlock:
2712 unix_state_unlock(sk);
2713 break;
2714 }
2715
2716 while (skip >= unix_skb_len(skb)) {
2717 skip -= unix_skb_len(skb);
2718 last = skb;
2719 last_len = skb->len;
2720 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2721 if (!skb)
2722 goto again;
2723 }
2724
2725 unix_state_unlock(sk);
2726
2727 if (check_creds) {
2728 /* Never glue messages from different writers */
2729 if (!unix_skb_scm_eq(skb, &scm))
2730 break;
2731 } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2732 test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2733 /* Copy credentials */
2734 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2735 unix_set_secdata(&scm, skb);
2736 check_creds = true;
2737 }
2738
2739 /* Copy address just once */
2740 if (state->msg && state->msg->msg_name) {
2741 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2742 state->msg->msg_name);
2743 unix_copy_addr(state->msg, skb->sk);
2744 sunaddr = NULL;
2745 }
2746
2747 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2748 skb_get(skb);
2749 chunk = state->recv_actor(skb, skip, chunk, state);
2750 drop_skb = !unix_skb_len(skb);
2751 /* skb is only safe to use if !drop_skb */
2752 consume_skb(skb);
2753 if (chunk < 0) {
2754 if (copied == 0)
2755 copied = -EFAULT;
2756 break;
2757 }
2758 copied += chunk;
2759 size -= chunk;
2760
2761 if (drop_skb) {
2762 /* the skb was touched by a concurrent reader;
2763 * we should not expect anything from this skb
2764 * anymore and assume it invalid - we can be
2765 * sure it was dropped from the socket queue
2766 *
2767 * let's report a short read
2768 */
2769 err = 0;
2770 break;
2771 }
2772
2773 /* Mark read part of skb as used */
2774 if (!(flags & MSG_PEEK)) {
2775 UNIXCB(skb).consumed += chunk;
2776
2777 sk_peek_offset_bwd(sk, chunk);
2778
2779 if (UNIXCB(skb).fp) {
2780 scm_stat_del(sk, skb);
2781 unix_detach_fds(&scm, skb);
2782 }
2783
2784 if (unix_skb_len(skb))
2785 break;
2786
2787 skb_unlink(skb, &sk->sk_receive_queue);
2788 consume_skb(skb);
2789
2790 if (scm.fp)
2791 break;
2792 } else {
2793 /* It is questionable, see note in unix_dgram_recvmsg.
2794 */
2795 if (UNIXCB(skb).fp)
2796 unix_peek_fds(&scm, skb);
2797
2798 sk_peek_offset_fwd(sk, chunk);
2799
2800 if (UNIXCB(skb).fp)
2801 break;
2802
2803 skip = 0;
2804 last = skb;
2805 last_len = skb->len;
2806 unix_state_lock(sk);
2807 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2808 if (skb)
2809 goto again;
2810 unix_state_unlock(sk);
2811 break;
2812 }
2813 } while (size);
2814
2815 mutex_unlock(&u->iolock);
2816 if (state->msg)
2817 scm_recv_unix(sock, state->msg, &scm, flags);
2818 else
2819 scm_destroy(&scm);
2820 out:
2821 return copied ? : err;
2822 }
2823
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2824 static int unix_stream_read_actor(struct sk_buff *skb,
2825 int skip, int chunk,
2826 struct unix_stream_read_state *state)
2827 {
2828 int ret;
2829
2830 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2831 state->msg, chunk);
2832 return ret ?: chunk;
2833 }
2834
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2835 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2836 size_t size, int flags)
2837 {
2838 struct unix_stream_read_state state = {
2839 .recv_actor = unix_stream_read_actor,
2840 .socket = sk->sk_socket,
2841 .msg = msg,
2842 .size = size,
2843 .flags = flags
2844 };
2845
2846 return unix_stream_read_generic(&state, true);
2847 }
2848
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2849 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2850 size_t size, int flags)
2851 {
2852 struct unix_stream_read_state state = {
2853 .recv_actor = unix_stream_read_actor,
2854 .socket = sock,
2855 .msg = msg,
2856 .size = size,
2857 .flags = flags
2858 };
2859
2860 #ifdef CONFIG_BPF_SYSCALL
2861 struct sock *sk = sock->sk;
2862 const struct proto *prot = READ_ONCE(sk->sk_prot);
2863
2864 if (prot != &unix_stream_proto)
2865 return prot->recvmsg(sk, msg, size, flags, NULL);
2866 #endif
2867 return unix_stream_read_generic(&state, true);
2868 }
2869
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2870 static int unix_stream_splice_actor(struct sk_buff *skb,
2871 int skip, int chunk,
2872 struct unix_stream_read_state *state)
2873 {
2874 return skb_splice_bits(skb, state->socket->sk,
2875 UNIXCB(skb).consumed + skip,
2876 state->pipe, chunk, state->splice_flags);
2877 }
2878
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2879 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2880 struct pipe_inode_info *pipe,
2881 size_t size, unsigned int flags)
2882 {
2883 struct unix_stream_read_state state = {
2884 .recv_actor = unix_stream_splice_actor,
2885 .socket = sock,
2886 .pipe = pipe,
2887 .size = size,
2888 .splice_flags = flags,
2889 };
2890
2891 if (unlikely(*ppos))
2892 return -ESPIPE;
2893
2894 if (sock->file->f_flags & O_NONBLOCK ||
2895 flags & SPLICE_F_NONBLOCK)
2896 state.flags = MSG_DONTWAIT;
2897
2898 return unix_stream_read_generic(&state, false);
2899 }
2900
unix_shutdown(struct socket * sock,int mode)2901 static int unix_shutdown(struct socket *sock, int mode)
2902 {
2903 struct sock *sk = sock->sk;
2904 struct sock *other;
2905
2906 if (mode < SHUT_RD || mode > SHUT_RDWR)
2907 return -EINVAL;
2908 /* This maps:
2909 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2910 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2911 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2912 */
2913 ++mode;
2914
2915 unix_state_lock(sk);
2916 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2917 other = unix_peer(sk);
2918 if (other)
2919 sock_hold(other);
2920 unix_state_unlock(sk);
2921 sk->sk_state_change(sk);
2922
2923 if (other &&
2924 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2925
2926 int peer_mode = 0;
2927 const struct proto *prot = READ_ONCE(other->sk_prot);
2928
2929 if (prot->unhash)
2930 prot->unhash(other);
2931 if (mode&RCV_SHUTDOWN)
2932 peer_mode |= SEND_SHUTDOWN;
2933 if (mode&SEND_SHUTDOWN)
2934 peer_mode |= RCV_SHUTDOWN;
2935 unix_state_lock(other);
2936 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2937 unix_state_unlock(other);
2938 other->sk_state_change(other);
2939 if (peer_mode == SHUTDOWN_MASK)
2940 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2941 else if (peer_mode & RCV_SHUTDOWN)
2942 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2943 }
2944 if (other)
2945 sock_put(other);
2946
2947 return 0;
2948 }
2949
unix_inq_len(struct sock * sk)2950 long unix_inq_len(struct sock *sk)
2951 {
2952 struct sk_buff *skb;
2953 long amount = 0;
2954
2955 if (sk->sk_state == TCP_LISTEN)
2956 return -EINVAL;
2957
2958 spin_lock(&sk->sk_receive_queue.lock);
2959 if (sk->sk_type == SOCK_STREAM ||
2960 sk->sk_type == SOCK_SEQPACKET) {
2961 skb_queue_walk(&sk->sk_receive_queue, skb)
2962 amount += unix_skb_len(skb);
2963 } else {
2964 skb = skb_peek(&sk->sk_receive_queue);
2965 if (skb)
2966 amount = skb->len;
2967 }
2968 spin_unlock(&sk->sk_receive_queue.lock);
2969
2970 return amount;
2971 }
2972 EXPORT_SYMBOL_GPL(unix_inq_len);
2973
unix_outq_len(struct sock * sk)2974 long unix_outq_len(struct sock *sk)
2975 {
2976 return sk_wmem_alloc_get(sk);
2977 }
2978 EXPORT_SYMBOL_GPL(unix_outq_len);
2979
unix_open_file(struct sock * sk)2980 static int unix_open_file(struct sock *sk)
2981 {
2982 struct path path;
2983 struct file *f;
2984 int fd;
2985
2986 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2987 return -EPERM;
2988
2989 if (!smp_load_acquire(&unix_sk(sk)->addr))
2990 return -ENOENT;
2991
2992 path = unix_sk(sk)->path;
2993 if (!path.dentry)
2994 return -ENOENT;
2995
2996 path_get(&path);
2997
2998 fd = get_unused_fd_flags(O_CLOEXEC);
2999 if (fd < 0)
3000 goto out;
3001
3002 f = dentry_open(&path, O_PATH, current_cred());
3003 if (IS_ERR(f)) {
3004 put_unused_fd(fd);
3005 fd = PTR_ERR(f);
3006 goto out;
3007 }
3008
3009 fd_install(fd, f);
3010 out:
3011 path_put(&path);
3012
3013 return fd;
3014 }
3015
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3016 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3017 {
3018 struct sock *sk = sock->sk;
3019 long amount = 0;
3020 int err;
3021
3022 switch (cmd) {
3023 case SIOCOUTQ:
3024 amount = unix_outq_len(sk);
3025 err = put_user(amount, (int __user *)arg);
3026 break;
3027 case SIOCINQ:
3028 amount = unix_inq_len(sk);
3029 if (amount < 0)
3030 err = amount;
3031 else
3032 err = put_user(amount, (int __user *)arg);
3033 break;
3034 case SIOCUNIXFILE:
3035 err = unix_open_file(sk);
3036 break;
3037 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3038 case SIOCATMARK:
3039 {
3040 struct sk_buff *skb;
3041 int answ = 0;
3042
3043 skb = skb_peek(&sk->sk_receive_queue);
3044 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3045 answ = 1;
3046 err = put_user(answ, (int __user *)arg);
3047 }
3048 break;
3049 #endif
3050 default:
3051 err = -ENOIOCTLCMD;
3052 break;
3053 }
3054 return err;
3055 }
3056
3057 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3058 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3059 {
3060 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3061 }
3062 #endif
3063
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3064 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3065 {
3066 struct sock *sk = sock->sk;
3067 __poll_t mask;
3068 u8 shutdown;
3069
3070 sock_poll_wait(file, sock, wait);
3071 mask = 0;
3072 shutdown = READ_ONCE(sk->sk_shutdown);
3073
3074 /* exceptional events? */
3075 if (READ_ONCE(sk->sk_err))
3076 mask |= EPOLLERR;
3077 if (shutdown == SHUTDOWN_MASK)
3078 mask |= EPOLLHUP;
3079 if (shutdown & RCV_SHUTDOWN)
3080 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3081
3082 /* readable? */
3083 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3084 mask |= EPOLLIN | EPOLLRDNORM;
3085 if (sk_is_readable(sk))
3086 mask |= EPOLLIN | EPOLLRDNORM;
3087 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3088 if (READ_ONCE(unix_sk(sk)->oob_skb))
3089 mask |= EPOLLPRI;
3090 #endif
3091
3092 /* Connection-based need to check for termination and startup */
3093 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3094 sk->sk_state == TCP_CLOSE)
3095 mask |= EPOLLHUP;
3096
3097 /*
3098 * we set writable also when the other side has shut down the
3099 * connection. This prevents stuck sockets.
3100 */
3101 if (unix_writable(sk))
3102 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3103
3104 return mask;
3105 }
3106
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3107 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3108 poll_table *wait)
3109 {
3110 struct sock *sk = sock->sk, *other;
3111 unsigned int writable;
3112 __poll_t mask;
3113 u8 shutdown;
3114
3115 sock_poll_wait(file, sock, wait);
3116 mask = 0;
3117 shutdown = READ_ONCE(sk->sk_shutdown);
3118
3119 /* exceptional events? */
3120 if (READ_ONCE(sk->sk_err) ||
3121 !skb_queue_empty_lockless(&sk->sk_error_queue))
3122 mask |= EPOLLERR |
3123 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3124
3125 if (shutdown & RCV_SHUTDOWN)
3126 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3127 if (shutdown == SHUTDOWN_MASK)
3128 mask |= EPOLLHUP;
3129
3130 /* readable? */
3131 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3132 mask |= EPOLLIN | EPOLLRDNORM;
3133 if (sk_is_readable(sk))
3134 mask |= EPOLLIN | EPOLLRDNORM;
3135
3136 /* Connection-based need to check for termination and startup */
3137 if (sk->sk_type == SOCK_SEQPACKET) {
3138 if (sk->sk_state == TCP_CLOSE)
3139 mask |= EPOLLHUP;
3140 /* connection hasn't started yet? */
3141 if (sk->sk_state == TCP_SYN_SENT)
3142 return mask;
3143 }
3144
3145 /* No write status requested, avoid expensive OUT tests. */
3146 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3147 return mask;
3148
3149 writable = unix_writable(sk);
3150 if (writable) {
3151 unix_state_lock(sk);
3152
3153 other = unix_peer(sk);
3154 if (other && unix_peer(other) != sk &&
3155 unix_recvq_full_lockless(other) &&
3156 unix_dgram_peer_wake_me(sk, other))
3157 writable = 0;
3158
3159 unix_state_unlock(sk);
3160 }
3161
3162 if (writable)
3163 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3164 else
3165 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3166
3167 return mask;
3168 }
3169
3170 #ifdef CONFIG_PROC_FS
3171
3172 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3173
3174 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3175 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3176 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3177
unix_from_bucket(struct seq_file * seq,loff_t * pos)3178 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3179 {
3180 unsigned long offset = get_offset(*pos);
3181 unsigned long bucket = get_bucket(*pos);
3182 unsigned long count = 0;
3183 struct sock *sk;
3184
3185 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3186 sk; sk = sk_next(sk)) {
3187 if (++count == offset)
3188 break;
3189 }
3190
3191 return sk;
3192 }
3193
unix_get_first(struct seq_file * seq,loff_t * pos)3194 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3195 {
3196 unsigned long bucket = get_bucket(*pos);
3197 struct net *net = seq_file_net(seq);
3198 struct sock *sk;
3199
3200 while (bucket < UNIX_HASH_SIZE) {
3201 spin_lock(&net->unx.table.locks[bucket]);
3202
3203 sk = unix_from_bucket(seq, pos);
3204 if (sk)
3205 return sk;
3206
3207 spin_unlock(&net->unx.table.locks[bucket]);
3208
3209 *pos = set_bucket_offset(++bucket, 1);
3210 }
3211
3212 return NULL;
3213 }
3214
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)3215 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3216 loff_t *pos)
3217 {
3218 unsigned long bucket = get_bucket(*pos);
3219
3220 sk = sk_next(sk);
3221 if (sk)
3222 return sk;
3223
3224
3225 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3226
3227 *pos = set_bucket_offset(++bucket, 1);
3228
3229 return unix_get_first(seq, pos);
3230 }
3231
unix_seq_start(struct seq_file * seq,loff_t * pos)3232 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3233 {
3234 if (!*pos)
3235 return SEQ_START_TOKEN;
3236
3237 return unix_get_first(seq, pos);
3238 }
3239
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3240 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3241 {
3242 ++*pos;
3243
3244 if (v == SEQ_START_TOKEN)
3245 return unix_get_first(seq, pos);
3246
3247 return unix_get_next(seq, v, pos);
3248 }
3249
unix_seq_stop(struct seq_file * seq,void * v)3250 static void unix_seq_stop(struct seq_file *seq, void *v)
3251 {
3252 struct sock *sk = v;
3253
3254 if (sk)
3255 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3256 }
3257
unix_seq_show(struct seq_file * seq,void * v)3258 static int unix_seq_show(struct seq_file *seq, void *v)
3259 {
3260
3261 if (v == SEQ_START_TOKEN)
3262 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3263 "Inode Path\n");
3264 else {
3265 struct sock *s = v;
3266 struct unix_sock *u = unix_sk(s);
3267 unix_state_lock(s);
3268
3269 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3270 s,
3271 refcount_read(&s->sk_refcnt),
3272 0,
3273 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3274 s->sk_type,
3275 s->sk_socket ?
3276 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3277 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3278 sock_i_ino(s));
3279
3280 if (u->addr) { // under a hash table lock here
3281 int i, len;
3282 seq_putc(seq, ' ');
3283
3284 i = 0;
3285 len = u->addr->len -
3286 offsetof(struct sockaddr_un, sun_path);
3287 if (u->addr->name->sun_path[0]) {
3288 len--;
3289 } else {
3290 seq_putc(seq, '@');
3291 i++;
3292 }
3293 for ( ; i < len; i++)
3294 seq_putc(seq, u->addr->name->sun_path[i] ?:
3295 '@');
3296 }
3297 unix_state_unlock(s);
3298 seq_putc(seq, '\n');
3299 }
3300
3301 return 0;
3302 }
3303
3304 static const struct seq_operations unix_seq_ops = {
3305 .start = unix_seq_start,
3306 .next = unix_seq_next,
3307 .stop = unix_seq_stop,
3308 .show = unix_seq_show,
3309 };
3310
3311 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3312 struct bpf_unix_iter_state {
3313 struct seq_net_private p;
3314 unsigned int cur_sk;
3315 unsigned int end_sk;
3316 unsigned int max_sk;
3317 struct sock **batch;
3318 bool st_bucket_done;
3319 };
3320
3321 struct bpf_iter__unix {
3322 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3323 __bpf_md_ptr(struct unix_sock *, unix_sk);
3324 uid_t uid __aligned(8);
3325 };
3326
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3327 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3328 struct unix_sock *unix_sk, uid_t uid)
3329 {
3330 struct bpf_iter__unix ctx;
3331
3332 meta->seq_num--; /* skip SEQ_START_TOKEN */
3333 ctx.meta = meta;
3334 ctx.unix_sk = unix_sk;
3335 ctx.uid = uid;
3336 return bpf_iter_run_prog(prog, &ctx);
3337 }
3338
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3339 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3340
3341 {
3342 struct bpf_unix_iter_state *iter = seq->private;
3343 unsigned int expected = 1;
3344 struct sock *sk;
3345
3346 sock_hold(start_sk);
3347 iter->batch[iter->end_sk++] = start_sk;
3348
3349 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3350 if (iter->end_sk < iter->max_sk) {
3351 sock_hold(sk);
3352 iter->batch[iter->end_sk++] = sk;
3353 }
3354
3355 expected++;
3356 }
3357
3358 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3359
3360 return expected;
3361 }
3362
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3363 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3364 {
3365 while (iter->cur_sk < iter->end_sk)
3366 sock_put(iter->batch[iter->cur_sk++]);
3367 }
3368
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3369 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3370 unsigned int new_batch_sz)
3371 {
3372 struct sock **new_batch;
3373
3374 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3375 GFP_USER | __GFP_NOWARN);
3376 if (!new_batch)
3377 return -ENOMEM;
3378
3379 bpf_iter_unix_put_batch(iter);
3380 kvfree(iter->batch);
3381 iter->batch = new_batch;
3382 iter->max_sk = new_batch_sz;
3383
3384 return 0;
3385 }
3386
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3387 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3388 loff_t *pos)
3389 {
3390 struct bpf_unix_iter_state *iter = seq->private;
3391 unsigned int expected;
3392 bool resized = false;
3393 struct sock *sk;
3394
3395 if (iter->st_bucket_done)
3396 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3397
3398 again:
3399 /* Get a new batch */
3400 iter->cur_sk = 0;
3401 iter->end_sk = 0;
3402
3403 sk = unix_get_first(seq, pos);
3404 if (!sk)
3405 return NULL; /* Done */
3406
3407 expected = bpf_iter_unix_hold_batch(seq, sk);
3408
3409 if (iter->end_sk == expected) {
3410 iter->st_bucket_done = true;
3411 return sk;
3412 }
3413
3414 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3415 resized = true;
3416 goto again;
3417 }
3418
3419 return sk;
3420 }
3421
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3422 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3423 {
3424 if (!*pos)
3425 return SEQ_START_TOKEN;
3426
3427 /* bpf iter does not support lseek, so it always
3428 * continue from where it was stop()-ped.
3429 */
3430 return bpf_iter_unix_batch(seq, pos);
3431 }
3432
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3433 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3434 {
3435 struct bpf_unix_iter_state *iter = seq->private;
3436 struct sock *sk;
3437
3438 /* Whenever seq_next() is called, the iter->cur_sk is
3439 * done with seq_show(), so advance to the next sk in
3440 * the batch.
3441 */
3442 if (iter->cur_sk < iter->end_sk)
3443 sock_put(iter->batch[iter->cur_sk++]);
3444
3445 ++*pos;
3446
3447 if (iter->cur_sk < iter->end_sk)
3448 sk = iter->batch[iter->cur_sk];
3449 else
3450 sk = bpf_iter_unix_batch(seq, pos);
3451
3452 return sk;
3453 }
3454
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)3455 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3456 {
3457 struct bpf_iter_meta meta;
3458 struct bpf_prog *prog;
3459 struct sock *sk = v;
3460 uid_t uid;
3461 bool slow;
3462 int ret;
3463
3464 if (v == SEQ_START_TOKEN)
3465 return 0;
3466
3467 slow = lock_sock_fast(sk);
3468
3469 if (unlikely(sk_unhashed(sk))) {
3470 ret = SEQ_SKIP;
3471 goto unlock;
3472 }
3473
3474 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3475 meta.seq = seq;
3476 prog = bpf_iter_get_info(&meta, false);
3477 ret = unix_prog_seq_show(prog, &meta, v, uid);
3478 unlock:
3479 unlock_sock_fast(sk, slow);
3480 return ret;
3481 }
3482
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)3483 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3484 {
3485 struct bpf_unix_iter_state *iter = seq->private;
3486 struct bpf_iter_meta meta;
3487 struct bpf_prog *prog;
3488
3489 if (!v) {
3490 meta.seq = seq;
3491 prog = bpf_iter_get_info(&meta, true);
3492 if (prog)
3493 (void)unix_prog_seq_show(prog, &meta, v, 0);
3494 }
3495
3496 if (iter->cur_sk < iter->end_sk)
3497 bpf_iter_unix_put_batch(iter);
3498 }
3499
3500 static const struct seq_operations bpf_iter_unix_seq_ops = {
3501 .start = bpf_iter_unix_seq_start,
3502 .next = bpf_iter_unix_seq_next,
3503 .stop = bpf_iter_unix_seq_stop,
3504 .show = bpf_iter_unix_seq_show,
3505 };
3506 #endif
3507 #endif
3508
3509 static const struct net_proto_family unix_family_ops = {
3510 .family = PF_UNIX,
3511 .create = unix_create,
3512 .owner = THIS_MODULE,
3513 };
3514
3515
unix_net_init(struct net * net)3516 static int __net_init unix_net_init(struct net *net)
3517 {
3518 int i;
3519
3520 net->unx.sysctl_max_dgram_qlen = 10;
3521 if (unix_sysctl_register(net))
3522 goto out;
3523
3524 #ifdef CONFIG_PROC_FS
3525 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3526 sizeof(struct seq_net_private)))
3527 goto err_sysctl;
3528 #endif
3529
3530 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3531 sizeof(spinlock_t), GFP_KERNEL);
3532 if (!net->unx.table.locks)
3533 goto err_proc;
3534
3535 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3536 sizeof(struct hlist_head),
3537 GFP_KERNEL);
3538 if (!net->unx.table.buckets)
3539 goto free_locks;
3540
3541 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3542 spin_lock_init(&net->unx.table.locks[i]);
3543 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3544 }
3545
3546 return 0;
3547
3548 free_locks:
3549 kvfree(net->unx.table.locks);
3550 err_proc:
3551 #ifdef CONFIG_PROC_FS
3552 remove_proc_entry("unix", net->proc_net);
3553 err_sysctl:
3554 #endif
3555 unix_sysctl_unregister(net);
3556 out:
3557 return -ENOMEM;
3558 }
3559
unix_net_exit(struct net * net)3560 static void __net_exit unix_net_exit(struct net *net)
3561 {
3562 kvfree(net->unx.table.buckets);
3563 kvfree(net->unx.table.locks);
3564 unix_sysctl_unregister(net);
3565 remove_proc_entry("unix", net->proc_net);
3566 }
3567
3568 static struct pernet_operations unix_net_ops = {
3569 .init = unix_net_init,
3570 .exit = unix_net_exit,
3571 };
3572
3573 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3574 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3575 struct unix_sock *unix_sk, uid_t uid)
3576
3577 #define INIT_BATCH_SZ 16
3578
3579 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3580 {
3581 struct bpf_unix_iter_state *iter = priv_data;
3582 int err;
3583
3584 err = bpf_iter_init_seq_net(priv_data, aux);
3585 if (err)
3586 return err;
3587
3588 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3589 if (err) {
3590 bpf_iter_fini_seq_net(priv_data);
3591 return err;
3592 }
3593
3594 return 0;
3595 }
3596
bpf_iter_fini_unix(void * priv_data)3597 static void bpf_iter_fini_unix(void *priv_data)
3598 {
3599 struct bpf_unix_iter_state *iter = priv_data;
3600
3601 bpf_iter_fini_seq_net(priv_data);
3602 kvfree(iter->batch);
3603 }
3604
3605 static const struct bpf_iter_seq_info unix_seq_info = {
3606 .seq_ops = &bpf_iter_unix_seq_ops,
3607 .init_seq_private = bpf_iter_init_unix,
3608 .fini_seq_private = bpf_iter_fini_unix,
3609 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3610 };
3611
3612 static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3613 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3614 const struct bpf_prog *prog)
3615 {
3616 switch (func_id) {
3617 case BPF_FUNC_setsockopt:
3618 return &bpf_sk_setsockopt_proto;
3619 case BPF_FUNC_getsockopt:
3620 return &bpf_sk_getsockopt_proto;
3621 default:
3622 return NULL;
3623 }
3624 }
3625
3626 static struct bpf_iter_reg unix_reg_info = {
3627 .target = "unix",
3628 .ctx_arg_info_size = 1,
3629 .ctx_arg_info = {
3630 { offsetof(struct bpf_iter__unix, unix_sk),
3631 PTR_TO_BTF_ID_OR_NULL },
3632 },
3633 .get_func_proto = bpf_iter_unix_get_func_proto,
3634 .seq_info = &unix_seq_info,
3635 };
3636
bpf_iter_register(void)3637 static void __init bpf_iter_register(void)
3638 {
3639 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3640 if (bpf_iter_reg_target(&unix_reg_info))
3641 pr_warn("Warning: could not register bpf iterator unix\n");
3642 }
3643 #endif
3644
af_unix_init(void)3645 static int __init af_unix_init(void)
3646 {
3647 int i, rc = -1;
3648
3649 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3650
3651 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3652 spin_lock_init(&bsd_socket_locks[i]);
3653 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3654 }
3655
3656 rc = proto_register(&unix_dgram_proto, 1);
3657 if (rc != 0) {
3658 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3659 goto out;
3660 }
3661
3662 rc = proto_register(&unix_stream_proto, 1);
3663 if (rc != 0) {
3664 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3665 proto_unregister(&unix_dgram_proto);
3666 goto out;
3667 }
3668
3669 sock_register(&unix_family_ops);
3670 register_pernet_subsys(&unix_net_ops);
3671 unix_bpf_build_proto();
3672
3673 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3674 bpf_iter_register();
3675 #endif
3676
3677 out:
3678 return rc;
3679 }
3680
af_unix_exit(void)3681 static void __exit af_unix_exit(void)
3682 {
3683 sock_unregister(PF_UNIX);
3684 proto_unregister(&unix_dgram_proto);
3685 proto_unregister(&unix_stream_proto);
3686 unregister_pernet_subsys(&unix_net_ops);
3687 }
3688
3689 /* Earlier than device_initcall() so that other drivers invoking
3690 request_module() don't end up in a loop when modprobe tries
3691 to use a UNIX socket. But later than subsys_initcall() because
3692 we depend on stuff initialised there */
3693 fs_initcall(af_unix_init);
3694 module_exit(af_unix_exit);
3695
3696 MODULE_LICENSE("GPL");
3697 MODULE_ALIAS_NETPROTO(PF_UNIX);
3698