1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 *
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 #include <linux/btf_ids.h>
117
118 #include "scm.h"
119
120 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
121 EXPORT_SYMBOL_GPL(unix_socket_table);
122 DEFINE_SPINLOCK(unix_table_lock);
123 EXPORT_SYMBOL_GPL(unix_table_lock);
124 static atomic_long_t unix_nr_socks;
125
126
unix_sockets_unbound(void * addr)127 static struct hlist_head *unix_sockets_unbound(void *addr)
128 {
129 unsigned long hash = (unsigned long)addr;
130
131 hash ^= hash >> 16;
132 hash ^= hash >> 8;
133 hash %= UNIX_HASH_SIZE;
134 return &unix_socket_table[UNIX_HASH_SIZE + hash];
135 }
136
137 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
138
139 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)140 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 {
142 UNIXCB(skb).secid = scm->secid;
143 }
144
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)145 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146 {
147 scm->secid = UNIXCB(skb).secid;
148 }
149
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)150 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151 {
152 return (scm->secid == UNIXCB(skb).secid);
153 }
154 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)155 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
156 { }
157
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)158 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159 { }
160
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)161 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
162 {
163 return true;
164 }
165 #endif /* CONFIG_SECURITY_NETWORK */
166
167 /*
168 * SMP locking strategy:
169 * hash table is protected with spinlock unix_table_lock
170 * each socket state is protected by separate spin lock.
171 */
172
unix_hash_fold(__wsum n)173 static inline unsigned int unix_hash_fold(__wsum n)
174 {
175 unsigned int hash = (__force unsigned int)csum_fold(n);
176
177 hash ^= hash>>8;
178 return hash&(UNIX_HASH_SIZE-1);
179 }
180
181 #define unix_peer(sk) (unix_sk(sk)->peer)
182
unix_our_peer(struct sock * sk,struct sock * osk)183 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184 {
185 return unix_peer(osk) == sk;
186 }
187
unix_may_send(struct sock * sk,struct sock * osk)188 static inline int unix_may_send(struct sock *sk, struct sock *osk)
189 {
190 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
191 }
192
unix_recvq_full(const struct sock * sk)193 static inline int unix_recvq_full(const struct sock *sk)
194 {
195 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
196 }
197
unix_recvq_full_lockless(const struct sock * sk)198 static inline int unix_recvq_full_lockless(const struct sock *sk)
199 {
200 return skb_queue_len_lockless(&sk->sk_receive_queue) >
201 READ_ONCE(sk->sk_max_ack_backlog);
202 }
203
unix_peer_get(struct sock * s)204 struct sock *unix_peer_get(struct sock *s)
205 {
206 struct sock *peer;
207
208 unix_state_lock(s);
209 peer = unix_peer(s);
210 if (peer)
211 sock_hold(peer);
212 unix_state_unlock(s);
213 return peer;
214 }
215 EXPORT_SYMBOL_GPL(unix_peer_get);
216
unix_release_addr(struct unix_address * addr)217 static inline void unix_release_addr(struct unix_address *addr)
218 {
219 if (refcount_dec_and_test(&addr->refcnt))
220 kfree(addr);
221 }
222
223 /*
224 * Check unix socket name:
225 * - should be not zero length.
226 * - if started by not zero, should be NULL terminated (FS object)
227 * - if started by zero, it is abstract name.
228 */
229
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)230 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
231 {
232 *hashp = 0;
233
234 if (len <= sizeof(short) || len > sizeof(*sunaddr))
235 return -EINVAL;
236 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
237 return -EINVAL;
238 if (sunaddr->sun_path[0]) {
239 /*
240 * This may look like an off by one error but it is a bit more
241 * subtle. 108 is the longest valid AF_UNIX path for a binding.
242 * sun_path[108] doesn't as such exist. However in kernel space
243 * we are guaranteed that it is a valid memory location in our
244 * kernel address buffer.
245 */
246 ((char *)sunaddr)[len] = 0;
247 len = strlen(sunaddr->sun_path)+1+sizeof(short);
248 return len;
249 }
250
251 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
252 return len;
253 }
254
__unix_remove_socket(struct sock * sk)255 static void __unix_remove_socket(struct sock *sk)
256 {
257 sk_del_node_init(sk);
258 }
259
__unix_insert_socket(struct hlist_head * list,struct sock * sk)260 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261 {
262 WARN_ON(!sk_unhashed(sk));
263 sk_add_node(sk, list);
264 }
265
__unix_set_addr(struct sock * sk,struct unix_address * addr,unsigned hash)266 static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
267 unsigned hash)
268 {
269 __unix_remove_socket(sk);
270 smp_store_release(&unix_sk(sk)->addr, addr);
271 __unix_insert_socket(&unix_socket_table[hash], sk);
272 }
273
unix_remove_socket(struct sock * sk)274 static inline void unix_remove_socket(struct sock *sk)
275 {
276 spin_lock(&unix_table_lock);
277 __unix_remove_socket(sk);
278 spin_unlock(&unix_table_lock);
279 }
280
unix_insert_socket(struct hlist_head * list,struct sock * sk)281 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
282 {
283 spin_lock(&unix_table_lock);
284 __unix_insert_socket(list, sk);
285 spin_unlock(&unix_table_lock);
286 }
287
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)288 static struct sock *__unix_find_socket_byname(struct net *net,
289 struct sockaddr_un *sunname,
290 int len, unsigned int hash)
291 {
292 struct sock *s;
293
294 sk_for_each(s, &unix_socket_table[hash]) {
295 struct unix_sock *u = unix_sk(s);
296
297 if (!net_eq(sock_net(s), net))
298 continue;
299
300 if (u->addr->len == len &&
301 !memcmp(u->addr->name, sunname, len))
302 return s;
303 }
304 return NULL;
305 }
306
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)307 static inline struct sock *unix_find_socket_byname(struct net *net,
308 struct sockaddr_un *sunname,
309 int len, unsigned int hash)
310 {
311 struct sock *s;
312
313 spin_lock(&unix_table_lock);
314 s = __unix_find_socket_byname(net, sunname, len, hash);
315 if (s)
316 sock_hold(s);
317 spin_unlock(&unix_table_lock);
318 return s;
319 }
320
unix_find_socket_byinode(struct inode * i)321 static struct sock *unix_find_socket_byinode(struct inode *i)
322 {
323 struct sock *s;
324
325 spin_lock(&unix_table_lock);
326 sk_for_each(s,
327 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
328 struct dentry *dentry = unix_sk(s)->path.dentry;
329
330 if (dentry && d_backing_inode(dentry) == i) {
331 sock_hold(s);
332 goto found;
333 }
334 }
335 s = NULL;
336 found:
337 spin_unlock(&unix_table_lock);
338 return s;
339 }
340
341 /* Support code for asymmetrically connected dgram sockets
342 *
343 * If a datagram socket is connected to a socket not itself connected
344 * to the first socket (eg, /dev/log), clients may only enqueue more
345 * messages if the present receive queue of the server socket is not
346 * "too large". This means there's a second writeability condition
347 * poll and sendmsg need to test. The dgram recv code will do a wake
348 * up on the peer_wait wait queue of a socket upon reception of a
349 * datagram which needs to be propagated to sleeping would-be writers
350 * since these might not have sent anything so far. This can't be
351 * accomplished via poll_wait because the lifetime of the server
352 * socket might be less than that of its clients if these break their
353 * association with it or if the server socket is closed while clients
354 * are still connected to it and there's no way to inform "a polling
355 * implementation" that it should let go of a certain wait queue
356 *
357 * In order to propagate a wake up, a wait_queue_entry_t of the client
358 * socket is enqueued on the peer_wait queue of the server socket
359 * whose wake function does a wake_up on the ordinary client socket
360 * wait queue. This connection is established whenever a write (or
361 * poll for write) hit the flow control condition and broken when the
362 * association to the server socket is dissolved or after a wake up
363 * was relayed.
364 */
365
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)366 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
367 void *key)
368 {
369 struct unix_sock *u;
370 wait_queue_head_t *u_sleep;
371
372 u = container_of(q, struct unix_sock, peer_wake);
373
374 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
375 q);
376 u->peer_wake.private = NULL;
377
378 /* relaying can only happen while the wq still exists */
379 u_sleep = sk_sleep(&u->sk);
380 if (u_sleep)
381 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
382
383 return 0;
384 }
385
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)386 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
387 {
388 struct unix_sock *u, *u_other;
389 int rc;
390
391 u = unix_sk(sk);
392 u_other = unix_sk(other);
393 rc = 0;
394 spin_lock(&u_other->peer_wait.lock);
395
396 if (!u->peer_wake.private) {
397 u->peer_wake.private = other;
398 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
399
400 rc = 1;
401 }
402
403 spin_unlock(&u_other->peer_wait.lock);
404 return rc;
405 }
406
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)407 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
408 struct sock *other)
409 {
410 struct unix_sock *u, *u_other;
411
412 u = unix_sk(sk);
413 u_other = unix_sk(other);
414 spin_lock(&u_other->peer_wait.lock);
415
416 if (u->peer_wake.private == other) {
417 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
418 u->peer_wake.private = NULL;
419 }
420
421 spin_unlock(&u_other->peer_wait.lock);
422 }
423
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)424 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
425 struct sock *other)
426 {
427 unix_dgram_peer_wake_disconnect(sk, other);
428 wake_up_interruptible_poll(sk_sleep(sk),
429 EPOLLOUT |
430 EPOLLWRNORM |
431 EPOLLWRBAND);
432 }
433
434 /* preconditions:
435 * - unix_peer(sk) == other
436 * - association is stable
437 */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)438 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
439 {
440 int connected;
441
442 connected = unix_dgram_peer_wake_connect(sk, other);
443
444 /* If other is SOCK_DEAD, we want to make sure we signal
445 * POLLOUT, such that a subsequent write() can get a
446 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
447 * to other and its full, we will hang waiting for POLLOUT.
448 */
449 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
450 return 1;
451
452 if (connected)
453 unix_dgram_peer_wake_disconnect(sk, other);
454
455 return 0;
456 }
457
unix_writable(const struct sock * sk)458 static int unix_writable(const struct sock *sk)
459 {
460 return sk->sk_state != TCP_LISTEN &&
461 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
462 }
463
unix_write_space(struct sock * sk)464 static void unix_write_space(struct sock *sk)
465 {
466 struct socket_wq *wq;
467
468 rcu_read_lock();
469 if (unix_writable(sk)) {
470 wq = rcu_dereference(sk->sk_wq);
471 if (skwq_has_sleeper(wq))
472 wake_up_interruptible_sync_poll(&wq->wait,
473 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
474 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
475 }
476 rcu_read_unlock();
477 }
478
479 /* When dgram socket disconnects (or changes its peer), we clear its receive
480 * queue of packets arrived from previous peer. First, it allows to do
481 * flow control based only on wmem_alloc; second, sk connected to peer
482 * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)483 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
484 {
485 if (!skb_queue_empty(&sk->sk_receive_queue)) {
486 skb_queue_purge(&sk->sk_receive_queue);
487 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
488
489 /* If one link of bidirectional dgram pipe is disconnected,
490 * we signal error. Messages are lost. Do not make this,
491 * when peer was not connected to us.
492 */
493 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
494 other->sk_err = ECONNRESET;
495 sk_error_report(other);
496 }
497 }
498 other->sk_state = TCP_CLOSE;
499 }
500
unix_sock_destructor(struct sock * sk)501 static void unix_sock_destructor(struct sock *sk)
502 {
503 struct unix_sock *u = unix_sk(sk);
504
505 skb_queue_purge(&sk->sk_receive_queue);
506
507 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
508 WARN_ON(!sk_unhashed(sk));
509 WARN_ON(sk->sk_socket);
510 if (!sock_flag(sk, SOCK_DEAD)) {
511 pr_info("Attempt to release alive unix socket: %p\n", sk);
512 return;
513 }
514
515 if (u->addr)
516 unix_release_addr(u->addr);
517
518 atomic_long_dec(&unix_nr_socks);
519 local_bh_disable();
520 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
521 local_bh_enable();
522 #ifdef UNIX_REFCNT_DEBUG
523 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
524 atomic_long_read(&unix_nr_socks));
525 #endif
526 }
527
unix_release_sock(struct sock * sk,int embrion)528 static void unix_release_sock(struct sock *sk, int embrion)
529 {
530 struct unix_sock *u = unix_sk(sk);
531 struct path path;
532 struct sock *skpair;
533 struct sk_buff *skb;
534 int state;
535
536 unix_remove_socket(sk);
537
538 /* Clear state */
539 unix_state_lock(sk);
540 sock_orphan(sk);
541 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
542 path = u->path;
543 u->path.dentry = NULL;
544 u->path.mnt = NULL;
545 state = sk->sk_state;
546 sk->sk_state = TCP_CLOSE;
547
548 skpair = unix_peer(sk);
549 unix_peer(sk) = NULL;
550
551 unix_state_unlock(sk);
552
553 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
554 if (u->oob_skb) {
555 kfree_skb(u->oob_skb);
556 u->oob_skb = NULL;
557 }
558 #endif
559
560 wake_up_interruptible_all(&u->peer_wait);
561
562 if (skpair != NULL) {
563 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
564 unix_state_lock(skpair);
565 /* No more writes */
566 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
567 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
568 skpair->sk_err = ECONNRESET;
569 unix_state_unlock(skpair);
570 skpair->sk_state_change(skpair);
571 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
572 }
573
574 unix_dgram_peer_wake_disconnect(sk, skpair);
575 sock_put(skpair); /* It may now die */
576 }
577
578 /* Try to flush out this socket. Throw out buffers at least */
579
580 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
581 if (state == TCP_LISTEN)
582 unix_release_sock(skb->sk, 1);
583 /* passed fds are erased in the kfree_skb hook */
584 UNIXCB(skb).consumed = skb->len;
585 kfree_skb(skb);
586 }
587
588 if (path.dentry)
589 path_put(&path);
590
591 sock_put(sk);
592
593 /* ---- Socket is dead now and most probably destroyed ---- */
594
595 /*
596 * Fixme: BSD difference: In BSD all sockets connected to us get
597 * ECONNRESET and we die on the spot. In Linux we behave
598 * like files and pipes do and wait for the last
599 * dereference.
600 *
601 * Can't we simply set sock->err?
602 *
603 * What the above comment does talk about? --ANK(980817)
604 */
605
606 if (READ_ONCE(unix_tot_inflight))
607 unix_gc(); /* Garbage collect fds */
608 }
609
init_peercred(struct sock * sk)610 static void init_peercred(struct sock *sk)
611 {
612 const struct cred *old_cred;
613 struct pid *old_pid;
614
615 spin_lock(&sk->sk_peer_lock);
616 old_pid = sk->sk_peer_pid;
617 old_cred = sk->sk_peer_cred;
618 sk->sk_peer_pid = get_pid(task_tgid(current));
619 sk->sk_peer_cred = get_current_cred();
620 spin_unlock(&sk->sk_peer_lock);
621
622 put_pid(old_pid);
623 put_cred(old_cred);
624 }
625
copy_peercred(struct sock * sk,struct sock * peersk)626 static void copy_peercred(struct sock *sk, struct sock *peersk)
627 {
628 const struct cred *old_cred;
629 struct pid *old_pid;
630
631 if (sk < peersk) {
632 spin_lock(&sk->sk_peer_lock);
633 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
634 } else {
635 spin_lock(&peersk->sk_peer_lock);
636 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
637 }
638 old_pid = sk->sk_peer_pid;
639 old_cred = sk->sk_peer_cred;
640 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
641 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
642
643 spin_unlock(&sk->sk_peer_lock);
644 spin_unlock(&peersk->sk_peer_lock);
645
646 put_pid(old_pid);
647 put_cred(old_cred);
648 }
649
unix_listen(struct socket * sock,int backlog)650 static int unix_listen(struct socket *sock, int backlog)
651 {
652 int err;
653 struct sock *sk = sock->sk;
654 struct unix_sock *u = unix_sk(sk);
655
656 err = -EOPNOTSUPP;
657 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
658 goto out; /* Only stream/seqpacket sockets accept */
659 err = -EINVAL;
660 if (!u->addr)
661 goto out; /* No listens on an unbound socket */
662 unix_state_lock(sk);
663 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
664 goto out_unlock;
665 if (backlog > sk->sk_max_ack_backlog)
666 wake_up_interruptible_all(&u->peer_wait);
667 sk->sk_max_ack_backlog = backlog;
668 sk->sk_state = TCP_LISTEN;
669 /* set credentials so connect can copy them */
670 init_peercred(sk);
671 err = 0;
672
673 out_unlock:
674 unix_state_unlock(sk);
675 out:
676 return err;
677 }
678
679 static int unix_release(struct socket *);
680 static int unix_bind(struct socket *, struct sockaddr *, int);
681 static int unix_stream_connect(struct socket *, struct sockaddr *,
682 int addr_len, int flags);
683 static int unix_socketpair(struct socket *, struct socket *);
684 static int unix_accept(struct socket *, struct socket *, int, bool);
685 static int unix_getname(struct socket *, struct sockaddr *, int);
686 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
687 static __poll_t unix_dgram_poll(struct file *, struct socket *,
688 poll_table *);
689 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
690 #ifdef CONFIG_COMPAT
691 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
692 #endif
693 static int unix_shutdown(struct socket *, int);
694 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
695 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
696 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
697 size_t size, int flags);
698 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
699 struct pipe_inode_info *, size_t size,
700 unsigned int flags);
701 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
702 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
703 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
704 sk_read_actor_t recv_actor);
705 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
706 sk_read_actor_t recv_actor);
707 static int unix_dgram_connect(struct socket *, struct sockaddr *,
708 int, int);
709 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
710 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
711 int);
712
unix_set_peek_off(struct sock * sk,int val)713 static int unix_set_peek_off(struct sock *sk, int val)
714 {
715 struct unix_sock *u = unix_sk(sk);
716
717 if (mutex_lock_interruptible(&u->iolock))
718 return -EINTR;
719
720 WRITE_ONCE(sk->sk_peek_off, val);
721 mutex_unlock(&u->iolock);
722
723 return 0;
724 }
725
726 #ifdef CONFIG_PROC_FS
unix_show_fdinfo(struct seq_file * m,struct socket * sock)727 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
728 {
729 struct sock *sk = sock->sk;
730 struct unix_sock *u;
731
732 if (sk) {
733 u = unix_sk(sock->sk);
734 seq_printf(m, "scm_fds: %u\n",
735 atomic_read(&u->scm_stat.nr_fds));
736 }
737 }
738 #else
739 #define unix_show_fdinfo NULL
740 #endif
741
742 static const struct proto_ops unix_stream_ops = {
743 .family = PF_UNIX,
744 .owner = THIS_MODULE,
745 .release = unix_release,
746 .bind = unix_bind,
747 .connect = unix_stream_connect,
748 .socketpair = unix_socketpair,
749 .accept = unix_accept,
750 .getname = unix_getname,
751 .poll = unix_poll,
752 .ioctl = unix_ioctl,
753 #ifdef CONFIG_COMPAT
754 .compat_ioctl = unix_compat_ioctl,
755 #endif
756 .listen = unix_listen,
757 .shutdown = unix_shutdown,
758 .sendmsg = unix_stream_sendmsg,
759 .recvmsg = unix_stream_recvmsg,
760 .read_sock = unix_stream_read_sock,
761 .mmap = sock_no_mmap,
762 .sendpage = unix_stream_sendpage,
763 .splice_read = unix_stream_splice_read,
764 .set_peek_off = unix_set_peek_off,
765 .show_fdinfo = unix_show_fdinfo,
766 };
767
768 static const struct proto_ops unix_dgram_ops = {
769 .family = PF_UNIX,
770 .owner = THIS_MODULE,
771 .release = unix_release,
772 .bind = unix_bind,
773 .connect = unix_dgram_connect,
774 .socketpair = unix_socketpair,
775 .accept = sock_no_accept,
776 .getname = unix_getname,
777 .poll = unix_dgram_poll,
778 .ioctl = unix_ioctl,
779 #ifdef CONFIG_COMPAT
780 .compat_ioctl = unix_compat_ioctl,
781 #endif
782 .listen = sock_no_listen,
783 .shutdown = unix_shutdown,
784 .sendmsg = unix_dgram_sendmsg,
785 .read_sock = unix_read_sock,
786 .recvmsg = unix_dgram_recvmsg,
787 .mmap = sock_no_mmap,
788 .sendpage = sock_no_sendpage,
789 .set_peek_off = unix_set_peek_off,
790 .show_fdinfo = unix_show_fdinfo,
791 };
792
793 static const struct proto_ops unix_seqpacket_ops = {
794 .family = PF_UNIX,
795 .owner = THIS_MODULE,
796 .release = unix_release,
797 .bind = unix_bind,
798 .connect = unix_stream_connect,
799 .socketpair = unix_socketpair,
800 .accept = unix_accept,
801 .getname = unix_getname,
802 .poll = unix_dgram_poll,
803 .ioctl = unix_ioctl,
804 #ifdef CONFIG_COMPAT
805 .compat_ioctl = unix_compat_ioctl,
806 #endif
807 .listen = unix_listen,
808 .shutdown = unix_shutdown,
809 .sendmsg = unix_seqpacket_sendmsg,
810 .recvmsg = unix_seqpacket_recvmsg,
811 .mmap = sock_no_mmap,
812 .sendpage = sock_no_sendpage,
813 .set_peek_off = unix_set_peek_off,
814 .show_fdinfo = unix_show_fdinfo,
815 };
816
unix_close(struct sock * sk,long timeout)817 static void unix_close(struct sock *sk, long timeout)
818 {
819 /* Nothing to do here, unix socket does not need a ->close().
820 * This is merely for sockmap.
821 */
822 }
823
unix_unhash(struct sock * sk)824 static void unix_unhash(struct sock *sk)
825 {
826 /* Nothing to do here, unix socket does not need a ->unhash().
827 * This is merely for sockmap.
828 */
829 }
830
831 struct proto unix_dgram_proto = {
832 .name = "UNIX",
833 .owner = THIS_MODULE,
834 .obj_size = sizeof(struct unix_sock),
835 .close = unix_close,
836 #ifdef CONFIG_BPF_SYSCALL
837 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
838 #endif
839 };
840
841 struct proto unix_stream_proto = {
842 .name = "UNIX-STREAM",
843 .owner = THIS_MODULE,
844 .obj_size = sizeof(struct unix_sock),
845 .close = unix_close,
846 .unhash = unix_unhash,
847 #ifdef CONFIG_BPF_SYSCALL
848 .psock_update_sk_prot = unix_stream_bpf_update_proto,
849 #endif
850 };
851
unix_create1(struct net * net,struct socket * sock,int kern,int type)852 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
853 {
854 struct unix_sock *u;
855 struct sock *sk;
856 int err;
857
858 atomic_long_inc(&unix_nr_socks);
859 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
860 err = -ENFILE;
861 goto err;
862 }
863
864 if (type == SOCK_STREAM)
865 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
866 else /*dgram and seqpacket */
867 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
868
869 if (!sk) {
870 err = -ENOMEM;
871 goto err;
872 }
873
874 sock_init_data(sock, sk);
875
876 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
877 sk->sk_write_space = unix_write_space;
878 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
879 sk->sk_destruct = unix_sock_destructor;
880 u = unix_sk(sk);
881 u->path.dentry = NULL;
882 u->path.mnt = NULL;
883 spin_lock_init(&u->lock);
884 atomic_long_set(&u->inflight, 0);
885 INIT_LIST_HEAD(&u->link);
886 mutex_init(&u->iolock); /* single task reading lock */
887 mutex_init(&u->bindlock); /* single task binding lock */
888 init_waitqueue_head(&u->peer_wait);
889 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
890 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
891 unix_insert_socket(unix_sockets_unbound(sk), sk);
892
893 local_bh_disable();
894 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
895 local_bh_enable();
896
897 return sk;
898
899 err:
900 atomic_long_dec(&unix_nr_socks);
901 return ERR_PTR(err);
902 }
903
unix_create(struct net * net,struct socket * sock,int protocol,int kern)904 static int unix_create(struct net *net, struct socket *sock, int protocol,
905 int kern)
906 {
907 struct sock *sk;
908
909 if (protocol && protocol != PF_UNIX)
910 return -EPROTONOSUPPORT;
911
912 sock->state = SS_UNCONNECTED;
913
914 switch (sock->type) {
915 case SOCK_STREAM:
916 sock->ops = &unix_stream_ops;
917 break;
918 /*
919 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
920 * nothing uses it.
921 */
922 case SOCK_RAW:
923 sock->type = SOCK_DGRAM;
924 fallthrough;
925 case SOCK_DGRAM:
926 sock->ops = &unix_dgram_ops;
927 break;
928 case SOCK_SEQPACKET:
929 sock->ops = &unix_seqpacket_ops;
930 break;
931 default:
932 return -ESOCKTNOSUPPORT;
933 }
934
935 sk = unix_create1(net, sock, kern, sock->type);
936 if (IS_ERR(sk))
937 return PTR_ERR(sk);
938
939 return 0;
940 }
941
unix_release(struct socket * sock)942 static int unix_release(struct socket *sock)
943 {
944 struct sock *sk = sock->sk;
945
946 if (!sk)
947 return 0;
948
949 sk->sk_prot->close(sk, 0);
950 unix_release_sock(sk, 0);
951 sock->sk = NULL;
952
953 return 0;
954 }
955
unix_autobind(struct socket * sock)956 static int unix_autobind(struct socket *sock)
957 {
958 struct sock *sk = sock->sk;
959 struct net *net = sock_net(sk);
960 struct unix_sock *u = unix_sk(sk);
961 static u32 ordernum = 1;
962 struct unix_address *addr;
963 int err;
964 unsigned int retries = 0;
965
966 err = mutex_lock_interruptible(&u->bindlock);
967 if (err)
968 return err;
969
970 if (u->addr)
971 goto out;
972
973 err = -ENOMEM;
974 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
975 if (!addr)
976 goto out;
977
978 addr->name->sun_family = AF_UNIX;
979 refcount_set(&addr->refcnt, 1);
980
981 retry:
982 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
983 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
984 addr->hash ^= sk->sk_type;
985
986 spin_lock(&unix_table_lock);
987 ordernum = (ordernum+1)&0xFFFFF;
988
989 if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
990 spin_unlock(&unix_table_lock);
991 /*
992 * __unix_find_socket_byname() may take long time if many names
993 * are already in use.
994 */
995 cond_resched();
996 /* Give up if all names seems to be in use. */
997 if (retries++ == 0xFFFFF) {
998 err = -ENOSPC;
999 kfree(addr);
1000 goto out;
1001 }
1002 goto retry;
1003 }
1004
1005 __unix_set_addr(sk, addr, addr->hash);
1006 spin_unlock(&unix_table_lock);
1007 err = 0;
1008
1009 out: mutex_unlock(&u->bindlock);
1010 return err;
1011 }
1012
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)1013 static struct sock *unix_find_other(struct net *net,
1014 struct sockaddr_un *sunname, int len,
1015 int type, unsigned int hash, int *error)
1016 {
1017 struct sock *u;
1018 struct path path;
1019 int err = 0;
1020
1021 if (sunname->sun_path[0]) {
1022 struct inode *inode;
1023 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1024 if (err)
1025 goto fail;
1026 inode = d_backing_inode(path.dentry);
1027 err = path_permission(&path, MAY_WRITE);
1028 if (err)
1029 goto put_fail;
1030
1031 err = -ECONNREFUSED;
1032 if (!S_ISSOCK(inode->i_mode))
1033 goto put_fail;
1034 u = unix_find_socket_byinode(inode);
1035 if (!u)
1036 goto put_fail;
1037
1038 if (u->sk_type == type)
1039 touch_atime(&path);
1040
1041 path_put(&path);
1042
1043 err = -EPROTOTYPE;
1044 if (u->sk_type != type) {
1045 sock_put(u);
1046 goto fail;
1047 }
1048 } else {
1049 err = -ECONNREFUSED;
1050 u = unix_find_socket_byname(net, sunname, len, type ^ hash);
1051 if (u) {
1052 struct dentry *dentry;
1053 dentry = unix_sk(u)->path.dentry;
1054 if (dentry)
1055 touch_atime(&unix_sk(u)->path);
1056 } else
1057 goto fail;
1058 }
1059 return u;
1060
1061 put_fail:
1062 path_put(&path);
1063 fail:
1064 *error = err;
1065 return NULL;
1066 }
1067
unix_bind_bsd(struct sock * sk,struct unix_address * addr)1068 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
1069 {
1070 struct unix_sock *u = unix_sk(sk);
1071 umode_t mode = S_IFSOCK |
1072 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1073 struct user_namespace *ns; // barf...
1074 struct path parent;
1075 struct dentry *dentry;
1076 unsigned int hash;
1077 int err;
1078
1079 /*
1080 * Get the parent directory, calculate the hash for last
1081 * component.
1082 */
1083 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1084 if (IS_ERR(dentry))
1085 return PTR_ERR(dentry);
1086 ns = mnt_user_ns(parent.mnt);
1087
1088 /*
1089 * All right, let's create it.
1090 */
1091 err = security_path_mknod(&parent, dentry, mode, 0);
1092 if (!err)
1093 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1094 if (err)
1095 goto out;
1096 err = mutex_lock_interruptible(&u->bindlock);
1097 if (err)
1098 goto out_unlink;
1099 if (u->addr)
1100 goto out_unlock;
1101
1102 addr->hash = UNIX_HASH_SIZE;
1103 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1104 spin_lock(&unix_table_lock);
1105 u->path.mnt = mntget(parent.mnt);
1106 u->path.dentry = dget(dentry);
1107 __unix_set_addr(sk, addr, hash);
1108 spin_unlock(&unix_table_lock);
1109 mutex_unlock(&u->bindlock);
1110 done_path_create(&parent, dentry);
1111 return 0;
1112
1113 out_unlock:
1114 mutex_unlock(&u->bindlock);
1115 err = -EINVAL;
1116 out_unlink:
1117 /* failed after successful mknod? unlink what we'd created... */
1118 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1119 out:
1120 done_path_create(&parent, dentry);
1121 return err;
1122 }
1123
unix_bind_abstract(struct sock * sk,struct unix_address * addr)1124 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
1125 {
1126 struct unix_sock *u = unix_sk(sk);
1127 int err;
1128
1129 err = mutex_lock_interruptible(&u->bindlock);
1130 if (err)
1131 return err;
1132
1133 if (u->addr) {
1134 mutex_unlock(&u->bindlock);
1135 return -EINVAL;
1136 }
1137
1138 spin_lock(&unix_table_lock);
1139 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1140 addr->hash)) {
1141 spin_unlock(&unix_table_lock);
1142 mutex_unlock(&u->bindlock);
1143 return -EADDRINUSE;
1144 }
1145 __unix_set_addr(sk, addr, addr->hash);
1146 spin_unlock(&unix_table_lock);
1147 mutex_unlock(&u->bindlock);
1148 return 0;
1149 }
1150
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1151 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1152 {
1153 struct sock *sk = sock->sk;
1154 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1155 char *sun_path = sunaddr->sun_path;
1156 int err;
1157 unsigned int hash;
1158 struct unix_address *addr;
1159
1160 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1161 sunaddr->sun_family != AF_UNIX)
1162 return -EINVAL;
1163
1164 if (addr_len == sizeof(short))
1165 return unix_autobind(sock);
1166
1167 err = unix_mkname(sunaddr, addr_len, &hash);
1168 if (err < 0)
1169 return err;
1170 addr_len = err;
1171 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1172 if (!addr)
1173 return -ENOMEM;
1174
1175 memcpy(addr->name, sunaddr, addr_len);
1176 addr->len = addr_len;
1177 addr->hash = hash ^ sk->sk_type;
1178 refcount_set(&addr->refcnt, 1);
1179
1180 if (sun_path[0])
1181 err = unix_bind_bsd(sk, addr);
1182 else
1183 err = unix_bind_abstract(sk, addr);
1184 if (err)
1185 unix_release_addr(addr);
1186 return err == -EEXIST ? -EADDRINUSE : err;
1187 }
1188
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1189 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1190 {
1191 if (unlikely(sk1 == sk2) || !sk2) {
1192 unix_state_lock(sk1);
1193 return;
1194 }
1195 if (sk1 > sk2)
1196 swap(sk1, sk2);
1197
1198 unix_state_lock(sk1);
1199 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1200 }
1201
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1202 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1203 {
1204 if (unlikely(sk1 == sk2) || !sk2) {
1205 unix_state_unlock(sk1);
1206 return;
1207 }
1208 unix_state_unlock(sk1);
1209 unix_state_unlock(sk2);
1210 }
1211
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1212 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1213 int alen, int flags)
1214 {
1215 struct sock *sk = sock->sk;
1216 struct net *net = sock_net(sk);
1217 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1218 struct sock *other;
1219 unsigned int hash;
1220 int err;
1221
1222 err = -EINVAL;
1223 if (alen < offsetofend(struct sockaddr, sa_family))
1224 goto out;
1225
1226 if (addr->sa_family != AF_UNSPEC) {
1227 err = unix_mkname(sunaddr, alen, &hash);
1228 if (err < 0)
1229 goto out;
1230 alen = err;
1231
1232 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1233 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1234 goto out;
1235
1236 restart:
1237 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1238 if (!other)
1239 goto out;
1240
1241 unix_state_double_lock(sk, other);
1242
1243 /* Apparently VFS overslept socket death. Retry. */
1244 if (sock_flag(other, SOCK_DEAD)) {
1245 unix_state_double_unlock(sk, other);
1246 sock_put(other);
1247 goto restart;
1248 }
1249
1250 err = -EPERM;
1251 if (!unix_may_send(sk, other))
1252 goto out_unlock;
1253
1254 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1255 if (err)
1256 goto out_unlock;
1257
1258 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1259 } else {
1260 /*
1261 * 1003.1g breaking connected state with AF_UNSPEC
1262 */
1263 other = NULL;
1264 unix_state_double_lock(sk, other);
1265 }
1266
1267 /*
1268 * If it was connected, reconnect.
1269 */
1270 if (unix_peer(sk)) {
1271 struct sock *old_peer = unix_peer(sk);
1272
1273 unix_peer(sk) = other;
1274 if (!other)
1275 sk->sk_state = TCP_CLOSE;
1276 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1277
1278 unix_state_double_unlock(sk, other);
1279
1280 if (other != old_peer)
1281 unix_dgram_disconnected(sk, old_peer);
1282 sock_put(old_peer);
1283 } else {
1284 unix_peer(sk) = other;
1285 unix_state_double_unlock(sk, other);
1286 }
1287
1288 return 0;
1289
1290 out_unlock:
1291 unix_state_double_unlock(sk, other);
1292 sock_put(other);
1293 out:
1294 return err;
1295 }
1296
unix_wait_for_peer(struct sock * other,long timeo)1297 static long unix_wait_for_peer(struct sock *other, long timeo)
1298 __releases(&unix_sk(other)->lock)
1299 {
1300 struct unix_sock *u = unix_sk(other);
1301 int sched;
1302 DEFINE_WAIT(wait);
1303
1304 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1305
1306 sched = !sock_flag(other, SOCK_DEAD) &&
1307 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1308 unix_recvq_full_lockless(other);
1309
1310 unix_state_unlock(other);
1311
1312 if (sched)
1313 timeo = schedule_timeout(timeo);
1314
1315 finish_wait(&u->peer_wait, &wait);
1316 return timeo;
1317 }
1318
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1319 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1320 int addr_len, int flags)
1321 {
1322 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1323 struct sock *sk = sock->sk;
1324 struct net *net = sock_net(sk);
1325 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1326 struct sock *newsk = NULL;
1327 struct sock *other = NULL;
1328 struct sk_buff *skb = NULL;
1329 unsigned int hash;
1330 int st;
1331 int err;
1332 long timeo;
1333
1334 err = unix_mkname(sunaddr, addr_len, &hash);
1335 if (err < 0)
1336 goto out;
1337 addr_len = err;
1338
1339 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1340 (err = unix_autobind(sock)) != 0)
1341 goto out;
1342
1343 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1344
1345 /* First of all allocate resources.
1346 If we will make it after state is locked,
1347 we will have to recheck all again in any case.
1348 */
1349
1350 /* create new sock for complete connection */
1351 newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
1352 if (IS_ERR(newsk)) {
1353 err = PTR_ERR(newsk);
1354 newsk = NULL;
1355 goto out;
1356 }
1357
1358 err = -ENOMEM;
1359
1360 /* Allocate skb for sending to listening sock */
1361 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1362 if (skb == NULL)
1363 goto out;
1364
1365 restart:
1366 /* Find listening sock. */
1367 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1368 if (!other)
1369 goto out;
1370
1371 /* Latch state of peer */
1372 unix_state_lock(other);
1373
1374 /* Apparently VFS overslept socket death. Retry. */
1375 if (sock_flag(other, SOCK_DEAD)) {
1376 unix_state_unlock(other);
1377 sock_put(other);
1378 goto restart;
1379 }
1380
1381 err = -ECONNREFUSED;
1382 if (other->sk_state != TCP_LISTEN)
1383 goto out_unlock;
1384 if (other->sk_shutdown & RCV_SHUTDOWN)
1385 goto out_unlock;
1386
1387 if (unix_recvq_full(other)) {
1388 err = -EAGAIN;
1389 if (!timeo)
1390 goto out_unlock;
1391
1392 timeo = unix_wait_for_peer(other, timeo);
1393
1394 err = sock_intr_errno(timeo);
1395 if (signal_pending(current))
1396 goto out;
1397 sock_put(other);
1398 goto restart;
1399 }
1400
1401 /* Latch our state.
1402
1403 It is tricky place. We need to grab our state lock and cannot
1404 drop lock on peer. It is dangerous because deadlock is
1405 possible. Connect to self case and simultaneous
1406 attempt to connect are eliminated by checking socket
1407 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1408 check this before attempt to grab lock.
1409
1410 Well, and we have to recheck the state after socket locked.
1411 */
1412 st = sk->sk_state;
1413
1414 switch (st) {
1415 case TCP_CLOSE:
1416 /* This is ok... continue with connect */
1417 break;
1418 case TCP_ESTABLISHED:
1419 /* Socket is already connected */
1420 err = -EISCONN;
1421 goto out_unlock;
1422 default:
1423 err = -EINVAL;
1424 goto out_unlock;
1425 }
1426
1427 unix_state_lock_nested(sk, U_LOCK_SECOND);
1428
1429 if (sk->sk_state != st) {
1430 unix_state_unlock(sk);
1431 unix_state_unlock(other);
1432 sock_put(other);
1433 goto restart;
1434 }
1435
1436 err = security_unix_stream_connect(sk, other, newsk);
1437 if (err) {
1438 unix_state_unlock(sk);
1439 goto out_unlock;
1440 }
1441
1442 /* The way is open! Fastly set all the necessary fields... */
1443
1444 sock_hold(sk);
1445 unix_peer(newsk) = sk;
1446 newsk->sk_state = TCP_ESTABLISHED;
1447 newsk->sk_type = sk->sk_type;
1448 init_peercred(newsk);
1449 newu = unix_sk(newsk);
1450 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1451 otheru = unix_sk(other);
1452
1453 /* copy address information from listening to new sock
1454 *
1455 * The contents of *(otheru->addr) and otheru->path
1456 * are seen fully set up here, since we have found
1457 * otheru in hash under unix_table_lock. Insertion
1458 * into the hash chain we'd found it in had been done
1459 * in an earlier critical area protected by unix_table_lock,
1460 * the same one where we'd set *(otheru->addr) contents,
1461 * as well as otheru->path and otheru->addr itself.
1462 *
1463 * Using smp_store_release() here to set newu->addr
1464 * is enough to make those stores, as well as stores
1465 * to newu->path visible to anyone who gets newu->addr
1466 * by smp_load_acquire(). IOW, the same warranties
1467 * as for unix_sock instances bound in unix_bind() or
1468 * in unix_autobind().
1469 */
1470 if (otheru->path.dentry) {
1471 path_get(&otheru->path);
1472 newu->path = otheru->path;
1473 }
1474 refcount_inc(&otheru->addr->refcnt);
1475 smp_store_release(&newu->addr, otheru->addr);
1476
1477 /* Set credentials */
1478 copy_peercred(sk, other);
1479
1480 sock->state = SS_CONNECTED;
1481 sk->sk_state = TCP_ESTABLISHED;
1482 sock_hold(newsk);
1483
1484 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1485 unix_peer(sk) = newsk;
1486
1487 unix_state_unlock(sk);
1488
1489 /* take ten and send info to listening sock */
1490 spin_lock(&other->sk_receive_queue.lock);
1491 __skb_queue_tail(&other->sk_receive_queue, skb);
1492 spin_unlock(&other->sk_receive_queue.lock);
1493 unix_state_unlock(other);
1494 other->sk_data_ready(other);
1495 sock_put(other);
1496 return 0;
1497
1498 out_unlock:
1499 if (other)
1500 unix_state_unlock(other);
1501
1502 out:
1503 kfree_skb(skb);
1504 if (newsk)
1505 unix_release_sock(newsk, 0);
1506 if (other)
1507 sock_put(other);
1508 return err;
1509 }
1510
unix_socketpair(struct socket * socka,struct socket * sockb)1511 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1512 {
1513 struct sock *ska = socka->sk, *skb = sockb->sk;
1514
1515 /* Join our sockets back to back */
1516 sock_hold(ska);
1517 sock_hold(skb);
1518 unix_peer(ska) = skb;
1519 unix_peer(skb) = ska;
1520 init_peercred(ska);
1521 init_peercred(skb);
1522
1523 ska->sk_state = TCP_ESTABLISHED;
1524 skb->sk_state = TCP_ESTABLISHED;
1525 socka->state = SS_CONNECTED;
1526 sockb->state = SS_CONNECTED;
1527 return 0;
1528 }
1529
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1530 static void unix_sock_inherit_flags(const struct socket *old,
1531 struct socket *new)
1532 {
1533 if (test_bit(SOCK_PASSCRED, &old->flags))
1534 set_bit(SOCK_PASSCRED, &new->flags);
1535 if (test_bit(SOCK_PASSSEC, &old->flags))
1536 set_bit(SOCK_PASSSEC, &new->flags);
1537 }
1538
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1539 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1540 bool kern)
1541 {
1542 struct sock *sk = sock->sk;
1543 struct sock *tsk;
1544 struct sk_buff *skb;
1545 int err;
1546
1547 err = -EOPNOTSUPP;
1548 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1549 goto out;
1550
1551 err = -EINVAL;
1552 if (sk->sk_state != TCP_LISTEN)
1553 goto out;
1554
1555 /* If socket state is TCP_LISTEN it cannot change (for now...),
1556 * so that no locks are necessary.
1557 */
1558
1559 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1560 if (!skb) {
1561 /* This means receive shutdown. */
1562 if (err == 0)
1563 err = -EINVAL;
1564 goto out;
1565 }
1566
1567 tsk = skb->sk;
1568 skb_free_datagram(sk, skb);
1569 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1570
1571 /* attach accepted sock to socket */
1572 unix_state_lock(tsk);
1573 newsock->state = SS_CONNECTED;
1574 unix_sock_inherit_flags(sock, newsock);
1575 sock_graft(tsk, newsock);
1576 unix_state_unlock(tsk);
1577 return 0;
1578
1579 out:
1580 return err;
1581 }
1582
1583
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1584 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1585 {
1586 struct sock *sk = sock->sk;
1587 struct unix_address *addr;
1588 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1589 int err = 0;
1590
1591 if (peer) {
1592 sk = unix_peer_get(sk);
1593
1594 err = -ENOTCONN;
1595 if (!sk)
1596 goto out;
1597 err = 0;
1598 } else {
1599 sock_hold(sk);
1600 }
1601
1602 addr = smp_load_acquire(&unix_sk(sk)->addr);
1603 if (!addr) {
1604 sunaddr->sun_family = AF_UNIX;
1605 sunaddr->sun_path[0] = 0;
1606 err = sizeof(short);
1607 } else {
1608 err = addr->len;
1609 memcpy(sunaddr, addr->name, addr->len);
1610 }
1611 sock_put(sk);
1612 out:
1613 return err;
1614 }
1615
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1616 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1617 {
1618 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1619
1620 /*
1621 * Garbage collection of unix sockets starts by selecting a set of
1622 * candidate sockets which have reference only from being in flight
1623 * (total_refs == inflight_refs). This condition is checked once during
1624 * the candidate collection phase, and candidates are marked as such, so
1625 * that non-candidates can later be ignored. While inflight_refs is
1626 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1627 * is an instantaneous decision.
1628 *
1629 * Once a candidate, however, the socket must not be reinstalled into a
1630 * file descriptor while the garbage collection is in progress.
1631 *
1632 * If the above conditions are met, then the directed graph of
1633 * candidates (*) does not change while unix_gc_lock is held.
1634 *
1635 * Any operations that changes the file count through file descriptors
1636 * (dup, close, sendmsg) does not change the graph since candidates are
1637 * not installed in fds.
1638 *
1639 * Dequeing a candidate via recvmsg would install it into an fd, but
1640 * that takes unix_gc_lock to decrement the inflight count, so it's
1641 * serialized with garbage collection.
1642 *
1643 * MSG_PEEK is special in that it does not change the inflight count,
1644 * yet does install the socket into an fd. The following lock/unlock
1645 * pair is to ensure serialization with garbage collection. It must be
1646 * done between incrementing the file count and installing the file into
1647 * an fd.
1648 *
1649 * If garbage collection starts after the barrier provided by the
1650 * lock/unlock, then it will see the elevated refcount and not mark this
1651 * as a candidate. If a garbage collection is already in progress
1652 * before the file count was incremented, then the lock/unlock pair will
1653 * ensure that garbage collection is finished before progressing to
1654 * installing the fd.
1655 *
1656 * (*) A -> B where B is on the queue of A or B is on the queue of C
1657 * which is on the queue of listening socket A.
1658 */
1659 spin_lock(&unix_gc_lock);
1660 spin_unlock(&unix_gc_lock);
1661 }
1662
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1663 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1664 {
1665 int err = 0;
1666
1667 UNIXCB(skb).pid = get_pid(scm->pid);
1668 UNIXCB(skb).uid = scm->creds.uid;
1669 UNIXCB(skb).gid = scm->creds.gid;
1670 UNIXCB(skb).fp = NULL;
1671 unix_get_secdata(scm, skb);
1672 if (scm->fp && send_fds)
1673 err = unix_attach_fds(scm, skb);
1674
1675 skb->destructor = unix_destruct_scm;
1676 return err;
1677 }
1678
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1679 static bool unix_passcred_enabled(const struct socket *sock,
1680 const struct sock *other)
1681 {
1682 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1683 !other->sk_socket ||
1684 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1685 }
1686
1687 /*
1688 * Some apps rely on write() giving SCM_CREDENTIALS
1689 * We include credentials if source or destination socket
1690 * asserted SOCK_PASSCRED.
1691 */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1692 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1693 const struct sock *other)
1694 {
1695 if (UNIXCB(skb).pid)
1696 return;
1697 if (unix_passcred_enabled(sock, other)) {
1698 UNIXCB(skb).pid = get_pid(task_tgid(current));
1699 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1700 }
1701 }
1702
maybe_init_creds(struct scm_cookie * scm,struct socket * socket,const struct sock * other)1703 static int maybe_init_creds(struct scm_cookie *scm,
1704 struct socket *socket,
1705 const struct sock *other)
1706 {
1707 int err;
1708 struct msghdr msg = { .msg_controllen = 0 };
1709
1710 err = scm_send(socket, &msg, scm, false);
1711 if (err)
1712 return err;
1713
1714 if (unix_passcred_enabled(socket, other)) {
1715 scm->pid = get_pid(task_tgid(current));
1716 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1717 }
1718 return err;
1719 }
1720
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1721 static bool unix_skb_scm_eq(struct sk_buff *skb,
1722 struct scm_cookie *scm)
1723 {
1724 const struct unix_skb_parms *u = &UNIXCB(skb);
1725
1726 return u->pid == scm->pid &&
1727 uid_eq(u->uid, scm->creds.uid) &&
1728 gid_eq(u->gid, scm->creds.gid) &&
1729 unix_secdata_eq(scm, skb);
1730 }
1731
scm_stat_add(struct sock * sk,struct sk_buff * skb)1732 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1733 {
1734 struct scm_fp_list *fp = UNIXCB(skb).fp;
1735 struct unix_sock *u = unix_sk(sk);
1736
1737 if (unlikely(fp && fp->count))
1738 atomic_add(fp->count, &u->scm_stat.nr_fds);
1739 }
1740
scm_stat_del(struct sock * sk,struct sk_buff * skb)1741 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1742 {
1743 struct scm_fp_list *fp = UNIXCB(skb).fp;
1744 struct unix_sock *u = unix_sk(sk);
1745
1746 if (unlikely(fp && fp->count))
1747 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1748 }
1749
1750 /*
1751 * Send AF_UNIX data.
1752 */
1753
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1754 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1755 size_t len)
1756 {
1757 struct sock *sk = sock->sk;
1758 struct net *net = sock_net(sk);
1759 struct unix_sock *u = unix_sk(sk);
1760 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1761 struct sock *other = NULL;
1762 int namelen = 0; /* fake GCC */
1763 int err;
1764 unsigned int hash;
1765 struct sk_buff *skb;
1766 long timeo;
1767 struct scm_cookie scm;
1768 int data_len = 0;
1769 int sk_locked;
1770
1771 wait_for_unix_gc();
1772 err = scm_send(sock, msg, &scm, false);
1773 if (err < 0)
1774 return err;
1775
1776 err = -EOPNOTSUPP;
1777 if (msg->msg_flags&MSG_OOB)
1778 goto out;
1779
1780 if (msg->msg_namelen) {
1781 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1782 if (err < 0)
1783 goto out;
1784 namelen = err;
1785 } else {
1786 sunaddr = NULL;
1787 err = -ENOTCONN;
1788 other = unix_peer_get(sk);
1789 if (!other)
1790 goto out;
1791 }
1792
1793 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1794 && (err = unix_autobind(sock)) != 0)
1795 goto out;
1796
1797 err = -EMSGSIZE;
1798 if (len > sk->sk_sndbuf - 32)
1799 goto out;
1800
1801 if (len > SKB_MAX_ALLOC) {
1802 data_len = min_t(size_t,
1803 len - SKB_MAX_ALLOC,
1804 MAX_SKB_FRAGS * PAGE_SIZE);
1805 data_len = PAGE_ALIGN(data_len);
1806
1807 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1808 }
1809
1810 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1811 msg->msg_flags & MSG_DONTWAIT, &err,
1812 PAGE_ALLOC_COSTLY_ORDER);
1813 if (skb == NULL)
1814 goto out;
1815
1816 err = unix_scm_to_skb(&scm, skb, true);
1817 if (err < 0)
1818 goto out_free;
1819
1820 skb_put(skb, len - data_len);
1821 skb->data_len = data_len;
1822 skb->len = len;
1823 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1824 if (err)
1825 goto out_free;
1826
1827 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1828
1829 restart:
1830 if (!other) {
1831 err = -ECONNRESET;
1832 if (sunaddr == NULL)
1833 goto out_free;
1834
1835 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1836 hash, &err);
1837 if (other == NULL)
1838 goto out_free;
1839 }
1840
1841 if (sk_filter(other, skb) < 0) {
1842 /* Toss the packet but do not return any error to the sender */
1843 err = len;
1844 goto out_free;
1845 }
1846
1847 sk_locked = 0;
1848 unix_state_lock(other);
1849 restart_locked:
1850 err = -EPERM;
1851 if (!unix_may_send(sk, other))
1852 goto out_unlock;
1853
1854 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1855 /*
1856 * Check with 1003.1g - what should
1857 * datagram error
1858 */
1859 unix_state_unlock(other);
1860 sock_put(other);
1861
1862 if (!sk_locked)
1863 unix_state_lock(sk);
1864
1865 err = 0;
1866 if (sk->sk_type == SOCK_SEQPACKET) {
1867 /* We are here only when racing with unix_release_sock()
1868 * is clearing @other. Never change state to TCP_CLOSE
1869 * unlike SOCK_DGRAM wants.
1870 */
1871 unix_state_unlock(sk);
1872 err = -EPIPE;
1873 } else if (unix_peer(sk) == other) {
1874 unix_peer(sk) = NULL;
1875 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1876
1877 sk->sk_state = TCP_CLOSE;
1878 unix_state_unlock(sk);
1879
1880 unix_dgram_disconnected(sk, other);
1881 sock_put(other);
1882 err = -ECONNREFUSED;
1883 } else {
1884 unix_state_unlock(sk);
1885 }
1886
1887 other = NULL;
1888 if (err)
1889 goto out_free;
1890 goto restart;
1891 }
1892
1893 err = -EPIPE;
1894 if (other->sk_shutdown & RCV_SHUTDOWN)
1895 goto out_unlock;
1896
1897 if (sk->sk_type != SOCK_SEQPACKET) {
1898 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1899 if (err)
1900 goto out_unlock;
1901 }
1902
1903 /* other == sk && unix_peer(other) != sk if
1904 * - unix_peer(sk) == NULL, destination address bound to sk
1905 * - unix_peer(sk) == sk by time of get but disconnected before lock
1906 */
1907 if (other != sk &&
1908 unlikely(unix_peer(other) != sk &&
1909 unix_recvq_full_lockless(other))) {
1910 if (timeo) {
1911 timeo = unix_wait_for_peer(other, timeo);
1912
1913 err = sock_intr_errno(timeo);
1914 if (signal_pending(current))
1915 goto out_free;
1916
1917 goto restart;
1918 }
1919
1920 if (!sk_locked) {
1921 unix_state_unlock(other);
1922 unix_state_double_lock(sk, other);
1923 }
1924
1925 if (unix_peer(sk) != other ||
1926 unix_dgram_peer_wake_me(sk, other)) {
1927 err = -EAGAIN;
1928 sk_locked = 1;
1929 goto out_unlock;
1930 }
1931
1932 if (!sk_locked) {
1933 sk_locked = 1;
1934 goto restart_locked;
1935 }
1936 }
1937
1938 if (unlikely(sk_locked))
1939 unix_state_unlock(sk);
1940
1941 if (sock_flag(other, SOCK_RCVTSTAMP))
1942 __net_timestamp(skb);
1943 maybe_add_creds(skb, sock, other);
1944 scm_stat_add(other, skb);
1945 skb_queue_tail(&other->sk_receive_queue, skb);
1946 unix_state_unlock(other);
1947 other->sk_data_ready(other);
1948 sock_put(other);
1949 scm_destroy(&scm);
1950 return len;
1951
1952 out_unlock:
1953 if (sk_locked)
1954 unix_state_unlock(sk);
1955 unix_state_unlock(other);
1956 out_free:
1957 kfree_skb(skb);
1958 out:
1959 if (other)
1960 sock_put(other);
1961 scm_destroy(&scm);
1962 return err;
1963 }
1964
1965 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1966 * bytes, and a minimum of a full page.
1967 */
1968 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1969
1970 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)1971 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
1972 struct scm_cookie *scm, bool fds_sent)
1973 {
1974 struct unix_sock *ousk = unix_sk(other);
1975 struct sk_buff *skb;
1976 int err = 0;
1977
1978 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1979
1980 if (!skb)
1981 return err;
1982
1983 err = unix_scm_to_skb(scm, skb, !fds_sent);
1984 if (err < 0) {
1985 kfree_skb(skb);
1986 return err;
1987 }
1988 skb_put(skb, 1);
1989 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1990
1991 if (err) {
1992 kfree_skb(skb);
1993 return err;
1994 }
1995
1996 unix_state_lock(other);
1997
1998 if (sock_flag(other, SOCK_DEAD) ||
1999 (other->sk_shutdown & RCV_SHUTDOWN)) {
2000 unix_state_unlock(other);
2001 kfree_skb(skb);
2002 return -EPIPE;
2003 }
2004
2005 maybe_add_creds(skb, sock, other);
2006 skb_get(skb);
2007
2008 if (ousk->oob_skb)
2009 consume_skb(ousk->oob_skb);
2010
2011 WRITE_ONCE(ousk->oob_skb, skb);
2012
2013 scm_stat_add(other, skb);
2014 skb_queue_tail(&other->sk_receive_queue, skb);
2015 sk_send_sigurg(other);
2016 unix_state_unlock(other);
2017 other->sk_data_ready(other);
2018
2019 return err;
2020 }
2021 #endif
2022
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2023 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2024 size_t len)
2025 {
2026 struct sock *sk = sock->sk;
2027 struct sock *other = NULL;
2028 int err, size;
2029 struct sk_buff *skb;
2030 int sent = 0;
2031 struct scm_cookie scm;
2032 bool fds_sent = false;
2033 int data_len;
2034
2035 wait_for_unix_gc();
2036 err = scm_send(sock, msg, &scm, false);
2037 if (err < 0)
2038 return err;
2039
2040 err = -EOPNOTSUPP;
2041 if (msg->msg_flags & MSG_OOB) {
2042 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2043 if (len)
2044 len--;
2045 else
2046 #endif
2047 goto out_err;
2048 }
2049
2050 if (msg->msg_namelen) {
2051 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2052 goto out_err;
2053 } else {
2054 err = -ENOTCONN;
2055 other = unix_peer(sk);
2056 if (!other)
2057 goto out_err;
2058 }
2059
2060 if (sk->sk_shutdown & SEND_SHUTDOWN)
2061 goto pipe_err;
2062
2063 while (sent < len) {
2064 size = len - sent;
2065
2066 /* Keep two messages in the pipe so it schedules better */
2067 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2068
2069 /* allow fallback to order-0 allocations */
2070 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2071
2072 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2073
2074 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2075
2076 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2077 msg->msg_flags & MSG_DONTWAIT, &err,
2078 get_order(UNIX_SKB_FRAGS_SZ));
2079 if (!skb)
2080 goto out_err;
2081
2082 /* Only send the fds in the first buffer */
2083 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2084 if (err < 0) {
2085 kfree_skb(skb);
2086 goto out_err;
2087 }
2088 fds_sent = true;
2089
2090 skb_put(skb, size - data_len);
2091 skb->data_len = data_len;
2092 skb->len = size;
2093 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2094 if (err) {
2095 kfree_skb(skb);
2096 goto out_err;
2097 }
2098
2099 unix_state_lock(other);
2100
2101 if (sock_flag(other, SOCK_DEAD) ||
2102 (other->sk_shutdown & RCV_SHUTDOWN))
2103 goto pipe_err_free;
2104
2105 maybe_add_creds(skb, sock, other);
2106 scm_stat_add(other, skb);
2107 skb_queue_tail(&other->sk_receive_queue, skb);
2108 unix_state_unlock(other);
2109 other->sk_data_ready(other);
2110 sent += size;
2111 }
2112
2113 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2114 if (msg->msg_flags & MSG_OOB) {
2115 err = queue_oob(sock, msg, other, &scm, fds_sent);
2116 if (err)
2117 goto out_err;
2118 sent++;
2119 }
2120 #endif
2121
2122 scm_destroy(&scm);
2123
2124 return sent;
2125
2126 pipe_err_free:
2127 unix_state_unlock(other);
2128 kfree_skb(skb);
2129 pipe_err:
2130 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2131 send_sig(SIGPIPE, current, 0);
2132 err = -EPIPE;
2133 out_err:
2134 scm_destroy(&scm);
2135 return sent ? : err;
2136 }
2137
unix_stream_sendpage(struct socket * socket,struct page * page,int offset,size_t size,int flags)2138 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2139 int offset, size_t size, int flags)
2140 {
2141 int err;
2142 bool send_sigpipe = false;
2143 bool init_scm = true;
2144 struct scm_cookie scm;
2145 struct sock *other, *sk = socket->sk;
2146 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2147
2148 if (flags & MSG_OOB)
2149 return -EOPNOTSUPP;
2150
2151 other = unix_peer(sk);
2152 if (!other || sk->sk_state != TCP_ESTABLISHED)
2153 return -ENOTCONN;
2154
2155 if (false) {
2156 alloc_skb:
2157 spin_unlock(&other->sk_receive_queue.lock);
2158 unix_state_unlock(other);
2159 mutex_unlock(&unix_sk(other)->iolock);
2160 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2161 &err, 0);
2162 if (!newskb)
2163 goto err;
2164 }
2165
2166 /* we must acquire iolock as we modify already present
2167 * skbs in the sk_receive_queue and mess with skb->len
2168 */
2169 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2170 if (err) {
2171 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2172 goto err;
2173 }
2174
2175 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2176 err = -EPIPE;
2177 send_sigpipe = true;
2178 goto err_unlock;
2179 }
2180
2181 unix_state_lock(other);
2182
2183 if (sock_flag(other, SOCK_DEAD) ||
2184 other->sk_shutdown & RCV_SHUTDOWN) {
2185 err = -EPIPE;
2186 send_sigpipe = true;
2187 goto err_state_unlock;
2188 }
2189
2190 if (init_scm) {
2191 err = maybe_init_creds(&scm, socket, other);
2192 if (err)
2193 goto err_state_unlock;
2194 init_scm = false;
2195 }
2196
2197 spin_lock(&other->sk_receive_queue.lock);
2198 skb = skb_peek_tail(&other->sk_receive_queue);
2199 if (tail && tail == skb) {
2200 skb = newskb;
2201 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2202 if (newskb) {
2203 skb = newskb;
2204 } else {
2205 tail = skb;
2206 goto alloc_skb;
2207 }
2208 } else if (newskb) {
2209 /* this is fast path, we don't necessarily need to
2210 * call to kfree_skb even though with newskb == NULL
2211 * this - does no harm
2212 */
2213 consume_skb(newskb);
2214 newskb = NULL;
2215 }
2216
2217 if (skb_append_pagefrags(skb, page, offset, size)) {
2218 tail = skb;
2219 goto alloc_skb;
2220 }
2221
2222 skb->len += size;
2223 skb->data_len += size;
2224 skb->truesize += size;
2225 refcount_add(size, &sk->sk_wmem_alloc);
2226
2227 if (newskb) {
2228 unix_scm_to_skb(&scm, skb, false);
2229 __skb_queue_tail(&other->sk_receive_queue, newskb);
2230 }
2231
2232 spin_unlock(&other->sk_receive_queue.lock);
2233 unix_state_unlock(other);
2234 mutex_unlock(&unix_sk(other)->iolock);
2235
2236 other->sk_data_ready(other);
2237 scm_destroy(&scm);
2238 return size;
2239
2240 err_state_unlock:
2241 unix_state_unlock(other);
2242 err_unlock:
2243 mutex_unlock(&unix_sk(other)->iolock);
2244 err:
2245 kfree_skb(newskb);
2246 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2247 send_sig(SIGPIPE, current, 0);
2248 if (!init_scm)
2249 scm_destroy(&scm);
2250 return err;
2251 }
2252
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2253 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2254 size_t len)
2255 {
2256 int err;
2257 struct sock *sk = sock->sk;
2258
2259 err = sock_error(sk);
2260 if (err)
2261 return err;
2262
2263 if (sk->sk_state != TCP_ESTABLISHED)
2264 return -ENOTCONN;
2265
2266 if (msg->msg_namelen)
2267 msg->msg_namelen = 0;
2268
2269 return unix_dgram_sendmsg(sock, msg, len);
2270 }
2271
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2272 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2273 size_t size, int flags)
2274 {
2275 struct sock *sk = sock->sk;
2276
2277 if (sk->sk_state != TCP_ESTABLISHED)
2278 return -ENOTCONN;
2279
2280 return unix_dgram_recvmsg(sock, msg, size, flags);
2281 }
2282
unix_copy_addr(struct msghdr * msg,struct sock * sk)2283 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2284 {
2285 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2286
2287 if (addr) {
2288 msg->msg_namelen = addr->len;
2289 memcpy(msg->msg_name, addr->name, addr->len);
2290 }
2291 }
2292
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2293 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2294 int flags)
2295 {
2296 struct scm_cookie scm;
2297 struct socket *sock = sk->sk_socket;
2298 struct unix_sock *u = unix_sk(sk);
2299 struct sk_buff *skb, *last;
2300 long timeo;
2301 int skip;
2302 int err;
2303
2304 err = -EOPNOTSUPP;
2305 if (flags&MSG_OOB)
2306 goto out;
2307
2308 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2309
2310 do {
2311 mutex_lock(&u->iolock);
2312
2313 skip = sk_peek_offset(sk, flags);
2314 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2315 &skip, &err, &last);
2316 if (skb) {
2317 if (!(flags & MSG_PEEK))
2318 scm_stat_del(sk, skb);
2319 break;
2320 }
2321
2322 mutex_unlock(&u->iolock);
2323
2324 if (err != -EAGAIN)
2325 break;
2326 } while (timeo &&
2327 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2328 &err, &timeo, last));
2329
2330 if (!skb) { /* implies iolock unlocked */
2331 unix_state_lock(sk);
2332 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2333 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2334 (sk->sk_shutdown & RCV_SHUTDOWN))
2335 err = 0;
2336 unix_state_unlock(sk);
2337 goto out;
2338 }
2339
2340 if (wq_has_sleeper(&u->peer_wait))
2341 wake_up_interruptible_sync_poll(&u->peer_wait,
2342 EPOLLOUT | EPOLLWRNORM |
2343 EPOLLWRBAND);
2344
2345 if (msg->msg_name)
2346 unix_copy_addr(msg, skb->sk);
2347
2348 if (size > skb->len - skip)
2349 size = skb->len - skip;
2350 else if (size < skb->len - skip)
2351 msg->msg_flags |= MSG_TRUNC;
2352
2353 err = skb_copy_datagram_msg(skb, skip, msg, size);
2354 if (err)
2355 goto out_free;
2356
2357 if (sock_flag(sk, SOCK_RCVTSTAMP))
2358 __sock_recv_timestamp(msg, sk, skb);
2359
2360 memset(&scm, 0, sizeof(scm));
2361
2362 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2363 unix_set_secdata(&scm, skb);
2364
2365 if (!(flags & MSG_PEEK)) {
2366 if (UNIXCB(skb).fp)
2367 unix_detach_fds(&scm, skb);
2368
2369 sk_peek_offset_bwd(sk, skb->len);
2370 } else {
2371 /* It is questionable: on PEEK we could:
2372 - do not return fds - good, but too simple 8)
2373 - return fds, and do not return them on read (old strategy,
2374 apparently wrong)
2375 - clone fds (I chose it for now, it is the most universal
2376 solution)
2377
2378 POSIX 1003.1g does not actually define this clearly
2379 at all. POSIX 1003.1g doesn't define a lot of things
2380 clearly however!
2381
2382 */
2383
2384 sk_peek_offset_fwd(sk, size);
2385
2386 if (UNIXCB(skb).fp)
2387 unix_peek_fds(&scm, skb);
2388 }
2389 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2390
2391 scm_recv(sock, msg, &scm, flags);
2392
2393 out_free:
2394 skb_free_datagram(sk, skb);
2395 mutex_unlock(&u->iolock);
2396 out:
2397 return err;
2398 }
2399
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2400 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2401 int flags)
2402 {
2403 struct sock *sk = sock->sk;
2404
2405 #ifdef CONFIG_BPF_SYSCALL
2406 const struct proto *prot = READ_ONCE(sk->sk_prot);
2407
2408 if (prot != &unix_dgram_proto)
2409 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2410 flags & ~MSG_DONTWAIT, NULL);
2411 #endif
2412 return __unix_dgram_recvmsg(sk, msg, size, flags);
2413 }
2414
unix_read_sock(struct sock * sk,read_descriptor_t * desc,sk_read_actor_t recv_actor)2415 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2416 sk_read_actor_t recv_actor)
2417 {
2418 int copied = 0;
2419
2420 while (1) {
2421 struct unix_sock *u = unix_sk(sk);
2422 struct sk_buff *skb;
2423 int used, err;
2424
2425 mutex_lock(&u->iolock);
2426 skb = skb_recv_datagram(sk, 0, 1, &err);
2427 mutex_unlock(&u->iolock);
2428 if (!skb)
2429 return err;
2430
2431 used = recv_actor(desc, skb, 0, skb->len);
2432 if (used <= 0) {
2433 if (!copied)
2434 copied = used;
2435 kfree_skb(skb);
2436 break;
2437 } else if (used <= skb->len) {
2438 copied += used;
2439 }
2440
2441 kfree_skb(skb);
2442 if (!desc->count)
2443 break;
2444 }
2445
2446 return copied;
2447 }
2448
2449 /*
2450 * Sleep until more data has arrived. But check for races..
2451 */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2452 static long unix_stream_data_wait(struct sock *sk, long timeo,
2453 struct sk_buff *last, unsigned int last_len,
2454 bool freezable)
2455 {
2456 struct sk_buff *tail;
2457 DEFINE_WAIT(wait);
2458
2459 unix_state_lock(sk);
2460
2461 for (;;) {
2462 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2463
2464 tail = skb_peek_tail(&sk->sk_receive_queue);
2465 if (tail != last ||
2466 (tail && tail->len != last_len) ||
2467 sk->sk_err ||
2468 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2469 signal_pending(current) ||
2470 !timeo)
2471 break;
2472
2473 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2474 unix_state_unlock(sk);
2475 if (freezable)
2476 timeo = freezable_schedule_timeout(timeo);
2477 else
2478 timeo = schedule_timeout(timeo);
2479 unix_state_lock(sk);
2480
2481 if (sock_flag(sk, SOCK_DEAD))
2482 break;
2483
2484 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2485 }
2486
2487 finish_wait(sk_sleep(sk), &wait);
2488 unix_state_unlock(sk);
2489 return timeo;
2490 }
2491
unix_skb_len(const struct sk_buff * skb)2492 static unsigned int unix_skb_len(const struct sk_buff *skb)
2493 {
2494 return skb->len - UNIXCB(skb).consumed;
2495 }
2496
2497 struct unix_stream_read_state {
2498 int (*recv_actor)(struct sk_buff *, int, int,
2499 struct unix_stream_read_state *);
2500 struct socket *socket;
2501 struct msghdr *msg;
2502 struct pipe_inode_info *pipe;
2503 size_t size;
2504 int flags;
2505 unsigned int splice_flags;
2506 };
2507
2508 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2509 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2510 {
2511 struct socket *sock = state->socket;
2512 struct sock *sk = sock->sk;
2513 struct unix_sock *u = unix_sk(sk);
2514 int chunk = 1;
2515 struct sk_buff *oob_skb;
2516
2517 mutex_lock(&u->iolock);
2518 unix_state_lock(sk);
2519
2520 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2521 unix_state_unlock(sk);
2522 mutex_unlock(&u->iolock);
2523 return -EINVAL;
2524 }
2525
2526 oob_skb = u->oob_skb;
2527
2528 if (!(state->flags & MSG_PEEK))
2529 WRITE_ONCE(u->oob_skb, NULL);
2530 else
2531 skb_get(oob_skb);
2532 unix_state_unlock(sk);
2533
2534 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2535
2536 if (!(state->flags & MSG_PEEK))
2537 UNIXCB(oob_skb).consumed += 1;
2538
2539 consume_skb(oob_skb);
2540
2541 mutex_unlock(&u->iolock);
2542
2543 if (chunk < 0)
2544 return -EFAULT;
2545
2546 state->msg->msg_flags |= MSG_OOB;
2547 return 1;
2548 }
2549
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2550 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2551 int flags, int copied)
2552 {
2553 struct unix_sock *u = unix_sk(sk);
2554
2555 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2556 skb_unlink(skb, &sk->sk_receive_queue);
2557 consume_skb(skb);
2558 skb = NULL;
2559 } else {
2560 if (skb == u->oob_skb) {
2561 if (copied) {
2562 skb = NULL;
2563 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2564 if (!(flags & MSG_PEEK)) {
2565 WRITE_ONCE(u->oob_skb, NULL);
2566 consume_skb(skb);
2567 }
2568 } else if (!(flags & MSG_PEEK)) {
2569 skb_unlink(skb, &sk->sk_receive_queue);
2570 consume_skb(skb);
2571 skb = skb_peek(&sk->sk_receive_queue);
2572 }
2573 }
2574 }
2575 return skb;
2576 }
2577 #endif
2578
unix_stream_read_sock(struct sock * sk,read_descriptor_t * desc,sk_read_actor_t recv_actor)2579 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2580 sk_read_actor_t recv_actor)
2581 {
2582 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2583 return -ENOTCONN;
2584
2585 return unix_read_sock(sk, desc, recv_actor);
2586 }
2587
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2588 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2589 bool freezable)
2590 {
2591 struct scm_cookie scm;
2592 struct socket *sock = state->socket;
2593 struct sock *sk = sock->sk;
2594 struct unix_sock *u = unix_sk(sk);
2595 int copied = 0;
2596 int flags = state->flags;
2597 int noblock = flags & MSG_DONTWAIT;
2598 bool check_creds = false;
2599 int target;
2600 int err = 0;
2601 long timeo;
2602 int skip;
2603 size_t size = state->size;
2604 unsigned int last_len;
2605
2606 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2607 err = -EINVAL;
2608 goto out;
2609 }
2610
2611 if (unlikely(flags & MSG_OOB)) {
2612 err = -EOPNOTSUPP;
2613 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2614 err = unix_stream_recv_urg(state);
2615 #endif
2616 goto out;
2617 }
2618
2619 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2620 timeo = sock_rcvtimeo(sk, noblock);
2621
2622 memset(&scm, 0, sizeof(scm));
2623
2624 /* Lock the socket to prevent queue disordering
2625 * while sleeps in memcpy_tomsg
2626 */
2627 mutex_lock(&u->iolock);
2628
2629 skip = max(sk_peek_offset(sk, flags), 0);
2630
2631 do {
2632 int chunk;
2633 bool drop_skb;
2634 struct sk_buff *skb, *last;
2635
2636 redo:
2637 unix_state_lock(sk);
2638 if (sock_flag(sk, SOCK_DEAD)) {
2639 err = -ECONNRESET;
2640 goto unlock;
2641 }
2642 last = skb = skb_peek(&sk->sk_receive_queue);
2643 last_len = last ? last->len : 0;
2644
2645 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2646 if (skb) {
2647 skb = manage_oob(skb, sk, flags, copied);
2648 if (!skb) {
2649 unix_state_unlock(sk);
2650 if (copied)
2651 break;
2652 goto redo;
2653 }
2654 }
2655 #endif
2656 again:
2657 if (skb == NULL) {
2658 if (copied >= target)
2659 goto unlock;
2660
2661 /*
2662 * POSIX 1003.1g mandates this order.
2663 */
2664
2665 err = sock_error(sk);
2666 if (err)
2667 goto unlock;
2668 if (sk->sk_shutdown & RCV_SHUTDOWN)
2669 goto unlock;
2670
2671 unix_state_unlock(sk);
2672 if (!timeo) {
2673 err = -EAGAIN;
2674 break;
2675 }
2676
2677 mutex_unlock(&u->iolock);
2678
2679 timeo = unix_stream_data_wait(sk, timeo, last,
2680 last_len, freezable);
2681
2682 if (signal_pending(current)) {
2683 err = sock_intr_errno(timeo);
2684 scm_destroy(&scm);
2685 goto out;
2686 }
2687
2688 mutex_lock(&u->iolock);
2689 goto redo;
2690 unlock:
2691 unix_state_unlock(sk);
2692 break;
2693 }
2694
2695 while (skip >= unix_skb_len(skb)) {
2696 skip -= unix_skb_len(skb);
2697 last = skb;
2698 last_len = skb->len;
2699 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2700 if (!skb)
2701 goto again;
2702 }
2703
2704 unix_state_unlock(sk);
2705
2706 if (check_creds) {
2707 /* Never glue messages from different writers */
2708 if (!unix_skb_scm_eq(skb, &scm))
2709 break;
2710 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2711 /* Copy credentials */
2712 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2713 unix_set_secdata(&scm, skb);
2714 check_creds = true;
2715 }
2716
2717 /* Copy address just once */
2718 if (state->msg && state->msg->msg_name) {
2719 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2720 state->msg->msg_name);
2721 unix_copy_addr(state->msg, skb->sk);
2722 sunaddr = NULL;
2723 }
2724
2725 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2726 skb_get(skb);
2727 chunk = state->recv_actor(skb, skip, chunk, state);
2728 drop_skb = !unix_skb_len(skb);
2729 /* skb is only safe to use if !drop_skb */
2730 consume_skb(skb);
2731 if (chunk < 0) {
2732 if (copied == 0)
2733 copied = -EFAULT;
2734 break;
2735 }
2736 copied += chunk;
2737 size -= chunk;
2738
2739 if (drop_skb) {
2740 /* the skb was touched by a concurrent reader;
2741 * we should not expect anything from this skb
2742 * anymore and assume it invalid - we can be
2743 * sure it was dropped from the socket queue
2744 *
2745 * let's report a short read
2746 */
2747 err = 0;
2748 break;
2749 }
2750
2751 /* Mark read part of skb as used */
2752 if (!(flags & MSG_PEEK)) {
2753 UNIXCB(skb).consumed += chunk;
2754
2755 sk_peek_offset_bwd(sk, chunk);
2756
2757 if (UNIXCB(skb).fp) {
2758 scm_stat_del(sk, skb);
2759 unix_detach_fds(&scm, skb);
2760 }
2761
2762 if (unix_skb_len(skb))
2763 break;
2764
2765 skb_unlink(skb, &sk->sk_receive_queue);
2766 consume_skb(skb);
2767
2768 if (scm.fp)
2769 break;
2770 } else {
2771 /* It is questionable, see note in unix_dgram_recvmsg.
2772 */
2773 if (UNIXCB(skb).fp)
2774 unix_peek_fds(&scm, skb);
2775
2776 sk_peek_offset_fwd(sk, chunk);
2777
2778 if (UNIXCB(skb).fp)
2779 break;
2780
2781 skip = 0;
2782 last = skb;
2783 last_len = skb->len;
2784 unix_state_lock(sk);
2785 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2786 if (skb)
2787 goto again;
2788 unix_state_unlock(sk);
2789 break;
2790 }
2791 } while (size);
2792
2793 mutex_unlock(&u->iolock);
2794 if (state->msg)
2795 scm_recv(sock, state->msg, &scm, flags);
2796 else
2797 scm_destroy(&scm);
2798 out:
2799 return copied ? : err;
2800 }
2801
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2802 static int unix_stream_read_actor(struct sk_buff *skb,
2803 int skip, int chunk,
2804 struct unix_stream_read_state *state)
2805 {
2806 int ret;
2807
2808 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2809 state->msg, chunk);
2810 return ret ?: chunk;
2811 }
2812
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2813 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2814 size_t size, int flags)
2815 {
2816 struct unix_stream_read_state state = {
2817 .recv_actor = unix_stream_read_actor,
2818 .socket = sk->sk_socket,
2819 .msg = msg,
2820 .size = size,
2821 .flags = flags
2822 };
2823
2824 return unix_stream_read_generic(&state, true);
2825 }
2826
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2827 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2828 size_t size, int flags)
2829 {
2830 struct unix_stream_read_state state = {
2831 .recv_actor = unix_stream_read_actor,
2832 .socket = sock,
2833 .msg = msg,
2834 .size = size,
2835 .flags = flags
2836 };
2837
2838 #ifdef CONFIG_BPF_SYSCALL
2839 struct sock *sk = sock->sk;
2840 const struct proto *prot = READ_ONCE(sk->sk_prot);
2841
2842 if (prot != &unix_stream_proto)
2843 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2844 flags & ~MSG_DONTWAIT, NULL);
2845 #endif
2846 return unix_stream_read_generic(&state, true);
2847 }
2848
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2849 static int unix_stream_splice_actor(struct sk_buff *skb,
2850 int skip, int chunk,
2851 struct unix_stream_read_state *state)
2852 {
2853 return skb_splice_bits(skb, state->socket->sk,
2854 UNIXCB(skb).consumed + skip,
2855 state->pipe, chunk, state->splice_flags);
2856 }
2857
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2858 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2859 struct pipe_inode_info *pipe,
2860 size_t size, unsigned int flags)
2861 {
2862 struct unix_stream_read_state state = {
2863 .recv_actor = unix_stream_splice_actor,
2864 .socket = sock,
2865 .pipe = pipe,
2866 .size = size,
2867 .splice_flags = flags,
2868 };
2869
2870 if (unlikely(*ppos))
2871 return -ESPIPE;
2872
2873 if (sock->file->f_flags & O_NONBLOCK ||
2874 flags & SPLICE_F_NONBLOCK)
2875 state.flags = MSG_DONTWAIT;
2876
2877 return unix_stream_read_generic(&state, false);
2878 }
2879
unix_shutdown(struct socket * sock,int mode)2880 static int unix_shutdown(struct socket *sock, int mode)
2881 {
2882 struct sock *sk = sock->sk;
2883 struct sock *other;
2884
2885 if (mode < SHUT_RD || mode > SHUT_RDWR)
2886 return -EINVAL;
2887 /* This maps:
2888 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2889 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2890 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2891 */
2892 ++mode;
2893
2894 unix_state_lock(sk);
2895 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2896 other = unix_peer(sk);
2897 if (other)
2898 sock_hold(other);
2899 unix_state_unlock(sk);
2900 sk->sk_state_change(sk);
2901
2902 if (other &&
2903 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2904
2905 int peer_mode = 0;
2906 const struct proto *prot = READ_ONCE(other->sk_prot);
2907
2908 if (prot->unhash)
2909 prot->unhash(other);
2910 if (mode&RCV_SHUTDOWN)
2911 peer_mode |= SEND_SHUTDOWN;
2912 if (mode&SEND_SHUTDOWN)
2913 peer_mode |= RCV_SHUTDOWN;
2914 unix_state_lock(other);
2915 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2916 unix_state_unlock(other);
2917 other->sk_state_change(other);
2918 if (peer_mode == SHUTDOWN_MASK)
2919 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2920 else if (peer_mode & RCV_SHUTDOWN)
2921 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2922 }
2923 if (other)
2924 sock_put(other);
2925
2926 return 0;
2927 }
2928
unix_inq_len(struct sock * sk)2929 long unix_inq_len(struct sock *sk)
2930 {
2931 struct sk_buff *skb;
2932 long amount = 0;
2933
2934 if (sk->sk_state == TCP_LISTEN)
2935 return -EINVAL;
2936
2937 spin_lock(&sk->sk_receive_queue.lock);
2938 if (sk->sk_type == SOCK_STREAM ||
2939 sk->sk_type == SOCK_SEQPACKET) {
2940 skb_queue_walk(&sk->sk_receive_queue, skb)
2941 amount += unix_skb_len(skb);
2942 } else {
2943 skb = skb_peek(&sk->sk_receive_queue);
2944 if (skb)
2945 amount = skb->len;
2946 }
2947 spin_unlock(&sk->sk_receive_queue.lock);
2948
2949 return amount;
2950 }
2951 EXPORT_SYMBOL_GPL(unix_inq_len);
2952
unix_outq_len(struct sock * sk)2953 long unix_outq_len(struct sock *sk)
2954 {
2955 return sk_wmem_alloc_get(sk);
2956 }
2957 EXPORT_SYMBOL_GPL(unix_outq_len);
2958
unix_open_file(struct sock * sk)2959 static int unix_open_file(struct sock *sk)
2960 {
2961 struct path path;
2962 struct file *f;
2963 int fd;
2964
2965 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2966 return -EPERM;
2967
2968 if (!smp_load_acquire(&unix_sk(sk)->addr))
2969 return -ENOENT;
2970
2971 path = unix_sk(sk)->path;
2972 if (!path.dentry)
2973 return -ENOENT;
2974
2975 path_get(&path);
2976
2977 fd = get_unused_fd_flags(O_CLOEXEC);
2978 if (fd < 0)
2979 goto out;
2980
2981 f = dentry_open(&path, O_PATH, current_cred());
2982 if (IS_ERR(f)) {
2983 put_unused_fd(fd);
2984 fd = PTR_ERR(f);
2985 goto out;
2986 }
2987
2988 fd_install(fd, f);
2989 out:
2990 path_put(&path);
2991
2992 return fd;
2993 }
2994
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2995 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2996 {
2997 struct sock *sk = sock->sk;
2998 long amount = 0;
2999 int err;
3000
3001 switch (cmd) {
3002 case SIOCOUTQ:
3003 amount = unix_outq_len(sk);
3004 err = put_user(amount, (int __user *)arg);
3005 break;
3006 case SIOCINQ:
3007 amount = unix_inq_len(sk);
3008 if (amount < 0)
3009 err = amount;
3010 else
3011 err = put_user(amount, (int __user *)arg);
3012 break;
3013 case SIOCUNIXFILE:
3014 err = unix_open_file(sk);
3015 break;
3016 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3017 case SIOCATMARK:
3018 {
3019 struct sk_buff *skb;
3020 int answ = 0;
3021
3022 skb = skb_peek(&sk->sk_receive_queue);
3023 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3024 answ = 1;
3025 err = put_user(answ, (int __user *)arg);
3026 }
3027 break;
3028 #endif
3029 default:
3030 err = -ENOIOCTLCMD;
3031 break;
3032 }
3033 return err;
3034 }
3035
3036 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3037 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3038 {
3039 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3040 }
3041 #endif
3042
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3043 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3044 {
3045 struct sock *sk = sock->sk;
3046 __poll_t mask;
3047 u8 shutdown;
3048
3049 sock_poll_wait(file, sock, wait);
3050 mask = 0;
3051 shutdown = READ_ONCE(sk->sk_shutdown);
3052
3053 /* exceptional events? */
3054 if (sk->sk_err)
3055 mask |= EPOLLERR;
3056 if (shutdown == SHUTDOWN_MASK)
3057 mask |= EPOLLHUP;
3058 if (shutdown & RCV_SHUTDOWN)
3059 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3060
3061 /* readable? */
3062 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3063 mask |= EPOLLIN | EPOLLRDNORM;
3064 if (sk_is_readable(sk))
3065 mask |= EPOLLIN | EPOLLRDNORM;
3066 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3067 if (READ_ONCE(unix_sk(sk)->oob_skb))
3068 mask |= EPOLLPRI;
3069 #endif
3070
3071 /* Connection-based need to check for termination and startup */
3072 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3073 sk->sk_state == TCP_CLOSE)
3074 mask |= EPOLLHUP;
3075
3076 /*
3077 * we set writable also when the other side has shut down the
3078 * connection. This prevents stuck sockets.
3079 */
3080 if (unix_writable(sk))
3081 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3082
3083 return mask;
3084 }
3085
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3086 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3087 poll_table *wait)
3088 {
3089 struct sock *sk = sock->sk, *other;
3090 unsigned int writable;
3091 __poll_t mask;
3092 u8 shutdown;
3093
3094 sock_poll_wait(file, sock, wait);
3095 mask = 0;
3096 shutdown = READ_ONCE(sk->sk_shutdown);
3097
3098 /* exceptional events? */
3099 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3100 mask |= EPOLLERR |
3101 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3102
3103 if (shutdown & RCV_SHUTDOWN)
3104 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3105 if (shutdown == SHUTDOWN_MASK)
3106 mask |= EPOLLHUP;
3107
3108 /* readable? */
3109 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3110 mask |= EPOLLIN | EPOLLRDNORM;
3111 if (sk_is_readable(sk))
3112 mask |= EPOLLIN | EPOLLRDNORM;
3113
3114 /* Connection-based need to check for termination and startup */
3115 if (sk->sk_type == SOCK_SEQPACKET) {
3116 if (sk->sk_state == TCP_CLOSE)
3117 mask |= EPOLLHUP;
3118 /* connection hasn't started yet? */
3119 if (sk->sk_state == TCP_SYN_SENT)
3120 return mask;
3121 }
3122
3123 /* No write status requested, avoid expensive OUT tests. */
3124 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3125 return mask;
3126
3127 writable = unix_writable(sk);
3128 if (writable) {
3129 unix_state_lock(sk);
3130
3131 other = unix_peer(sk);
3132 if (other && unix_peer(other) != sk &&
3133 unix_recvq_full_lockless(other) &&
3134 unix_dgram_peer_wake_me(sk, other))
3135 writable = 0;
3136
3137 unix_state_unlock(sk);
3138 }
3139
3140 if (writable)
3141 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3142 else
3143 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3144
3145 return mask;
3146 }
3147
3148 #ifdef CONFIG_PROC_FS
3149
3150 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3151
3152 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3153 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3154 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3155
unix_from_bucket(struct seq_file * seq,loff_t * pos)3156 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3157 {
3158 unsigned long offset = get_offset(*pos);
3159 unsigned long bucket = get_bucket(*pos);
3160 struct sock *sk;
3161 unsigned long count = 0;
3162
3163 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3164 if (sock_net(sk) != seq_file_net(seq))
3165 continue;
3166 if (++count == offset)
3167 break;
3168 }
3169
3170 return sk;
3171 }
3172
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)3173 static struct sock *unix_next_socket(struct seq_file *seq,
3174 struct sock *sk,
3175 loff_t *pos)
3176 {
3177 unsigned long bucket;
3178
3179 while (sk > (struct sock *)SEQ_START_TOKEN) {
3180 sk = sk_next(sk);
3181 if (!sk)
3182 goto next_bucket;
3183 if (sock_net(sk) == seq_file_net(seq))
3184 return sk;
3185 }
3186
3187 do {
3188 sk = unix_from_bucket(seq, pos);
3189 if (sk)
3190 return sk;
3191
3192 next_bucket:
3193 bucket = get_bucket(*pos) + 1;
3194 *pos = set_bucket_offset(bucket, 1);
3195 } while (bucket < ARRAY_SIZE(unix_socket_table));
3196
3197 return NULL;
3198 }
3199
unix_seq_start(struct seq_file * seq,loff_t * pos)3200 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3201 __acquires(unix_table_lock)
3202 {
3203 spin_lock(&unix_table_lock);
3204
3205 if (!*pos)
3206 return SEQ_START_TOKEN;
3207
3208 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3209 return NULL;
3210
3211 return unix_next_socket(seq, NULL, pos);
3212 }
3213
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3214 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3215 {
3216 ++*pos;
3217 return unix_next_socket(seq, v, pos);
3218 }
3219
unix_seq_stop(struct seq_file * seq,void * v)3220 static void unix_seq_stop(struct seq_file *seq, void *v)
3221 __releases(unix_table_lock)
3222 {
3223 spin_unlock(&unix_table_lock);
3224 }
3225
unix_seq_show(struct seq_file * seq,void * v)3226 static int unix_seq_show(struct seq_file *seq, void *v)
3227 {
3228
3229 if (v == SEQ_START_TOKEN)
3230 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3231 "Inode Path\n");
3232 else {
3233 struct sock *s = v;
3234 struct unix_sock *u = unix_sk(s);
3235 unix_state_lock(s);
3236
3237 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3238 s,
3239 refcount_read(&s->sk_refcnt),
3240 0,
3241 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3242 s->sk_type,
3243 s->sk_socket ?
3244 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3245 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3246 sock_i_ino(s));
3247
3248 if (u->addr) { // under unix_table_lock here
3249 int i, len;
3250 seq_putc(seq, ' ');
3251
3252 i = 0;
3253 len = u->addr->len - sizeof(short);
3254 if (!UNIX_ABSTRACT(s))
3255 len--;
3256 else {
3257 seq_putc(seq, '@');
3258 i++;
3259 }
3260 for ( ; i < len; i++)
3261 seq_putc(seq, u->addr->name->sun_path[i] ?:
3262 '@');
3263 }
3264 unix_state_unlock(s);
3265 seq_putc(seq, '\n');
3266 }
3267
3268 return 0;
3269 }
3270
3271 static const struct seq_operations unix_seq_ops = {
3272 .start = unix_seq_start,
3273 .next = unix_seq_next,
3274 .stop = unix_seq_stop,
3275 .show = unix_seq_show,
3276 };
3277
3278 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3279 struct bpf_iter__unix {
3280 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3281 __bpf_md_ptr(struct unix_sock *, unix_sk);
3282 uid_t uid __aligned(8);
3283 };
3284
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3285 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3286 struct unix_sock *unix_sk, uid_t uid)
3287 {
3288 struct bpf_iter__unix ctx;
3289
3290 meta->seq_num--; /* skip SEQ_START_TOKEN */
3291 ctx.meta = meta;
3292 ctx.unix_sk = unix_sk;
3293 ctx.uid = uid;
3294 return bpf_iter_run_prog(prog, &ctx);
3295 }
3296
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)3297 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3298 {
3299 struct bpf_iter_meta meta;
3300 struct bpf_prog *prog;
3301 struct sock *sk = v;
3302 uid_t uid;
3303
3304 if (v == SEQ_START_TOKEN)
3305 return 0;
3306
3307 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3308 meta.seq = seq;
3309 prog = bpf_iter_get_info(&meta, false);
3310 return unix_prog_seq_show(prog, &meta, v, uid);
3311 }
3312
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)3313 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3314 {
3315 struct bpf_iter_meta meta;
3316 struct bpf_prog *prog;
3317
3318 if (!v) {
3319 meta.seq = seq;
3320 prog = bpf_iter_get_info(&meta, true);
3321 if (prog)
3322 (void)unix_prog_seq_show(prog, &meta, v, 0);
3323 }
3324
3325 unix_seq_stop(seq, v);
3326 }
3327
3328 static const struct seq_operations bpf_iter_unix_seq_ops = {
3329 .start = unix_seq_start,
3330 .next = unix_seq_next,
3331 .stop = bpf_iter_unix_seq_stop,
3332 .show = bpf_iter_unix_seq_show,
3333 };
3334 #endif
3335 #endif
3336
3337 static const struct net_proto_family unix_family_ops = {
3338 .family = PF_UNIX,
3339 .create = unix_create,
3340 .owner = THIS_MODULE,
3341 };
3342
3343
unix_net_init(struct net * net)3344 static int __net_init unix_net_init(struct net *net)
3345 {
3346 int error = -ENOMEM;
3347
3348 net->unx.sysctl_max_dgram_qlen = 10;
3349 if (unix_sysctl_register(net))
3350 goto out;
3351
3352 #ifdef CONFIG_PROC_FS
3353 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3354 sizeof(struct seq_net_private))) {
3355 unix_sysctl_unregister(net);
3356 goto out;
3357 }
3358 #endif
3359 error = 0;
3360 out:
3361 return error;
3362 }
3363
unix_net_exit(struct net * net)3364 static void __net_exit unix_net_exit(struct net *net)
3365 {
3366 unix_sysctl_unregister(net);
3367 remove_proc_entry("unix", net->proc_net);
3368 }
3369
3370 static struct pernet_operations unix_net_ops = {
3371 .init = unix_net_init,
3372 .exit = unix_net_exit,
3373 };
3374
3375 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3376 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3377 struct unix_sock *unix_sk, uid_t uid)
3378
3379 static const struct bpf_iter_seq_info unix_seq_info = {
3380 .seq_ops = &bpf_iter_unix_seq_ops,
3381 .init_seq_private = bpf_iter_init_seq_net,
3382 .fini_seq_private = bpf_iter_fini_seq_net,
3383 .seq_priv_size = sizeof(struct seq_net_private),
3384 };
3385
3386 static struct bpf_iter_reg unix_reg_info = {
3387 .target = "unix",
3388 .ctx_arg_info_size = 1,
3389 .ctx_arg_info = {
3390 { offsetof(struct bpf_iter__unix, unix_sk),
3391 PTR_TO_BTF_ID_OR_NULL },
3392 },
3393 .seq_info = &unix_seq_info,
3394 };
3395
bpf_iter_register(void)3396 static void __init bpf_iter_register(void)
3397 {
3398 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3399 if (bpf_iter_reg_target(&unix_reg_info))
3400 pr_warn("Warning: could not register bpf iterator unix\n");
3401 }
3402 #endif
3403
af_unix_init(void)3404 static int __init af_unix_init(void)
3405 {
3406 int rc = -1;
3407
3408 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3409
3410 rc = proto_register(&unix_dgram_proto, 1);
3411 if (rc != 0) {
3412 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3413 goto out;
3414 }
3415
3416 rc = proto_register(&unix_stream_proto, 1);
3417 if (rc != 0) {
3418 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3419 proto_unregister(&unix_dgram_proto);
3420 goto out;
3421 }
3422
3423 sock_register(&unix_family_ops);
3424 register_pernet_subsys(&unix_net_ops);
3425 unix_bpf_build_proto();
3426
3427 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3428 bpf_iter_register();
3429 #endif
3430
3431 out:
3432 return rc;
3433 }
3434
af_unix_exit(void)3435 static void __exit af_unix_exit(void)
3436 {
3437 sock_unregister(PF_UNIX);
3438 proto_unregister(&unix_dgram_proto);
3439 proto_unregister(&unix_stream_proto);
3440 unregister_pernet_subsys(&unix_net_ops);
3441 }
3442
3443 /* Earlier than device_initcall() so that other drivers invoking
3444 request_module() don't end up in a loop when modprobe tries
3445 to use a UNIX socket. But later than subsys_initcall() because
3446 we depend on stuff initialised there */
3447 fs_initcall(af_unix_init);
3448 module_exit(af_unix_exit);
3449
3450 MODULE_LICENSE("GPL");
3451 MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver);
3452 MODULE_ALIAS_NETPROTO(PF_UNIX);
3453