1 /*
2 * NETLINK Kernel-user communication protocol.
3 *
4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 * Patrick McHardy <kaber@trash.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
14 * added netlink_proto_exit
15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
16 * use nlk_sk, as sk->protinfo is on a diet 8)
17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
18 * - inc module use count of module that owns
19 * the kernel socket in case userspace opens
20 * socket of same protocol
21 * - remove all module support, since netlink is
22 * mandatory if CONFIG_NET=y these days
23 */
24
25 #include <linux/module.h>
26
27 #include <linux/capability.h>
28 #include <linux/kernel.h>
29 #include <linux/init.h>
30 #include <linux/signal.h>
31 #include <linux/sched.h>
32 #include <linux/errno.h>
33 #include <linux/string.h>
34 #include <linux/stat.h>
35 #include <linux/socket.h>
36 #include <linux/un.h>
37 #include <linux/fcntl.h>
38 #include <linux/termios.h>
39 #include <linux/sockios.h>
40 #include <linux/net.h>
41 #include <linux/fs.h>
42 #include <linux/slab.h>
43 #include <asm/uaccess.h>
44 #include <linux/skbuff.h>
45 #include <linux/netdevice.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/proc_fs.h>
48 #include <linux/seq_file.h>
49 #include <linux/notifier.h>
50 #include <linux/security.h>
51 #include <linux/jhash.h>
52 #include <linux/jiffies.h>
53 #include <linux/random.h>
54 #include <linux/bitops.h>
55 #include <linux/mm.h>
56 #include <linux/types.h>
57 #include <linux/audit.h>
58 #include <linux/mutex.h>
59 #include <linux/vmalloc.h>
60 #include <linux/if_arp.h>
61 #include <linux/rhashtable.h>
62 #include <asm/cacheflush.h>
63 #include <linux/hash.h>
64 #include <linux/genetlink.h>
65
66 #include <net/net_namespace.h>
67 #include <net/sock.h>
68 #include <net/scm.h>
69 #include <net/netlink.h>
70
71 #include "af_netlink.h"
72
73 struct listeners {
74 struct rcu_head rcu;
75 unsigned long masks[0];
76 };
77
78 /* state bits */
79 #define NETLINK_S_CONGESTED 0x0
80
81 /* flags */
82 #define NETLINK_F_KERNEL_SOCKET 0x1
83 #define NETLINK_F_RECV_PKTINFO 0x2
84 #define NETLINK_F_BROADCAST_SEND_ERROR 0x4
85 #define NETLINK_F_RECV_NO_ENOBUFS 0x8
86 #define NETLINK_F_LISTEN_ALL_NSID 0x10
87 #define NETLINK_F_CAP_ACK 0x20
88
netlink_is_kernel(struct sock * sk)89 static inline int netlink_is_kernel(struct sock *sk)
90 {
91 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
92 }
93
94 struct netlink_table *nl_table __read_mostly;
95 EXPORT_SYMBOL_GPL(nl_table);
96
97 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
98
99 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
100
101 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
102 "nlk_cb_mutex-ROUTE",
103 "nlk_cb_mutex-1",
104 "nlk_cb_mutex-USERSOCK",
105 "nlk_cb_mutex-FIREWALL",
106 "nlk_cb_mutex-SOCK_DIAG",
107 "nlk_cb_mutex-NFLOG",
108 "nlk_cb_mutex-XFRM",
109 "nlk_cb_mutex-SELINUX",
110 "nlk_cb_mutex-ISCSI",
111 "nlk_cb_mutex-AUDIT",
112 "nlk_cb_mutex-FIB_LOOKUP",
113 "nlk_cb_mutex-CONNECTOR",
114 "nlk_cb_mutex-NETFILTER",
115 "nlk_cb_mutex-IP6_FW",
116 "nlk_cb_mutex-DNRTMSG",
117 "nlk_cb_mutex-KOBJECT_UEVENT",
118 "nlk_cb_mutex-GENERIC",
119 "nlk_cb_mutex-17",
120 "nlk_cb_mutex-SCSITRANSPORT",
121 "nlk_cb_mutex-ECRYPTFS",
122 "nlk_cb_mutex-RDMA",
123 "nlk_cb_mutex-CRYPTO",
124 "nlk_cb_mutex-SMC",
125 "nlk_cb_mutex-23",
126 "nlk_cb_mutex-24",
127 "nlk_cb_mutex-25",
128 "nlk_cb_mutex-26",
129 "nlk_cb_mutex-27",
130 "nlk_cb_mutex-28",
131 "nlk_cb_mutex-29",
132 "nlk_cb_mutex-30",
133 "nlk_cb_mutex-31",
134 "nlk_cb_mutex-MAX_LINKS"
135 };
136
137 static int netlink_dump(struct sock *sk);
138 static void netlink_skb_destructor(struct sk_buff *skb);
139
140 /* nl_table locking explained:
141 * Lookup and traversal are protected with an RCU read-side lock. Insertion
142 * and removal are protected with per bucket lock while using RCU list
143 * modification primitives and may run in parallel to RCU protected lookups.
144 * Destruction of the Netlink socket may only occur *after* nl_table_lock has
145 * been acquired * either during or after the socket has been removed from
146 * the list and after an RCU grace period.
147 */
148 DEFINE_RWLOCK(nl_table_lock);
149 EXPORT_SYMBOL_GPL(nl_table_lock);
150 static atomic_t nl_table_users = ATOMIC_INIT(0);
151
152 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
153
154 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
155
156 static DEFINE_SPINLOCK(netlink_tap_lock);
157 static struct list_head netlink_tap_all __read_mostly;
158
159 static const struct rhashtable_params netlink_rhashtable_params;
160
netlink_group_mask(u32 group)161 static inline u32 netlink_group_mask(u32 group)
162 {
163 return group ? 1 << (group - 1) : 0;
164 }
165
netlink_to_full_skb(const struct sk_buff * skb,gfp_t gfp_mask)166 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
167 gfp_t gfp_mask)
168 {
169 unsigned int len = skb_end_offset(skb);
170 struct sk_buff *new;
171
172 new = alloc_skb(len, gfp_mask);
173 if (new == NULL)
174 return NULL;
175
176 NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
177 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
178 NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
179
180 memcpy(skb_put(new, len), skb->data, len);
181 return new;
182 }
183
netlink_add_tap(struct netlink_tap * nt)184 int netlink_add_tap(struct netlink_tap *nt)
185 {
186 if (unlikely(nt->dev->type != ARPHRD_NETLINK))
187 return -EINVAL;
188
189 spin_lock(&netlink_tap_lock);
190 list_add_rcu(&nt->list, &netlink_tap_all);
191 spin_unlock(&netlink_tap_lock);
192
193 __module_get(nt->module);
194
195 return 0;
196 }
197 EXPORT_SYMBOL_GPL(netlink_add_tap);
198
__netlink_remove_tap(struct netlink_tap * nt)199 static int __netlink_remove_tap(struct netlink_tap *nt)
200 {
201 bool found = false;
202 struct netlink_tap *tmp;
203
204 spin_lock(&netlink_tap_lock);
205
206 list_for_each_entry(tmp, &netlink_tap_all, list) {
207 if (nt == tmp) {
208 list_del_rcu(&nt->list);
209 found = true;
210 goto out;
211 }
212 }
213
214 pr_warn("__netlink_remove_tap: %p not found\n", nt);
215 out:
216 spin_unlock(&netlink_tap_lock);
217
218 if (found)
219 module_put(nt->module);
220
221 return found ? 0 : -ENODEV;
222 }
223
netlink_remove_tap(struct netlink_tap * nt)224 int netlink_remove_tap(struct netlink_tap *nt)
225 {
226 int ret;
227
228 ret = __netlink_remove_tap(nt);
229 synchronize_net();
230
231 return ret;
232 }
233 EXPORT_SYMBOL_GPL(netlink_remove_tap);
234
netlink_filter_tap(const struct sk_buff * skb)235 static bool netlink_filter_tap(const struct sk_buff *skb)
236 {
237 struct sock *sk = skb->sk;
238
239 /* We take the more conservative approach and
240 * whitelist socket protocols that may pass.
241 */
242 switch (sk->sk_protocol) {
243 case NETLINK_ROUTE:
244 case NETLINK_USERSOCK:
245 case NETLINK_SOCK_DIAG:
246 case NETLINK_NFLOG:
247 case NETLINK_XFRM:
248 case NETLINK_FIB_LOOKUP:
249 case NETLINK_NETFILTER:
250 case NETLINK_GENERIC:
251 return true;
252 }
253
254 return false;
255 }
256
__netlink_deliver_tap_skb(struct sk_buff * skb,struct net_device * dev)257 static int __netlink_deliver_tap_skb(struct sk_buff *skb,
258 struct net_device *dev)
259 {
260 struct sk_buff *nskb;
261 struct sock *sk = skb->sk;
262 int ret = -ENOMEM;
263
264 if (!net_eq(dev_net(dev), sock_net(sk)))
265 return 0;
266
267 dev_hold(dev);
268
269 if (is_vmalloc_addr(skb->head))
270 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
271 else
272 nskb = skb_clone(skb, GFP_ATOMIC);
273 if (nskb) {
274 nskb->dev = dev;
275 nskb->protocol = htons((u16) sk->sk_protocol);
276 nskb->pkt_type = netlink_is_kernel(sk) ?
277 PACKET_KERNEL : PACKET_USER;
278 skb_reset_network_header(nskb);
279 ret = dev_queue_xmit(nskb);
280 if (unlikely(ret > 0))
281 ret = net_xmit_errno(ret);
282 }
283
284 dev_put(dev);
285 return ret;
286 }
287
__netlink_deliver_tap(struct sk_buff * skb)288 static void __netlink_deliver_tap(struct sk_buff *skb)
289 {
290 int ret;
291 struct netlink_tap *tmp;
292
293 if (!netlink_filter_tap(skb))
294 return;
295
296 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
297 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
298 if (unlikely(ret))
299 break;
300 }
301 }
302
netlink_deliver_tap(struct sk_buff * skb)303 static void netlink_deliver_tap(struct sk_buff *skb)
304 {
305 rcu_read_lock();
306
307 if (unlikely(!list_empty(&netlink_tap_all)))
308 __netlink_deliver_tap(skb);
309
310 rcu_read_unlock();
311 }
312
netlink_deliver_tap_kernel(struct sock * dst,struct sock * src,struct sk_buff * skb)313 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
314 struct sk_buff *skb)
315 {
316 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
317 netlink_deliver_tap(skb);
318 }
319
netlink_overrun(struct sock * sk)320 static void netlink_overrun(struct sock *sk)
321 {
322 struct netlink_sock *nlk = nlk_sk(sk);
323
324 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
325 if (!test_and_set_bit(NETLINK_S_CONGESTED,
326 &nlk_sk(sk)->state)) {
327 sk->sk_err = ENOBUFS;
328 sk->sk_error_report(sk);
329 }
330 }
331 atomic_inc(&sk->sk_drops);
332 }
333
netlink_rcv_wake(struct sock * sk)334 static void netlink_rcv_wake(struct sock *sk)
335 {
336 struct netlink_sock *nlk = nlk_sk(sk);
337
338 if (skb_queue_empty(&sk->sk_receive_queue))
339 clear_bit(NETLINK_S_CONGESTED, &nlk->state);
340 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
341 wake_up_interruptible(&nlk->wait);
342 }
343
netlink_skb_destructor(struct sk_buff * skb)344 static void netlink_skb_destructor(struct sk_buff *skb)
345 {
346 if (is_vmalloc_addr(skb->head)) {
347 if (!skb->cloned ||
348 !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
349 vfree(skb->head);
350
351 skb->head = NULL;
352 }
353 if (skb->sk != NULL)
354 sock_rfree(skb);
355 }
356
netlink_skb_set_owner_r(struct sk_buff * skb,struct sock * sk)357 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
358 {
359 WARN_ON(skb->sk != NULL);
360 skb->sk = sk;
361 skb->destructor = netlink_skb_destructor;
362 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
363 sk_mem_charge(sk, skb->truesize);
364 }
365
netlink_sock_destruct(struct sock * sk)366 static void netlink_sock_destruct(struct sock *sk)
367 {
368 struct netlink_sock *nlk = nlk_sk(sk);
369
370 if (nlk->cb_running) {
371 if (nlk->cb.done)
372 nlk->cb.done(&nlk->cb);
373 module_put(nlk->cb.module);
374 kfree_skb(nlk->cb.skb);
375 }
376
377 skb_queue_purge(&sk->sk_receive_queue);
378
379 if (!sock_flag(sk, SOCK_DEAD)) {
380 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
381 return;
382 }
383
384 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
385 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
386 WARN_ON(nlk_sk(sk)->groups);
387 }
388
netlink_sock_destruct_work(struct work_struct * work)389 static void netlink_sock_destruct_work(struct work_struct *work)
390 {
391 struct netlink_sock *nlk = container_of(work, struct netlink_sock,
392 work);
393
394 sk_free(&nlk->sk);
395 }
396
397 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
398 * SMP. Look, when several writers sleep and reader wakes them up, all but one
399 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
400 * this, _but_ remember, it adds useless work on UP machines.
401 */
402
netlink_table_grab(void)403 void netlink_table_grab(void)
404 __acquires(nl_table_lock)
405 {
406 might_sleep();
407
408 write_lock_irq(&nl_table_lock);
409
410 if (atomic_read(&nl_table_users)) {
411 DECLARE_WAITQUEUE(wait, current);
412
413 add_wait_queue_exclusive(&nl_table_wait, &wait);
414 for (;;) {
415 set_current_state(TASK_UNINTERRUPTIBLE);
416 if (atomic_read(&nl_table_users) == 0)
417 break;
418 write_unlock_irq(&nl_table_lock);
419 schedule();
420 write_lock_irq(&nl_table_lock);
421 }
422
423 __set_current_state(TASK_RUNNING);
424 remove_wait_queue(&nl_table_wait, &wait);
425 }
426 }
427
netlink_table_ungrab(void)428 void netlink_table_ungrab(void)
429 __releases(nl_table_lock)
430 {
431 write_unlock_irq(&nl_table_lock);
432 wake_up(&nl_table_wait);
433 }
434
435 static inline void
netlink_lock_table(void)436 netlink_lock_table(void)
437 {
438 /* read_lock() synchronizes us to netlink_table_grab */
439
440 read_lock(&nl_table_lock);
441 atomic_inc(&nl_table_users);
442 read_unlock(&nl_table_lock);
443 }
444
445 static inline void
netlink_unlock_table(void)446 netlink_unlock_table(void)
447 {
448 if (atomic_dec_and_test(&nl_table_users))
449 wake_up(&nl_table_wait);
450 }
451
452 struct netlink_compare_arg
453 {
454 possible_net_t pnet;
455 u32 portid;
456 };
457
458 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */
459 #define netlink_compare_arg_len \
460 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32))
461
netlink_compare(struct rhashtable_compare_arg * arg,const void * ptr)462 static inline int netlink_compare(struct rhashtable_compare_arg *arg,
463 const void *ptr)
464 {
465 const struct netlink_compare_arg *x = arg->key;
466 const struct netlink_sock *nlk = ptr;
467
468 return nlk->portid != x->portid ||
469 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
470 }
471
netlink_compare_arg_init(struct netlink_compare_arg * arg,struct net * net,u32 portid)472 static void netlink_compare_arg_init(struct netlink_compare_arg *arg,
473 struct net *net, u32 portid)
474 {
475 memset(arg, 0, sizeof(*arg));
476 write_pnet(&arg->pnet, net);
477 arg->portid = portid;
478 }
479
__netlink_lookup(struct netlink_table * table,u32 portid,struct net * net)480 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
481 struct net *net)
482 {
483 struct netlink_compare_arg arg;
484
485 netlink_compare_arg_init(&arg, net, portid);
486 return rhashtable_lookup_fast(&table->hash, &arg,
487 netlink_rhashtable_params);
488 }
489
__netlink_insert(struct netlink_table * table,struct sock * sk)490 static int __netlink_insert(struct netlink_table *table, struct sock *sk)
491 {
492 struct netlink_compare_arg arg;
493
494 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
495 return rhashtable_lookup_insert_key(&table->hash, &arg,
496 &nlk_sk(sk)->node,
497 netlink_rhashtable_params);
498 }
499
netlink_lookup(struct net * net,int protocol,u32 portid)500 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
501 {
502 struct netlink_table *table = &nl_table[protocol];
503 struct sock *sk;
504
505 rcu_read_lock();
506 sk = __netlink_lookup(table, portid, net);
507 if (sk)
508 sock_hold(sk);
509 rcu_read_unlock();
510
511 return sk;
512 }
513
514 static const struct proto_ops netlink_ops;
515
516 static void
netlink_update_listeners(struct sock * sk)517 netlink_update_listeners(struct sock *sk)
518 {
519 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
520 unsigned long mask;
521 unsigned int i;
522 struct listeners *listeners;
523
524 listeners = nl_deref_protected(tbl->listeners);
525 if (!listeners)
526 return;
527
528 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
529 mask = 0;
530 sk_for_each_bound(sk, &tbl->mc_list) {
531 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
532 mask |= nlk_sk(sk)->groups[i];
533 }
534 listeners->masks[i] = mask;
535 }
536 /* this function is only called with the netlink table "grabbed", which
537 * makes sure updates are visible before bind or setsockopt return. */
538 }
539
netlink_insert(struct sock * sk,u32 portid)540 static int netlink_insert(struct sock *sk, u32 portid)
541 {
542 struct netlink_table *table = &nl_table[sk->sk_protocol];
543 int err;
544
545 lock_sock(sk);
546
547 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
548 if (nlk_sk(sk)->bound)
549 goto err;
550
551 err = -ENOMEM;
552 if (BITS_PER_LONG > 32 &&
553 unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
554 goto err;
555
556 nlk_sk(sk)->portid = portid;
557 sock_hold(sk);
558
559 err = __netlink_insert(table, sk);
560 if (err) {
561 /* In case the hashtable backend returns with -EBUSY
562 * from here, it must not escape to the caller.
563 */
564 if (unlikely(err == -EBUSY))
565 err = -EOVERFLOW;
566 if (err == -EEXIST)
567 err = -EADDRINUSE;
568 sock_put(sk);
569 goto err;
570 }
571
572 /* We need to ensure that the socket is hashed and visible. */
573 smp_wmb();
574 nlk_sk(sk)->bound = portid;
575
576 err:
577 release_sock(sk);
578 return err;
579 }
580
netlink_remove(struct sock * sk)581 static void netlink_remove(struct sock *sk)
582 {
583 struct netlink_table *table;
584
585 table = &nl_table[sk->sk_protocol];
586 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
587 netlink_rhashtable_params)) {
588 WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
589 __sock_put(sk);
590 }
591
592 netlink_table_grab();
593 if (nlk_sk(sk)->subscriptions) {
594 __sk_del_bind_node(sk);
595 netlink_update_listeners(sk);
596 }
597 if (sk->sk_protocol == NETLINK_GENERIC)
598 atomic_inc(&genl_sk_destructing_cnt);
599 netlink_table_ungrab();
600 }
601
602 static struct proto netlink_proto = {
603 .name = "NETLINK",
604 .owner = THIS_MODULE,
605 .obj_size = sizeof(struct netlink_sock),
606 };
607
__netlink_create(struct net * net,struct socket * sock,struct mutex * cb_mutex,int protocol,int kern)608 static int __netlink_create(struct net *net, struct socket *sock,
609 struct mutex *cb_mutex, int protocol,
610 int kern)
611 {
612 struct sock *sk;
613 struct netlink_sock *nlk;
614
615 sock->ops = &netlink_ops;
616
617 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
618 if (!sk)
619 return -ENOMEM;
620
621 sock_init_data(sock, sk);
622
623 nlk = nlk_sk(sk);
624 if (cb_mutex) {
625 nlk->cb_mutex = cb_mutex;
626 } else {
627 nlk->cb_mutex = &nlk->cb_def_mutex;
628 mutex_init(nlk->cb_mutex);
629 lockdep_set_class_and_name(nlk->cb_mutex,
630 nlk_cb_mutex_keys + protocol,
631 nlk_cb_mutex_key_strings[protocol]);
632 }
633 init_waitqueue_head(&nlk->wait);
634
635 sk->sk_destruct = netlink_sock_destruct;
636 sk->sk_protocol = protocol;
637 return 0;
638 }
639
netlink_create(struct net * net,struct socket * sock,int protocol,int kern)640 static int netlink_create(struct net *net, struct socket *sock, int protocol,
641 int kern)
642 {
643 struct module *module = NULL;
644 struct mutex *cb_mutex;
645 struct netlink_sock *nlk;
646 int (*bind)(struct net *net, int group);
647 void (*unbind)(struct net *net, int group);
648 int err = 0;
649
650 sock->state = SS_UNCONNECTED;
651
652 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
653 return -ESOCKTNOSUPPORT;
654
655 if (protocol < 0 || protocol >= MAX_LINKS)
656 return -EPROTONOSUPPORT;
657
658 netlink_lock_table();
659 #ifdef CONFIG_MODULES
660 if (!nl_table[protocol].registered) {
661 netlink_unlock_table();
662 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
663 netlink_lock_table();
664 }
665 #endif
666 if (nl_table[protocol].registered &&
667 try_module_get(nl_table[protocol].module))
668 module = nl_table[protocol].module;
669 else
670 err = -EPROTONOSUPPORT;
671 cb_mutex = nl_table[protocol].cb_mutex;
672 bind = nl_table[protocol].bind;
673 unbind = nl_table[protocol].unbind;
674 netlink_unlock_table();
675
676 if (err < 0)
677 goto out;
678
679 err = __netlink_create(net, sock, cb_mutex, protocol, kern);
680 if (err < 0)
681 goto out_module;
682
683 local_bh_disable();
684 sock_prot_inuse_add(net, &netlink_proto, 1);
685 local_bh_enable();
686
687 nlk = nlk_sk(sock->sk);
688 nlk->module = module;
689 nlk->netlink_bind = bind;
690 nlk->netlink_unbind = unbind;
691 out:
692 return err;
693
694 out_module:
695 module_put(module);
696 goto out;
697 }
698
deferred_put_nlk_sk(struct rcu_head * head)699 static void deferred_put_nlk_sk(struct rcu_head *head)
700 {
701 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
702 struct sock *sk = &nlk->sk;
703
704 if (!atomic_dec_and_test(&sk->sk_refcnt))
705 return;
706
707 if (nlk->cb_running && nlk->cb.done) {
708 INIT_WORK(&nlk->work, netlink_sock_destruct_work);
709 schedule_work(&nlk->work);
710 return;
711 }
712
713 sk_free(sk);
714 }
715
netlink_release(struct socket * sock)716 static int netlink_release(struct socket *sock)
717 {
718 struct sock *sk = sock->sk;
719 struct netlink_sock *nlk;
720
721 if (!sk)
722 return 0;
723
724 netlink_remove(sk);
725 sock_orphan(sk);
726 nlk = nlk_sk(sk);
727
728 /*
729 * OK. Socket is unlinked, any packets that arrive now
730 * will be purged.
731 */
732
733 /* must not acquire netlink_table_lock in any way again before unbind
734 * and notifying genetlink is done as otherwise it might deadlock
735 */
736 if (nlk->netlink_unbind) {
737 int i;
738
739 for (i = 0; i < nlk->ngroups; i++)
740 if (test_bit(i, nlk->groups))
741 nlk->netlink_unbind(sock_net(sk), i + 1);
742 }
743 if (sk->sk_protocol == NETLINK_GENERIC &&
744 atomic_dec_return(&genl_sk_destructing_cnt) == 0)
745 wake_up(&genl_sk_destructing_waitq);
746
747 sock->sk = NULL;
748 wake_up_interruptible_all(&nlk->wait);
749
750 skb_queue_purge(&sk->sk_write_queue);
751
752 if (nlk->portid && nlk->bound) {
753 struct netlink_notify n = {
754 .net = sock_net(sk),
755 .protocol = sk->sk_protocol,
756 .portid = nlk->portid,
757 };
758 atomic_notifier_call_chain(&netlink_chain,
759 NETLINK_URELEASE, &n);
760 }
761
762 module_put(nlk->module);
763
764 if (netlink_is_kernel(sk)) {
765 netlink_table_grab();
766 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
767 if (--nl_table[sk->sk_protocol].registered == 0) {
768 struct listeners *old;
769
770 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
771 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
772 kfree_rcu(old, rcu);
773 nl_table[sk->sk_protocol].module = NULL;
774 nl_table[sk->sk_protocol].bind = NULL;
775 nl_table[sk->sk_protocol].unbind = NULL;
776 nl_table[sk->sk_protocol].flags = 0;
777 nl_table[sk->sk_protocol].registered = 0;
778 }
779 netlink_table_ungrab();
780 }
781
782 kfree(nlk->groups);
783 nlk->groups = NULL;
784
785 local_bh_disable();
786 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
787 local_bh_enable();
788 call_rcu(&nlk->rcu, deferred_put_nlk_sk);
789 return 0;
790 }
791
netlink_autobind(struct socket * sock)792 static int netlink_autobind(struct socket *sock)
793 {
794 struct sock *sk = sock->sk;
795 struct net *net = sock_net(sk);
796 struct netlink_table *table = &nl_table[sk->sk_protocol];
797 s32 portid = task_tgid_vnr(current);
798 int err;
799 s32 rover = -4096;
800 bool ok;
801
802 retry:
803 cond_resched();
804 rcu_read_lock();
805 ok = !__netlink_lookup(table, portid, net);
806 rcu_read_unlock();
807 if (!ok) {
808 /* Bind collision, search negative portid values. */
809 if (rover == -4096)
810 /* rover will be in range [S32_MIN, -4097] */
811 rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
812 else if (rover >= -4096)
813 rover = -4097;
814 portid = rover--;
815 goto retry;
816 }
817
818 err = netlink_insert(sk, portid);
819 if (err == -EADDRINUSE)
820 goto retry;
821
822 /* If 2 threads race to autobind, that is fine. */
823 if (err == -EBUSY)
824 err = 0;
825
826 return err;
827 }
828
829 /**
830 * __netlink_ns_capable - General netlink message capability test
831 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
832 * @user_ns: The user namespace of the capability to use
833 * @cap: The capability to use
834 *
835 * Test to see if the opener of the socket we received the message
836 * from had when the netlink socket was created and the sender of the
837 * message has has the capability @cap in the user namespace @user_ns.
838 */
__netlink_ns_capable(const struct netlink_skb_parms * nsp,struct user_namespace * user_ns,int cap)839 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
840 struct user_namespace *user_ns, int cap)
841 {
842 return ((nsp->flags & NETLINK_SKB_DST) ||
843 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
844 ns_capable(user_ns, cap);
845 }
846 EXPORT_SYMBOL(__netlink_ns_capable);
847
848 /**
849 * netlink_ns_capable - General netlink message capability test
850 * @skb: socket buffer holding a netlink command from userspace
851 * @user_ns: The user namespace of the capability to use
852 * @cap: The capability to use
853 *
854 * Test to see if the opener of the socket we received the message
855 * from had when the netlink socket was created and the sender of the
856 * message has has the capability @cap in the user namespace @user_ns.
857 */
netlink_ns_capable(const struct sk_buff * skb,struct user_namespace * user_ns,int cap)858 bool netlink_ns_capable(const struct sk_buff *skb,
859 struct user_namespace *user_ns, int cap)
860 {
861 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
862 }
863 EXPORT_SYMBOL(netlink_ns_capable);
864
865 /**
866 * netlink_capable - Netlink global message capability test
867 * @skb: socket buffer holding a netlink command from userspace
868 * @cap: The capability to use
869 *
870 * Test to see if the opener of the socket we received the message
871 * from had when the netlink socket was created and the sender of the
872 * message has has the capability @cap in all user namespaces.
873 */
netlink_capable(const struct sk_buff * skb,int cap)874 bool netlink_capable(const struct sk_buff *skb, int cap)
875 {
876 return netlink_ns_capable(skb, &init_user_ns, cap);
877 }
878 EXPORT_SYMBOL(netlink_capable);
879
880 /**
881 * netlink_net_capable - Netlink network namespace message capability test
882 * @skb: socket buffer holding a netlink command from userspace
883 * @cap: The capability to use
884 *
885 * Test to see if the opener of the socket we received the message
886 * from had when the netlink socket was created and the sender of the
887 * message has has the capability @cap over the network namespace of
888 * the socket we received the message from.
889 */
netlink_net_capable(const struct sk_buff * skb,int cap)890 bool netlink_net_capable(const struct sk_buff *skb, int cap)
891 {
892 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
893 }
894 EXPORT_SYMBOL(netlink_net_capable);
895
netlink_allowed(const struct socket * sock,unsigned int flag)896 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
897 {
898 return (nl_table[sock->sk->sk_protocol].flags & flag) ||
899 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
900 }
901
902 static void
netlink_update_subscriptions(struct sock * sk,unsigned int subscriptions)903 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
904 {
905 struct netlink_sock *nlk = nlk_sk(sk);
906
907 if (nlk->subscriptions && !subscriptions)
908 __sk_del_bind_node(sk);
909 else if (!nlk->subscriptions && subscriptions)
910 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
911 nlk->subscriptions = subscriptions;
912 }
913
netlink_realloc_groups(struct sock * sk)914 static int netlink_realloc_groups(struct sock *sk)
915 {
916 struct netlink_sock *nlk = nlk_sk(sk);
917 unsigned int groups;
918 unsigned long *new_groups;
919 int err = 0;
920
921 netlink_table_grab();
922
923 groups = nl_table[sk->sk_protocol].groups;
924 if (!nl_table[sk->sk_protocol].registered) {
925 err = -ENOENT;
926 goto out_unlock;
927 }
928
929 if (nlk->ngroups >= groups)
930 goto out_unlock;
931
932 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
933 if (new_groups == NULL) {
934 err = -ENOMEM;
935 goto out_unlock;
936 }
937 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
938 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
939
940 nlk->groups = new_groups;
941 nlk->ngroups = groups;
942 out_unlock:
943 netlink_table_ungrab();
944 return err;
945 }
946
netlink_undo_bind(int group,long unsigned int groups,struct sock * sk)947 static void netlink_undo_bind(int group, long unsigned int groups,
948 struct sock *sk)
949 {
950 struct netlink_sock *nlk = nlk_sk(sk);
951 int undo;
952
953 if (!nlk->netlink_unbind)
954 return;
955
956 for (undo = 0; undo < group; undo++)
957 if (test_bit(undo, &groups))
958 nlk->netlink_unbind(sock_net(sk), undo + 1);
959 }
960
netlink_bind(struct socket * sock,struct sockaddr * addr,int addr_len)961 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
962 int addr_len)
963 {
964 struct sock *sk = sock->sk;
965 struct net *net = sock_net(sk);
966 struct netlink_sock *nlk = nlk_sk(sk);
967 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
968 int err;
969 long unsigned int groups = nladdr->nl_groups;
970 bool bound;
971
972 if (addr_len < sizeof(struct sockaddr_nl))
973 return -EINVAL;
974
975 if (nladdr->nl_family != AF_NETLINK)
976 return -EINVAL;
977
978 /* Only superuser is allowed to listen multicasts */
979 if (groups) {
980 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
981 return -EPERM;
982 err = netlink_realloc_groups(sk);
983 if (err)
984 return err;
985 }
986
987 bound = nlk->bound;
988 if (bound) {
989 /* Ensure nlk->portid is up-to-date. */
990 smp_rmb();
991
992 if (nladdr->nl_pid != nlk->portid)
993 return -EINVAL;
994 }
995
996 if (nlk->netlink_bind && groups) {
997 int group;
998
999 for (group = 0; group < nlk->ngroups; group++) {
1000 if (!test_bit(group, &groups))
1001 continue;
1002 err = nlk->netlink_bind(net, group + 1);
1003 if (!err)
1004 continue;
1005 netlink_undo_bind(group, groups, sk);
1006 return err;
1007 }
1008 }
1009
1010 /* No need for barriers here as we return to user-space without
1011 * using any of the bound attributes.
1012 */
1013 if (!bound) {
1014 err = nladdr->nl_pid ?
1015 netlink_insert(sk, nladdr->nl_pid) :
1016 netlink_autobind(sock);
1017 if (err) {
1018 netlink_undo_bind(nlk->ngroups, groups, sk);
1019 return err;
1020 }
1021 }
1022
1023 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1024 return 0;
1025
1026 netlink_table_grab();
1027 netlink_update_subscriptions(sk, nlk->subscriptions +
1028 hweight32(groups) -
1029 hweight32(nlk->groups[0]));
1030 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
1031 netlink_update_listeners(sk);
1032 netlink_table_ungrab();
1033
1034 return 0;
1035 }
1036
netlink_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1037 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1038 int alen, int flags)
1039 {
1040 int err = 0;
1041 struct sock *sk = sock->sk;
1042 struct netlink_sock *nlk = nlk_sk(sk);
1043 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1044
1045 if (alen < sizeof(addr->sa_family))
1046 return -EINVAL;
1047
1048 if (addr->sa_family == AF_UNSPEC) {
1049 sk->sk_state = NETLINK_UNCONNECTED;
1050 nlk->dst_portid = 0;
1051 nlk->dst_group = 0;
1052 return 0;
1053 }
1054 if (addr->sa_family != AF_NETLINK)
1055 return -EINVAL;
1056
1057 if (alen < sizeof(struct sockaddr_nl))
1058 return -EINVAL;
1059
1060 if ((nladdr->nl_groups || nladdr->nl_pid) &&
1061 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1062 return -EPERM;
1063
1064 /* No need for barriers here as we return to user-space without
1065 * using any of the bound attributes.
1066 */
1067 if (!nlk->bound)
1068 err = netlink_autobind(sock);
1069
1070 if (err == 0) {
1071 sk->sk_state = NETLINK_CONNECTED;
1072 nlk->dst_portid = nladdr->nl_pid;
1073 nlk->dst_group = ffs(nladdr->nl_groups);
1074 }
1075
1076 return err;
1077 }
1078
netlink_getname(struct socket * sock,struct sockaddr * addr,int * addr_len,int peer)1079 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1080 int *addr_len, int peer)
1081 {
1082 struct sock *sk = sock->sk;
1083 struct netlink_sock *nlk = nlk_sk(sk);
1084 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1085
1086 nladdr->nl_family = AF_NETLINK;
1087 nladdr->nl_pad = 0;
1088 *addr_len = sizeof(*nladdr);
1089
1090 if (peer) {
1091 nladdr->nl_pid = nlk->dst_portid;
1092 nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1093 } else {
1094 nladdr->nl_pid = nlk->portid;
1095 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1096 }
1097 return 0;
1098 }
1099
netlink_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)1100 static int netlink_ioctl(struct socket *sock, unsigned int cmd,
1101 unsigned long arg)
1102 {
1103 /* try to hand this ioctl down to the NIC drivers.
1104 */
1105 return -ENOIOCTLCMD;
1106 }
1107
netlink_getsockbyportid(struct sock * ssk,u32 portid)1108 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1109 {
1110 struct sock *sock;
1111 struct netlink_sock *nlk;
1112
1113 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1114 if (!sock)
1115 return ERR_PTR(-ECONNREFUSED);
1116
1117 /* Don't bother queuing skb if kernel socket has no input function */
1118 nlk = nlk_sk(sock);
1119 if (sock->sk_state == NETLINK_CONNECTED &&
1120 nlk->dst_portid != nlk_sk(ssk)->portid) {
1121 sock_put(sock);
1122 return ERR_PTR(-ECONNREFUSED);
1123 }
1124 return sock;
1125 }
1126
netlink_getsockbyfilp(struct file * filp)1127 struct sock *netlink_getsockbyfilp(struct file *filp)
1128 {
1129 struct inode *inode = file_inode(filp);
1130 struct sock *sock;
1131
1132 if (!S_ISSOCK(inode->i_mode))
1133 return ERR_PTR(-ENOTSOCK);
1134
1135 sock = SOCKET_I(inode)->sk;
1136 if (sock->sk_family != AF_NETLINK)
1137 return ERR_PTR(-EINVAL);
1138
1139 sock_hold(sock);
1140 return sock;
1141 }
1142
netlink_alloc_large_skb(unsigned int size,int broadcast)1143 static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1144 int broadcast)
1145 {
1146 struct sk_buff *skb;
1147 void *data;
1148
1149 if (size <= NLMSG_GOODSIZE || broadcast)
1150 return alloc_skb(size, GFP_KERNEL);
1151
1152 size = SKB_DATA_ALIGN(size) +
1153 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1154
1155 data = vmalloc(size);
1156 if (data == NULL)
1157 return NULL;
1158
1159 skb = __build_skb(data, size);
1160 if (skb == NULL)
1161 vfree(data);
1162 else
1163 skb->destructor = netlink_skb_destructor;
1164
1165 return skb;
1166 }
1167
1168 /*
1169 * Attach a skb to a netlink socket.
1170 * The caller must hold a reference to the destination socket. On error, the
1171 * reference is dropped. The skb is not send to the destination, just all
1172 * all error checks are performed and memory in the queue is reserved.
1173 * Return values:
1174 * < 0: error. skb freed, reference to sock dropped.
1175 * 0: continue
1176 * 1: repeat lookup - reference dropped while waiting for socket memory.
1177 */
netlink_attachskb(struct sock * sk,struct sk_buff * skb,long * timeo,struct sock * ssk)1178 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1179 long *timeo, struct sock *ssk)
1180 {
1181 struct netlink_sock *nlk;
1182
1183 nlk = nlk_sk(sk);
1184
1185 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1186 test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
1187 DECLARE_WAITQUEUE(wait, current);
1188 if (!*timeo) {
1189 if (!ssk || netlink_is_kernel(ssk))
1190 netlink_overrun(sk);
1191 sock_put(sk);
1192 kfree_skb(skb);
1193 return -EAGAIN;
1194 }
1195
1196 __set_current_state(TASK_INTERRUPTIBLE);
1197 add_wait_queue(&nlk->wait, &wait);
1198
1199 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1200 test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
1201 !sock_flag(sk, SOCK_DEAD))
1202 *timeo = schedule_timeout(*timeo);
1203
1204 __set_current_state(TASK_RUNNING);
1205 remove_wait_queue(&nlk->wait, &wait);
1206 sock_put(sk);
1207
1208 if (signal_pending(current)) {
1209 kfree_skb(skb);
1210 return sock_intr_errno(*timeo);
1211 }
1212 return 1;
1213 }
1214 netlink_skb_set_owner_r(skb, sk);
1215 return 0;
1216 }
1217
__netlink_sendskb(struct sock * sk,struct sk_buff * skb)1218 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1219 {
1220 int len = skb->len;
1221
1222 netlink_deliver_tap(skb);
1223
1224 skb_queue_tail(&sk->sk_receive_queue, skb);
1225 sk->sk_data_ready(sk);
1226 return len;
1227 }
1228
netlink_sendskb(struct sock * sk,struct sk_buff * skb)1229 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1230 {
1231 int len = __netlink_sendskb(sk, skb);
1232
1233 sock_put(sk);
1234 return len;
1235 }
1236
netlink_detachskb(struct sock * sk,struct sk_buff * skb)1237 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1238 {
1239 kfree_skb(skb);
1240 sock_put(sk);
1241 }
1242
netlink_trim(struct sk_buff * skb,gfp_t allocation)1243 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1244 {
1245 int delta;
1246
1247 WARN_ON(skb->sk != NULL);
1248 delta = skb->end - skb->tail;
1249 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1250 return skb;
1251
1252 if (skb_shared(skb)) {
1253 struct sk_buff *nskb = skb_clone(skb, allocation);
1254 if (!nskb)
1255 return skb;
1256 consume_skb(skb);
1257 skb = nskb;
1258 }
1259
1260 if (!pskb_expand_head(skb, 0, -delta, allocation))
1261 skb->truesize -= delta;
1262
1263 return skb;
1264 }
1265
netlink_unicast_kernel(struct sock * sk,struct sk_buff * skb,struct sock * ssk)1266 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1267 struct sock *ssk)
1268 {
1269 int ret;
1270 struct netlink_sock *nlk = nlk_sk(sk);
1271
1272 ret = -ECONNREFUSED;
1273 if (nlk->netlink_rcv != NULL) {
1274 ret = skb->len;
1275 netlink_skb_set_owner_r(skb, sk);
1276 NETLINK_CB(skb).sk = ssk;
1277 netlink_deliver_tap_kernel(sk, ssk, skb);
1278 nlk->netlink_rcv(skb);
1279 consume_skb(skb);
1280 } else {
1281 kfree_skb(skb);
1282 }
1283 sock_put(sk);
1284 return ret;
1285 }
1286
netlink_unicast(struct sock * ssk,struct sk_buff * skb,u32 portid,int nonblock)1287 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1288 u32 portid, int nonblock)
1289 {
1290 struct sock *sk;
1291 int err;
1292 long timeo;
1293
1294 skb = netlink_trim(skb, gfp_any());
1295
1296 timeo = sock_sndtimeo(ssk, nonblock);
1297 retry:
1298 sk = netlink_getsockbyportid(ssk, portid);
1299 if (IS_ERR(sk)) {
1300 kfree_skb(skb);
1301 return PTR_ERR(sk);
1302 }
1303 if (netlink_is_kernel(sk))
1304 return netlink_unicast_kernel(sk, skb, ssk);
1305
1306 if (sk_filter(sk, skb)) {
1307 err = skb->len;
1308 kfree_skb(skb);
1309 sock_put(sk);
1310 return err;
1311 }
1312
1313 err = netlink_attachskb(sk, skb, &timeo, ssk);
1314 if (err == 1)
1315 goto retry;
1316 if (err)
1317 return err;
1318
1319 return netlink_sendskb(sk, skb);
1320 }
1321 EXPORT_SYMBOL(netlink_unicast);
1322
netlink_has_listeners(struct sock * sk,unsigned int group)1323 int netlink_has_listeners(struct sock *sk, unsigned int group)
1324 {
1325 int res = 0;
1326 struct listeners *listeners;
1327
1328 BUG_ON(!netlink_is_kernel(sk));
1329
1330 rcu_read_lock();
1331 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1332
1333 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1334 res = test_bit(group - 1, listeners->masks);
1335
1336 rcu_read_unlock();
1337
1338 return res;
1339 }
1340 EXPORT_SYMBOL_GPL(netlink_has_listeners);
1341
netlink_broadcast_deliver(struct sock * sk,struct sk_buff * skb)1342 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1343 {
1344 struct netlink_sock *nlk = nlk_sk(sk);
1345
1346 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1347 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
1348 netlink_skb_set_owner_r(skb, sk);
1349 __netlink_sendskb(sk, skb);
1350 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1351 }
1352 return -1;
1353 }
1354
1355 struct netlink_broadcast_data {
1356 struct sock *exclude_sk;
1357 struct net *net;
1358 u32 portid;
1359 u32 group;
1360 int failure;
1361 int delivery_failure;
1362 int congested;
1363 int delivered;
1364 gfp_t allocation;
1365 struct sk_buff *skb, *skb2;
1366 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1367 void *tx_data;
1368 };
1369
do_one_broadcast(struct sock * sk,struct netlink_broadcast_data * p)1370 static void do_one_broadcast(struct sock *sk,
1371 struct netlink_broadcast_data *p)
1372 {
1373 struct netlink_sock *nlk = nlk_sk(sk);
1374 int val;
1375
1376 if (p->exclude_sk == sk)
1377 return;
1378
1379 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1380 !test_bit(p->group - 1, nlk->groups))
1381 return;
1382
1383 if (!net_eq(sock_net(sk), p->net)) {
1384 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
1385 return;
1386
1387 if (!peernet_has_id(sock_net(sk), p->net))
1388 return;
1389
1390 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
1391 CAP_NET_BROADCAST))
1392 return;
1393 }
1394
1395 if (p->failure) {
1396 netlink_overrun(sk);
1397 return;
1398 }
1399
1400 sock_hold(sk);
1401 if (p->skb2 == NULL) {
1402 if (skb_shared(p->skb)) {
1403 p->skb2 = skb_clone(p->skb, p->allocation);
1404 } else {
1405 p->skb2 = skb_get(p->skb);
1406 /*
1407 * skb ownership may have been set when
1408 * delivered to a previous socket.
1409 */
1410 skb_orphan(p->skb2);
1411 }
1412 }
1413 if (p->skb2 == NULL) {
1414 netlink_overrun(sk);
1415 /* Clone failed. Notify ALL listeners. */
1416 p->failure = 1;
1417 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
1418 p->delivery_failure = 1;
1419 goto out;
1420 }
1421 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1422 kfree_skb(p->skb2);
1423 p->skb2 = NULL;
1424 goto out;
1425 }
1426 if (sk_filter(sk, p->skb2)) {
1427 kfree_skb(p->skb2);
1428 p->skb2 = NULL;
1429 goto out;
1430 }
1431 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
1432 NETLINK_CB(p->skb2).nsid_is_set = true;
1433 val = netlink_broadcast_deliver(sk, p->skb2);
1434 if (val < 0) {
1435 netlink_overrun(sk);
1436 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
1437 p->delivery_failure = 1;
1438 } else {
1439 p->congested |= val;
1440 p->delivered = 1;
1441 p->skb2 = NULL;
1442 }
1443 out:
1444 sock_put(sk);
1445 }
1446
netlink_broadcast_filtered(struct sock * ssk,struct sk_buff * skb,u32 portid,u32 group,gfp_t allocation,int (* filter)(struct sock * dsk,struct sk_buff * skb,void * data),void * filter_data)1447 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1448 u32 group, gfp_t allocation,
1449 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1450 void *filter_data)
1451 {
1452 struct net *net = sock_net(ssk);
1453 struct netlink_broadcast_data info;
1454 struct sock *sk;
1455
1456 skb = netlink_trim(skb, allocation);
1457
1458 info.exclude_sk = ssk;
1459 info.net = net;
1460 info.portid = portid;
1461 info.group = group;
1462 info.failure = 0;
1463 info.delivery_failure = 0;
1464 info.congested = 0;
1465 info.delivered = 0;
1466 info.allocation = allocation;
1467 info.skb = skb;
1468 info.skb2 = NULL;
1469 info.tx_filter = filter;
1470 info.tx_data = filter_data;
1471
1472 /* While we sleep in clone, do not allow to change socket list */
1473
1474 netlink_lock_table();
1475
1476 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1477 do_one_broadcast(sk, &info);
1478
1479 consume_skb(skb);
1480
1481 netlink_unlock_table();
1482
1483 if (info.delivery_failure) {
1484 kfree_skb(info.skb2);
1485 return -ENOBUFS;
1486 }
1487 consume_skb(info.skb2);
1488
1489 if (info.delivered) {
1490 if (info.congested && gfpflags_allow_blocking(allocation))
1491 yield();
1492 return 0;
1493 }
1494 return -ESRCH;
1495 }
1496 EXPORT_SYMBOL(netlink_broadcast_filtered);
1497
netlink_broadcast(struct sock * ssk,struct sk_buff * skb,u32 portid,u32 group,gfp_t allocation)1498 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1499 u32 group, gfp_t allocation)
1500 {
1501 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1502 NULL, NULL);
1503 }
1504 EXPORT_SYMBOL(netlink_broadcast);
1505
1506 struct netlink_set_err_data {
1507 struct sock *exclude_sk;
1508 u32 portid;
1509 u32 group;
1510 int code;
1511 };
1512
do_one_set_err(struct sock * sk,struct netlink_set_err_data * p)1513 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1514 {
1515 struct netlink_sock *nlk = nlk_sk(sk);
1516 int ret = 0;
1517
1518 if (sk == p->exclude_sk)
1519 goto out;
1520
1521 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1522 goto out;
1523
1524 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1525 !test_bit(p->group - 1, nlk->groups))
1526 goto out;
1527
1528 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
1529 ret = 1;
1530 goto out;
1531 }
1532
1533 sk->sk_err = p->code;
1534 sk->sk_error_report(sk);
1535 out:
1536 return ret;
1537 }
1538
1539 /**
1540 * netlink_set_err - report error to broadcast listeners
1541 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1542 * @portid: the PORTID of a process that we want to skip (if any)
1543 * @group: the broadcast group that will notice the error
1544 * @code: error code, must be negative (as usual in kernelspace)
1545 *
1546 * This function returns the number of broadcast listeners that have set the
1547 * NETLINK_NO_ENOBUFS socket option.
1548 */
netlink_set_err(struct sock * ssk,u32 portid,u32 group,int code)1549 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1550 {
1551 struct netlink_set_err_data info;
1552 struct sock *sk;
1553 int ret = 0;
1554
1555 info.exclude_sk = ssk;
1556 info.portid = portid;
1557 info.group = group;
1558 /* sk->sk_err wants a positive error value */
1559 info.code = -code;
1560
1561 read_lock(&nl_table_lock);
1562
1563 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1564 ret += do_one_set_err(sk, &info);
1565
1566 read_unlock(&nl_table_lock);
1567 return ret;
1568 }
1569 EXPORT_SYMBOL(netlink_set_err);
1570
1571 /* must be called with netlink table grabbed */
netlink_update_socket_mc(struct netlink_sock * nlk,unsigned int group,int is_new)1572 static void netlink_update_socket_mc(struct netlink_sock *nlk,
1573 unsigned int group,
1574 int is_new)
1575 {
1576 int old, new = !!is_new, subscriptions;
1577
1578 old = test_bit(group - 1, nlk->groups);
1579 subscriptions = nlk->subscriptions - old + new;
1580 if (new)
1581 __set_bit(group - 1, nlk->groups);
1582 else
1583 __clear_bit(group - 1, nlk->groups);
1584 netlink_update_subscriptions(&nlk->sk, subscriptions);
1585 netlink_update_listeners(&nlk->sk);
1586 }
1587
netlink_setsockopt(struct socket * sock,int level,int optname,char __user * optval,unsigned int optlen)1588 static int netlink_setsockopt(struct socket *sock, int level, int optname,
1589 char __user *optval, unsigned int optlen)
1590 {
1591 struct sock *sk = sock->sk;
1592 struct netlink_sock *nlk = nlk_sk(sk);
1593 unsigned int val = 0;
1594 int err;
1595
1596 if (level != SOL_NETLINK)
1597 return -ENOPROTOOPT;
1598
1599 if (optlen >= sizeof(int) &&
1600 get_user(val, (unsigned int __user *)optval))
1601 return -EFAULT;
1602
1603 switch (optname) {
1604 case NETLINK_PKTINFO:
1605 if (val)
1606 nlk->flags |= NETLINK_F_RECV_PKTINFO;
1607 else
1608 nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
1609 err = 0;
1610 break;
1611 case NETLINK_ADD_MEMBERSHIP:
1612 case NETLINK_DROP_MEMBERSHIP: {
1613 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
1614 return -EPERM;
1615 err = netlink_realloc_groups(sk);
1616 if (err)
1617 return err;
1618 if (!val || val - 1 >= nlk->ngroups)
1619 return -EINVAL;
1620 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
1621 err = nlk->netlink_bind(sock_net(sk), val);
1622 if (err)
1623 return err;
1624 }
1625 netlink_table_grab();
1626 netlink_update_socket_mc(nlk, val,
1627 optname == NETLINK_ADD_MEMBERSHIP);
1628 netlink_table_ungrab();
1629 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
1630 nlk->netlink_unbind(sock_net(sk), val);
1631
1632 err = 0;
1633 break;
1634 }
1635 case NETLINK_BROADCAST_ERROR:
1636 if (val)
1637 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
1638 else
1639 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
1640 err = 0;
1641 break;
1642 case NETLINK_NO_ENOBUFS:
1643 if (val) {
1644 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
1645 clear_bit(NETLINK_S_CONGESTED, &nlk->state);
1646 wake_up_interruptible(&nlk->wait);
1647 } else {
1648 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
1649 }
1650 err = 0;
1651 break;
1652 case NETLINK_LISTEN_ALL_NSID:
1653 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
1654 return -EPERM;
1655
1656 if (val)
1657 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
1658 else
1659 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
1660 err = 0;
1661 break;
1662 case NETLINK_CAP_ACK:
1663 if (val)
1664 nlk->flags |= NETLINK_F_CAP_ACK;
1665 else
1666 nlk->flags &= ~NETLINK_F_CAP_ACK;
1667 err = 0;
1668 break;
1669 default:
1670 err = -ENOPROTOOPT;
1671 }
1672 return err;
1673 }
1674
netlink_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)1675 static int netlink_getsockopt(struct socket *sock, int level, int optname,
1676 char __user *optval, int __user *optlen)
1677 {
1678 struct sock *sk = sock->sk;
1679 struct netlink_sock *nlk = nlk_sk(sk);
1680 int len, val, err;
1681
1682 if (level != SOL_NETLINK)
1683 return -ENOPROTOOPT;
1684
1685 if (get_user(len, optlen))
1686 return -EFAULT;
1687 if (len < 0)
1688 return -EINVAL;
1689
1690 switch (optname) {
1691 case NETLINK_PKTINFO:
1692 if (len < sizeof(int))
1693 return -EINVAL;
1694 len = sizeof(int);
1695 val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
1696 if (put_user(len, optlen) ||
1697 put_user(val, optval))
1698 return -EFAULT;
1699 err = 0;
1700 break;
1701 case NETLINK_BROADCAST_ERROR:
1702 if (len < sizeof(int))
1703 return -EINVAL;
1704 len = sizeof(int);
1705 val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
1706 if (put_user(len, optlen) ||
1707 put_user(val, optval))
1708 return -EFAULT;
1709 err = 0;
1710 break;
1711 case NETLINK_NO_ENOBUFS:
1712 if (len < sizeof(int))
1713 return -EINVAL;
1714 len = sizeof(int);
1715 val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
1716 if (put_user(len, optlen) ||
1717 put_user(val, optval))
1718 return -EFAULT;
1719 err = 0;
1720 break;
1721 case NETLINK_LIST_MEMBERSHIPS: {
1722 int pos, idx, shift;
1723
1724 err = 0;
1725 netlink_lock_table();
1726 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
1727 if (len - pos < sizeof(u32))
1728 break;
1729
1730 idx = pos / sizeof(unsigned long);
1731 shift = (pos % sizeof(unsigned long)) * 8;
1732 if (put_user((u32)(nlk->groups[idx] >> shift),
1733 (u32 __user *)(optval + pos))) {
1734 err = -EFAULT;
1735 break;
1736 }
1737 }
1738 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
1739 err = -EFAULT;
1740 netlink_unlock_table();
1741 break;
1742 }
1743 case NETLINK_CAP_ACK:
1744 if (len < sizeof(int))
1745 return -EINVAL;
1746 len = sizeof(int);
1747 val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0;
1748 if (put_user(len, optlen) ||
1749 put_user(val, optval))
1750 return -EFAULT;
1751 err = 0;
1752 break;
1753 default:
1754 err = -ENOPROTOOPT;
1755 }
1756 return err;
1757 }
1758
netlink_cmsg_recv_pktinfo(struct msghdr * msg,struct sk_buff * skb)1759 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1760 {
1761 struct nl_pktinfo info;
1762
1763 info.group = NETLINK_CB(skb).dst_group;
1764 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1765 }
1766
netlink_cmsg_listen_all_nsid(struct sock * sk,struct msghdr * msg,struct sk_buff * skb)1767 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
1768 struct sk_buff *skb)
1769 {
1770 if (!NETLINK_CB(skb).nsid_is_set)
1771 return;
1772
1773 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
1774 &NETLINK_CB(skb).nsid);
1775 }
1776
netlink_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1777 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1778 {
1779 struct sock *sk = sock->sk;
1780 struct netlink_sock *nlk = nlk_sk(sk);
1781 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
1782 u32 dst_portid;
1783 u32 dst_group;
1784 struct sk_buff *skb;
1785 int err;
1786 struct scm_cookie scm;
1787 u32 netlink_skb_flags = 0;
1788
1789 if (msg->msg_flags&MSG_OOB)
1790 return -EOPNOTSUPP;
1791
1792 err = scm_send(sock, msg, &scm, true);
1793 if (err < 0)
1794 return err;
1795
1796 if (msg->msg_namelen) {
1797 err = -EINVAL;
1798 if (addr->nl_family != AF_NETLINK)
1799 goto out;
1800 dst_portid = addr->nl_pid;
1801 dst_group = ffs(addr->nl_groups);
1802 err = -EPERM;
1803 if ((dst_group || dst_portid) &&
1804 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1805 goto out;
1806 netlink_skb_flags |= NETLINK_SKB_DST;
1807 } else {
1808 dst_portid = nlk->dst_portid;
1809 dst_group = nlk->dst_group;
1810 }
1811
1812 if (!nlk->bound) {
1813 err = netlink_autobind(sock);
1814 if (err)
1815 goto out;
1816 } else {
1817 /* Ensure nlk is hashed and visible. */
1818 smp_rmb();
1819 }
1820
1821 err = -EMSGSIZE;
1822 if (len > sk->sk_sndbuf - 32)
1823 goto out;
1824 err = -ENOBUFS;
1825 skb = netlink_alloc_large_skb(len, dst_group);
1826 if (skb == NULL)
1827 goto out;
1828
1829 NETLINK_CB(skb).portid = nlk->portid;
1830 NETLINK_CB(skb).dst_group = dst_group;
1831 NETLINK_CB(skb).creds = scm.creds;
1832 NETLINK_CB(skb).flags = netlink_skb_flags;
1833
1834 err = -EFAULT;
1835 if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
1836 kfree_skb(skb);
1837 goto out;
1838 }
1839
1840 err = security_netlink_send(sk, skb);
1841 if (err) {
1842 kfree_skb(skb);
1843 goto out;
1844 }
1845
1846 if (dst_group) {
1847 atomic_inc(&skb->users);
1848 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1849 }
1850 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
1851
1852 out:
1853 scm_destroy(&scm);
1854 return err;
1855 }
1856
netlink_recvmsg(struct socket * sock,struct msghdr * msg,size_t len,int flags)1857 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1858 int flags)
1859 {
1860 struct scm_cookie scm;
1861 struct sock *sk = sock->sk;
1862 struct netlink_sock *nlk = nlk_sk(sk);
1863 int noblock = flags&MSG_DONTWAIT;
1864 size_t copied;
1865 struct sk_buff *skb, *data_skb;
1866 int err, ret;
1867
1868 if (flags&MSG_OOB)
1869 return -EOPNOTSUPP;
1870
1871 copied = 0;
1872
1873 skb = skb_recv_datagram(sk, flags, noblock, &err);
1874 if (skb == NULL)
1875 goto out;
1876
1877 data_skb = skb;
1878
1879 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1880 if (unlikely(skb_shinfo(skb)->frag_list)) {
1881 /*
1882 * If this skb has a frag_list, then here that means that we
1883 * will have to use the frag_list skb's data for compat tasks
1884 * and the regular skb's data for normal (non-compat) tasks.
1885 *
1886 * If we need to send the compat skb, assign it to the
1887 * 'data_skb' variable so that it will be used below for data
1888 * copying. We keep 'skb' for everything else, including
1889 * freeing both later.
1890 */
1891 if (flags & MSG_CMSG_COMPAT)
1892 data_skb = skb_shinfo(skb)->frag_list;
1893 }
1894 #endif
1895
1896 /* Record the max length of recvmsg() calls for future allocations */
1897 nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
1898 nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
1899 SKB_WITH_OVERHEAD(32768));
1900
1901 copied = data_skb->len;
1902 if (len < copied) {
1903 msg->msg_flags |= MSG_TRUNC;
1904 copied = len;
1905 }
1906
1907 skb_reset_transport_header(data_skb);
1908 err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
1909
1910 if (msg->msg_name) {
1911 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
1912 addr->nl_family = AF_NETLINK;
1913 addr->nl_pad = 0;
1914 addr->nl_pid = NETLINK_CB(skb).portid;
1915 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1916 msg->msg_namelen = sizeof(*addr);
1917 }
1918
1919 if (nlk->flags & NETLINK_F_RECV_PKTINFO)
1920 netlink_cmsg_recv_pktinfo(msg, skb);
1921 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
1922 netlink_cmsg_listen_all_nsid(sk, msg, skb);
1923
1924 memset(&scm, 0, sizeof(scm));
1925 scm.creds = *NETLINK_CREDS(skb);
1926 if (flags & MSG_TRUNC)
1927 copied = data_skb->len;
1928
1929 skb_free_datagram(sk, skb);
1930
1931 if (nlk->cb_running &&
1932 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1933 ret = netlink_dump(sk);
1934 if (ret) {
1935 sk->sk_err = -ret;
1936 sk->sk_error_report(sk);
1937 }
1938 }
1939
1940 scm_recv(sock, msg, &scm, flags);
1941 out:
1942 netlink_rcv_wake(sk);
1943 return err ? : copied;
1944 }
1945
netlink_data_ready(struct sock * sk)1946 static void netlink_data_ready(struct sock *sk)
1947 {
1948 BUG();
1949 }
1950
1951 /*
1952 * We export these functions to other modules. They provide a
1953 * complete set of kernel non-blocking support for message
1954 * queueing.
1955 */
1956
1957 struct sock *
__netlink_kernel_create(struct net * net,int unit,struct module * module,struct netlink_kernel_cfg * cfg)1958 __netlink_kernel_create(struct net *net, int unit, struct module *module,
1959 struct netlink_kernel_cfg *cfg)
1960 {
1961 struct socket *sock;
1962 struct sock *sk;
1963 struct netlink_sock *nlk;
1964 struct listeners *listeners = NULL;
1965 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1966 unsigned int groups;
1967
1968 BUG_ON(!nl_table);
1969
1970 if (unit < 0 || unit >= MAX_LINKS)
1971 return NULL;
1972
1973 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1974 return NULL;
1975
1976 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
1977 goto out_sock_release_nosk;
1978
1979 sk = sock->sk;
1980
1981 if (!cfg || cfg->groups < 32)
1982 groups = 32;
1983 else
1984 groups = cfg->groups;
1985
1986 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1987 if (!listeners)
1988 goto out_sock_release;
1989
1990 sk->sk_data_ready = netlink_data_ready;
1991 if (cfg && cfg->input)
1992 nlk_sk(sk)->netlink_rcv = cfg->input;
1993
1994 if (netlink_insert(sk, 0))
1995 goto out_sock_release;
1996
1997 nlk = nlk_sk(sk);
1998 nlk->flags |= NETLINK_F_KERNEL_SOCKET;
1999
2000 netlink_table_grab();
2001 if (!nl_table[unit].registered) {
2002 nl_table[unit].groups = groups;
2003 rcu_assign_pointer(nl_table[unit].listeners, listeners);
2004 nl_table[unit].cb_mutex = cb_mutex;
2005 nl_table[unit].module = module;
2006 if (cfg) {
2007 nl_table[unit].bind = cfg->bind;
2008 nl_table[unit].unbind = cfg->unbind;
2009 nl_table[unit].flags = cfg->flags;
2010 if (cfg->compare)
2011 nl_table[unit].compare = cfg->compare;
2012 }
2013 nl_table[unit].registered = 1;
2014 } else {
2015 kfree(listeners);
2016 nl_table[unit].registered++;
2017 }
2018 netlink_table_ungrab();
2019 return sk;
2020
2021 out_sock_release:
2022 kfree(listeners);
2023 netlink_kernel_release(sk);
2024 return NULL;
2025
2026 out_sock_release_nosk:
2027 sock_release(sock);
2028 return NULL;
2029 }
2030 EXPORT_SYMBOL(__netlink_kernel_create);
2031
2032 void
netlink_kernel_release(struct sock * sk)2033 netlink_kernel_release(struct sock *sk)
2034 {
2035 if (sk == NULL || sk->sk_socket == NULL)
2036 return;
2037
2038 sock_release(sk->sk_socket);
2039 }
2040 EXPORT_SYMBOL(netlink_kernel_release);
2041
__netlink_change_ngroups(struct sock * sk,unsigned int groups)2042 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2043 {
2044 struct listeners *new, *old;
2045 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2046
2047 if (groups < 32)
2048 groups = 32;
2049
2050 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2051 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2052 if (!new)
2053 return -ENOMEM;
2054 old = nl_deref_protected(tbl->listeners);
2055 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2056 rcu_assign_pointer(tbl->listeners, new);
2057
2058 kfree_rcu(old, rcu);
2059 }
2060 tbl->groups = groups;
2061
2062 return 0;
2063 }
2064
2065 /**
2066 * netlink_change_ngroups - change number of multicast groups
2067 *
2068 * This changes the number of multicast groups that are available
2069 * on a certain netlink family. Note that it is not possible to
2070 * change the number of groups to below 32. Also note that it does
2071 * not implicitly call netlink_clear_multicast_users() when the
2072 * number of groups is reduced.
2073 *
2074 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2075 * @groups: The new number of groups.
2076 */
netlink_change_ngroups(struct sock * sk,unsigned int groups)2077 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2078 {
2079 int err;
2080
2081 netlink_table_grab();
2082 err = __netlink_change_ngroups(sk, groups);
2083 netlink_table_ungrab();
2084
2085 return err;
2086 }
2087
__netlink_clear_multicast_users(struct sock * ksk,unsigned int group)2088 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2089 {
2090 struct sock *sk;
2091 struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2092
2093 sk_for_each_bound(sk, &tbl->mc_list)
2094 netlink_update_socket_mc(nlk_sk(sk), group, 0);
2095 }
2096
2097 struct nlmsghdr *
__nlmsg_put(struct sk_buff * skb,u32 portid,u32 seq,int type,int len,int flags)2098 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2099 {
2100 struct nlmsghdr *nlh;
2101 int size = nlmsg_msg_size(len);
2102
2103 nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size));
2104 nlh->nlmsg_type = type;
2105 nlh->nlmsg_len = size;
2106 nlh->nlmsg_flags = flags;
2107 nlh->nlmsg_pid = portid;
2108 nlh->nlmsg_seq = seq;
2109 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2110 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2111 return nlh;
2112 }
2113 EXPORT_SYMBOL(__nlmsg_put);
2114
2115 /*
2116 * It looks a bit ugly.
2117 * It would be better to create kernel thread.
2118 */
2119
netlink_dump(struct sock * sk)2120 static int netlink_dump(struct sock *sk)
2121 {
2122 struct netlink_sock *nlk = nlk_sk(sk);
2123 struct netlink_callback *cb;
2124 struct sk_buff *skb = NULL;
2125 struct nlmsghdr *nlh;
2126 struct module *module;
2127 int err = -ENOBUFS;
2128 int alloc_min_size;
2129 int alloc_size;
2130
2131 mutex_lock(nlk->cb_mutex);
2132 if (!nlk->cb_running) {
2133 err = -EINVAL;
2134 goto errout_skb;
2135 }
2136
2137 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2138 goto errout_skb;
2139
2140 /* NLMSG_GOODSIZE is small to avoid high order allocations being
2141 * required, but it makes sense to _attempt_ a 16K bytes allocation
2142 * to reduce number of system calls on dump operations, if user
2143 * ever provided a big enough buffer.
2144 */
2145 cb = &nlk->cb;
2146 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2147
2148 if (alloc_min_size < nlk->max_recvmsg_len) {
2149 alloc_size = nlk->max_recvmsg_len;
2150 skb = alloc_skb(alloc_size,
2151 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
2152 __GFP_NOWARN | __GFP_NORETRY);
2153 }
2154 if (!skb) {
2155 alloc_size = alloc_min_size;
2156 skb = alloc_skb(alloc_size, GFP_KERNEL);
2157 }
2158 if (!skb)
2159 goto errout_skb;
2160
2161 /* Trim skb to allocated size. User is expected to provide buffer as
2162 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
2163 * netlink_recvmsg())). dump will pack as many smaller messages as
2164 * could fit within the allocated skb. skb is typically allocated
2165 * with larger space than required (could be as much as near 2x the
2166 * requested size with align to next power of 2 approach). Allowing
2167 * dump to use the excess space makes it difficult for a user to have a
2168 * reasonable static buffer based on the expected largest dump of a
2169 * single netdev. The outcome is MSG_TRUNC error.
2170 */
2171 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2172 netlink_skb_set_owner_r(skb, sk);
2173
2174 if (nlk->dump_done_errno > 0)
2175 nlk->dump_done_errno = cb->dump(skb, cb);
2176
2177 if (nlk->dump_done_errno > 0 ||
2178 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
2179 mutex_unlock(nlk->cb_mutex);
2180
2181 if (sk_filter(sk, skb))
2182 kfree_skb(skb);
2183 else
2184 __netlink_sendskb(sk, skb);
2185 return 0;
2186 }
2187
2188 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
2189 sizeof(nlk->dump_done_errno), NLM_F_MULTI);
2190 if (WARN_ON(!nlh))
2191 goto errout_skb;
2192
2193 nl_dump_check_consistent(cb, nlh);
2194
2195 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
2196 sizeof(nlk->dump_done_errno));
2197
2198 if (sk_filter(sk, skb))
2199 kfree_skb(skb);
2200 else
2201 __netlink_sendskb(sk, skb);
2202
2203 if (cb->done)
2204 cb->done(cb);
2205
2206 nlk->cb_running = false;
2207 module = cb->module;
2208 skb = cb->skb;
2209 mutex_unlock(nlk->cb_mutex);
2210 module_put(module);
2211 consume_skb(skb);
2212 return 0;
2213
2214 errout_skb:
2215 mutex_unlock(nlk->cb_mutex);
2216 kfree_skb(skb);
2217 return err;
2218 }
2219
__netlink_dump_start(struct sock * ssk,struct sk_buff * skb,const struct nlmsghdr * nlh,struct netlink_dump_control * control)2220 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2221 const struct nlmsghdr *nlh,
2222 struct netlink_dump_control *control)
2223 {
2224 struct netlink_callback *cb;
2225 struct sock *sk;
2226 struct netlink_sock *nlk;
2227 int ret;
2228
2229 atomic_inc(&skb->users);
2230
2231 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2232 if (sk == NULL) {
2233 ret = -ECONNREFUSED;
2234 goto error_free;
2235 }
2236
2237 nlk = nlk_sk(sk);
2238 mutex_lock(nlk->cb_mutex);
2239 /* A dump is in progress... */
2240 if (nlk->cb_running) {
2241 ret = -EBUSY;
2242 goto error_unlock;
2243 }
2244 /* add reference of module which cb->dump belongs to */
2245 if (!try_module_get(control->module)) {
2246 ret = -EPROTONOSUPPORT;
2247 goto error_unlock;
2248 }
2249
2250 cb = &nlk->cb;
2251 memset(cb, 0, sizeof(*cb));
2252 cb->start = control->start;
2253 cb->dump = control->dump;
2254 cb->done = control->done;
2255 cb->nlh = nlh;
2256 cb->data = control->data;
2257 cb->module = control->module;
2258 cb->min_dump_alloc = control->min_dump_alloc;
2259 cb->skb = skb;
2260
2261 if (cb->start) {
2262 ret = cb->start(cb);
2263 if (ret)
2264 goto error_put;
2265 }
2266
2267 nlk->cb_running = true;
2268 nlk->dump_done_errno = INT_MAX;
2269
2270 mutex_unlock(nlk->cb_mutex);
2271
2272 ret = netlink_dump(sk);
2273
2274 sock_put(sk);
2275
2276 if (ret)
2277 return ret;
2278
2279 /* We successfully started a dump, by returning -EINTR we
2280 * signal not to send ACK even if it was requested.
2281 */
2282 return -EINTR;
2283
2284 error_put:
2285 module_put(control->module);
2286 error_unlock:
2287 sock_put(sk);
2288 mutex_unlock(nlk->cb_mutex);
2289 error_free:
2290 kfree_skb(skb);
2291 return ret;
2292 }
2293 EXPORT_SYMBOL(__netlink_dump_start);
2294
netlink_ack(struct sk_buff * in_skb,struct nlmsghdr * nlh,int err)2295 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2296 {
2297 struct sk_buff *skb;
2298 struct nlmsghdr *rep;
2299 struct nlmsgerr *errmsg;
2300 size_t payload = sizeof(*errmsg);
2301 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
2302
2303 /* Error messages get the original request appened, unless the user
2304 * requests to cap the error message.
2305 */
2306 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
2307 payload += nlmsg_len(nlh);
2308
2309 skb = nlmsg_new(payload, GFP_KERNEL);
2310 if (!skb) {
2311 struct sock *sk;
2312
2313 sk = netlink_lookup(sock_net(in_skb->sk),
2314 in_skb->sk->sk_protocol,
2315 NETLINK_CB(in_skb).portid);
2316 if (sk) {
2317 sk->sk_err = ENOBUFS;
2318 sk->sk_error_report(sk);
2319 sock_put(sk);
2320 }
2321 return;
2322 }
2323
2324 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2325 NLMSG_ERROR, payload, 0);
2326 errmsg = nlmsg_data(rep);
2327 errmsg->error = err;
2328 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
2329 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
2330 }
2331 EXPORT_SYMBOL(netlink_ack);
2332
netlink_rcv_skb(struct sk_buff * skb,int (* cb)(struct sk_buff *,struct nlmsghdr *))2333 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2334 struct nlmsghdr *))
2335 {
2336 struct nlmsghdr *nlh;
2337 int err;
2338
2339 while (skb->len >= nlmsg_total_size(0)) {
2340 int msglen;
2341
2342 nlh = nlmsg_hdr(skb);
2343 err = 0;
2344
2345 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2346 return 0;
2347
2348 /* Only requests are handled by the kernel */
2349 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2350 goto ack;
2351
2352 /* Skip control messages */
2353 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2354 goto ack;
2355
2356 err = cb(skb, nlh);
2357 if (err == -EINTR)
2358 goto skip;
2359
2360 ack:
2361 if (nlh->nlmsg_flags & NLM_F_ACK || err)
2362 netlink_ack(skb, nlh, err);
2363
2364 skip:
2365 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2366 if (msglen > skb->len)
2367 msglen = skb->len;
2368 skb_pull(skb, msglen);
2369 }
2370
2371 return 0;
2372 }
2373 EXPORT_SYMBOL(netlink_rcv_skb);
2374
2375 /**
2376 * nlmsg_notify - send a notification netlink message
2377 * @sk: netlink socket to use
2378 * @skb: notification message
2379 * @portid: destination netlink portid for reports or 0
2380 * @group: destination multicast group or 0
2381 * @report: 1 to report back, 0 to disable
2382 * @flags: allocation flags
2383 */
nlmsg_notify(struct sock * sk,struct sk_buff * skb,u32 portid,unsigned int group,int report,gfp_t flags)2384 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2385 unsigned int group, int report, gfp_t flags)
2386 {
2387 int err = 0;
2388
2389 if (group) {
2390 int exclude_portid = 0;
2391
2392 if (report) {
2393 atomic_inc(&skb->users);
2394 exclude_portid = portid;
2395 }
2396
2397 /* errors reported via destination sk->sk_err, but propagate
2398 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2399 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2400 }
2401
2402 if (report) {
2403 int err2;
2404
2405 err2 = nlmsg_unicast(sk, skb, portid);
2406 if (!err || err == -ESRCH)
2407 err = err2;
2408 }
2409
2410 return err;
2411 }
2412 EXPORT_SYMBOL(nlmsg_notify);
2413
2414 #ifdef CONFIG_PROC_FS
2415 struct nl_seq_iter {
2416 struct seq_net_private p;
2417 struct rhashtable_iter hti;
2418 int link;
2419 };
2420
netlink_walk_start(struct nl_seq_iter * iter)2421 static int netlink_walk_start(struct nl_seq_iter *iter)
2422 {
2423 int err;
2424
2425 err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti,
2426 GFP_KERNEL);
2427 if (err) {
2428 iter->link = MAX_LINKS;
2429 return err;
2430 }
2431
2432 err = rhashtable_walk_start(&iter->hti);
2433 return err == -EAGAIN ? 0 : err;
2434 }
2435
netlink_walk_stop(struct nl_seq_iter * iter)2436 static void netlink_walk_stop(struct nl_seq_iter *iter)
2437 {
2438 rhashtable_walk_stop(&iter->hti);
2439 rhashtable_walk_exit(&iter->hti);
2440 }
2441
__netlink_seq_next(struct seq_file * seq)2442 static void *__netlink_seq_next(struct seq_file *seq)
2443 {
2444 struct nl_seq_iter *iter = seq->private;
2445 struct netlink_sock *nlk;
2446
2447 do {
2448 for (;;) {
2449 int err;
2450
2451 nlk = rhashtable_walk_next(&iter->hti);
2452
2453 if (IS_ERR(nlk)) {
2454 if (PTR_ERR(nlk) == -EAGAIN)
2455 continue;
2456
2457 return nlk;
2458 }
2459
2460 if (nlk)
2461 break;
2462
2463 netlink_walk_stop(iter);
2464 if (++iter->link >= MAX_LINKS)
2465 return NULL;
2466
2467 err = netlink_walk_start(iter);
2468 if (err)
2469 return ERR_PTR(err);
2470 }
2471 } while (sock_net(&nlk->sk) != seq_file_net(seq));
2472
2473 return nlk;
2474 }
2475
netlink_seq_start(struct seq_file * seq,loff_t * posp)2476 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
2477 {
2478 struct nl_seq_iter *iter = seq->private;
2479 void *obj = SEQ_START_TOKEN;
2480 loff_t pos;
2481 int err;
2482
2483 iter->link = 0;
2484
2485 err = netlink_walk_start(iter);
2486 if (err)
2487 return ERR_PTR(err);
2488
2489 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
2490 obj = __netlink_seq_next(seq);
2491
2492 return obj;
2493 }
2494
netlink_seq_next(struct seq_file * seq,void * v,loff_t * pos)2495 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2496 {
2497 ++*pos;
2498 return __netlink_seq_next(seq);
2499 }
2500
netlink_seq_stop(struct seq_file * seq,void * v)2501 static void netlink_seq_stop(struct seq_file *seq, void *v)
2502 {
2503 struct nl_seq_iter *iter = seq->private;
2504
2505 if (iter->link >= MAX_LINKS)
2506 return;
2507
2508 netlink_walk_stop(iter);
2509 }
2510
2511
netlink_seq_show(struct seq_file * seq,void * v)2512 static int netlink_seq_show(struct seq_file *seq, void *v)
2513 {
2514 if (v == SEQ_START_TOKEN) {
2515 seq_puts(seq,
2516 "sk Eth Pid Groups "
2517 "Rmem Wmem Dump Locks Drops Inode\n");
2518 } else {
2519 struct sock *s = v;
2520 struct netlink_sock *nlk = nlk_sk(s);
2521
2522 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
2523 s,
2524 s->sk_protocol,
2525 nlk->portid,
2526 nlk->groups ? (u32)nlk->groups[0] : 0,
2527 sk_rmem_alloc_get(s),
2528 sk_wmem_alloc_get(s),
2529 nlk->cb_running,
2530 atomic_read(&s->sk_refcnt),
2531 atomic_read(&s->sk_drops),
2532 sock_i_ino(s)
2533 );
2534
2535 }
2536 return 0;
2537 }
2538
2539 static const struct seq_operations netlink_seq_ops = {
2540 .start = netlink_seq_start,
2541 .next = netlink_seq_next,
2542 .stop = netlink_seq_stop,
2543 .show = netlink_seq_show,
2544 };
2545
2546
netlink_seq_open(struct inode * inode,struct file * file)2547 static int netlink_seq_open(struct inode *inode, struct file *file)
2548 {
2549 return seq_open_net(inode, file, &netlink_seq_ops,
2550 sizeof(struct nl_seq_iter));
2551 }
2552
2553 static const struct file_operations netlink_seq_fops = {
2554 .owner = THIS_MODULE,
2555 .open = netlink_seq_open,
2556 .read = seq_read,
2557 .llseek = seq_lseek,
2558 .release = seq_release_net,
2559 };
2560
2561 #endif
2562
netlink_register_notifier(struct notifier_block * nb)2563 int netlink_register_notifier(struct notifier_block *nb)
2564 {
2565 return atomic_notifier_chain_register(&netlink_chain, nb);
2566 }
2567 EXPORT_SYMBOL(netlink_register_notifier);
2568
netlink_unregister_notifier(struct notifier_block * nb)2569 int netlink_unregister_notifier(struct notifier_block *nb)
2570 {
2571 return atomic_notifier_chain_unregister(&netlink_chain, nb);
2572 }
2573 EXPORT_SYMBOL(netlink_unregister_notifier);
2574
2575 static const struct proto_ops netlink_ops = {
2576 .family = PF_NETLINK,
2577 .owner = THIS_MODULE,
2578 .release = netlink_release,
2579 .bind = netlink_bind,
2580 .connect = netlink_connect,
2581 .socketpair = sock_no_socketpair,
2582 .accept = sock_no_accept,
2583 .getname = netlink_getname,
2584 .poll = datagram_poll,
2585 .ioctl = netlink_ioctl,
2586 .listen = sock_no_listen,
2587 .shutdown = sock_no_shutdown,
2588 .setsockopt = netlink_setsockopt,
2589 .getsockopt = netlink_getsockopt,
2590 .sendmsg = netlink_sendmsg,
2591 .recvmsg = netlink_recvmsg,
2592 .mmap = sock_no_mmap,
2593 .sendpage = sock_no_sendpage,
2594 };
2595
2596 static const struct net_proto_family netlink_family_ops = {
2597 .family = PF_NETLINK,
2598 .create = netlink_create,
2599 .owner = THIS_MODULE, /* for consistency 8) */
2600 };
2601
netlink_net_init(struct net * net)2602 static int __net_init netlink_net_init(struct net *net)
2603 {
2604 #ifdef CONFIG_PROC_FS
2605 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
2606 return -ENOMEM;
2607 #endif
2608 return 0;
2609 }
2610
netlink_net_exit(struct net * net)2611 static void __net_exit netlink_net_exit(struct net *net)
2612 {
2613 #ifdef CONFIG_PROC_FS
2614 remove_proc_entry("netlink", net->proc_net);
2615 #endif
2616 }
2617
netlink_add_usersock_entry(void)2618 static void __init netlink_add_usersock_entry(void)
2619 {
2620 struct listeners *listeners;
2621 int groups = 32;
2622
2623 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2624 if (!listeners)
2625 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2626
2627 netlink_table_grab();
2628
2629 nl_table[NETLINK_USERSOCK].groups = groups;
2630 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2631 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2632 nl_table[NETLINK_USERSOCK].registered = 1;
2633 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
2634
2635 netlink_table_ungrab();
2636 }
2637
2638 static struct pernet_operations __net_initdata netlink_net_ops = {
2639 .init = netlink_net_init,
2640 .exit = netlink_net_exit,
2641 };
2642
netlink_hash(const void * data,u32 len,u32 seed)2643 static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
2644 {
2645 const struct netlink_sock *nlk = data;
2646 struct netlink_compare_arg arg;
2647
2648 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
2649 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
2650 }
2651
2652 static const struct rhashtable_params netlink_rhashtable_params = {
2653 .head_offset = offsetof(struct netlink_sock, node),
2654 .key_len = netlink_compare_arg_len,
2655 .obj_hashfn = netlink_hash,
2656 .obj_cmpfn = netlink_compare,
2657 .automatic_shrinking = true,
2658 };
2659
netlink_proto_init(void)2660 static int __init netlink_proto_init(void)
2661 {
2662 int i;
2663 int err = proto_register(&netlink_proto, 0);
2664
2665 if (err != 0)
2666 goto out;
2667
2668 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2669
2670 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2671 if (!nl_table)
2672 goto panic;
2673
2674 for (i = 0; i < MAX_LINKS; i++) {
2675 if (rhashtable_init(&nl_table[i].hash,
2676 &netlink_rhashtable_params) < 0) {
2677 while (--i > 0)
2678 rhashtable_destroy(&nl_table[i].hash);
2679 kfree(nl_table);
2680 goto panic;
2681 }
2682 }
2683
2684 INIT_LIST_HEAD(&netlink_tap_all);
2685
2686 netlink_add_usersock_entry();
2687
2688 sock_register(&netlink_family_ops);
2689 register_pernet_subsys(&netlink_net_ops);
2690 /* The netlink device handler may be needed early. */
2691 rtnetlink_init();
2692 out:
2693 return err;
2694 panic:
2695 panic("netlink_init: Cannot allocate nl_table\n");
2696 }
2697
2698 core_initcall(netlink_proto_init);
2699