1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
3 * Patrick Schaaf <bof@bof.de>
4 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
5 */
6
7 /* Kernel module for IP set management */
8
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/moduleparam.h>
12 #include <linux/ip.h>
13 #include <linux/skbuff.h>
14 #include <linux/spinlock.h>
15 #include <linux/rculist.h>
16 #include <net/netlink.h>
17 #include <net/net_namespace.h>
18 #include <net/netns/generic.h>
19
20 #include <linux/netfilter.h>
21 #include <linux/netfilter/x_tables.h>
22 #include <linux/netfilter/nfnetlink.h>
23 #include <linux/netfilter/ipset/ip_set.h>
24
25 static LIST_HEAD(ip_set_type_list); /* all registered set types */
26 static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
27 static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
28
29 struct ip_set_net {
30 struct ip_set * __rcu *ip_set_list; /* all individual sets */
31 ip_set_id_t ip_set_max; /* max number of sets */
32 bool is_deleted; /* deleted by ip_set_net_exit */
33 bool is_destroyed; /* all sets are destroyed */
34 };
35
36 static unsigned int ip_set_net_id __read_mostly;
37
ip_set_pernet(struct net * net)38 static inline struct ip_set_net *ip_set_pernet(struct net *net)
39 {
40 return net_generic(net, ip_set_net_id);
41 }
42
43 #define IP_SET_INC 64
44 #define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
45
46 static unsigned int max_sets;
47
48 module_param(max_sets, int, 0600);
49 MODULE_PARM_DESC(max_sets, "maximal number of sets");
50 MODULE_LICENSE("GPL");
51 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
52 MODULE_DESCRIPTION("core IP set support");
53 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
54
55 /* When the nfnl mutex or ip_set_ref_lock is held: */
56 #define ip_set_dereference(p) \
57 rcu_dereference_protected(p, \
58 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
59 lockdep_is_held(&ip_set_ref_lock))
60 #define ip_set(inst, id) \
61 ip_set_dereference((inst)->ip_set_list)[id]
62 #define ip_set_ref_netlink(inst,id) \
63 rcu_dereference_raw((inst)->ip_set_list)[id]
64 #define ip_set_dereference_nfnl(p) \
65 rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
66
67 /* The set types are implemented in modules and registered set types
68 * can be found in ip_set_type_list. Adding/deleting types is
69 * serialized by ip_set_type_mutex.
70 */
71
72 static inline void
ip_set_type_lock(void)73 ip_set_type_lock(void)
74 {
75 mutex_lock(&ip_set_type_mutex);
76 }
77
78 static inline void
ip_set_type_unlock(void)79 ip_set_type_unlock(void)
80 {
81 mutex_unlock(&ip_set_type_mutex);
82 }
83
84 /* Register and deregister settype */
85
86 static struct ip_set_type *
find_set_type(const char * name,u8 family,u8 revision)87 find_set_type(const char *name, u8 family, u8 revision)
88 {
89 struct ip_set_type *type;
90
91 list_for_each_entry_rcu(type, &ip_set_type_list, list)
92 if (STRNCMP(type->name, name) &&
93 (type->family == family ||
94 type->family == NFPROTO_UNSPEC) &&
95 revision >= type->revision_min &&
96 revision <= type->revision_max)
97 return type;
98 return NULL;
99 }
100
101 /* Unlock, try to load a set type module and lock again */
102 static bool
load_settype(const char * name)103 load_settype(const char *name)
104 {
105 nfnl_unlock(NFNL_SUBSYS_IPSET);
106 pr_debug("try to load ip_set_%s\n", name);
107 if (request_module("ip_set_%s", name) < 0) {
108 pr_warn("Can't find ip_set type %s\n", name);
109 nfnl_lock(NFNL_SUBSYS_IPSET);
110 return false;
111 }
112 nfnl_lock(NFNL_SUBSYS_IPSET);
113 return true;
114 }
115
116 /* Find a set type and reference it */
117 #define find_set_type_get(name, family, revision, found) \
118 __find_set_type_get(name, family, revision, found, false)
119
120 static int
__find_set_type_get(const char * name,u8 family,u8 revision,struct ip_set_type ** found,bool retry)121 __find_set_type_get(const char *name, u8 family, u8 revision,
122 struct ip_set_type **found, bool retry)
123 {
124 struct ip_set_type *type;
125 int err;
126
127 if (retry && !load_settype(name))
128 return -IPSET_ERR_FIND_TYPE;
129
130 rcu_read_lock();
131 *found = find_set_type(name, family, revision);
132 if (*found) {
133 err = !try_module_get((*found)->me) ? -EFAULT : 0;
134 goto unlock;
135 }
136 /* Make sure the type is already loaded
137 * but we don't support the revision
138 */
139 list_for_each_entry_rcu(type, &ip_set_type_list, list)
140 if (STRNCMP(type->name, name)) {
141 err = -IPSET_ERR_FIND_TYPE;
142 goto unlock;
143 }
144 rcu_read_unlock();
145
146 return retry ? -IPSET_ERR_FIND_TYPE :
147 __find_set_type_get(name, family, revision, found, true);
148
149 unlock:
150 rcu_read_unlock();
151 return err;
152 }
153
154 /* Find a given set type by name and family.
155 * If we succeeded, the supported minimal and maximum revisions are
156 * filled out.
157 */
158 #define find_set_type_minmax(name, family, min, max) \
159 __find_set_type_minmax(name, family, min, max, false)
160
161 static int
__find_set_type_minmax(const char * name,u8 family,u8 * min,u8 * max,bool retry)162 __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
163 bool retry)
164 {
165 struct ip_set_type *type;
166 bool found = false;
167
168 if (retry && !load_settype(name))
169 return -IPSET_ERR_FIND_TYPE;
170
171 *min = 255; *max = 0;
172 rcu_read_lock();
173 list_for_each_entry_rcu(type, &ip_set_type_list, list)
174 if (STRNCMP(type->name, name) &&
175 (type->family == family ||
176 type->family == NFPROTO_UNSPEC)) {
177 found = true;
178 if (type->revision_min < *min)
179 *min = type->revision_min;
180 if (type->revision_max > *max)
181 *max = type->revision_max;
182 }
183 rcu_read_unlock();
184 if (found)
185 return 0;
186
187 return retry ? -IPSET_ERR_FIND_TYPE :
188 __find_set_type_minmax(name, family, min, max, true);
189 }
190
191 #define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \
192 (f) == NFPROTO_IPV6 ? "inet6" : "any")
193
194 /* Register a set type structure. The type is identified by
195 * the unique triple of name, family and revision.
196 */
197 int
ip_set_type_register(struct ip_set_type * type)198 ip_set_type_register(struct ip_set_type *type)
199 {
200 int ret = 0;
201
202 if (type->protocol != IPSET_PROTOCOL) {
203 pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
204 type->name, family_name(type->family),
205 type->revision_min, type->revision_max,
206 type->protocol, IPSET_PROTOCOL);
207 return -EINVAL;
208 }
209
210 ip_set_type_lock();
211 if (find_set_type(type->name, type->family, type->revision_min)) {
212 /* Duplicate! */
213 pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
214 type->name, family_name(type->family),
215 type->revision_min);
216 ip_set_type_unlock();
217 return -EINVAL;
218 }
219 list_add_rcu(&type->list, &ip_set_type_list);
220 pr_debug("type %s, family %s, revision %u:%u registered.\n",
221 type->name, family_name(type->family),
222 type->revision_min, type->revision_max);
223 ip_set_type_unlock();
224
225 return ret;
226 }
227 EXPORT_SYMBOL_GPL(ip_set_type_register);
228
229 /* Unregister a set type. There's a small race with ip_set_create */
230 void
ip_set_type_unregister(struct ip_set_type * type)231 ip_set_type_unregister(struct ip_set_type *type)
232 {
233 ip_set_type_lock();
234 if (!find_set_type(type->name, type->family, type->revision_min)) {
235 pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
236 type->name, family_name(type->family),
237 type->revision_min);
238 ip_set_type_unlock();
239 return;
240 }
241 list_del_rcu(&type->list);
242 pr_debug("type %s, family %s with revision min %u unregistered.\n",
243 type->name, family_name(type->family), type->revision_min);
244 ip_set_type_unlock();
245
246 synchronize_rcu();
247 }
248 EXPORT_SYMBOL_GPL(ip_set_type_unregister);
249
250 /* Utility functions */
251 void *
ip_set_alloc(size_t size)252 ip_set_alloc(size_t size)
253 {
254 void *members = NULL;
255
256 if (size < KMALLOC_MAX_SIZE)
257 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
258
259 if (members) {
260 pr_debug("%p: allocated with kmalloc\n", members);
261 return members;
262 }
263
264 members = vzalloc(size);
265 if (!members)
266 return NULL;
267 pr_debug("%p: allocated with vmalloc\n", members);
268
269 return members;
270 }
271 EXPORT_SYMBOL_GPL(ip_set_alloc);
272
273 void
ip_set_free(void * members)274 ip_set_free(void *members)
275 {
276 pr_debug("%p: free with %s\n", members,
277 is_vmalloc_addr(members) ? "vfree" : "kfree");
278 kvfree(members);
279 }
280 EXPORT_SYMBOL_GPL(ip_set_free);
281
282 static inline bool
flag_nested(const struct nlattr * nla)283 flag_nested(const struct nlattr *nla)
284 {
285 return nla->nla_type & NLA_F_NESTED;
286 }
287
288 static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
289 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
290 [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
291 };
292
293 int
ip_set_get_ipaddr4(struct nlattr * nla,__be32 * ipaddr)294 ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
295 {
296 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
297
298 if (unlikely(!flag_nested(nla)))
299 return -IPSET_ERR_PROTOCOL;
300 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
301 ipaddr_policy, NULL))
302 return -IPSET_ERR_PROTOCOL;
303 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
304 return -IPSET_ERR_PROTOCOL;
305
306 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
307 return 0;
308 }
309 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
310
311 int
ip_set_get_ipaddr6(struct nlattr * nla,union nf_inet_addr * ipaddr)312 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
313 {
314 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
315
316 if (unlikely(!flag_nested(nla)))
317 return -IPSET_ERR_PROTOCOL;
318
319 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
320 ipaddr_policy, NULL))
321 return -IPSET_ERR_PROTOCOL;
322 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
323 return -IPSET_ERR_PROTOCOL;
324
325 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
326 sizeof(struct in6_addr));
327 return 0;
328 }
329 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
330
331 typedef void (*destroyer)(struct ip_set *, void *);
332 /* ipset data extension types, in size order */
333
334 const struct ip_set_ext_type ip_set_extensions[] = {
335 [IPSET_EXT_ID_COUNTER] = {
336 .type = IPSET_EXT_COUNTER,
337 .flag = IPSET_FLAG_WITH_COUNTERS,
338 .len = sizeof(struct ip_set_counter),
339 .align = __alignof__(struct ip_set_counter),
340 },
341 [IPSET_EXT_ID_TIMEOUT] = {
342 .type = IPSET_EXT_TIMEOUT,
343 .len = sizeof(unsigned long),
344 .align = __alignof__(unsigned long),
345 },
346 [IPSET_EXT_ID_SKBINFO] = {
347 .type = IPSET_EXT_SKBINFO,
348 .flag = IPSET_FLAG_WITH_SKBINFO,
349 .len = sizeof(struct ip_set_skbinfo),
350 .align = __alignof__(struct ip_set_skbinfo),
351 },
352 [IPSET_EXT_ID_COMMENT] = {
353 .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
354 .flag = IPSET_FLAG_WITH_COMMENT,
355 .len = sizeof(struct ip_set_comment),
356 .align = __alignof__(struct ip_set_comment),
357 .destroy = (destroyer) ip_set_comment_free,
358 },
359 };
360 EXPORT_SYMBOL_GPL(ip_set_extensions);
361
362 static inline bool
add_extension(enum ip_set_ext_id id,u32 flags,struct nlattr * tb[])363 add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
364 {
365 return ip_set_extensions[id].flag ?
366 (flags & ip_set_extensions[id].flag) :
367 !!tb[IPSET_ATTR_TIMEOUT];
368 }
369
370 size_t
ip_set_elem_len(struct ip_set * set,struct nlattr * tb[],size_t len,size_t align)371 ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
372 size_t align)
373 {
374 enum ip_set_ext_id id;
375 u32 cadt_flags = 0;
376
377 if (tb[IPSET_ATTR_CADT_FLAGS])
378 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
379 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
380 set->flags |= IPSET_CREATE_FLAG_FORCEADD;
381 if (!align)
382 align = 1;
383 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
384 if (!add_extension(id, cadt_flags, tb))
385 continue;
386 if (align < ip_set_extensions[id].align)
387 align = ip_set_extensions[id].align;
388 len = ALIGN(len, ip_set_extensions[id].align);
389 set->offset[id] = len;
390 set->extensions |= ip_set_extensions[id].type;
391 len += ip_set_extensions[id].len;
392 }
393 return ALIGN(len, align);
394 }
395 EXPORT_SYMBOL_GPL(ip_set_elem_len);
396
397 int
ip_set_get_extensions(struct ip_set * set,struct nlattr * tb[],struct ip_set_ext * ext)398 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
399 struct ip_set_ext *ext)
400 {
401 u64 fullmark;
402
403 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
404 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
405 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
406 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
407 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
408 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
409 return -IPSET_ERR_PROTOCOL;
410
411 if (tb[IPSET_ATTR_TIMEOUT]) {
412 if (!SET_WITH_TIMEOUT(set))
413 return -IPSET_ERR_TIMEOUT;
414 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
415 }
416 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
417 if (!SET_WITH_COUNTER(set))
418 return -IPSET_ERR_COUNTER;
419 if (tb[IPSET_ATTR_BYTES])
420 ext->bytes = be64_to_cpu(nla_get_be64(
421 tb[IPSET_ATTR_BYTES]));
422 if (tb[IPSET_ATTR_PACKETS])
423 ext->packets = be64_to_cpu(nla_get_be64(
424 tb[IPSET_ATTR_PACKETS]));
425 }
426 if (tb[IPSET_ATTR_COMMENT]) {
427 if (!SET_WITH_COMMENT(set))
428 return -IPSET_ERR_COMMENT;
429 ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
430 }
431 if (tb[IPSET_ATTR_SKBMARK]) {
432 if (!SET_WITH_SKBINFO(set))
433 return -IPSET_ERR_SKBINFO;
434 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
435 ext->skbinfo.skbmark = fullmark >> 32;
436 ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
437 }
438 if (tb[IPSET_ATTR_SKBPRIO]) {
439 if (!SET_WITH_SKBINFO(set))
440 return -IPSET_ERR_SKBINFO;
441 ext->skbinfo.skbprio =
442 be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
443 }
444 if (tb[IPSET_ATTR_SKBQUEUE]) {
445 if (!SET_WITH_SKBINFO(set))
446 return -IPSET_ERR_SKBINFO;
447 ext->skbinfo.skbqueue =
448 be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
449 }
450 return 0;
451 }
452 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
453
454 int
ip_set_put_extensions(struct sk_buff * skb,const struct ip_set * set,const void * e,bool active)455 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
456 const void *e, bool active)
457 {
458 if (SET_WITH_TIMEOUT(set)) {
459 unsigned long *timeout = ext_timeout(e, set);
460
461 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
462 htonl(active ? ip_set_timeout_get(timeout)
463 : *timeout)))
464 return -EMSGSIZE;
465 }
466 if (SET_WITH_COUNTER(set) &&
467 ip_set_put_counter(skb, ext_counter(e, set)))
468 return -EMSGSIZE;
469 if (SET_WITH_COMMENT(set) &&
470 ip_set_put_comment(skb, ext_comment(e, set)))
471 return -EMSGSIZE;
472 if (SET_WITH_SKBINFO(set) &&
473 ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
474 return -EMSGSIZE;
475 return 0;
476 }
477 EXPORT_SYMBOL_GPL(ip_set_put_extensions);
478
479 bool
ip_set_match_extensions(struct ip_set * set,const struct ip_set_ext * ext,struct ip_set_ext * mext,u32 flags,void * data)480 ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
481 struct ip_set_ext *mext, u32 flags, void *data)
482 {
483 if (SET_WITH_TIMEOUT(set) &&
484 ip_set_timeout_expired(ext_timeout(data, set)))
485 return false;
486 if (SET_WITH_COUNTER(set)) {
487 struct ip_set_counter *counter = ext_counter(data, set);
488
489 ip_set_update_counter(counter, ext, flags);
490
491 if (flags & IPSET_FLAG_MATCH_COUNTERS &&
492 !(ip_set_match_counter(ip_set_get_packets(counter),
493 mext->packets, mext->packets_op) &&
494 ip_set_match_counter(ip_set_get_bytes(counter),
495 mext->bytes, mext->bytes_op)))
496 return false;
497 }
498 if (SET_WITH_SKBINFO(set))
499 ip_set_get_skbinfo(ext_skbinfo(data, set),
500 ext, mext, flags);
501 return true;
502 }
503 EXPORT_SYMBOL_GPL(ip_set_match_extensions);
504
505 /* Creating/destroying/renaming/swapping affect the existence and
506 * the properties of a set. All of these can be executed from userspace
507 * only and serialized by the nfnl mutex indirectly from nfnetlink.
508 *
509 * Sets are identified by their index in ip_set_list and the index
510 * is used by the external references (set/SET netfilter modules).
511 *
512 * The set behind an index may change by swapping only, from userspace.
513 */
514
515 static inline void
__ip_set_get(struct ip_set * set)516 __ip_set_get(struct ip_set *set)
517 {
518 write_lock_bh(&ip_set_ref_lock);
519 set->ref++;
520 write_unlock_bh(&ip_set_ref_lock);
521 }
522
523 static inline void
__ip_set_put(struct ip_set * set)524 __ip_set_put(struct ip_set *set)
525 {
526 write_lock_bh(&ip_set_ref_lock);
527 BUG_ON(set->ref == 0);
528 set->ref--;
529 write_unlock_bh(&ip_set_ref_lock);
530 }
531
532 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
533 * a separate reference counter
534 */
535 static void
__ip_set_get_netlink(struct ip_set * set)536 __ip_set_get_netlink(struct ip_set *set)
537 {
538 write_lock_bh(&ip_set_ref_lock);
539 set->ref_netlink++;
540 write_unlock_bh(&ip_set_ref_lock);
541 }
542
543 static inline void
__ip_set_put_netlink(struct ip_set * set)544 __ip_set_put_netlink(struct ip_set *set)
545 {
546 write_lock_bh(&ip_set_ref_lock);
547 BUG_ON(set->ref_netlink == 0);
548 set->ref_netlink--;
549 write_unlock_bh(&ip_set_ref_lock);
550 }
551
552 /* Add, del and test set entries from kernel.
553 *
554 * The set behind the index must exist and must be referenced
555 * so it can't be destroyed (or changed) under our foot.
556 */
557
558 static inline struct ip_set *
ip_set_rcu_get(struct net * net,ip_set_id_t index)559 ip_set_rcu_get(struct net *net, ip_set_id_t index)
560 {
561 struct ip_set_net *inst = ip_set_pernet(net);
562
563 /* ip_set_list and the set pointer need to be protected */
564 return ip_set_dereference_nfnl(inst->ip_set_list)[index];
565 }
566
567 static inline void
ip_set_lock(struct ip_set * set)568 ip_set_lock(struct ip_set *set)
569 {
570 if (!set->variant->region_lock)
571 spin_lock_bh(&set->lock);
572 }
573
574 static inline void
ip_set_unlock(struct ip_set * set)575 ip_set_unlock(struct ip_set *set)
576 {
577 if (!set->variant->region_lock)
578 spin_unlock_bh(&set->lock);
579 }
580
581 int
ip_set_test(ip_set_id_t index,const struct sk_buff * skb,const struct xt_action_param * par,struct ip_set_adt_opt * opt)582 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
583 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
584 {
585 struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
586 int ret = 0;
587
588 BUG_ON(!set);
589 pr_debug("set %s, index %u\n", set->name, index);
590
591 if (opt->dim < set->type->dimension ||
592 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
593 return 0;
594
595 rcu_read_lock_bh();
596 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
597 rcu_read_unlock_bh();
598
599 if (ret == -EAGAIN) {
600 /* Type requests element to be completed */
601 pr_debug("element must be completed, ADD is triggered\n");
602 ip_set_lock(set);
603 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
604 ip_set_unlock(set);
605 ret = 1;
606 } else {
607 /* --return-nomatch: invert matched element */
608 if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
609 (set->type->features & IPSET_TYPE_NOMATCH) &&
610 (ret > 0 || ret == -ENOTEMPTY))
611 ret = -ret;
612 }
613
614 /* Convert error codes to nomatch */
615 return (ret < 0 ? 0 : ret);
616 }
617 EXPORT_SYMBOL_GPL(ip_set_test);
618
619 int
ip_set_add(ip_set_id_t index,const struct sk_buff * skb,const struct xt_action_param * par,struct ip_set_adt_opt * opt)620 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
621 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
622 {
623 struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
624 int ret;
625
626 BUG_ON(!set);
627 pr_debug("set %s, index %u\n", set->name, index);
628
629 if (opt->dim < set->type->dimension ||
630 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
631 return -IPSET_ERR_TYPE_MISMATCH;
632
633 ip_set_lock(set);
634 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
635 ip_set_unlock(set);
636
637 return ret;
638 }
639 EXPORT_SYMBOL_GPL(ip_set_add);
640
641 int
ip_set_del(ip_set_id_t index,const struct sk_buff * skb,const struct xt_action_param * par,struct ip_set_adt_opt * opt)642 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
643 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
644 {
645 struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
646 int ret = 0;
647
648 BUG_ON(!set);
649 pr_debug("set %s, index %u\n", set->name, index);
650
651 if (opt->dim < set->type->dimension ||
652 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
653 return -IPSET_ERR_TYPE_MISMATCH;
654
655 ip_set_lock(set);
656 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
657 ip_set_unlock(set);
658
659 return ret;
660 }
661 EXPORT_SYMBOL_GPL(ip_set_del);
662
663 /* Find set by name, reference it once. The reference makes sure the
664 * thing pointed to, does not go away under our feet.
665 *
666 */
667 ip_set_id_t
ip_set_get_byname(struct net * net,const char * name,struct ip_set ** set)668 ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
669 {
670 ip_set_id_t i, index = IPSET_INVALID_ID;
671 struct ip_set *s;
672 struct ip_set_net *inst = ip_set_pernet(net);
673
674 rcu_read_lock();
675 for (i = 0; i < inst->ip_set_max; i++) {
676 s = rcu_dereference(inst->ip_set_list)[i];
677 if (s && STRNCMP(s->name, name)) {
678 __ip_set_get(s);
679 index = i;
680 *set = s;
681 break;
682 }
683 }
684 rcu_read_unlock();
685
686 return index;
687 }
688 EXPORT_SYMBOL_GPL(ip_set_get_byname);
689
690 /* If the given set pointer points to a valid set, decrement
691 * reference count by 1. The caller shall not assume the index
692 * to be valid, after calling this function.
693 *
694 */
695
696 static inline void
__ip_set_put_byindex(struct ip_set_net * inst,ip_set_id_t index)697 __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
698 {
699 struct ip_set *set;
700
701 rcu_read_lock();
702 set = rcu_dereference(inst->ip_set_list)[index];
703 if (set)
704 __ip_set_put(set);
705 rcu_read_unlock();
706 }
707
708 void
ip_set_put_byindex(struct net * net,ip_set_id_t index)709 ip_set_put_byindex(struct net *net, ip_set_id_t index)
710 {
711 struct ip_set_net *inst = ip_set_pernet(net);
712
713 __ip_set_put_byindex(inst, index);
714 }
715 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
716
717 /* Get the name of a set behind a set index.
718 * Set itself is protected by RCU, but its name isn't: to protect against
719 * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
720 * name.
721 */
722 void
ip_set_name_byindex(struct net * net,ip_set_id_t index,char * name)723 ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
724 {
725 struct ip_set *set = ip_set_rcu_get(net, index);
726
727 BUG_ON(!set);
728
729 read_lock_bh(&ip_set_ref_lock);
730 strncpy(name, set->name, IPSET_MAXNAMELEN);
731 read_unlock_bh(&ip_set_ref_lock);
732 }
733 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
734
735 /* Routines to call by external subsystems, which do not
736 * call nfnl_lock for us.
737 */
738
739 /* Find set by index, reference it once. The reference makes sure the
740 * thing pointed to, does not go away under our feet.
741 *
742 * The nfnl mutex is used in the function.
743 */
744 ip_set_id_t
ip_set_nfnl_get_byindex(struct net * net,ip_set_id_t index)745 ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
746 {
747 struct ip_set *set;
748 struct ip_set_net *inst = ip_set_pernet(net);
749
750 if (index >= inst->ip_set_max)
751 return IPSET_INVALID_ID;
752
753 nfnl_lock(NFNL_SUBSYS_IPSET);
754 set = ip_set(inst, index);
755 if (set)
756 __ip_set_get(set);
757 else
758 index = IPSET_INVALID_ID;
759 nfnl_unlock(NFNL_SUBSYS_IPSET);
760
761 return index;
762 }
763 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
764
765 /* If the given set pointer points to a valid set, decrement
766 * reference count by 1. The caller shall not assume the index
767 * to be valid, after calling this function.
768 *
769 * The nfnl mutex is used in the function.
770 */
771 void
ip_set_nfnl_put(struct net * net,ip_set_id_t index)772 ip_set_nfnl_put(struct net *net, ip_set_id_t index)
773 {
774 struct ip_set *set;
775 struct ip_set_net *inst = ip_set_pernet(net);
776
777 nfnl_lock(NFNL_SUBSYS_IPSET);
778 if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
779 set = ip_set(inst, index);
780 if (set)
781 __ip_set_put(set);
782 }
783 nfnl_unlock(NFNL_SUBSYS_IPSET);
784 }
785 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
786
787 /* Communication protocol with userspace over netlink.
788 *
789 * The commands are serialized by the nfnl mutex.
790 */
791
protocol(const struct nlattr * const tb[])792 static inline u8 protocol(const struct nlattr * const tb[])
793 {
794 return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
795 }
796
797 static inline bool
protocol_failed(const struct nlattr * const tb[])798 protocol_failed(const struct nlattr * const tb[])
799 {
800 return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
801 }
802
803 static inline bool
protocol_min_failed(const struct nlattr * const tb[])804 protocol_min_failed(const struct nlattr * const tb[])
805 {
806 return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
807 }
808
809 static inline u32
flag_exist(const struct nlmsghdr * nlh)810 flag_exist(const struct nlmsghdr *nlh)
811 {
812 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
813 }
814
815 static struct nlmsghdr *
start_msg(struct sk_buff * skb,u32 portid,u32 seq,unsigned int flags,enum ipset_cmd cmd)816 start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
817 enum ipset_cmd cmd)
818 {
819 return nfnl_msg_put(skb, portid, seq,
820 nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
821 NFPROTO_IPV4, NFNETLINK_V0, 0);
822 }
823
824 /* Create a set */
825
826 static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
827 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
828 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
829 .len = IPSET_MAXNAMELEN - 1 },
830 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
831 .len = IPSET_MAXNAMELEN - 1},
832 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
833 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
834 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
835 };
836
837 static struct ip_set *
find_set_and_id(struct ip_set_net * inst,const char * name,ip_set_id_t * id)838 find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
839 {
840 struct ip_set *set = NULL;
841 ip_set_id_t i;
842
843 *id = IPSET_INVALID_ID;
844 for (i = 0; i < inst->ip_set_max; i++) {
845 set = ip_set(inst, i);
846 if (set && STRNCMP(set->name, name)) {
847 *id = i;
848 break;
849 }
850 }
851 return (*id == IPSET_INVALID_ID ? NULL : set);
852 }
853
854 static inline struct ip_set *
find_set(struct ip_set_net * inst,const char * name)855 find_set(struct ip_set_net *inst, const char *name)
856 {
857 ip_set_id_t id;
858
859 return find_set_and_id(inst, name, &id);
860 }
861
862 static int
find_free_id(struct ip_set_net * inst,const char * name,ip_set_id_t * index,struct ip_set ** set)863 find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
864 struct ip_set **set)
865 {
866 struct ip_set *s;
867 ip_set_id_t i;
868
869 *index = IPSET_INVALID_ID;
870 for (i = 0; i < inst->ip_set_max; i++) {
871 s = ip_set(inst, i);
872 if (!s) {
873 if (*index == IPSET_INVALID_ID)
874 *index = i;
875 } else if (STRNCMP(name, s->name)) {
876 /* Name clash */
877 *set = s;
878 return -EEXIST;
879 }
880 }
881 if (*index == IPSET_INVALID_ID)
882 /* No free slot remained */
883 return -IPSET_ERR_MAX_SETS;
884 return 0;
885 }
886
ip_set_none(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)887 static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
888 const struct nlmsghdr *nlh,
889 const struct nlattr * const attr[],
890 struct netlink_ext_ack *extack)
891 {
892 return -EOPNOTSUPP;
893 }
894
ip_set_create(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)895 static int ip_set_create(struct net *net, struct sock *ctnl,
896 struct sk_buff *skb, const struct nlmsghdr *nlh,
897 const struct nlattr * const attr[],
898 struct netlink_ext_ack *extack)
899 {
900 struct ip_set_net *inst = ip_set_pernet(net);
901 struct ip_set *set, *clash = NULL;
902 ip_set_id_t index = IPSET_INVALID_ID;
903 struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
904 const char *name, *typename;
905 u8 family, revision;
906 u32 flags = flag_exist(nlh);
907 int ret = 0;
908
909 if (unlikely(protocol_min_failed(attr) ||
910 !attr[IPSET_ATTR_SETNAME] ||
911 !attr[IPSET_ATTR_TYPENAME] ||
912 !attr[IPSET_ATTR_REVISION] ||
913 !attr[IPSET_ATTR_FAMILY] ||
914 (attr[IPSET_ATTR_DATA] &&
915 !flag_nested(attr[IPSET_ATTR_DATA]))))
916 return -IPSET_ERR_PROTOCOL;
917
918 name = nla_data(attr[IPSET_ATTR_SETNAME]);
919 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
920 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
921 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
922 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
923 name, typename, family_name(family), revision);
924
925 /* First, and without any locks, allocate and initialize
926 * a normal base set structure.
927 */
928 set = kzalloc(sizeof(*set), GFP_KERNEL);
929 if (!set)
930 return -ENOMEM;
931 spin_lock_init(&set->lock);
932 strlcpy(set->name, name, IPSET_MAXNAMELEN);
933 set->family = family;
934 set->revision = revision;
935
936 /* Next, check that we know the type, and take
937 * a reference on the type, to make sure it stays available
938 * while constructing our new set.
939 *
940 * After referencing the type, we try to create the type
941 * specific part of the set without holding any locks.
942 */
943 ret = find_set_type_get(typename, family, revision, &set->type);
944 if (ret)
945 goto out;
946
947 /* Without holding any locks, create private part. */
948 if (attr[IPSET_ATTR_DATA] &&
949 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
950 set->type->create_policy, NULL)) {
951 ret = -IPSET_ERR_PROTOCOL;
952 goto put_out;
953 }
954
955 ret = set->type->create(net, set, tb, flags);
956 if (ret != 0)
957 goto put_out;
958
959 /* BTW, ret==0 here. */
960
961 /* Here, we have a valid, constructed set and we are protected
962 * by the nfnl mutex. Find the first free index in ip_set_list
963 * and check clashing.
964 */
965 ret = find_free_id(inst, set->name, &index, &clash);
966 if (ret == -EEXIST) {
967 /* If this is the same set and requested, ignore error */
968 if ((flags & IPSET_FLAG_EXIST) &&
969 STRNCMP(set->type->name, clash->type->name) &&
970 set->type->family == clash->type->family &&
971 set->type->revision_min == clash->type->revision_min &&
972 set->type->revision_max == clash->type->revision_max &&
973 set->variant->same_set(set, clash))
974 ret = 0;
975 goto cleanup;
976 } else if (ret == -IPSET_ERR_MAX_SETS) {
977 struct ip_set **list, **tmp;
978 ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
979
980 if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
981 /* Wraparound */
982 goto cleanup;
983
984 list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
985 if (!list)
986 goto cleanup;
987 /* nfnl mutex is held, both lists are valid */
988 tmp = ip_set_dereference(inst->ip_set_list);
989 memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
990 rcu_assign_pointer(inst->ip_set_list, list);
991 /* Make sure all current packets have passed through */
992 synchronize_net();
993 /* Use new list */
994 index = inst->ip_set_max;
995 inst->ip_set_max = i;
996 kvfree(tmp);
997 ret = 0;
998 } else if (ret) {
999 goto cleanup;
1000 }
1001
1002 /* Finally! Add our shiny new set to the list, and be done. */
1003 pr_debug("create: '%s' created with index %u!\n", set->name, index);
1004 ip_set(inst, index) = set;
1005
1006 return ret;
1007
1008 cleanup:
1009 set->variant->cancel_gc(set);
1010 set->variant->destroy(set);
1011 put_out:
1012 module_put(set->type->me);
1013 out:
1014 kfree(set);
1015 return ret;
1016 }
1017
1018 /* Destroy sets */
1019
1020 static const struct nla_policy
1021 ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
1022 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1023 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1024 .len = IPSET_MAXNAMELEN - 1 },
1025 };
1026
1027 static void
ip_set_destroy_set(struct ip_set * set)1028 ip_set_destroy_set(struct ip_set *set)
1029 {
1030 pr_debug("set: %s\n", set->name);
1031
1032 /* Must call it without holding any lock */
1033 set->variant->destroy(set);
1034 module_put(set->type->me);
1035 kfree(set);
1036 }
1037
1038 static void
ip_set_destroy_set_rcu(struct rcu_head * head)1039 ip_set_destroy_set_rcu(struct rcu_head *head)
1040 {
1041 struct ip_set *set = container_of(head, struct ip_set, rcu);
1042
1043 ip_set_destroy_set(set);
1044 }
1045
ip_set_destroy(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1046 static int ip_set_destroy(struct net *net, struct sock *ctnl,
1047 struct sk_buff *skb, const struct nlmsghdr *nlh,
1048 const struct nlattr * const attr[],
1049 struct netlink_ext_ack *extack)
1050 {
1051 struct ip_set_net *inst = ip_set_pernet(net);
1052 struct ip_set *s;
1053 ip_set_id_t i;
1054 int ret = 0;
1055
1056 if (unlikely(protocol_min_failed(attr)))
1057 return -IPSET_ERR_PROTOCOL;
1058
1059
1060 /* Commands are serialized and references are
1061 * protected by the ip_set_ref_lock.
1062 * External systems (i.e. xt_set) must call
1063 * ip_set_put|get_nfnl_* functions, that way we
1064 * can safely check references here.
1065 *
1066 * list:set timer can only decrement the reference
1067 * counter, so if it's already zero, we can proceed
1068 * without holding the lock.
1069 */
1070 if (!attr[IPSET_ATTR_SETNAME]) {
1071 /* Must wait for flush to be really finished in list:set */
1072 rcu_barrier();
1073 read_lock_bh(&ip_set_ref_lock);
1074 for (i = 0; i < inst->ip_set_max; i++) {
1075 s = ip_set(inst, i);
1076 if (s && (s->ref || s->ref_netlink)) {
1077 ret = -IPSET_ERR_BUSY;
1078 goto out;
1079 }
1080 }
1081 inst->is_destroyed = true;
1082 read_unlock_bh(&ip_set_ref_lock);
1083 for (i = 0; i < inst->ip_set_max; i++) {
1084 s = ip_set(inst, i);
1085 if (s) {
1086 ip_set(inst, i) = NULL;
1087 /* Must cancel garbage collectors */
1088 s->variant->cancel_gc(s);
1089 ip_set_destroy_set(s);
1090 }
1091 }
1092 /* Modified by ip_set_destroy() only, which is serialized */
1093 inst->is_destroyed = false;
1094 } else {
1095 u16 features = 0;
1096
1097 read_lock_bh(&ip_set_ref_lock);
1098 s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1099 &i);
1100 if (!s) {
1101 ret = -ENOENT;
1102 goto out;
1103 } else if (s->ref || s->ref_netlink) {
1104 ret = -IPSET_ERR_BUSY;
1105 goto out;
1106 }
1107 features = s->type->features;
1108 ip_set(inst, i) = NULL;
1109 read_unlock_bh(&ip_set_ref_lock);
1110 if (features & IPSET_TYPE_NAME) {
1111 /* Must wait for flush to be really finished */
1112 rcu_barrier();
1113 }
1114 /* Must cancel garbage collectors */
1115 s->variant->cancel_gc(s);
1116 call_rcu(&s->rcu, ip_set_destroy_set_rcu);
1117 }
1118 return 0;
1119 out:
1120 read_unlock_bh(&ip_set_ref_lock);
1121 return ret;
1122 }
1123
1124 /* Flush sets */
1125
1126 static void
ip_set_flush_set(struct ip_set * set)1127 ip_set_flush_set(struct ip_set *set)
1128 {
1129 pr_debug("set: %s\n", set->name);
1130
1131 ip_set_lock(set);
1132 set->variant->flush(set);
1133 ip_set_unlock(set);
1134 }
1135
ip_set_flush(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1136 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1137 const struct nlmsghdr *nlh,
1138 const struct nlattr * const attr[],
1139 struct netlink_ext_ack *extack)
1140 {
1141 struct ip_set_net *inst = ip_set_pernet(net);
1142 struct ip_set *s;
1143 ip_set_id_t i;
1144
1145 if (unlikely(protocol_min_failed(attr)))
1146 return -IPSET_ERR_PROTOCOL;
1147
1148 if (!attr[IPSET_ATTR_SETNAME]) {
1149 for (i = 0; i < inst->ip_set_max; i++) {
1150 s = ip_set(inst, i);
1151 if (s)
1152 ip_set_flush_set(s);
1153 }
1154 } else {
1155 s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1156 if (!s)
1157 return -ENOENT;
1158
1159 ip_set_flush_set(s);
1160 }
1161
1162 return 0;
1163 }
1164
1165 /* Rename a set */
1166
1167 static const struct nla_policy
1168 ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
1169 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1170 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1171 .len = IPSET_MAXNAMELEN - 1 },
1172 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
1173 .len = IPSET_MAXNAMELEN - 1 },
1174 };
1175
ip_set_rename(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1176 static int ip_set_rename(struct net *net, struct sock *ctnl,
1177 struct sk_buff *skb, const struct nlmsghdr *nlh,
1178 const struct nlattr * const attr[],
1179 struct netlink_ext_ack *extack)
1180 {
1181 struct ip_set_net *inst = ip_set_pernet(net);
1182 struct ip_set *set, *s;
1183 const char *name2;
1184 ip_set_id_t i;
1185 int ret = 0;
1186
1187 if (unlikely(protocol_min_failed(attr) ||
1188 !attr[IPSET_ATTR_SETNAME] ||
1189 !attr[IPSET_ATTR_SETNAME2]))
1190 return -IPSET_ERR_PROTOCOL;
1191
1192 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1193 if (!set)
1194 return -ENOENT;
1195
1196 write_lock_bh(&ip_set_ref_lock);
1197 if (set->ref != 0 || set->ref_netlink != 0) {
1198 ret = -IPSET_ERR_REFERENCED;
1199 goto out;
1200 }
1201
1202 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1203 for (i = 0; i < inst->ip_set_max; i++) {
1204 s = ip_set(inst, i);
1205 if (s && STRNCMP(s->name, name2)) {
1206 ret = -IPSET_ERR_EXIST_SETNAME2;
1207 goto out;
1208 }
1209 }
1210 strncpy(set->name, name2, IPSET_MAXNAMELEN);
1211
1212 out:
1213 write_unlock_bh(&ip_set_ref_lock);
1214 return ret;
1215 }
1216
1217 /* Swap two sets so that name/index points to the other.
1218 * References and set names are also swapped.
1219 *
1220 * The commands are serialized by the nfnl mutex and references are
1221 * protected by the ip_set_ref_lock. The kernel interfaces
1222 * do not hold the mutex but the pointer settings are atomic
1223 * so the ip_set_list always contains valid pointers to the sets.
1224 */
1225
ip_set_swap(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1226 static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1227 const struct nlmsghdr *nlh,
1228 const struct nlattr * const attr[],
1229 struct netlink_ext_ack *extack)
1230 {
1231 struct ip_set_net *inst = ip_set_pernet(net);
1232 struct ip_set *from, *to;
1233 ip_set_id_t from_id, to_id;
1234 char from_name[IPSET_MAXNAMELEN];
1235
1236 if (unlikely(protocol_min_failed(attr) ||
1237 !attr[IPSET_ATTR_SETNAME] ||
1238 !attr[IPSET_ATTR_SETNAME2]))
1239 return -IPSET_ERR_PROTOCOL;
1240
1241 from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1242 &from_id);
1243 if (!from)
1244 return -ENOENT;
1245
1246 to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
1247 &to_id);
1248 if (!to)
1249 return -IPSET_ERR_EXIST_SETNAME2;
1250
1251 /* Features must not change.
1252 * Not an artifical restriction anymore, as we must prevent
1253 * possible loops created by swapping in setlist type of sets.
1254 */
1255 if (!(from->type->features == to->type->features &&
1256 from->family == to->family))
1257 return -IPSET_ERR_TYPE_MISMATCH;
1258
1259 write_lock_bh(&ip_set_ref_lock);
1260
1261 if (from->ref_netlink || to->ref_netlink) {
1262 write_unlock_bh(&ip_set_ref_lock);
1263 return -EBUSY;
1264 }
1265
1266 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1267 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1268 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
1269
1270 swap(from->ref, to->ref);
1271 ip_set(inst, from_id) = to;
1272 ip_set(inst, to_id) = from;
1273 write_unlock_bh(&ip_set_ref_lock);
1274
1275 return 0;
1276 }
1277
1278 /* List/save set data */
1279
1280 #define DUMP_INIT 0
1281 #define DUMP_ALL 1
1282 #define DUMP_ONE 2
1283 #define DUMP_LAST 3
1284
1285 #define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
1286 #define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
1287
1288 static int
ip_set_dump_done(struct netlink_callback * cb)1289 ip_set_dump_done(struct netlink_callback *cb)
1290 {
1291 if (cb->args[IPSET_CB_ARG0]) {
1292 struct ip_set_net *inst =
1293 (struct ip_set_net *)cb->args[IPSET_CB_NET];
1294 ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1295 struct ip_set *set = ip_set_ref_netlink(inst, index);
1296
1297 if (set->variant->uref)
1298 set->variant->uref(set, cb, false);
1299 pr_debug("release set %s\n", set->name);
1300 __ip_set_put_netlink(set);
1301 }
1302 return 0;
1303 }
1304
1305 static inline void
dump_attrs(struct nlmsghdr * nlh)1306 dump_attrs(struct nlmsghdr *nlh)
1307 {
1308 const struct nlattr *attr;
1309 int rem;
1310
1311 pr_debug("dump nlmsg\n");
1312 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1313 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1314 }
1315 }
1316
1317 static const struct nla_policy
1318 ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
1319 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1320 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1321 .len = IPSET_MAXNAMELEN - 1 },
1322 [IPSET_ATTR_FLAGS] = { .type = NLA_U32 },
1323 };
1324
1325 static int
ip_set_dump_start(struct netlink_callback * cb)1326 ip_set_dump_start(struct netlink_callback *cb)
1327 {
1328 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1329 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1330 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1331 struct nlattr *attr = (void *)nlh + min_len;
1332 struct sk_buff *skb = cb->skb;
1333 struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1334 u32 dump_type;
1335 int ret;
1336
1337 ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
1338 nlh->nlmsg_len - min_len,
1339 ip_set_dump_policy, NULL);
1340 if (ret)
1341 goto error;
1342
1343 cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
1344 if (cda[IPSET_ATTR_SETNAME]) {
1345 ip_set_id_t index;
1346 struct ip_set *set;
1347
1348 set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
1349 &index);
1350 if (!set) {
1351 ret = -ENOENT;
1352 goto error;
1353 }
1354 dump_type = DUMP_ONE;
1355 cb->args[IPSET_CB_INDEX] = index;
1356 } else {
1357 dump_type = DUMP_ALL;
1358 }
1359
1360 if (cda[IPSET_ATTR_FLAGS]) {
1361 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1362
1363 dump_type |= (f << 16);
1364 }
1365 cb->args[IPSET_CB_NET] = (unsigned long)inst;
1366 cb->args[IPSET_CB_DUMP] = dump_type;
1367
1368 return 0;
1369
1370 error:
1371 /* We have to create and send the error message manually :-( */
1372 if (nlh->nlmsg_flags & NLM_F_ACK) {
1373 netlink_ack(cb->skb, nlh, ret, NULL);
1374 }
1375 return ret;
1376 }
1377
1378 static int
ip_set_dump_do(struct sk_buff * skb,struct netlink_callback * cb)1379 ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb)
1380 {
1381 ip_set_id_t index = IPSET_INVALID_ID, max;
1382 struct ip_set *set = NULL;
1383 struct nlmsghdr *nlh = NULL;
1384 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1385 struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1386 u32 dump_type, dump_flags;
1387 bool is_destroyed;
1388 int ret = 0;
1389
1390 if (!cb->args[IPSET_CB_DUMP])
1391 return -EINVAL;
1392
1393 if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
1394 goto out;
1395
1396 dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
1397 dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
1398 max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
1399 : inst->ip_set_max;
1400 dump_last:
1401 pr_debug("dump type, flag: %u %u index: %ld\n",
1402 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
1403 for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
1404 index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1405 write_lock_bh(&ip_set_ref_lock);
1406 set = ip_set(inst, index);
1407 is_destroyed = inst->is_destroyed;
1408 if (!set || is_destroyed) {
1409 write_unlock_bh(&ip_set_ref_lock);
1410 if (dump_type == DUMP_ONE) {
1411 ret = -ENOENT;
1412 goto out;
1413 }
1414 if (is_destroyed) {
1415 /* All sets are just being destroyed */
1416 ret = 0;
1417 goto out;
1418 }
1419 continue;
1420 }
1421 /* When dumping all sets, we must dump "sorted"
1422 * so that lists (unions of sets) are dumped last.
1423 */
1424 if (dump_type != DUMP_ONE &&
1425 ((dump_type == DUMP_ALL) ==
1426 !!(set->type->features & IPSET_DUMP_LAST))) {
1427 write_unlock_bh(&ip_set_ref_lock);
1428 continue;
1429 }
1430 pr_debug("List set: %s\n", set->name);
1431 if (!cb->args[IPSET_CB_ARG0]) {
1432 /* Start listing: make sure set won't be destroyed */
1433 pr_debug("reference set\n");
1434 set->ref_netlink++;
1435 }
1436 write_unlock_bh(&ip_set_ref_lock);
1437 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1438 cb->nlh->nlmsg_seq, flags,
1439 IPSET_CMD_LIST);
1440 if (!nlh) {
1441 ret = -EMSGSIZE;
1442 goto release_refcount;
1443 }
1444 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
1445 cb->args[IPSET_CB_PROTO]) ||
1446 nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1447 goto nla_put_failure;
1448 if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1449 goto next_set;
1450 switch (cb->args[IPSET_CB_ARG0]) {
1451 case 0:
1452 /* Core header data */
1453 if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1454 set->type->name) ||
1455 nla_put_u8(skb, IPSET_ATTR_FAMILY,
1456 set->family) ||
1457 nla_put_u8(skb, IPSET_ATTR_REVISION,
1458 set->revision))
1459 goto nla_put_failure;
1460 if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
1461 nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
1462 goto nla_put_failure;
1463 ret = set->variant->head(set, skb);
1464 if (ret < 0)
1465 goto release_refcount;
1466 if (dump_flags & IPSET_FLAG_LIST_HEADER)
1467 goto next_set;
1468 if (set->variant->uref)
1469 set->variant->uref(set, cb, true);
1470 /* fall through */
1471 default:
1472 ret = set->variant->list(set, skb, cb);
1473 if (!cb->args[IPSET_CB_ARG0])
1474 /* Set is done, proceed with next one */
1475 goto next_set;
1476 goto release_refcount;
1477 }
1478 }
1479 /* If we dump all sets, continue with dumping last ones */
1480 if (dump_type == DUMP_ALL) {
1481 dump_type = DUMP_LAST;
1482 cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
1483 cb->args[IPSET_CB_INDEX] = 0;
1484 if (set && set->variant->uref)
1485 set->variant->uref(set, cb, false);
1486 goto dump_last;
1487 }
1488 goto out;
1489
1490 nla_put_failure:
1491 ret = -EFAULT;
1492 next_set:
1493 if (dump_type == DUMP_ONE)
1494 cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
1495 else
1496 cb->args[IPSET_CB_INDEX]++;
1497 release_refcount:
1498 /* If there was an error or set is done, release set */
1499 if (ret || !cb->args[IPSET_CB_ARG0]) {
1500 set = ip_set_ref_netlink(inst, index);
1501 if (set->variant->uref)
1502 set->variant->uref(set, cb, false);
1503 pr_debug("release set %s\n", set->name);
1504 __ip_set_put_netlink(set);
1505 cb->args[IPSET_CB_ARG0] = 0;
1506 }
1507 out:
1508 if (nlh) {
1509 nlmsg_end(skb, nlh);
1510 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1511 dump_attrs(nlh);
1512 }
1513
1514 return ret < 0 ? ret : skb->len;
1515 }
1516
ip_set_dump(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1517 static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1518 const struct nlmsghdr *nlh,
1519 const struct nlattr * const attr[],
1520 struct netlink_ext_ack *extack)
1521 {
1522 if (unlikely(protocol_min_failed(attr)))
1523 return -IPSET_ERR_PROTOCOL;
1524
1525 {
1526 struct netlink_dump_control c = {
1527 .start = ip_set_dump_start,
1528 .dump = ip_set_dump_do,
1529 .done = ip_set_dump_done,
1530 };
1531 return netlink_dump_start(ctnl, skb, nlh, &c);
1532 }
1533 }
1534
1535 /* Add, del and test */
1536
1537 static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1538 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1539 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1540 .len = IPSET_MAXNAMELEN - 1 },
1541 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1542 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1543 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1544 };
1545
1546 static int
call_ad(struct sock * ctnl,struct sk_buff * skb,struct ip_set * set,struct nlattr * tb[],enum ipset_adt adt,u32 flags,bool use_lineno)1547 call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1548 struct nlattr *tb[], enum ipset_adt adt,
1549 u32 flags, bool use_lineno)
1550 {
1551 int ret;
1552 u32 lineno = 0;
1553 bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1554
1555 do {
1556 if (retried) {
1557 __ip_set_get_netlink(set);
1558 nfnl_unlock(NFNL_SUBSYS_IPSET);
1559 cond_resched();
1560 nfnl_lock(NFNL_SUBSYS_IPSET);
1561 __ip_set_put_netlink(set);
1562 }
1563
1564 ip_set_lock(set);
1565 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1566 ip_set_unlock(set);
1567 retried = true;
1568 } while (ret == -EAGAIN &&
1569 set->variant->resize &&
1570 (ret = set->variant->resize(set, retried)) == 0);
1571
1572 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1573 return 0;
1574 if (lineno && use_lineno) {
1575 /* Error in restore/batch mode: send back lineno */
1576 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1577 struct sk_buff *skb2;
1578 struct nlmsgerr *errmsg;
1579 size_t payload = min(SIZE_MAX,
1580 sizeof(*errmsg) + nlmsg_len(nlh));
1581 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1582 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1583 struct nlattr *cmdattr;
1584 u32 *errline;
1585
1586 skb2 = nlmsg_new(payload, GFP_KERNEL);
1587 if (!skb2)
1588 return -ENOMEM;
1589 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1590 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1591 errmsg = nlmsg_data(rep);
1592 errmsg->error = ret;
1593 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1594 cmdattr = (void *)&errmsg->msg + min_len;
1595
1596 ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
1597 nlh->nlmsg_len - min_len, ip_set_adt_policy,
1598 NULL);
1599
1600 if (ret) {
1601 nlmsg_free(skb2);
1602 return ret;
1603 }
1604 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1605
1606 *errline = lineno;
1607
1608 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
1609 MSG_DONTWAIT);
1610 /* Signal netlink not to send its ACK/errmsg. */
1611 return -EINTR;
1612 }
1613
1614 return ret;
1615 }
1616
ip_set_ad(struct net * net,struct sock * ctnl,struct sk_buff * skb,enum ipset_adt adt,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1617 static int ip_set_ad(struct net *net, struct sock *ctnl,
1618 struct sk_buff *skb,
1619 enum ipset_adt adt,
1620 const struct nlmsghdr *nlh,
1621 const struct nlattr * const attr[],
1622 struct netlink_ext_ack *extack)
1623 {
1624 struct ip_set_net *inst = ip_set_pernet(net);
1625 struct ip_set *set;
1626 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1627 const struct nlattr *nla;
1628 u32 flags = flag_exist(nlh);
1629 bool use_lineno;
1630 int ret = 0;
1631
1632 if (unlikely(protocol_min_failed(attr) ||
1633 !attr[IPSET_ATTR_SETNAME] ||
1634 !((attr[IPSET_ATTR_DATA] != NULL) ^
1635 (attr[IPSET_ATTR_ADT] != NULL)) ||
1636 (attr[IPSET_ATTR_DATA] &&
1637 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1638 (attr[IPSET_ATTR_ADT] &&
1639 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1640 !attr[IPSET_ATTR_LINENO]))))
1641 return -IPSET_ERR_PROTOCOL;
1642
1643 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1644 if (!set)
1645 return -ENOENT;
1646
1647 use_lineno = !!attr[IPSET_ATTR_LINENO];
1648 if (attr[IPSET_ATTR_DATA]) {
1649 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1650 attr[IPSET_ATTR_DATA],
1651 set->type->adt_policy, NULL))
1652 return -IPSET_ERR_PROTOCOL;
1653 ret = call_ad(ctnl, skb, set, tb, adt, flags,
1654 use_lineno);
1655 } else {
1656 int nla_rem;
1657
1658 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1659 if (nla_type(nla) != IPSET_ATTR_DATA ||
1660 !flag_nested(nla) ||
1661 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1662 set->type->adt_policy, NULL))
1663 return -IPSET_ERR_PROTOCOL;
1664 ret = call_ad(ctnl, skb, set, tb, adt,
1665 flags, use_lineno);
1666 if (ret < 0)
1667 return ret;
1668 }
1669 }
1670 return ret;
1671 }
1672
ip_set_uadd(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1673 static int ip_set_uadd(struct net *net, struct sock *ctnl,
1674 struct sk_buff *skb, const struct nlmsghdr *nlh,
1675 const struct nlattr * const attr[],
1676 struct netlink_ext_ack *extack)
1677 {
1678 return ip_set_ad(net, ctnl, skb,
1679 IPSET_ADD, nlh, attr, extack);
1680 }
1681
ip_set_udel(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1682 static int ip_set_udel(struct net *net, struct sock *ctnl,
1683 struct sk_buff *skb, const struct nlmsghdr *nlh,
1684 const struct nlattr * const attr[],
1685 struct netlink_ext_ack *extack)
1686 {
1687 return ip_set_ad(net, ctnl, skb,
1688 IPSET_DEL, nlh, attr, extack);
1689 }
1690
ip_set_utest(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1691 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1692 const struct nlmsghdr *nlh,
1693 const struct nlattr * const attr[],
1694 struct netlink_ext_ack *extack)
1695 {
1696 struct ip_set_net *inst = ip_set_pernet(net);
1697 struct ip_set *set;
1698 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1699 int ret = 0;
1700 u32 lineno;
1701
1702 if (unlikely(protocol_min_failed(attr) ||
1703 !attr[IPSET_ATTR_SETNAME] ||
1704 !attr[IPSET_ATTR_DATA] ||
1705 !flag_nested(attr[IPSET_ATTR_DATA])))
1706 return -IPSET_ERR_PROTOCOL;
1707
1708 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1709 if (!set)
1710 return -ENOENT;
1711
1712 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1713 set->type->adt_policy, NULL))
1714 return -IPSET_ERR_PROTOCOL;
1715
1716 rcu_read_lock_bh();
1717 ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0);
1718 rcu_read_unlock_bh();
1719 /* Userspace can't trigger element to be re-added */
1720 if (ret == -EAGAIN)
1721 ret = 1;
1722
1723 return ret > 0 ? 0 : -IPSET_ERR_EXIST;
1724 }
1725
1726 /* Get headed data of a set */
1727
ip_set_header(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1728 static int ip_set_header(struct net *net, struct sock *ctnl,
1729 struct sk_buff *skb, const struct nlmsghdr *nlh,
1730 const struct nlattr * const attr[],
1731 struct netlink_ext_ack *extack)
1732 {
1733 struct ip_set_net *inst = ip_set_pernet(net);
1734 const struct ip_set *set;
1735 struct sk_buff *skb2;
1736 struct nlmsghdr *nlh2;
1737 int ret = 0;
1738
1739 if (unlikely(protocol_min_failed(attr) ||
1740 !attr[IPSET_ATTR_SETNAME]))
1741 return -IPSET_ERR_PROTOCOL;
1742
1743 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1744 if (!set)
1745 return -ENOENT;
1746
1747 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1748 if (!skb2)
1749 return -ENOMEM;
1750
1751 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1752 IPSET_CMD_HEADER);
1753 if (!nlh2)
1754 goto nlmsg_failure;
1755 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1756 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1757 nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1758 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1759 nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1760 goto nla_put_failure;
1761 nlmsg_end(skb2, nlh2);
1762
1763 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1764 if (ret < 0)
1765 return ret;
1766
1767 return 0;
1768
1769 nla_put_failure:
1770 nlmsg_cancel(skb2, nlh2);
1771 nlmsg_failure:
1772 kfree_skb(skb2);
1773 return -EMSGSIZE;
1774 }
1775
1776 /* Get type data */
1777
1778 static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1779 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1780 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1781 .len = IPSET_MAXNAMELEN - 1 },
1782 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1783 };
1784
ip_set_type(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1785 static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1786 const struct nlmsghdr *nlh,
1787 const struct nlattr * const attr[],
1788 struct netlink_ext_ack *extack)
1789 {
1790 struct sk_buff *skb2;
1791 struct nlmsghdr *nlh2;
1792 u8 family, min, max;
1793 const char *typename;
1794 int ret = 0;
1795
1796 if (unlikely(protocol_min_failed(attr) ||
1797 !attr[IPSET_ATTR_TYPENAME] ||
1798 !attr[IPSET_ATTR_FAMILY]))
1799 return -IPSET_ERR_PROTOCOL;
1800
1801 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1802 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1803 ret = find_set_type_minmax(typename, family, &min, &max);
1804 if (ret)
1805 return ret;
1806
1807 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1808 if (!skb2)
1809 return -ENOMEM;
1810
1811 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1812 IPSET_CMD_TYPE);
1813 if (!nlh2)
1814 goto nlmsg_failure;
1815 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1816 nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1817 nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1818 nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1819 nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1820 goto nla_put_failure;
1821 nlmsg_end(skb2, nlh2);
1822
1823 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1824 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1825 if (ret < 0)
1826 return ret;
1827
1828 return 0;
1829
1830 nla_put_failure:
1831 nlmsg_cancel(skb2, nlh2);
1832 nlmsg_failure:
1833 kfree_skb(skb2);
1834 return -EMSGSIZE;
1835 }
1836
1837 /* Get protocol version */
1838
1839 static const struct nla_policy
1840 ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1841 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1842 };
1843
ip_set_protocol(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1844 static int ip_set_protocol(struct net *net, struct sock *ctnl,
1845 struct sk_buff *skb, const struct nlmsghdr *nlh,
1846 const struct nlattr * const attr[],
1847 struct netlink_ext_ack *extack)
1848 {
1849 struct sk_buff *skb2;
1850 struct nlmsghdr *nlh2;
1851 int ret = 0;
1852
1853 if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
1854 return -IPSET_ERR_PROTOCOL;
1855
1856 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1857 if (!skb2)
1858 return -ENOMEM;
1859
1860 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1861 IPSET_CMD_PROTOCOL);
1862 if (!nlh2)
1863 goto nlmsg_failure;
1864 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1865 goto nla_put_failure;
1866 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
1867 goto nla_put_failure;
1868 nlmsg_end(skb2, nlh2);
1869
1870 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1871 if (ret < 0)
1872 return ret;
1873
1874 return 0;
1875
1876 nla_put_failure:
1877 nlmsg_cancel(skb2, nlh2);
1878 nlmsg_failure:
1879 kfree_skb(skb2);
1880 return -EMSGSIZE;
1881 }
1882
1883 /* Get set by name or index, from userspace */
1884
ip_set_byname(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1885 static int ip_set_byname(struct net *net, struct sock *ctnl,
1886 struct sk_buff *skb, const struct nlmsghdr *nlh,
1887 const struct nlattr * const attr[],
1888 struct netlink_ext_ack *extack)
1889 {
1890 struct ip_set_net *inst = ip_set_pernet(net);
1891 struct sk_buff *skb2;
1892 struct nlmsghdr *nlh2;
1893 ip_set_id_t id = IPSET_INVALID_ID;
1894 const struct ip_set *set;
1895 int ret = 0;
1896
1897 if (unlikely(protocol_failed(attr) ||
1898 !attr[IPSET_ATTR_SETNAME]))
1899 return -IPSET_ERR_PROTOCOL;
1900
1901 set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
1902 if (id == IPSET_INVALID_ID)
1903 return -ENOENT;
1904
1905 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1906 if (!skb2)
1907 return -ENOMEM;
1908
1909 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1910 IPSET_CMD_GET_BYNAME);
1911 if (!nlh2)
1912 goto nlmsg_failure;
1913 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1914 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1915 nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
1916 goto nla_put_failure;
1917 nlmsg_end(skb2, nlh2);
1918
1919 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1920 if (ret < 0)
1921 return ret;
1922
1923 return 0;
1924
1925 nla_put_failure:
1926 nlmsg_cancel(skb2, nlh2);
1927 nlmsg_failure:
1928 kfree_skb(skb2);
1929 return -EMSGSIZE;
1930 }
1931
1932 static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
1933 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1934 [IPSET_ATTR_INDEX] = { .type = NLA_U16 },
1935 };
1936
ip_set_byindex(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1937 static int ip_set_byindex(struct net *net, struct sock *ctnl,
1938 struct sk_buff *skb, const struct nlmsghdr *nlh,
1939 const struct nlattr * const attr[],
1940 struct netlink_ext_ack *extack)
1941 {
1942 struct ip_set_net *inst = ip_set_pernet(net);
1943 struct sk_buff *skb2;
1944 struct nlmsghdr *nlh2;
1945 ip_set_id_t id = IPSET_INVALID_ID;
1946 const struct ip_set *set;
1947 int ret = 0;
1948
1949 if (unlikely(protocol_failed(attr) ||
1950 !attr[IPSET_ATTR_INDEX]))
1951 return -IPSET_ERR_PROTOCOL;
1952
1953 id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
1954 if (id >= inst->ip_set_max)
1955 return -ENOENT;
1956 set = ip_set(inst, id);
1957 if (set == NULL)
1958 return -ENOENT;
1959
1960 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1961 if (!skb2)
1962 return -ENOMEM;
1963
1964 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1965 IPSET_CMD_GET_BYINDEX);
1966 if (!nlh2)
1967 goto nlmsg_failure;
1968 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1969 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
1970 goto nla_put_failure;
1971 nlmsg_end(skb2, nlh2);
1972
1973 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1974 if (ret < 0)
1975 return ret;
1976
1977 return 0;
1978
1979 nla_put_failure:
1980 nlmsg_cancel(skb2, nlh2);
1981 nlmsg_failure:
1982 kfree_skb(skb2);
1983 return -EMSGSIZE;
1984 }
1985
1986 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1987 [IPSET_CMD_NONE] = {
1988 .call = ip_set_none,
1989 .attr_count = IPSET_ATTR_CMD_MAX,
1990 },
1991 [IPSET_CMD_CREATE] = {
1992 .call = ip_set_create,
1993 .attr_count = IPSET_ATTR_CMD_MAX,
1994 .policy = ip_set_create_policy,
1995 },
1996 [IPSET_CMD_DESTROY] = {
1997 .call = ip_set_destroy,
1998 .attr_count = IPSET_ATTR_CMD_MAX,
1999 .policy = ip_set_setname_policy,
2000 },
2001 [IPSET_CMD_FLUSH] = {
2002 .call = ip_set_flush,
2003 .attr_count = IPSET_ATTR_CMD_MAX,
2004 .policy = ip_set_setname_policy,
2005 },
2006 [IPSET_CMD_RENAME] = {
2007 .call = ip_set_rename,
2008 .attr_count = IPSET_ATTR_CMD_MAX,
2009 .policy = ip_set_setname2_policy,
2010 },
2011 [IPSET_CMD_SWAP] = {
2012 .call = ip_set_swap,
2013 .attr_count = IPSET_ATTR_CMD_MAX,
2014 .policy = ip_set_setname2_policy,
2015 },
2016 [IPSET_CMD_LIST] = {
2017 .call = ip_set_dump,
2018 .attr_count = IPSET_ATTR_CMD_MAX,
2019 .policy = ip_set_dump_policy,
2020 },
2021 [IPSET_CMD_SAVE] = {
2022 .call = ip_set_dump,
2023 .attr_count = IPSET_ATTR_CMD_MAX,
2024 .policy = ip_set_setname_policy,
2025 },
2026 [IPSET_CMD_ADD] = {
2027 .call = ip_set_uadd,
2028 .attr_count = IPSET_ATTR_CMD_MAX,
2029 .policy = ip_set_adt_policy,
2030 },
2031 [IPSET_CMD_DEL] = {
2032 .call = ip_set_udel,
2033 .attr_count = IPSET_ATTR_CMD_MAX,
2034 .policy = ip_set_adt_policy,
2035 },
2036 [IPSET_CMD_TEST] = {
2037 .call = ip_set_utest,
2038 .attr_count = IPSET_ATTR_CMD_MAX,
2039 .policy = ip_set_adt_policy,
2040 },
2041 [IPSET_CMD_HEADER] = {
2042 .call = ip_set_header,
2043 .attr_count = IPSET_ATTR_CMD_MAX,
2044 .policy = ip_set_setname_policy,
2045 },
2046 [IPSET_CMD_TYPE] = {
2047 .call = ip_set_type,
2048 .attr_count = IPSET_ATTR_CMD_MAX,
2049 .policy = ip_set_type_policy,
2050 },
2051 [IPSET_CMD_PROTOCOL] = {
2052 .call = ip_set_protocol,
2053 .attr_count = IPSET_ATTR_CMD_MAX,
2054 .policy = ip_set_protocol_policy,
2055 },
2056 [IPSET_CMD_GET_BYNAME] = {
2057 .call = ip_set_byname,
2058 .attr_count = IPSET_ATTR_CMD_MAX,
2059 .policy = ip_set_setname_policy,
2060 },
2061 [IPSET_CMD_GET_BYINDEX] = {
2062 .call = ip_set_byindex,
2063 .attr_count = IPSET_ATTR_CMD_MAX,
2064 .policy = ip_set_index_policy,
2065 },
2066 };
2067
2068 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
2069 .name = "ip_set",
2070 .subsys_id = NFNL_SUBSYS_IPSET,
2071 .cb_count = IPSET_MSG_MAX,
2072 .cb = ip_set_netlink_subsys_cb,
2073 };
2074
2075 /* Interface to iptables/ip6tables */
2076
2077 static int
ip_set_sockfn_get(struct sock * sk,int optval,void __user * user,int * len)2078 ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
2079 {
2080 unsigned int *op;
2081 void *data;
2082 int copylen = *len, ret = 0;
2083 struct net *net = sock_net(sk);
2084 struct ip_set_net *inst = ip_set_pernet(net);
2085
2086 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2087 return -EPERM;
2088 if (optval != SO_IP_SET)
2089 return -EBADF;
2090 if (*len < sizeof(unsigned int))
2091 return -EINVAL;
2092
2093 data = vmalloc(*len);
2094 if (!data)
2095 return -ENOMEM;
2096 if (copy_from_user(data, user, *len) != 0) {
2097 ret = -EFAULT;
2098 goto done;
2099 }
2100 op = data;
2101
2102 if (*op < IP_SET_OP_VERSION) {
2103 /* Check the version at the beginning of operations */
2104 struct ip_set_req_version *req_version = data;
2105
2106 if (*len < sizeof(struct ip_set_req_version)) {
2107 ret = -EINVAL;
2108 goto done;
2109 }
2110
2111 if (req_version->version < IPSET_PROTOCOL_MIN) {
2112 ret = -EPROTO;
2113 goto done;
2114 }
2115 }
2116
2117 switch (*op) {
2118 case IP_SET_OP_VERSION: {
2119 struct ip_set_req_version *req_version = data;
2120
2121 if (*len != sizeof(struct ip_set_req_version)) {
2122 ret = -EINVAL;
2123 goto done;
2124 }
2125
2126 req_version->version = IPSET_PROTOCOL;
2127 if (copy_to_user(user, req_version,
2128 sizeof(struct ip_set_req_version)))
2129 ret = -EFAULT;
2130 goto done;
2131 }
2132 case IP_SET_OP_GET_BYNAME: {
2133 struct ip_set_req_get_set *req_get = data;
2134 ip_set_id_t id;
2135
2136 if (*len != sizeof(struct ip_set_req_get_set)) {
2137 ret = -EINVAL;
2138 goto done;
2139 }
2140 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
2141 nfnl_lock(NFNL_SUBSYS_IPSET);
2142 find_set_and_id(inst, req_get->set.name, &id);
2143 req_get->set.index = id;
2144 nfnl_unlock(NFNL_SUBSYS_IPSET);
2145 goto copy;
2146 }
2147 case IP_SET_OP_GET_FNAME: {
2148 struct ip_set_req_get_set_family *req_get = data;
2149 ip_set_id_t id;
2150
2151 if (*len != sizeof(struct ip_set_req_get_set_family)) {
2152 ret = -EINVAL;
2153 goto done;
2154 }
2155 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
2156 nfnl_lock(NFNL_SUBSYS_IPSET);
2157 find_set_and_id(inst, req_get->set.name, &id);
2158 req_get->set.index = id;
2159 if (id != IPSET_INVALID_ID)
2160 req_get->family = ip_set(inst, id)->family;
2161 nfnl_unlock(NFNL_SUBSYS_IPSET);
2162 goto copy;
2163 }
2164 case IP_SET_OP_GET_BYINDEX: {
2165 struct ip_set_req_get_set *req_get = data;
2166 struct ip_set *set;
2167
2168 if (*len != sizeof(struct ip_set_req_get_set) ||
2169 req_get->set.index >= inst->ip_set_max) {
2170 ret = -EINVAL;
2171 goto done;
2172 }
2173 nfnl_lock(NFNL_SUBSYS_IPSET);
2174 set = ip_set(inst, req_get->set.index);
2175 ret = strscpy(req_get->set.name, set ? set->name : "",
2176 IPSET_MAXNAMELEN);
2177 nfnl_unlock(NFNL_SUBSYS_IPSET);
2178 if (ret < 0)
2179 goto done;
2180 goto copy;
2181 }
2182 default:
2183 ret = -EBADMSG;
2184 goto done;
2185 } /* end of switch(op) */
2186
2187 copy:
2188 if (copy_to_user(user, data, copylen))
2189 ret = -EFAULT;
2190
2191 done:
2192 vfree(data);
2193 if (ret > 0)
2194 ret = 0;
2195 return ret;
2196 }
2197
2198 static struct nf_sockopt_ops so_set __read_mostly = {
2199 .pf = PF_INET,
2200 .get_optmin = SO_IP_SET,
2201 .get_optmax = SO_IP_SET + 1,
2202 .get = ip_set_sockfn_get,
2203 .owner = THIS_MODULE,
2204 };
2205
2206 static int __net_init
ip_set_net_init(struct net * net)2207 ip_set_net_init(struct net *net)
2208 {
2209 struct ip_set_net *inst = ip_set_pernet(net);
2210 struct ip_set **list;
2211
2212 inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
2213 if (inst->ip_set_max >= IPSET_INVALID_ID)
2214 inst->ip_set_max = IPSET_INVALID_ID - 1;
2215
2216 list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
2217 if (!list)
2218 return -ENOMEM;
2219 inst->is_deleted = false;
2220 inst->is_destroyed = false;
2221 rcu_assign_pointer(inst->ip_set_list, list);
2222 return 0;
2223 }
2224
2225 static void __net_exit
ip_set_net_exit(struct net * net)2226 ip_set_net_exit(struct net *net)
2227 {
2228 struct ip_set_net *inst = ip_set_pernet(net);
2229
2230 struct ip_set *set = NULL;
2231 ip_set_id_t i;
2232
2233 inst->is_deleted = true; /* flag for ip_set_nfnl_put */
2234
2235 nfnl_lock(NFNL_SUBSYS_IPSET);
2236 for (i = 0; i < inst->ip_set_max; i++) {
2237 set = ip_set(inst, i);
2238 if (set) {
2239 ip_set(inst, i) = NULL;
2240 set->variant->cancel_gc(set);
2241 ip_set_destroy_set(set);
2242 }
2243 }
2244 nfnl_unlock(NFNL_SUBSYS_IPSET);
2245 kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
2246 }
2247
2248 static struct pernet_operations ip_set_net_ops = {
2249 .init = ip_set_net_init,
2250 .exit = ip_set_net_exit,
2251 .id = &ip_set_net_id,
2252 .size = sizeof(struct ip_set_net),
2253 };
2254
2255 static int __init
ip_set_init(void)2256 ip_set_init(void)
2257 {
2258 int ret = register_pernet_subsys(&ip_set_net_ops);
2259
2260 if (ret) {
2261 pr_err("ip_set: cannot register pernet_subsys.\n");
2262 return ret;
2263 }
2264
2265 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2266 if (ret != 0) {
2267 pr_err("ip_set: cannot register with nfnetlink.\n");
2268 unregister_pernet_subsys(&ip_set_net_ops);
2269 return ret;
2270 }
2271
2272 ret = nf_register_sockopt(&so_set);
2273 if (ret != 0) {
2274 pr_err("SO_SET registry failed: %d\n", ret);
2275 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2276 unregister_pernet_subsys(&ip_set_net_ops);
2277 return ret;
2278 }
2279
2280 return 0;
2281 }
2282
2283 static void __exit
ip_set_fini(void)2284 ip_set_fini(void)
2285 {
2286 nf_unregister_sockopt(&so_set);
2287 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2288 unregister_pernet_subsys(&ip_set_net_ops);
2289
2290 /* Wait for call_rcu() in destroy */
2291 rcu_barrier();
2292
2293 pr_debug("these are the famous last words\n");
2294 }
2295
2296 module_init(ip_set_init);
2297 module_exit(ip_set_fini);
2298
2299 MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));
2300