• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
3  *                         Patrick Schaaf <bof@bof.de>
4  * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
5  */
6 
7 /* Kernel module for IP set management */
8 
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/moduleparam.h>
12 #include <linux/ip.h>
13 #include <linux/skbuff.h>
14 #include <linux/spinlock.h>
15 #include <linux/rculist.h>
16 #include <net/netlink.h>
17 #include <net/net_namespace.h>
18 #include <net/netns/generic.h>
19 
20 #include <linux/netfilter.h>
21 #include <linux/netfilter/x_tables.h>
22 #include <linux/netfilter/nfnetlink.h>
23 #include <linux/netfilter/ipset/ip_set.h>
24 
25 static LIST_HEAD(ip_set_type_list);		/* all registered set types */
26 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
27 static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
28 
29 struct ip_set_net {
30 	struct ip_set * __rcu *ip_set_list;	/* all individual sets */
31 	ip_set_id_t	ip_set_max;	/* max number of sets */
32 	bool		is_deleted;	/* deleted by ip_set_net_exit */
33 	bool		is_destroyed;	/* all sets are destroyed */
34 };
35 
36 static unsigned int ip_set_net_id __read_mostly;
37 
ip_set_pernet(struct net * net)38 static inline struct ip_set_net *ip_set_pernet(struct net *net)
39 {
40 	return net_generic(net, ip_set_net_id);
41 }
42 
43 #define IP_SET_INC	64
44 #define STRNCMP(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
45 
46 static unsigned int max_sets;
47 
48 module_param(max_sets, int, 0600);
49 MODULE_PARM_DESC(max_sets, "maximal number of sets");
50 MODULE_LICENSE("GPL");
51 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
52 MODULE_DESCRIPTION("core IP set support");
53 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
54 
55 /* When the nfnl mutex or ip_set_ref_lock is held: */
56 #define ip_set_dereference(p)		\
57 	rcu_dereference_protected(p,	\
58 		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
59 		lockdep_is_held(&ip_set_ref_lock))
60 #define ip_set(inst, id)		\
61 	ip_set_dereference((inst)->ip_set_list)[id]
62 #define ip_set_ref_netlink(inst,id)	\
63 	rcu_dereference_raw((inst)->ip_set_list)[id]
64 #define ip_set_dereference_nfnl(p)	\
65 	rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
66 
67 /* The set types are implemented in modules and registered set types
68  * can be found in ip_set_type_list. Adding/deleting types is
69  * serialized by ip_set_type_mutex.
70  */
71 
72 static inline void
ip_set_type_lock(void)73 ip_set_type_lock(void)
74 {
75 	mutex_lock(&ip_set_type_mutex);
76 }
77 
78 static inline void
ip_set_type_unlock(void)79 ip_set_type_unlock(void)
80 {
81 	mutex_unlock(&ip_set_type_mutex);
82 }
83 
84 /* Register and deregister settype */
85 
86 static struct ip_set_type *
find_set_type(const char * name,u8 family,u8 revision)87 find_set_type(const char *name, u8 family, u8 revision)
88 {
89 	struct ip_set_type *type;
90 
91 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
92 		if (STRNCMP(type->name, name) &&
93 		    (type->family == family ||
94 		     type->family == NFPROTO_UNSPEC) &&
95 		    revision >= type->revision_min &&
96 		    revision <= type->revision_max)
97 			return type;
98 	return NULL;
99 }
100 
101 /* Unlock, try to load a set type module and lock again */
102 static bool
load_settype(const char * name)103 load_settype(const char *name)
104 {
105 	nfnl_unlock(NFNL_SUBSYS_IPSET);
106 	pr_debug("try to load ip_set_%s\n", name);
107 	if (request_module("ip_set_%s", name) < 0) {
108 		pr_warn("Can't find ip_set type %s\n", name);
109 		nfnl_lock(NFNL_SUBSYS_IPSET);
110 		return false;
111 	}
112 	nfnl_lock(NFNL_SUBSYS_IPSET);
113 	return true;
114 }
115 
116 /* Find a set type and reference it */
117 #define find_set_type_get(name, family, revision, found)	\
118 	__find_set_type_get(name, family, revision, found, false)
119 
120 static int
__find_set_type_get(const char * name,u8 family,u8 revision,struct ip_set_type ** found,bool retry)121 __find_set_type_get(const char *name, u8 family, u8 revision,
122 		    struct ip_set_type **found, bool retry)
123 {
124 	struct ip_set_type *type;
125 	int err;
126 
127 	if (retry && !load_settype(name))
128 		return -IPSET_ERR_FIND_TYPE;
129 
130 	rcu_read_lock();
131 	*found = find_set_type(name, family, revision);
132 	if (*found) {
133 		err = !try_module_get((*found)->me) ? -EFAULT : 0;
134 		goto unlock;
135 	}
136 	/* Make sure the type is already loaded
137 	 * but we don't support the revision
138 	 */
139 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
140 		if (STRNCMP(type->name, name)) {
141 			err = -IPSET_ERR_FIND_TYPE;
142 			goto unlock;
143 		}
144 	rcu_read_unlock();
145 
146 	return retry ? -IPSET_ERR_FIND_TYPE :
147 		__find_set_type_get(name, family, revision, found, true);
148 
149 unlock:
150 	rcu_read_unlock();
151 	return err;
152 }
153 
154 /* Find a given set type by name and family.
155  * If we succeeded, the supported minimal and maximum revisions are
156  * filled out.
157  */
158 #define find_set_type_minmax(name, family, min, max) \
159 	__find_set_type_minmax(name, family, min, max, false)
160 
161 static int
__find_set_type_minmax(const char * name,u8 family,u8 * min,u8 * max,bool retry)162 __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
163 		       bool retry)
164 {
165 	struct ip_set_type *type;
166 	bool found = false;
167 
168 	if (retry && !load_settype(name))
169 		return -IPSET_ERR_FIND_TYPE;
170 
171 	*min = 255; *max = 0;
172 	rcu_read_lock();
173 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
174 		if (STRNCMP(type->name, name) &&
175 		    (type->family == family ||
176 		     type->family == NFPROTO_UNSPEC)) {
177 			found = true;
178 			if (type->revision_min < *min)
179 				*min = type->revision_min;
180 			if (type->revision_max > *max)
181 				*max = type->revision_max;
182 		}
183 	rcu_read_unlock();
184 	if (found)
185 		return 0;
186 
187 	return retry ? -IPSET_ERR_FIND_TYPE :
188 		__find_set_type_minmax(name, family, min, max, true);
189 }
190 
191 #define family_name(f)	((f) == NFPROTO_IPV4 ? "inet" : \
192 			 (f) == NFPROTO_IPV6 ? "inet6" : "any")
193 
194 /* Register a set type structure. The type is identified by
195  * the unique triple of name, family and revision.
196  */
197 int
ip_set_type_register(struct ip_set_type * type)198 ip_set_type_register(struct ip_set_type *type)
199 {
200 	int ret = 0;
201 
202 	if (type->protocol != IPSET_PROTOCOL) {
203 		pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
204 			type->name, family_name(type->family),
205 			type->revision_min, type->revision_max,
206 			type->protocol, IPSET_PROTOCOL);
207 		return -EINVAL;
208 	}
209 
210 	ip_set_type_lock();
211 	if (find_set_type(type->name, type->family, type->revision_min)) {
212 		/* Duplicate! */
213 		pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
214 			type->name, family_name(type->family),
215 			type->revision_min);
216 		ip_set_type_unlock();
217 		return -EINVAL;
218 	}
219 	list_add_rcu(&type->list, &ip_set_type_list);
220 	pr_debug("type %s, family %s, revision %u:%u registered.\n",
221 		 type->name, family_name(type->family),
222 		 type->revision_min, type->revision_max);
223 	ip_set_type_unlock();
224 
225 	return ret;
226 }
227 EXPORT_SYMBOL_GPL(ip_set_type_register);
228 
229 /* Unregister a set type. There's a small race with ip_set_create */
230 void
ip_set_type_unregister(struct ip_set_type * type)231 ip_set_type_unregister(struct ip_set_type *type)
232 {
233 	ip_set_type_lock();
234 	if (!find_set_type(type->name, type->family, type->revision_min)) {
235 		pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
236 			type->name, family_name(type->family),
237 			type->revision_min);
238 		ip_set_type_unlock();
239 		return;
240 	}
241 	list_del_rcu(&type->list);
242 	pr_debug("type %s, family %s with revision min %u unregistered.\n",
243 		 type->name, family_name(type->family), type->revision_min);
244 	ip_set_type_unlock();
245 
246 	synchronize_rcu();
247 }
248 EXPORT_SYMBOL_GPL(ip_set_type_unregister);
249 
250 /* Utility functions */
251 void *
ip_set_alloc(size_t size)252 ip_set_alloc(size_t size)
253 {
254 	void *members = NULL;
255 
256 	if (size < KMALLOC_MAX_SIZE)
257 		members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
258 
259 	if (members) {
260 		pr_debug("%p: allocated with kmalloc\n", members);
261 		return members;
262 	}
263 
264 	members = vzalloc(size);
265 	if (!members)
266 		return NULL;
267 	pr_debug("%p: allocated with vmalloc\n", members);
268 
269 	return members;
270 }
271 EXPORT_SYMBOL_GPL(ip_set_alloc);
272 
273 void
ip_set_free(void * members)274 ip_set_free(void *members)
275 {
276 	pr_debug("%p: free with %s\n", members,
277 		 is_vmalloc_addr(members) ? "vfree" : "kfree");
278 	kvfree(members);
279 }
280 EXPORT_SYMBOL_GPL(ip_set_free);
281 
282 static inline bool
flag_nested(const struct nlattr * nla)283 flag_nested(const struct nlattr *nla)
284 {
285 	return nla->nla_type & NLA_F_NESTED;
286 }
287 
288 static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
289 	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
290 	[IPSET_ATTR_IPADDR_IPV6]	= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
291 };
292 
293 int
ip_set_get_ipaddr4(struct nlattr * nla,__be32 * ipaddr)294 ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
295 {
296 	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
297 
298 	if (unlikely(!flag_nested(nla)))
299 		return -IPSET_ERR_PROTOCOL;
300 	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
301 			     ipaddr_policy, NULL))
302 		return -IPSET_ERR_PROTOCOL;
303 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
304 		return -IPSET_ERR_PROTOCOL;
305 
306 	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
307 	return 0;
308 }
309 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
310 
311 int
ip_set_get_ipaddr6(struct nlattr * nla,union nf_inet_addr * ipaddr)312 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
313 {
314 	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
315 
316 	if (unlikely(!flag_nested(nla)))
317 		return -IPSET_ERR_PROTOCOL;
318 
319 	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
320 			     ipaddr_policy, NULL))
321 		return -IPSET_ERR_PROTOCOL;
322 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
323 		return -IPSET_ERR_PROTOCOL;
324 
325 	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
326 	       sizeof(struct in6_addr));
327 	return 0;
328 }
329 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
330 
331 typedef void (*destroyer)(struct ip_set *, void *);
332 /* ipset data extension types, in size order */
333 
334 const struct ip_set_ext_type ip_set_extensions[] = {
335 	[IPSET_EXT_ID_COUNTER] = {
336 		.type	= IPSET_EXT_COUNTER,
337 		.flag	= IPSET_FLAG_WITH_COUNTERS,
338 		.len	= sizeof(struct ip_set_counter),
339 		.align	= __alignof__(struct ip_set_counter),
340 	},
341 	[IPSET_EXT_ID_TIMEOUT] = {
342 		.type	= IPSET_EXT_TIMEOUT,
343 		.len	= sizeof(unsigned long),
344 		.align	= __alignof__(unsigned long),
345 	},
346 	[IPSET_EXT_ID_SKBINFO] = {
347 		.type	= IPSET_EXT_SKBINFO,
348 		.flag	= IPSET_FLAG_WITH_SKBINFO,
349 		.len	= sizeof(struct ip_set_skbinfo),
350 		.align	= __alignof__(struct ip_set_skbinfo),
351 	},
352 	[IPSET_EXT_ID_COMMENT] = {
353 		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
354 		.flag	 = IPSET_FLAG_WITH_COMMENT,
355 		.len	 = sizeof(struct ip_set_comment),
356 		.align	 = __alignof__(struct ip_set_comment),
357 		.destroy = (destroyer) ip_set_comment_free,
358 	},
359 };
360 EXPORT_SYMBOL_GPL(ip_set_extensions);
361 
362 static inline bool
add_extension(enum ip_set_ext_id id,u32 flags,struct nlattr * tb[])363 add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
364 {
365 	return ip_set_extensions[id].flag ?
366 		(flags & ip_set_extensions[id].flag) :
367 		!!tb[IPSET_ATTR_TIMEOUT];
368 }
369 
370 size_t
ip_set_elem_len(struct ip_set * set,struct nlattr * tb[],size_t len,size_t align)371 ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
372 		size_t align)
373 {
374 	enum ip_set_ext_id id;
375 	u32 cadt_flags = 0;
376 
377 	if (tb[IPSET_ATTR_CADT_FLAGS])
378 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
379 	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
380 		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
381 	if (!align)
382 		align = 1;
383 	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
384 		if (!add_extension(id, cadt_flags, tb))
385 			continue;
386 		if (align < ip_set_extensions[id].align)
387 			align = ip_set_extensions[id].align;
388 		len = ALIGN(len, ip_set_extensions[id].align);
389 		set->offset[id] = len;
390 		set->extensions |= ip_set_extensions[id].type;
391 		len += ip_set_extensions[id].len;
392 	}
393 	return ALIGN(len, align);
394 }
395 EXPORT_SYMBOL_GPL(ip_set_elem_len);
396 
397 int
ip_set_get_extensions(struct ip_set * set,struct nlattr * tb[],struct ip_set_ext * ext)398 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
399 		      struct ip_set_ext *ext)
400 {
401 	u64 fullmark;
402 
403 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
404 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
405 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
406 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
407 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
408 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
409 		return -IPSET_ERR_PROTOCOL;
410 
411 	if (tb[IPSET_ATTR_TIMEOUT]) {
412 		if (!SET_WITH_TIMEOUT(set))
413 			return -IPSET_ERR_TIMEOUT;
414 		ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
415 	}
416 	if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
417 		if (!SET_WITH_COUNTER(set))
418 			return -IPSET_ERR_COUNTER;
419 		if (tb[IPSET_ATTR_BYTES])
420 			ext->bytes = be64_to_cpu(nla_get_be64(
421 						 tb[IPSET_ATTR_BYTES]));
422 		if (tb[IPSET_ATTR_PACKETS])
423 			ext->packets = be64_to_cpu(nla_get_be64(
424 						   tb[IPSET_ATTR_PACKETS]));
425 	}
426 	if (tb[IPSET_ATTR_COMMENT]) {
427 		if (!SET_WITH_COMMENT(set))
428 			return -IPSET_ERR_COMMENT;
429 		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
430 	}
431 	if (tb[IPSET_ATTR_SKBMARK]) {
432 		if (!SET_WITH_SKBINFO(set))
433 			return -IPSET_ERR_SKBINFO;
434 		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
435 		ext->skbinfo.skbmark = fullmark >> 32;
436 		ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
437 	}
438 	if (tb[IPSET_ATTR_SKBPRIO]) {
439 		if (!SET_WITH_SKBINFO(set))
440 			return -IPSET_ERR_SKBINFO;
441 		ext->skbinfo.skbprio =
442 			be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
443 	}
444 	if (tb[IPSET_ATTR_SKBQUEUE]) {
445 		if (!SET_WITH_SKBINFO(set))
446 			return -IPSET_ERR_SKBINFO;
447 		ext->skbinfo.skbqueue =
448 			be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
449 	}
450 	return 0;
451 }
452 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
453 
454 int
ip_set_put_extensions(struct sk_buff * skb,const struct ip_set * set,const void * e,bool active)455 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
456 		      const void *e, bool active)
457 {
458 	if (SET_WITH_TIMEOUT(set)) {
459 		unsigned long *timeout = ext_timeout(e, set);
460 
461 		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
462 			htonl(active ? ip_set_timeout_get(timeout)
463 				: *timeout)))
464 			return -EMSGSIZE;
465 	}
466 	if (SET_WITH_COUNTER(set) &&
467 	    ip_set_put_counter(skb, ext_counter(e, set)))
468 		return -EMSGSIZE;
469 	if (SET_WITH_COMMENT(set) &&
470 	    ip_set_put_comment(skb, ext_comment(e, set)))
471 		return -EMSGSIZE;
472 	if (SET_WITH_SKBINFO(set) &&
473 	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
474 		return -EMSGSIZE;
475 	return 0;
476 }
477 EXPORT_SYMBOL_GPL(ip_set_put_extensions);
478 
479 bool
ip_set_match_extensions(struct ip_set * set,const struct ip_set_ext * ext,struct ip_set_ext * mext,u32 flags,void * data)480 ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
481 			struct ip_set_ext *mext, u32 flags, void *data)
482 {
483 	if (SET_WITH_TIMEOUT(set) &&
484 	    ip_set_timeout_expired(ext_timeout(data, set)))
485 		return false;
486 	if (SET_WITH_COUNTER(set)) {
487 		struct ip_set_counter *counter = ext_counter(data, set);
488 
489 		ip_set_update_counter(counter, ext, flags);
490 
491 		if (flags & IPSET_FLAG_MATCH_COUNTERS &&
492 		    !(ip_set_match_counter(ip_set_get_packets(counter),
493 				mext->packets, mext->packets_op) &&
494 		      ip_set_match_counter(ip_set_get_bytes(counter),
495 				mext->bytes, mext->bytes_op)))
496 			return false;
497 	}
498 	if (SET_WITH_SKBINFO(set))
499 		ip_set_get_skbinfo(ext_skbinfo(data, set),
500 				   ext, mext, flags);
501 	return true;
502 }
503 EXPORT_SYMBOL_GPL(ip_set_match_extensions);
504 
505 /* Creating/destroying/renaming/swapping affect the existence and
506  * the properties of a set. All of these can be executed from userspace
507  * only and serialized by the nfnl mutex indirectly from nfnetlink.
508  *
509  * Sets are identified by their index in ip_set_list and the index
510  * is used by the external references (set/SET netfilter modules).
511  *
512  * The set behind an index may change by swapping only, from userspace.
513  */
514 
515 static inline void
__ip_set_get(struct ip_set * set)516 __ip_set_get(struct ip_set *set)
517 {
518 	write_lock_bh(&ip_set_ref_lock);
519 	set->ref++;
520 	write_unlock_bh(&ip_set_ref_lock);
521 }
522 
523 static inline void
__ip_set_put(struct ip_set * set)524 __ip_set_put(struct ip_set *set)
525 {
526 	write_lock_bh(&ip_set_ref_lock);
527 	BUG_ON(set->ref == 0);
528 	set->ref--;
529 	write_unlock_bh(&ip_set_ref_lock);
530 }
531 
532 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
533  * a separate reference counter
534  */
535 static void
__ip_set_get_netlink(struct ip_set * set)536 __ip_set_get_netlink(struct ip_set *set)
537 {
538 	write_lock_bh(&ip_set_ref_lock);
539 	set->ref_netlink++;
540 	write_unlock_bh(&ip_set_ref_lock);
541 }
542 
543 static inline void
__ip_set_put_netlink(struct ip_set * set)544 __ip_set_put_netlink(struct ip_set *set)
545 {
546 	write_lock_bh(&ip_set_ref_lock);
547 	BUG_ON(set->ref_netlink == 0);
548 	set->ref_netlink--;
549 	write_unlock_bh(&ip_set_ref_lock);
550 }
551 
552 /* Add, del and test set entries from kernel.
553  *
554  * The set behind the index must exist and must be referenced
555  * so it can't be destroyed (or changed) under our foot.
556  */
557 
558 static inline struct ip_set *
ip_set_rcu_get(struct net * net,ip_set_id_t index)559 ip_set_rcu_get(struct net *net, ip_set_id_t index)
560 {
561 	struct ip_set_net *inst = ip_set_pernet(net);
562 
563 	/* ip_set_list and the set pointer need to be protected */
564 	return ip_set_dereference_nfnl(inst->ip_set_list)[index];
565 }
566 
567 static inline void
ip_set_lock(struct ip_set * set)568 ip_set_lock(struct ip_set *set)
569 {
570 	if (!set->variant->region_lock)
571 		spin_lock_bh(&set->lock);
572 }
573 
574 static inline void
ip_set_unlock(struct ip_set * set)575 ip_set_unlock(struct ip_set *set)
576 {
577 	if (!set->variant->region_lock)
578 		spin_unlock_bh(&set->lock);
579 }
580 
581 int
ip_set_test(ip_set_id_t index,const struct sk_buff * skb,const struct xt_action_param * par,struct ip_set_adt_opt * opt)582 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
583 	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
584 {
585 	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
586 	int ret = 0;
587 
588 	BUG_ON(!set);
589 	pr_debug("set %s, index %u\n", set->name, index);
590 
591 	if (opt->dim < set->type->dimension ||
592 	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
593 		return 0;
594 
595 	rcu_read_lock_bh();
596 	ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
597 	rcu_read_unlock_bh();
598 
599 	if (ret == -EAGAIN) {
600 		/* Type requests element to be completed */
601 		pr_debug("element must be completed, ADD is triggered\n");
602 		ip_set_lock(set);
603 		set->variant->kadt(set, skb, par, IPSET_ADD, opt);
604 		ip_set_unlock(set);
605 		ret = 1;
606 	} else {
607 		/* --return-nomatch: invert matched element */
608 		if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
609 		    (set->type->features & IPSET_TYPE_NOMATCH) &&
610 		    (ret > 0 || ret == -ENOTEMPTY))
611 			ret = -ret;
612 	}
613 
614 	/* Convert error codes to nomatch */
615 	return (ret < 0 ? 0 : ret);
616 }
617 EXPORT_SYMBOL_GPL(ip_set_test);
618 
619 int
ip_set_add(ip_set_id_t index,const struct sk_buff * skb,const struct xt_action_param * par,struct ip_set_adt_opt * opt)620 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
621 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
622 {
623 	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
624 	int ret;
625 
626 	BUG_ON(!set);
627 	pr_debug("set %s, index %u\n", set->name, index);
628 
629 	if (opt->dim < set->type->dimension ||
630 	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
631 		return -IPSET_ERR_TYPE_MISMATCH;
632 
633 	ip_set_lock(set);
634 	ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
635 	ip_set_unlock(set);
636 
637 	return ret;
638 }
639 EXPORT_SYMBOL_GPL(ip_set_add);
640 
641 int
ip_set_del(ip_set_id_t index,const struct sk_buff * skb,const struct xt_action_param * par,struct ip_set_adt_opt * opt)642 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
643 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
644 {
645 	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
646 	int ret = 0;
647 
648 	BUG_ON(!set);
649 	pr_debug("set %s, index %u\n", set->name, index);
650 
651 	if (opt->dim < set->type->dimension ||
652 	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
653 		return -IPSET_ERR_TYPE_MISMATCH;
654 
655 	ip_set_lock(set);
656 	ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
657 	ip_set_unlock(set);
658 
659 	return ret;
660 }
661 EXPORT_SYMBOL_GPL(ip_set_del);
662 
663 /* Find set by name, reference it once. The reference makes sure the
664  * thing pointed to, does not go away under our feet.
665  *
666  */
667 ip_set_id_t
ip_set_get_byname(struct net * net,const char * name,struct ip_set ** set)668 ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
669 {
670 	ip_set_id_t i, index = IPSET_INVALID_ID;
671 	struct ip_set *s;
672 	struct ip_set_net *inst = ip_set_pernet(net);
673 
674 	rcu_read_lock();
675 	for (i = 0; i < inst->ip_set_max; i++) {
676 		s = rcu_dereference(inst->ip_set_list)[i];
677 		if (s && STRNCMP(s->name, name)) {
678 			__ip_set_get(s);
679 			index = i;
680 			*set = s;
681 			break;
682 		}
683 	}
684 	rcu_read_unlock();
685 
686 	return index;
687 }
688 EXPORT_SYMBOL_GPL(ip_set_get_byname);
689 
690 /* If the given set pointer points to a valid set, decrement
691  * reference count by 1. The caller shall not assume the index
692  * to be valid, after calling this function.
693  *
694  */
695 
696 static inline void
__ip_set_put_byindex(struct ip_set_net * inst,ip_set_id_t index)697 __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
698 {
699 	struct ip_set *set;
700 
701 	rcu_read_lock();
702 	set = rcu_dereference(inst->ip_set_list)[index];
703 	if (set)
704 		__ip_set_put(set);
705 	rcu_read_unlock();
706 }
707 
708 void
ip_set_put_byindex(struct net * net,ip_set_id_t index)709 ip_set_put_byindex(struct net *net, ip_set_id_t index)
710 {
711 	struct ip_set_net *inst = ip_set_pernet(net);
712 
713 	__ip_set_put_byindex(inst, index);
714 }
715 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
716 
717 /* Get the name of a set behind a set index.
718  * Set itself is protected by RCU, but its name isn't: to protect against
719  * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
720  * name.
721  */
722 void
ip_set_name_byindex(struct net * net,ip_set_id_t index,char * name)723 ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
724 {
725 	struct ip_set *set = ip_set_rcu_get(net, index);
726 
727 	BUG_ON(!set);
728 
729 	read_lock_bh(&ip_set_ref_lock);
730 	strncpy(name, set->name, IPSET_MAXNAMELEN);
731 	read_unlock_bh(&ip_set_ref_lock);
732 }
733 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
734 
735 /* Routines to call by external subsystems, which do not
736  * call nfnl_lock for us.
737  */
738 
739 /* Find set by index, reference it once. The reference makes sure the
740  * thing pointed to, does not go away under our feet.
741  *
742  * The nfnl mutex is used in the function.
743  */
744 ip_set_id_t
ip_set_nfnl_get_byindex(struct net * net,ip_set_id_t index)745 ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
746 {
747 	struct ip_set *set;
748 	struct ip_set_net *inst = ip_set_pernet(net);
749 
750 	if (index >= inst->ip_set_max)
751 		return IPSET_INVALID_ID;
752 
753 	nfnl_lock(NFNL_SUBSYS_IPSET);
754 	set = ip_set(inst, index);
755 	if (set)
756 		__ip_set_get(set);
757 	else
758 		index = IPSET_INVALID_ID;
759 	nfnl_unlock(NFNL_SUBSYS_IPSET);
760 
761 	return index;
762 }
763 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
764 
765 /* If the given set pointer points to a valid set, decrement
766  * reference count by 1. The caller shall not assume the index
767  * to be valid, after calling this function.
768  *
769  * The nfnl mutex is used in the function.
770  */
771 void
ip_set_nfnl_put(struct net * net,ip_set_id_t index)772 ip_set_nfnl_put(struct net *net, ip_set_id_t index)
773 {
774 	struct ip_set *set;
775 	struct ip_set_net *inst = ip_set_pernet(net);
776 
777 	nfnl_lock(NFNL_SUBSYS_IPSET);
778 	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
779 		set = ip_set(inst, index);
780 		if (set)
781 			__ip_set_put(set);
782 	}
783 	nfnl_unlock(NFNL_SUBSYS_IPSET);
784 }
785 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
786 
787 /* Communication protocol with userspace over netlink.
788  *
789  * The commands are serialized by the nfnl mutex.
790  */
791 
protocol(const struct nlattr * const tb[])792 static inline u8 protocol(const struct nlattr * const tb[])
793 {
794 	return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
795 }
796 
797 static inline bool
protocol_failed(const struct nlattr * const tb[])798 protocol_failed(const struct nlattr * const tb[])
799 {
800 	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
801 }
802 
803 static inline bool
protocol_min_failed(const struct nlattr * const tb[])804 protocol_min_failed(const struct nlattr * const tb[])
805 {
806 	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
807 }
808 
809 static inline u32
flag_exist(const struct nlmsghdr * nlh)810 flag_exist(const struct nlmsghdr *nlh)
811 {
812 	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
813 }
814 
815 static struct nlmsghdr *
start_msg(struct sk_buff * skb,u32 portid,u32 seq,unsigned int flags,enum ipset_cmd cmd)816 start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
817 	  enum ipset_cmd cmd)
818 {
819 	return nfnl_msg_put(skb, portid, seq,
820 			    nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
821 			    NFPROTO_IPV4, NFNETLINK_V0, 0);
822 }
823 
824 /* Create a set */
825 
826 static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
827 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
828 	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
829 				    .len = IPSET_MAXNAMELEN - 1 },
830 	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
831 				    .len = IPSET_MAXNAMELEN - 1},
832 	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
833 	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
834 	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
835 };
836 
837 static struct ip_set *
find_set_and_id(struct ip_set_net * inst,const char * name,ip_set_id_t * id)838 find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
839 {
840 	struct ip_set *set = NULL;
841 	ip_set_id_t i;
842 
843 	*id = IPSET_INVALID_ID;
844 	for (i = 0; i < inst->ip_set_max; i++) {
845 		set = ip_set(inst, i);
846 		if (set && STRNCMP(set->name, name)) {
847 			*id = i;
848 			break;
849 		}
850 	}
851 	return (*id == IPSET_INVALID_ID ? NULL : set);
852 }
853 
854 static inline struct ip_set *
find_set(struct ip_set_net * inst,const char * name)855 find_set(struct ip_set_net *inst, const char *name)
856 {
857 	ip_set_id_t id;
858 
859 	return find_set_and_id(inst, name, &id);
860 }
861 
862 static int
find_free_id(struct ip_set_net * inst,const char * name,ip_set_id_t * index,struct ip_set ** set)863 find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
864 	     struct ip_set **set)
865 {
866 	struct ip_set *s;
867 	ip_set_id_t i;
868 
869 	*index = IPSET_INVALID_ID;
870 	for (i = 0;  i < inst->ip_set_max; i++) {
871 		s = ip_set(inst, i);
872 		if (!s) {
873 			if (*index == IPSET_INVALID_ID)
874 				*index = i;
875 		} else if (STRNCMP(name, s->name)) {
876 			/* Name clash */
877 			*set = s;
878 			return -EEXIST;
879 		}
880 	}
881 	if (*index == IPSET_INVALID_ID)
882 		/* No free slot remained */
883 		return -IPSET_ERR_MAX_SETS;
884 	return 0;
885 }
886 
ip_set_none(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)887 static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
888 		       const struct nlmsghdr *nlh,
889 		       const struct nlattr * const attr[],
890 		       struct netlink_ext_ack *extack)
891 {
892 	return -EOPNOTSUPP;
893 }
894 
ip_set_create(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)895 static int ip_set_create(struct net *net, struct sock *ctnl,
896 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
897 			 const struct nlattr * const attr[],
898 			 struct netlink_ext_ack *extack)
899 {
900 	struct ip_set_net *inst = ip_set_pernet(net);
901 	struct ip_set *set, *clash = NULL;
902 	ip_set_id_t index = IPSET_INVALID_ID;
903 	struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
904 	const char *name, *typename;
905 	u8 family, revision;
906 	u32 flags = flag_exist(nlh);
907 	int ret = 0;
908 
909 	if (unlikely(protocol_min_failed(attr) ||
910 		     !attr[IPSET_ATTR_SETNAME] ||
911 		     !attr[IPSET_ATTR_TYPENAME] ||
912 		     !attr[IPSET_ATTR_REVISION] ||
913 		     !attr[IPSET_ATTR_FAMILY] ||
914 		     (attr[IPSET_ATTR_DATA] &&
915 		      !flag_nested(attr[IPSET_ATTR_DATA]))))
916 		return -IPSET_ERR_PROTOCOL;
917 
918 	name = nla_data(attr[IPSET_ATTR_SETNAME]);
919 	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
920 	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
921 	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
922 	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
923 		 name, typename, family_name(family), revision);
924 
925 	/* First, and without any locks, allocate and initialize
926 	 * a normal base set structure.
927 	 */
928 	set = kzalloc(sizeof(*set), GFP_KERNEL);
929 	if (!set)
930 		return -ENOMEM;
931 	spin_lock_init(&set->lock);
932 	strlcpy(set->name, name, IPSET_MAXNAMELEN);
933 	set->family = family;
934 	set->revision = revision;
935 
936 	/* Next, check that we know the type, and take
937 	 * a reference on the type, to make sure it stays available
938 	 * while constructing our new set.
939 	 *
940 	 * After referencing the type, we try to create the type
941 	 * specific part of the set without holding any locks.
942 	 */
943 	ret = find_set_type_get(typename, family, revision, &set->type);
944 	if (ret)
945 		goto out;
946 
947 	/* Without holding any locks, create private part. */
948 	if (attr[IPSET_ATTR_DATA] &&
949 	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
950 			     set->type->create_policy, NULL)) {
951 		ret = -IPSET_ERR_PROTOCOL;
952 		goto put_out;
953 	}
954 
955 	ret = set->type->create(net, set, tb, flags);
956 	if (ret != 0)
957 		goto put_out;
958 
959 	/* BTW, ret==0 here. */
960 
961 	/* Here, we have a valid, constructed set and we are protected
962 	 * by the nfnl mutex. Find the first free index in ip_set_list
963 	 * and check clashing.
964 	 */
965 	ret = find_free_id(inst, set->name, &index, &clash);
966 	if (ret == -EEXIST) {
967 		/* If this is the same set and requested, ignore error */
968 		if ((flags & IPSET_FLAG_EXIST) &&
969 		    STRNCMP(set->type->name, clash->type->name) &&
970 		    set->type->family == clash->type->family &&
971 		    set->type->revision_min == clash->type->revision_min &&
972 		    set->type->revision_max == clash->type->revision_max &&
973 		    set->variant->same_set(set, clash))
974 			ret = 0;
975 		goto cleanup;
976 	} else if (ret == -IPSET_ERR_MAX_SETS) {
977 		struct ip_set **list, **tmp;
978 		ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
979 
980 		if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
981 			/* Wraparound */
982 			goto cleanup;
983 
984 		list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
985 		if (!list)
986 			goto cleanup;
987 		/* nfnl mutex is held, both lists are valid */
988 		tmp = ip_set_dereference(inst->ip_set_list);
989 		memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
990 		rcu_assign_pointer(inst->ip_set_list, list);
991 		/* Make sure all current packets have passed through */
992 		synchronize_net();
993 		/* Use new list */
994 		index = inst->ip_set_max;
995 		inst->ip_set_max = i;
996 		kvfree(tmp);
997 		ret = 0;
998 	} else if (ret) {
999 		goto cleanup;
1000 	}
1001 
1002 	/* Finally! Add our shiny new set to the list, and be done. */
1003 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
1004 	ip_set(inst, index) = set;
1005 
1006 	return ret;
1007 
1008 cleanup:
1009 	set->variant->cancel_gc(set);
1010 	set->variant->destroy(set);
1011 put_out:
1012 	module_put(set->type->me);
1013 out:
1014 	kfree(set);
1015 	return ret;
1016 }
1017 
1018 /* Destroy sets */
1019 
1020 static const struct nla_policy
1021 ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
1022 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1023 	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1024 				    .len = IPSET_MAXNAMELEN - 1 },
1025 };
1026 
1027 static void
ip_set_destroy_set(struct ip_set * set)1028 ip_set_destroy_set(struct ip_set *set)
1029 {
1030 	pr_debug("set: %s\n",  set->name);
1031 
1032 	/* Must call it without holding any lock */
1033 	set->variant->destroy(set);
1034 	module_put(set->type->me);
1035 	kfree(set);
1036 }
1037 
1038 static void
ip_set_destroy_set_rcu(struct rcu_head * head)1039 ip_set_destroy_set_rcu(struct rcu_head *head)
1040 {
1041 	struct ip_set *set = container_of(head, struct ip_set, rcu);
1042 
1043 	ip_set_destroy_set(set);
1044 }
1045 
ip_set_destroy(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1046 static int ip_set_destroy(struct net *net, struct sock *ctnl,
1047 			  struct sk_buff *skb, const struct nlmsghdr *nlh,
1048 			  const struct nlattr * const attr[],
1049 			  struct netlink_ext_ack *extack)
1050 {
1051 	struct ip_set_net *inst = ip_set_pernet(net);
1052 	struct ip_set *s;
1053 	ip_set_id_t i;
1054 	int ret = 0;
1055 
1056 	if (unlikely(protocol_min_failed(attr)))
1057 		return -IPSET_ERR_PROTOCOL;
1058 
1059 
1060 	/* Commands are serialized and references are
1061 	 * protected by the ip_set_ref_lock.
1062 	 * External systems (i.e. xt_set) must call
1063 	 * ip_set_put|get_nfnl_* functions, that way we
1064 	 * can safely check references here.
1065 	 *
1066 	 * list:set timer can only decrement the reference
1067 	 * counter, so if it's already zero, we can proceed
1068 	 * without holding the lock.
1069 	 */
1070 	if (!attr[IPSET_ATTR_SETNAME]) {
1071 		/* Must wait for flush to be really finished in list:set */
1072 		rcu_barrier();
1073 		read_lock_bh(&ip_set_ref_lock);
1074 		for (i = 0; i < inst->ip_set_max; i++) {
1075 			s = ip_set(inst, i);
1076 			if (s && (s->ref || s->ref_netlink)) {
1077 				ret = -IPSET_ERR_BUSY;
1078 				goto out;
1079 			}
1080 		}
1081 		inst->is_destroyed = true;
1082 		read_unlock_bh(&ip_set_ref_lock);
1083 		for (i = 0; i < inst->ip_set_max; i++) {
1084 			s = ip_set(inst, i);
1085 			if (s) {
1086 				ip_set(inst, i) = NULL;
1087 				/* Must cancel garbage collectors */
1088 				s->variant->cancel_gc(s);
1089 				ip_set_destroy_set(s);
1090 			}
1091 		}
1092 		/* Modified by ip_set_destroy() only, which is serialized */
1093 		inst->is_destroyed = false;
1094 	} else {
1095 		u16 features = 0;
1096 
1097 		read_lock_bh(&ip_set_ref_lock);
1098 		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1099 				    &i);
1100 		if (!s) {
1101 			ret = -ENOENT;
1102 			goto out;
1103 		} else if (s->ref || s->ref_netlink) {
1104 			ret = -IPSET_ERR_BUSY;
1105 			goto out;
1106 		}
1107 		features = s->type->features;
1108 		ip_set(inst, i) = NULL;
1109 		read_unlock_bh(&ip_set_ref_lock);
1110 		if (features & IPSET_TYPE_NAME) {
1111 			/* Must wait for flush to be really finished  */
1112 			rcu_barrier();
1113 		}
1114 		/* Must cancel garbage collectors */
1115 		s->variant->cancel_gc(s);
1116 		call_rcu(&s->rcu, ip_set_destroy_set_rcu);
1117 	}
1118 	return 0;
1119 out:
1120 	read_unlock_bh(&ip_set_ref_lock);
1121 	return ret;
1122 }
1123 
1124 /* Flush sets */
1125 
1126 static void
ip_set_flush_set(struct ip_set * set)1127 ip_set_flush_set(struct ip_set *set)
1128 {
1129 	pr_debug("set: %s\n",  set->name);
1130 
1131 	ip_set_lock(set);
1132 	set->variant->flush(set);
1133 	ip_set_unlock(set);
1134 }
1135 
ip_set_flush(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1136 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1137 			const struct nlmsghdr *nlh,
1138 			const struct nlattr * const attr[],
1139 			struct netlink_ext_ack *extack)
1140 {
1141 	struct ip_set_net *inst = ip_set_pernet(net);
1142 	struct ip_set *s;
1143 	ip_set_id_t i;
1144 
1145 	if (unlikely(protocol_min_failed(attr)))
1146 		return -IPSET_ERR_PROTOCOL;
1147 
1148 	if (!attr[IPSET_ATTR_SETNAME]) {
1149 		for (i = 0; i < inst->ip_set_max; i++) {
1150 			s = ip_set(inst, i);
1151 			if (s)
1152 				ip_set_flush_set(s);
1153 		}
1154 	} else {
1155 		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1156 		if (!s)
1157 			return -ENOENT;
1158 
1159 		ip_set_flush_set(s);
1160 	}
1161 
1162 	return 0;
1163 }
1164 
1165 /* Rename a set */
1166 
1167 static const struct nla_policy
1168 ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
1169 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1170 	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1171 				    .len = IPSET_MAXNAMELEN - 1 },
1172 	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
1173 				    .len = IPSET_MAXNAMELEN - 1 },
1174 };
1175 
ip_set_rename(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1176 static int ip_set_rename(struct net *net, struct sock *ctnl,
1177 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
1178 			 const struct nlattr * const attr[],
1179 			 struct netlink_ext_ack *extack)
1180 {
1181 	struct ip_set_net *inst = ip_set_pernet(net);
1182 	struct ip_set *set, *s;
1183 	const char *name2;
1184 	ip_set_id_t i;
1185 	int ret = 0;
1186 
1187 	if (unlikely(protocol_min_failed(attr) ||
1188 		     !attr[IPSET_ATTR_SETNAME] ||
1189 		     !attr[IPSET_ATTR_SETNAME2]))
1190 		return -IPSET_ERR_PROTOCOL;
1191 
1192 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1193 	if (!set)
1194 		return -ENOENT;
1195 
1196 	write_lock_bh(&ip_set_ref_lock);
1197 	if (set->ref != 0 || set->ref_netlink != 0) {
1198 		ret = -IPSET_ERR_REFERENCED;
1199 		goto out;
1200 	}
1201 
1202 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1203 	for (i = 0; i < inst->ip_set_max; i++) {
1204 		s = ip_set(inst, i);
1205 		if (s && STRNCMP(s->name, name2)) {
1206 			ret = -IPSET_ERR_EXIST_SETNAME2;
1207 			goto out;
1208 		}
1209 	}
1210 	strncpy(set->name, name2, IPSET_MAXNAMELEN);
1211 
1212 out:
1213 	write_unlock_bh(&ip_set_ref_lock);
1214 	return ret;
1215 }
1216 
1217 /* Swap two sets so that name/index points to the other.
1218  * References and set names are also swapped.
1219  *
1220  * The commands are serialized by the nfnl mutex and references are
1221  * protected by the ip_set_ref_lock. The kernel interfaces
1222  * do not hold the mutex but the pointer settings are atomic
1223  * so the ip_set_list always contains valid pointers to the sets.
1224  */
1225 
ip_set_swap(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1226 static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1227 		       const struct nlmsghdr *nlh,
1228 		       const struct nlattr * const attr[],
1229 		       struct netlink_ext_ack *extack)
1230 {
1231 	struct ip_set_net *inst = ip_set_pernet(net);
1232 	struct ip_set *from, *to;
1233 	ip_set_id_t from_id, to_id;
1234 	char from_name[IPSET_MAXNAMELEN];
1235 
1236 	if (unlikely(protocol_min_failed(attr) ||
1237 		     !attr[IPSET_ATTR_SETNAME] ||
1238 		     !attr[IPSET_ATTR_SETNAME2]))
1239 		return -IPSET_ERR_PROTOCOL;
1240 
1241 	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1242 			       &from_id);
1243 	if (!from)
1244 		return -ENOENT;
1245 
1246 	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
1247 			     &to_id);
1248 	if (!to)
1249 		return -IPSET_ERR_EXIST_SETNAME2;
1250 
1251 	/* Features must not change.
1252 	 * Not an artifical restriction anymore, as we must prevent
1253 	 * possible loops created by swapping in setlist type of sets.
1254 	 */
1255 	if (!(from->type->features == to->type->features &&
1256 	      from->family == to->family))
1257 		return -IPSET_ERR_TYPE_MISMATCH;
1258 
1259 	write_lock_bh(&ip_set_ref_lock);
1260 
1261 	if (from->ref_netlink || to->ref_netlink) {
1262 		write_unlock_bh(&ip_set_ref_lock);
1263 		return -EBUSY;
1264 	}
1265 
1266 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1267 	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1268 	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
1269 
1270 	swap(from->ref, to->ref);
1271 	ip_set(inst, from_id) = to;
1272 	ip_set(inst, to_id) = from;
1273 	write_unlock_bh(&ip_set_ref_lock);
1274 
1275 	return 0;
1276 }
1277 
1278 /* List/save set data */
1279 
1280 #define DUMP_INIT	0
1281 #define DUMP_ALL	1
1282 #define DUMP_ONE	2
1283 #define DUMP_LAST	3
1284 
1285 #define DUMP_TYPE(arg)		(((u32)(arg)) & 0x0000FFFF)
1286 #define DUMP_FLAGS(arg)		(((u32)(arg)) >> 16)
1287 
1288 static int
ip_set_dump_done(struct netlink_callback * cb)1289 ip_set_dump_done(struct netlink_callback *cb)
1290 {
1291 	if (cb->args[IPSET_CB_ARG0]) {
1292 		struct ip_set_net *inst =
1293 			(struct ip_set_net *)cb->args[IPSET_CB_NET];
1294 		ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1295 		struct ip_set *set = ip_set_ref_netlink(inst, index);
1296 
1297 		if (set->variant->uref)
1298 			set->variant->uref(set, cb, false);
1299 		pr_debug("release set %s\n", set->name);
1300 		__ip_set_put_netlink(set);
1301 	}
1302 	return 0;
1303 }
1304 
1305 static inline void
dump_attrs(struct nlmsghdr * nlh)1306 dump_attrs(struct nlmsghdr *nlh)
1307 {
1308 	const struct nlattr *attr;
1309 	int rem;
1310 
1311 	pr_debug("dump nlmsg\n");
1312 	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1313 		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1314 	}
1315 }
1316 
1317 static const struct nla_policy
1318 ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
1319 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1320 	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1321 				    .len = IPSET_MAXNAMELEN - 1 },
1322 	[IPSET_ATTR_FLAGS]	= { .type = NLA_U32 },
1323 };
1324 
1325 static int
ip_set_dump_start(struct netlink_callback * cb)1326 ip_set_dump_start(struct netlink_callback *cb)
1327 {
1328 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1329 	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1330 	struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1331 	struct nlattr *attr = (void *)nlh + min_len;
1332 	struct sk_buff *skb = cb->skb;
1333 	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1334 	u32 dump_type;
1335 	int ret;
1336 
1337 	ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
1338 			nlh->nlmsg_len - min_len,
1339 			ip_set_dump_policy, NULL);
1340 	if (ret)
1341 		goto error;
1342 
1343 	cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
1344 	if (cda[IPSET_ATTR_SETNAME]) {
1345 		ip_set_id_t index;
1346 		struct ip_set *set;
1347 
1348 		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
1349 				      &index);
1350 		if (!set) {
1351 			ret = -ENOENT;
1352 			goto error;
1353 		}
1354 		dump_type = DUMP_ONE;
1355 		cb->args[IPSET_CB_INDEX] = index;
1356 	} else {
1357 		dump_type = DUMP_ALL;
1358 	}
1359 
1360 	if (cda[IPSET_ATTR_FLAGS]) {
1361 		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1362 
1363 		dump_type |= (f << 16);
1364 	}
1365 	cb->args[IPSET_CB_NET] = (unsigned long)inst;
1366 	cb->args[IPSET_CB_DUMP] = dump_type;
1367 
1368 	return 0;
1369 
1370 error:
1371 	/* We have to create and send the error message manually :-( */
1372 	if (nlh->nlmsg_flags & NLM_F_ACK) {
1373 		netlink_ack(cb->skb, nlh, ret, NULL);
1374 	}
1375 	return ret;
1376 }
1377 
1378 static int
ip_set_dump_do(struct sk_buff * skb,struct netlink_callback * cb)1379 ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb)
1380 {
1381 	ip_set_id_t index = IPSET_INVALID_ID, max;
1382 	struct ip_set *set = NULL;
1383 	struct nlmsghdr *nlh = NULL;
1384 	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1385 	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1386 	u32 dump_type, dump_flags;
1387 	bool is_destroyed;
1388 	int ret = 0;
1389 
1390 	if (!cb->args[IPSET_CB_DUMP])
1391 		return -EINVAL;
1392 
1393 	if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
1394 		goto out;
1395 
1396 	dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
1397 	dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
1398 	max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
1399 				    : inst->ip_set_max;
1400 dump_last:
1401 	pr_debug("dump type, flag: %u %u index: %ld\n",
1402 		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
1403 	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
1404 		index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1405 		write_lock_bh(&ip_set_ref_lock);
1406 		set = ip_set(inst, index);
1407 		is_destroyed = inst->is_destroyed;
1408 		if (!set || is_destroyed) {
1409 			write_unlock_bh(&ip_set_ref_lock);
1410 			if (dump_type == DUMP_ONE) {
1411 				ret = -ENOENT;
1412 				goto out;
1413 			}
1414 			if (is_destroyed) {
1415 				/* All sets are just being destroyed */
1416 				ret = 0;
1417 				goto out;
1418 			}
1419 			continue;
1420 		}
1421 		/* When dumping all sets, we must dump "sorted"
1422 		 * so that lists (unions of sets) are dumped last.
1423 		 */
1424 		if (dump_type != DUMP_ONE &&
1425 		    ((dump_type == DUMP_ALL) ==
1426 		     !!(set->type->features & IPSET_DUMP_LAST))) {
1427 			write_unlock_bh(&ip_set_ref_lock);
1428 			continue;
1429 		}
1430 		pr_debug("List set: %s\n", set->name);
1431 		if (!cb->args[IPSET_CB_ARG0]) {
1432 			/* Start listing: make sure set won't be destroyed */
1433 			pr_debug("reference set\n");
1434 			set->ref_netlink++;
1435 		}
1436 		write_unlock_bh(&ip_set_ref_lock);
1437 		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1438 				cb->nlh->nlmsg_seq, flags,
1439 				IPSET_CMD_LIST);
1440 		if (!nlh) {
1441 			ret = -EMSGSIZE;
1442 			goto release_refcount;
1443 		}
1444 		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
1445 			       cb->args[IPSET_CB_PROTO]) ||
1446 		    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1447 			goto nla_put_failure;
1448 		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1449 			goto next_set;
1450 		switch (cb->args[IPSET_CB_ARG0]) {
1451 		case 0:
1452 			/* Core header data */
1453 			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1454 					   set->type->name) ||
1455 			    nla_put_u8(skb, IPSET_ATTR_FAMILY,
1456 				       set->family) ||
1457 			    nla_put_u8(skb, IPSET_ATTR_REVISION,
1458 				       set->revision))
1459 				goto nla_put_failure;
1460 			if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
1461 			    nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
1462 				goto nla_put_failure;
1463 			ret = set->variant->head(set, skb);
1464 			if (ret < 0)
1465 				goto release_refcount;
1466 			if (dump_flags & IPSET_FLAG_LIST_HEADER)
1467 				goto next_set;
1468 			if (set->variant->uref)
1469 				set->variant->uref(set, cb, true);
1470 			/* fall through */
1471 		default:
1472 			ret = set->variant->list(set, skb, cb);
1473 			if (!cb->args[IPSET_CB_ARG0])
1474 				/* Set is done, proceed with next one */
1475 				goto next_set;
1476 			goto release_refcount;
1477 		}
1478 	}
1479 	/* If we dump all sets, continue with dumping last ones */
1480 	if (dump_type == DUMP_ALL) {
1481 		dump_type = DUMP_LAST;
1482 		cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
1483 		cb->args[IPSET_CB_INDEX] = 0;
1484 		if (set && set->variant->uref)
1485 			set->variant->uref(set, cb, false);
1486 		goto dump_last;
1487 	}
1488 	goto out;
1489 
1490 nla_put_failure:
1491 	ret = -EFAULT;
1492 next_set:
1493 	if (dump_type == DUMP_ONE)
1494 		cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
1495 	else
1496 		cb->args[IPSET_CB_INDEX]++;
1497 release_refcount:
1498 	/* If there was an error or set is done, release set */
1499 	if (ret || !cb->args[IPSET_CB_ARG0]) {
1500 		set = ip_set_ref_netlink(inst, index);
1501 		if (set->variant->uref)
1502 			set->variant->uref(set, cb, false);
1503 		pr_debug("release set %s\n", set->name);
1504 		__ip_set_put_netlink(set);
1505 		cb->args[IPSET_CB_ARG0] = 0;
1506 	}
1507 out:
1508 	if (nlh) {
1509 		nlmsg_end(skb, nlh);
1510 		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1511 		dump_attrs(nlh);
1512 	}
1513 
1514 	return ret < 0 ? ret : skb->len;
1515 }
1516 
ip_set_dump(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1517 static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1518 		       const struct nlmsghdr *nlh,
1519 		       const struct nlattr * const attr[],
1520 		       struct netlink_ext_ack *extack)
1521 {
1522 	if (unlikely(protocol_min_failed(attr)))
1523 		return -IPSET_ERR_PROTOCOL;
1524 
1525 	{
1526 		struct netlink_dump_control c = {
1527 			.start = ip_set_dump_start,
1528 			.dump = ip_set_dump_do,
1529 			.done = ip_set_dump_done,
1530 		};
1531 		return netlink_dump_start(ctnl, skb, nlh, &c);
1532 	}
1533 }
1534 
1535 /* Add, del and test */
1536 
1537 static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1538 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1539 	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1540 				    .len = IPSET_MAXNAMELEN - 1 },
1541 	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
1542 	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
1543 	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
1544 };
1545 
1546 static int
call_ad(struct sock * ctnl,struct sk_buff * skb,struct ip_set * set,struct nlattr * tb[],enum ipset_adt adt,u32 flags,bool use_lineno)1547 call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1548 	struct nlattr *tb[], enum ipset_adt adt,
1549 	u32 flags, bool use_lineno)
1550 {
1551 	int ret;
1552 	u32 lineno = 0;
1553 	bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1554 
1555 	do {
1556 		if (retried) {
1557 			__ip_set_get_netlink(set);
1558 			nfnl_unlock(NFNL_SUBSYS_IPSET);
1559 			cond_resched();
1560 			nfnl_lock(NFNL_SUBSYS_IPSET);
1561 			__ip_set_put_netlink(set);
1562 		}
1563 
1564 		ip_set_lock(set);
1565 		ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1566 		ip_set_unlock(set);
1567 		retried = true;
1568 	} while (ret == -EAGAIN &&
1569 		 set->variant->resize &&
1570 		 (ret = set->variant->resize(set, retried)) == 0);
1571 
1572 	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1573 		return 0;
1574 	if (lineno && use_lineno) {
1575 		/* Error in restore/batch mode: send back lineno */
1576 		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1577 		struct sk_buff *skb2;
1578 		struct nlmsgerr *errmsg;
1579 		size_t payload = min(SIZE_MAX,
1580 				     sizeof(*errmsg) + nlmsg_len(nlh));
1581 		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1582 		struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1583 		struct nlattr *cmdattr;
1584 		u32 *errline;
1585 
1586 		skb2 = nlmsg_new(payload, GFP_KERNEL);
1587 		if (!skb2)
1588 			return -ENOMEM;
1589 		rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1590 				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1591 		errmsg = nlmsg_data(rep);
1592 		errmsg->error = ret;
1593 		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1594 		cmdattr = (void *)&errmsg->msg + min_len;
1595 
1596 		ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
1597 				nlh->nlmsg_len - min_len, ip_set_adt_policy,
1598 				NULL);
1599 
1600 		if (ret) {
1601 			nlmsg_free(skb2);
1602 			return ret;
1603 		}
1604 		errline = nla_data(cda[IPSET_ATTR_LINENO]);
1605 
1606 		*errline = lineno;
1607 
1608 		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
1609 				MSG_DONTWAIT);
1610 		/* Signal netlink not to send its ACK/errmsg.  */
1611 		return -EINTR;
1612 	}
1613 
1614 	return ret;
1615 }
1616 
ip_set_ad(struct net * net,struct sock * ctnl,struct sk_buff * skb,enum ipset_adt adt,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1617 static int ip_set_ad(struct net *net, struct sock *ctnl,
1618 		     struct sk_buff *skb,
1619 		     enum ipset_adt adt,
1620 		     const struct nlmsghdr *nlh,
1621 		     const struct nlattr * const attr[],
1622 		     struct netlink_ext_ack *extack)
1623 {
1624 	struct ip_set_net *inst = ip_set_pernet(net);
1625 	struct ip_set *set;
1626 	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1627 	const struct nlattr *nla;
1628 	u32 flags = flag_exist(nlh);
1629 	bool use_lineno;
1630 	int ret = 0;
1631 
1632 	if (unlikely(protocol_min_failed(attr) ||
1633 		     !attr[IPSET_ATTR_SETNAME] ||
1634 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1635 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1636 		     (attr[IPSET_ATTR_DATA] &&
1637 		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1638 		     (attr[IPSET_ATTR_ADT] &&
1639 		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1640 		       !attr[IPSET_ATTR_LINENO]))))
1641 		return -IPSET_ERR_PROTOCOL;
1642 
1643 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1644 	if (!set)
1645 		return -ENOENT;
1646 
1647 	use_lineno = !!attr[IPSET_ATTR_LINENO];
1648 	if (attr[IPSET_ATTR_DATA]) {
1649 		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1650 				     attr[IPSET_ATTR_DATA],
1651 				     set->type->adt_policy, NULL))
1652 			return -IPSET_ERR_PROTOCOL;
1653 		ret = call_ad(ctnl, skb, set, tb, adt, flags,
1654 			      use_lineno);
1655 	} else {
1656 		int nla_rem;
1657 
1658 		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1659 			if (nla_type(nla) != IPSET_ATTR_DATA ||
1660 			    !flag_nested(nla) ||
1661 			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1662 					     set->type->adt_policy, NULL))
1663 				return -IPSET_ERR_PROTOCOL;
1664 			ret = call_ad(ctnl, skb, set, tb, adt,
1665 				      flags, use_lineno);
1666 			if (ret < 0)
1667 				return ret;
1668 		}
1669 	}
1670 	return ret;
1671 }
1672 
ip_set_uadd(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1673 static int ip_set_uadd(struct net *net, struct sock *ctnl,
1674 		       struct sk_buff *skb, const struct nlmsghdr *nlh,
1675 		       const struct nlattr * const attr[],
1676 		       struct netlink_ext_ack *extack)
1677 {
1678 	return ip_set_ad(net, ctnl, skb,
1679 			 IPSET_ADD, nlh, attr, extack);
1680 }
1681 
ip_set_udel(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1682 static int ip_set_udel(struct net *net, struct sock *ctnl,
1683 		       struct sk_buff *skb, const struct nlmsghdr *nlh,
1684 		       const struct nlattr * const attr[],
1685 		       struct netlink_ext_ack *extack)
1686 {
1687 	return ip_set_ad(net, ctnl, skb,
1688 			 IPSET_DEL, nlh, attr, extack);
1689 }
1690 
ip_set_utest(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1691 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1692 			const struct nlmsghdr *nlh,
1693 			const struct nlattr * const attr[],
1694 			struct netlink_ext_ack *extack)
1695 {
1696 	struct ip_set_net *inst = ip_set_pernet(net);
1697 	struct ip_set *set;
1698 	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1699 	int ret = 0;
1700 	u32 lineno;
1701 
1702 	if (unlikely(protocol_min_failed(attr) ||
1703 		     !attr[IPSET_ATTR_SETNAME] ||
1704 		     !attr[IPSET_ATTR_DATA] ||
1705 		     !flag_nested(attr[IPSET_ATTR_DATA])))
1706 		return -IPSET_ERR_PROTOCOL;
1707 
1708 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1709 	if (!set)
1710 		return -ENOENT;
1711 
1712 	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1713 			     set->type->adt_policy, NULL))
1714 		return -IPSET_ERR_PROTOCOL;
1715 
1716 	rcu_read_lock_bh();
1717 	ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0);
1718 	rcu_read_unlock_bh();
1719 	/* Userspace can't trigger element to be re-added */
1720 	if (ret == -EAGAIN)
1721 		ret = 1;
1722 
1723 	return ret > 0 ? 0 : -IPSET_ERR_EXIST;
1724 }
1725 
1726 /* Get headed data of a set */
1727 
ip_set_header(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1728 static int ip_set_header(struct net *net, struct sock *ctnl,
1729 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
1730 			 const struct nlattr * const attr[],
1731 			 struct netlink_ext_ack *extack)
1732 {
1733 	struct ip_set_net *inst = ip_set_pernet(net);
1734 	const struct ip_set *set;
1735 	struct sk_buff *skb2;
1736 	struct nlmsghdr *nlh2;
1737 	int ret = 0;
1738 
1739 	if (unlikely(protocol_min_failed(attr) ||
1740 		     !attr[IPSET_ATTR_SETNAME]))
1741 		return -IPSET_ERR_PROTOCOL;
1742 
1743 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1744 	if (!set)
1745 		return -ENOENT;
1746 
1747 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1748 	if (!skb2)
1749 		return -ENOMEM;
1750 
1751 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1752 			 IPSET_CMD_HEADER);
1753 	if (!nlh2)
1754 		goto nlmsg_failure;
1755 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1756 	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1757 	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1758 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1759 	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1760 		goto nla_put_failure;
1761 	nlmsg_end(skb2, nlh2);
1762 
1763 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1764 	if (ret < 0)
1765 		return ret;
1766 
1767 	return 0;
1768 
1769 nla_put_failure:
1770 	nlmsg_cancel(skb2, nlh2);
1771 nlmsg_failure:
1772 	kfree_skb(skb2);
1773 	return -EMSGSIZE;
1774 }
1775 
1776 /* Get type data */
1777 
1778 static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1779 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1780 	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
1781 				    .len = IPSET_MAXNAMELEN - 1 },
1782 	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
1783 };
1784 
ip_set_type(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1785 static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1786 		       const struct nlmsghdr *nlh,
1787 		       const struct nlattr * const attr[],
1788 		       struct netlink_ext_ack *extack)
1789 {
1790 	struct sk_buff *skb2;
1791 	struct nlmsghdr *nlh2;
1792 	u8 family, min, max;
1793 	const char *typename;
1794 	int ret = 0;
1795 
1796 	if (unlikely(protocol_min_failed(attr) ||
1797 		     !attr[IPSET_ATTR_TYPENAME] ||
1798 		     !attr[IPSET_ATTR_FAMILY]))
1799 		return -IPSET_ERR_PROTOCOL;
1800 
1801 	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1802 	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1803 	ret = find_set_type_minmax(typename, family, &min, &max);
1804 	if (ret)
1805 		return ret;
1806 
1807 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1808 	if (!skb2)
1809 		return -ENOMEM;
1810 
1811 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1812 			 IPSET_CMD_TYPE);
1813 	if (!nlh2)
1814 		goto nlmsg_failure;
1815 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1816 	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1817 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1818 	    nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1819 	    nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1820 		goto nla_put_failure;
1821 	nlmsg_end(skb2, nlh2);
1822 
1823 	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1824 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1825 	if (ret < 0)
1826 		return ret;
1827 
1828 	return 0;
1829 
1830 nla_put_failure:
1831 	nlmsg_cancel(skb2, nlh2);
1832 nlmsg_failure:
1833 	kfree_skb(skb2);
1834 	return -EMSGSIZE;
1835 }
1836 
1837 /* Get protocol version */
1838 
1839 static const struct nla_policy
1840 ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1841 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1842 };
1843 
ip_set_protocol(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1844 static int ip_set_protocol(struct net *net, struct sock *ctnl,
1845 			   struct sk_buff *skb, const struct nlmsghdr *nlh,
1846 			   const struct nlattr * const attr[],
1847 			   struct netlink_ext_ack *extack)
1848 {
1849 	struct sk_buff *skb2;
1850 	struct nlmsghdr *nlh2;
1851 	int ret = 0;
1852 
1853 	if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
1854 		return -IPSET_ERR_PROTOCOL;
1855 
1856 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1857 	if (!skb2)
1858 		return -ENOMEM;
1859 
1860 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1861 			 IPSET_CMD_PROTOCOL);
1862 	if (!nlh2)
1863 		goto nlmsg_failure;
1864 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1865 		goto nla_put_failure;
1866 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
1867 		goto nla_put_failure;
1868 	nlmsg_end(skb2, nlh2);
1869 
1870 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1871 	if (ret < 0)
1872 		return ret;
1873 
1874 	return 0;
1875 
1876 nla_put_failure:
1877 	nlmsg_cancel(skb2, nlh2);
1878 nlmsg_failure:
1879 	kfree_skb(skb2);
1880 	return -EMSGSIZE;
1881 }
1882 
1883 /* Get set by name or index, from userspace */
1884 
ip_set_byname(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1885 static int ip_set_byname(struct net *net, struct sock *ctnl,
1886 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
1887 			 const struct nlattr * const attr[],
1888 			 struct netlink_ext_ack *extack)
1889 {
1890 	struct ip_set_net *inst = ip_set_pernet(net);
1891 	struct sk_buff *skb2;
1892 	struct nlmsghdr *nlh2;
1893 	ip_set_id_t id = IPSET_INVALID_ID;
1894 	const struct ip_set *set;
1895 	int ret = 0;
1896 
1897 	if (unlikely(protocol_failed(attr) ||
1898 		     !attr[IPSET_ATTR_SETNAME]))
1899 		return -IPSET_ERR_PROTOCOL;
1900 
1901 	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
1902 	if (id == IPSET_INVALID_ID)
1903 		return -ENOENT;
1904 
1905 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1906 	if (!skb2)
1907 		return -ENOMEM;
1908 
1909 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1910 			 IPSET_CMD_GET_BYNAME);
1911 	if (!nlh2)
1912 		goto nlmsg_failure;
1913 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1914 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1915 	    nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
1916 		goto nla_put_failure;
1917 	nlmsg_end(skb2, nlh2);
1918 
1919 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1920 	if (ret < 0)
1921 		return ret;
1922 
1923 	return 0;
1924 
1925 nla_put_failure:
1926 	nlmsg_cancel(skb2, nlh2);
1927 nlmsg_failure:
1928 	kfree_skb(skb2);
1929 	return -EMSGSIZE;
1930 }
1931 
1932 static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
1933 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1934 	[IPSET_ATTR_INDEX]	= { .type = NLA_U16 },
1935 };
1936 
ip_set_byindex(struct net * net,struct sock * ctnl,struct sk_buff * skb,const struct nlmsghdr * nlh,const struct nlattr * const attr[],struct netlink_ext_ack * extack)1937 static int ip_set_byindex(struct net *net, struct sock *ctnl,
1938 			  struct sk_buff *skb, const struct nlmsghdr *nlh,
1939 			  const struct nlattr * const attr[],
1940 			  struct netlink_ext_ack *extack)
1941 {
1942 	struct ip_set_net *inst = ip_set_pernet(net);
1943 	struct sk_buff *skb2;
1944 	struct nlmsghdr *nlh2;
1945 	ip_set_id_t id = IPSET_INVALID_ID;
1946 	const struct ip_set *set;
1947 	int ret = 0;
1948 
1949 	if (unlikely(protocol_failed(attr) ||
1950 		     !attr[IPSET_ATTR_INDEX]))
1951 		return -IPSET_ERR_PROTOCOL;
1952 
1953 	id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
1954 	if (id >= inst->ip_set_max)
1955 		return -ENOENT;
1956 	set = ip_set(inst, id);
1957 	if (set == NULL)
1958 		return -ENOENT;
1959 
1960 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1961 	if (!skb2)
1962 		return -ENOMEM;
1963 
1964 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1965 			 IPSET_CMD_GET_BYINDEX);
1966 	if (!nlh2)
1967 		goto nlmsg_failure;
1968 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1969 	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
1970 		goto nla_put_failure;
1971 	nlmsg_end(skb2, nlh2);
1972 
1973 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1974 	if (ret < 0)
1975 		return ret;
1976 
1977 	return 0;
1978 
1979 nla_put_failure:
1980 	nlmsg_cancel(skb2, nlh2);
1981 nlmsg_failure:
1982 	kfree_skb(skb2);
1983 	return -EMSGSIZE;
1984 }
1985 
1986 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1987 	[IPSET_CMD_NONE]	= {
1988 		.call		= ip_set_none,
1989 		.attr_count	= IPSET_ATTR_CMD_MAX,
1990 	},
1991 	[IPSET_CMD_CREATE]	= {
1992 		.call		= ip_set_create,
1993 		.attr_count	= IPSET_ATTR_CMD_MAX,
1994 		.policy		= ip_set_create_policy,
1995 	},
1996 	[IPSET_CMD_DESTROY]	= {
1997 		.call		= ip_set_destroy,
1998 		.attr_count	= IPSET_ATTR_CMD_MAX,
1999 		.policy		= ip_set_setname_policy,
2000 	},
2001 	[IPSET_CMD_FLUSH]	= {
2002 		.call		= ip_set_flush,
2003 		.attr_count	= IPSET_ATTR_CMD_MAX,
2004 		.policy		= ip_set_setname_policy,
2005 	},
2006 	[IPSET_CMD_RENAME]	= {
2007 		.call		= ip_set_rename,
2008 		.attr_count	= IPSET_ATTR_CMD_MAX,
2009 		.policy		= ip_set_setname2_policy,
2010 	},
2011 	[IPSET_CMD_SWAP]	= {
2012 		.call		= ip_set_swap,
2013 		.attr_count	= IPSET_ATTR_CMD_MAX,
2014 		.policy		= ip_set_setname2_policy,
2015 	},
2016 	[IPSET_CMD_LIST]	= {
2017 		.call		= ip_set_dump,
2018 		.attr_count	= IPSET_ATTR_CMD_MAX,
2019 		.policy		= ip_set_dump_policy,
2020 	},
2021 	[IPSET_CMD_SAVE]	= {
2022 		.call		= ip_set_dump,
2023 		.attr_count	= IPSET_ATTR_CMD_MAX,
2024 		.policy		= ip_set_setname_policy,
2025 	},
2026 	[IPSET_CMD_ADD]	= {
2027 		.call		= ip_set_uadd,
2028 		.attr_count	= IPSET_ATTR_CMD_MAX,
2029 		.policy		= ip_set_adt_policy,
2030 	},
2031 	[IPSET_CMD_DEL]	= {
2032 		.call		= ip_set_udel,
2033 		.attr_count	= IPSET_ATTR_CMD_MAX,
2034 		.policy		= ip_set_adt_policy,
2035 	},
2036 	[IPSET_CMD_TEST]	= {
2037 		.call		= ip_set_utest,
2038 		.attr_count	= IPSET_ATTR_CMD_MAX,
2039 		.policy		= ip_set_adt_policy,
2040 	},
2041 	[IPSET_CMD_HEADER]	= {
2042 		.call		= ip_set_header,
2043 		.attr_count	= IPSET_ATTR_CMD_MAX,
2044 		.policy		= ip_set_setname_policy,
2045 	},
2046 	[IPSET_CMD_TYPE]	= {
2047 		.call		= ip_set_type,
2048 		.attr_count	= IPSET_ATTR_CMD_MAX,
2049 		.policy		= ip_set_type_policy,
2050 	},
2051 	[IPSET_CMD_PROTOCOL]	= {
2052 		.call		= ip_set_protocol,
2053 		.attr_count	= IPSET_ATTR_CMD_MAX,
2054 		.policy		= ip_set_protocol_policy,
2055 	},
2056 	[IPSET_CMD_GET_BYNAME]	= {
2057 		.call		= ip_set_byname,
2058 		.attr_count	= IPSET_ATTR_CMD_MAX,
2059 		.policy		= ip_set_setname_policy,
2060 	},
2061 	[IPSET_CMD_GET_BYINDEX]	= {
2062 		.call		= ip_set_byindex,
2063 		.attr_count	= IPSET_ATTR_CMD_MAX,
2064 		.policy		= ip_set_index_policy,
2065 	},
2066 };
2067 
2068 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
2069 	.name		= "ip_set",
2070 	.subsys_id	= NFNL_SUBSYS_IPSET,
2071 	.cb_count	= IPSET_MSG_MAX,
2072 	.cb		= ip_set_netlink_subsys_cb,
2073 };
2074 
2075 /* Interface to iptables/ip6tables */
2076 
2077 static int
ip_set_sockfn_get(struct sock * sk,int optval,void __user * user,int * len)2078 ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
2079 {
2080 	unsigned int *op;
2081 	void *data;
2082 	int copylen = *len, ret = 0;
2083 	struct net *net = sock_net(sk);
2084 	struct ip_set_net *inst = ip_set_pernet(net);
2085 
2086 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2087 		return -EPERM;
2088 	if (optval != SO_IP_SET)
2089 		return -EBADF;
2090 	if (*len < sizeof(unsigned int))
2091 		return -EINVAL;
2092 
2093 	data = vmalloc(*len);
2094 	if (!data)
2095 		return -ENOMEM;
2096 	if (copy_from_user(data, user, *len) != 0) {
2097 		ret = -EFAULT;
2098 		goto done;
2099 	}
2100 	op = data;
2101 
2102 	if (*op < IP_SET_OP_VERSION) {
2103 		/* Check the version at the beginning of operations */
2104 		struct ip_set_req_version *req_version = data;
2105 
2106 		if (*len < sizeof(struct ip_set_req_version)) {
2107 			ret = -EINVAL;
2108 			goto done;
2109 		}
2110 
2111 		if (req_version->version < IPSET_PROTOCOL_MIN) {
2112 			ret = -EPROTO;
2113 			goto done;
2114 		}
2115 	}
2116 
2117 	switch (*op) {
2118 	case IP_SET_OP_VERSION: {
2119 		struct ip_set_req_version *req_version = data;
2120 
2121 		if (*len != sizeof(struct ip_set_req_version)) {
2122 			ret = -EINVAL;
2123 			goto done;
2124 		}
2125 
2126 		req_version->version = IPSET_PROTOCOL;
2127 		if (copy_to_user(user, req_version,
2128 				 sizeof(struct ip_set_req_version)))
2129 			ret = -EFAULT;
2130 		goto done;
2131 	}
2132 	case IP_SET_OP_GET_BYNAME: {
2133 		struct ip_set_req_get_set *req_get = data;
2134 		ip_set_id_t id;
2135 
2136 		if (*len != sizeof(struct ip_set_req_get_set)) {
2137 			ret = -EINVAL;
2138 			goto done;
2139 		}
2140 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
2141 		nfnl_lock(NFNL_SUBSYS_IPSET);
2142 		find_set_and_id(inst, req_get->set.name, &id);
2143 		req_get->set.index = id;
2144 		nfnl_unlock(NFNL_SUBSYS_IPSET);
2145 		goto copy;
2146 	}
2147 	case IP_SET_OP_GET_FNAME: {
2148 		struct ip_set_req_get_set_family *req_get = data;
2149 		ip_set_id_t id;
2150 
2151 		if (*len != sizeof(struct ip_set_req_get_set_family)) {
2152 			ret = -EINVAL;
2153 			goto done;
2154 		}
2155 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
2156 		nfnl_lock(NFNL_SUBSYS_IPSET);
2157 		find_set_and_id(inst, req_get->set.name, &id);
2158 		req_get->set.index = id;
2159 		if (id != IPSET_INVALID_ID)
2160 			req_get->family = ip_set(inst, id)->family;
2161 		nfnl_unlock(NFNL_SUBSYS_IPSET);
2162 		goto copy;
2163 	}
2164 	case IP_SET_OP_GET_BYINDEX: {
2165 		struct ip_set_req_get_set *req_get = data;
2166 		struct ip_set *set;
2167 
2168 		if (*len != sizeof(struct ip_set_req_get_set) ||
2169 		    req_get->set.index >= inst->ip_set_max) {
2170 			ret = -EINVAL;
2171 			goto done;
2172 		}
2173 		nfnl_lock(NFNL_SUBSYS_IPSET);
2174 		set = ip_set(inst, req_get->set.index);
2175 		ret = strscpy(req_get->set.name, set ? set->name : "",
2176 			      IPSET_MAXNAMELEN);
2177 		nfnl_unlock(NFNL_SUBSYS_IPSET);
2178 		if (ret < 0)
2179 			goto done;
2180 		goto copy;
2181 	}
2182 	default:
2183 		ret = -EBADMSG;
2184 		goto done;
2185 	}	/* end of switch(op) */
2186 
2187 copy:
2188 	if (copy_to_user(user, data, copylen))
2189 		ret = -EFAULT;
2190 
2191 done:
2192 	vfree(data);
2193 	if (ret > 0)
2194 		ret = 0;
2195 	return ret;
2196 }
2197 
2198 static struct nf_sockopt_ops so_set __read_mostly = {
2199 	.pf		= PF_INET,
2200 	.get_optmin	= SO_IP_SET,
2201 	.get_optmax	= SO_IP_SET + 1,
2202 	.get		= ip_set_sockfn_get,
2203 	.owner		= THIS_MODULE,
2204 };
2205 
2206 static int __net_init
ip_set_net_init(struct net * net)2207 ip_set_net_init(struct net *net)
2208 {
2209 	struct ip_set_net *inst = ip_set_pernet(net);
2210 	struct ip_set **list;
2211 
2212 	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
2213 	if (inst->ip_set_max >= IPSET_INVALID_ID)
2214 		inst->ip_set_max = IPSET_INVALID_ID - 1;
2215 
2216 	list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
2217 	if (!list)
2218 		return -ENOMEM;
2219 	inst->is_deleted = false;
2220 	inst->is_destroyed = false;
2221 	rcu_assign_pointer(inst->ip_set_list, list);
2222 	return 0;
2223 }
2224 
2225 static void __net_exit
ip_set_net_exit(struct net * net)2226 ip_set_net_exit(struct net *net)
2227 {
2228 	struct ip_set_net *inst = ip_set_pernet(net);
2229 
2230 	struct ip_set *set = NULL;
2231 	ip_set_id_t i;
2232 
2233 	inst->is_deleted = true; /* flag for ip_set_nfnl_put */
2234 
2235 	nfnl_lock(NFNL_SUBSYS_IPSET);
2236 	for (i = 0; i < inst->ip_set_max; i++) {
2237 		set = ip_set(inst, i);
2238 		if (set) {
2239 			ip_set(inst, i) = NULL;
2240 			set->variant->cancel_gc(set);
2241 			ip_set_destroy_set(set);
2242 		}
2243 	}
2244 	nfnl_unlock(NFNL_SUBSYS_IPSET);
2245 	kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
2246 }
2247 
2248 static struct pernet_operations ip_set_net_ops = {
2249 	.init	= ip_set_net_init,
2250 	.exit   = ip_set_net_exit,
2251 	.id	= &ip_set_net_id,
2252 	.size	= sizeof(struct ip_set_net),
2253 };
2254 
2255 static int __init
ip_set_init(void)2256 ip_set_init(void)
2257 {
2258 	int ret = register_pernet_subsys(&ip_set_net_ops);
2259 
2260 	if (ret) {
2261 		pr_err("ip_set: cannot register pernet_subsys.\n");
2262 		return ret;
2263 	}
2264 
2265 	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2266 	if (ret != 0) {
2267 		pr_err("ip_set: cannot register with nfnetlink.\n");
2268 		unregister_pernet_subsys(&ip_set_net_ops);
2269 		return ret;
2270 	}
2271 
2272 	ret = nf_register_sockopt(&so_set);
2273 	if (ret != 0) {
2274 		pr_err("SO_SET registry failed: %d\n", ret);
2275 		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2276 		unregister_pernet_subsys(&ip_set_net_ops);
2277 		return ret;
2278 	}
2279 
2280 	return 0;
2281 }
2282 
2283 static void __exit
ip_set_fini(void)2284 ip_set_fini(void)
2285 {
2286 	nf_unregister_sockopt(&so_set);
2287 	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2288 	unregister_pernet_subsys(&ip_set_net_ops);
2289 
2290 	/* Wait for call_rcu() in destroy */
2291 	rcu_barrier();
2292 
2293 	pr_debug("these are the famous last words\n");
2294 }
2295 
2296 module_init(ip_set_init);
2297 module_exit(ip_set_fini);
2298 
2299 MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));
2300