1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
4 *
5 * Development of this code funded by Astaro AG (http://www.astaro.com/)
6 */
7
8 #include <linux/kernel.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/list.h>
12 #include <linux/rbtree.h>
13 #include <linux/netlink.h>
14 #include <linux/netfilter.h>
15 #include <linux/netfilter/nf_tables.h>
16 #include <net/netfilter/nf_tables_core.h>
17 #include <net/netns/generic.h>
18
19 extern unsigned int nf_tables_net_id;
20
21 struct nft_rbtree {
22 struct rb_root root;
23 rwlock_t lock;
24 seqcount_t count;
25 struct delayed_work gc_work;
26 };
27
28 struct nft_rbtree_elem {
29 struct rb_node node;
30 struct nft_set_ext ext;
31 };
32
nft_rbtree_interval_end(const struct nft_rbtree_elem * rbe)33 static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
34 {
35 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
36 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
37 }
38
nft_rbtree_interval_start(const struct nft_rbtree_elem * rbe)39 static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
40 {
41 return !nft_rbtree_interval_end(rbe);
42 }
43
nft_rbtree_cmp(const struct nft_set * set,const struct nft_rbtree_elem * e1,const struct nft_rbtree_elem * e2)44 static int nft_rbtree_cmp(const struct nft_set *set,
45 const struct nft_rbtree_elem *e1,
46 const struct nft_rbtree_elem *e2)
47 {
48 return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
49 set->klen);
50 }
51
nft_rbtree_elem_expired(const struct nft_rbtree_elem * rbe)52 static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
53 {
54 return nft_set_elem_expired(&rbe->ext) ||
55 nft_set_elem_is_dead(&rbe->ext);
56 }
57
__nft_rbtree_lookup(const struct net * net,const struct nft_set * set,const u32 * key,const struct nft_set_ext ** ext,unsigned int seq)58 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
59 const u32 *key, const struct nft_set_ext **ext,
60 unsigned int seq)
61 {
62 struct nft_rbtree *priv = nft_set_priv(set);
63 const struct nft_rbtree_elem *rbe, *interval = NULL;
64 u8 genmask = nft_genmask_cur(net);
65 const struct rb_node *parent;
66 int d;
67
68 parent = rcu_dereference_raw(priv->root.rb_node);
69 while (parent != NULL) {
70 if (read_seqcount_retry(&priv->count, seq))
71 return false;
72
73 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
74
75 d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
76 if (d < 0) {
77 parent = rcu_dereference_raw(parent->rb_left);
78 if (interval &&
79 !nft_rbtree_cmp(set, rbe, interval) &&
80 nft_rbtree_interval_end(rbe) &&
81 nft_rbtree_interval_start(interval))
82 continue;
83 interval = rbe;
84 } else if (d > 0)
85 parent = rcu_dereference_raw(parent->rb_right);
86 else {
87 if (!nft_set_elem_active(&rbe->ext, genmask)) {
88 parent = rcu_dereference_raw(parent->rb_left);
89 continue;
90 }
91
92 if (nft_rbtree_elem_expired(rbe))
93 return false;
94
95 if (nft_rbtree_interval_end(rbe)) {
96 if (nft_set_is_anonymous(set))
97 return false;
98 parent = rcu_dereference_raw(parent->rb_left);
99 interval = NULL;
100 continue;
101 }
102
103 *ext = &rbe->ext;
104 return true;
105 }
106 }
107
108 if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
109 nft_set_elem_active(&interval->ext, genmask) &&
110 !nft_rbtree_elem_expired(interval) &&
111 nft_rbtree_interval_start(interval)) {
112 *ext = &interval->ext;
113 return true;
114 }
115
116 return false;
117 }
118
nft_rbtree_lookup(const struct net * net,const struct nft_set * set,const u32 * key,const struct nft_set_ext ** ext)119 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
120 const u32 *key, const struct nft_set_ext **ext)
121 {
122 struct nft_rbtree *priv = nft_set_priv(set);
123 unsigned int seq = read_seqcount_begin(&priv->count);
124 bool ret;
125
126 ret = __nft_rbtree_lookup(net, set, key, ext, seq);
127 if (ret || !read_seqcount_retry(&priv->count, seq))
128 return ret;
129
130 read_lock_bh(&priv->lock);
131 seq = read_seqcount_begin(&priv->count);
132 ret = __nft_rbtree_lookup(net, set, key, ext, seq);
133 read_unlock_bh(&priv->lock);
134
135 return ret;
136 }
137
__nft_rbtree_get(const struct net * net,const struct nft_set * set,const u32 * key,struct nft_rbtree_elem ** elem,unsigned int seq,unsigned int flags,u8 genmask)138 static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
139 const u32 *key, struct nft_rbtree_elem **elem,
140 unsigned int seq, unsigned int flags, u8 genmask)
141 {
142 struct nft_rbtree_elem *rbe, *interval = NULL;
143 struct nft_rbtree *priv = nft_set_priv(set);
144 const struct rb_node *parent;
145 const void *this;
146 int d;
147
148 parent = rcu_dereference_raw(priv->root.rb_node);
149 while (parent != NULL) {
150 if (read_seqcount_retry(&priv->count, seq))
151 return false;
152
153 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
154
155 this = nft_set_ext_key(&rbe->ext);
156 d = memcmp(this, key, set->klen);
157 if (d < 0) {
158 parent = rcu_dereference_raw(parent->rb_left);
159 if (!(flags & NFT_SET_ELEM_INTERVAL_END))
160 interval = rbe;
161 } else if (d > 0) {
162 parent = rcu_dereference_raw(parent->rb_right);
163 if (flags & NFT_SET_ELEM_INTERVAL_END)
164 interval = rbe;
165 } else {
166 if (!nft_set_elem_active(&rbe->ext, genmask)) {
167 parent = rcu_dereference_raw(parent->rb_left);
168 continue;
169 }
170
171 if (nft_set_elem_expired(&rbe->ext))
172 return false;
173
174 if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
175 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
176 (flags & NFT_SET_ELEM_INTERVAL_END)) {
177 *elem = rbe;
178 return true;
179 }
180
181 if (nft_rbtree_interval_end(rbe))
182 interval = NULL;
183
184 parent = rcu_dereference_raw(parent->rb_left);
185 }
186 }
187
188 if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
189 nft_set_elem_active(&interval->ext, genmask) &&
190 !nft_set_elem_expired(&interval->ext) &&
191 ((!nft_rbtree_interval_end(interval) &&
192 !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
193 (nft_rbtree_interval_end(interval) &&
194 (flags & NFT_SET_ELEM_INTERVAL_END)))) {
195 *elem = interval;
196 return true;
197 }
198
199 return false;
200 }
201
nft_rbtree_get(const struct net * net,const struct nft_set * set,const struct nft_set_elem * elem,unsigned int flags)202 static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
203 const struct nft_set_elem *elem, unsigned int flags)
204 {
205 struct nft_rbtree *priv = nft_set_priv(set);
206 unsigned int seq = read_seqcount_begin(&priv->count);
207 struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
208 const u32 *key = (const u32 *)&elem->key.val;
209 u8 genmask = nft_genmask_cur(net);
210 bool ret;
211
212 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
213 if (ret || !read_seqcount_retry(&priv->count, seq))
214 return rbe;
215
216 read_lock_bh(&priv->lock);
217 seq = read_seqcount_begin(&priv->count);
218 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
219 if (!ret)
220 rbe = ERR_PTR(-ENOENT);
221 read_unlock_bh(&priv->lock);
222
223 return rbe;
224 }
225
nft_rbtree_gc_remove(struct net * net,struct nft_set * set,struct nft_rbtree * priv,struct nft_rbtree_elem * rbe)226 static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
227 struct nft_rbtree *priv,
228 struct nft_rbtree_elem *rbe)
229 {
230 struct nft_set_elem elem = {
231 .priv = rbe,
232 };
233
234 nft_setelem_data_deactivate(net, set, &elem);
235 rb_erase(&rbe->node, &priv->root);
236 }
237
nft_rbtree_gc_elem(const struct nft_set * __set,struct nft_rbtree * priv,struct nft_rbtree_elem * rbe)238 static int nft_rbtree_gc_elem(const struct nft_set *__set,
239 struct nft_rbtree *priv,
240 struct nft_rbtree_elem *rbe)
241 {
242 struct nft_set *set = (struct nft_set *)__set;
243 struct rb_node *prev = rb_prev(&rbe->node);
244 struct net *net = read_pnet(&set->net);
245 struct nft_rbtree_elem *rbe_prev;
246 struct nft_trans_gc *gc;
247
248 gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
249 if (!gc)
250 return -ENOMEM;
251
252 /* search for end interval coming before this element.
253 * end intervals don't carry a timeout extension, they
254 * are coupled with the interval start element.
255 */
256 while (prev) {
257 rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
258 if (nft_rbtree_interval_end(rbe_prev) &&
259 nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
260 break;
261
262 prev = rb_prev(prev);
263 }
264
265 if (prev) {
266 rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
267 nft_rbtree_gc_remove(net, set, priv, rbe_prev);
268
269 /* There is always room in this trans gc for this element,
270 * memory allocation never actually happens, hence, the warning
271 * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
272 * this is synchronous gc which never fails.
273 */
274 gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
275 if (WARN_ON_ONCE(!gc))
276 return -ENOMEM;
277
278 nft_trans_gc_elem_add(gc, rbe_prev);
279 }
280
281 nft_rbtree_gc_remove(net, set, priv, rbe);
282 gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
283 if (WARN_ON_ONCE(!gc))
284 return -ENOMEM;
285
286 nft_trans_gc_elem_add(gc, rbe);
287
288 nft_trans_gc_queue_sync_done(gc);
289
290 return 0;
291 }
292
nft_rbtree_update_first(const struct nft_set * set,struct nft_rbtree_elem * rbe,struct rb_node * first)293 static bool nft_rbtree_update_first(const struct nft_set *set,
294 struct nft_rbtree_elem *rbe,
295 struct rb_node *first)
296 {
297 struct nft_rbtree_elem *first_elem;
298
299 first_elem = rb_entry(first, struct nft_rbtree_elem, node);
300 /* this element is closest to where the new element is to be inserted:
301 * update the first element for the node list path.
302 */
303 if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
304 return true;
305
306 return false;
307 }
308
__nft_rbtree_insert(const struct net * net,const struct nft_set * set,struct nft_rbtree_elem * new,struct nft_set_ext ** ext)309 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
310 struct nft_rbtree_elem *new,
311 struct nft_set_ext **ext)
312 {
313 struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
314 struct rb_node *node, *next, *parent, **p, *first = NULL;
315 struct nft_rbtree *priv = nft_set_priv(set);
316 u8 cur_genmask = nft_genmask_cur(net);
317 u8 genmask = nft_genmask_next(net);
318 int d, err;
319
320 /* Descend the tree to search for an existing element greater than the
321 * key value to insert that is greater than the new element. This is the
322 * first element to walk the ordered elements to find possible overlap.
323 */
324 parent = NULL;
325 p = &priv->root.rb_node;
326 while (*p != NULL) {
327 parent = *p;
328 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
329 d = nft_rbtree_cmp(set, rbe, new);
330
331 if (d < 0) {
332 p = &parent->rb_left;
333 } else if (d > 0) {
334 if (!first ||
335 nft_rbtree_update_first(set, rbe, first))
336 first = &rbe->node;
337
338 p = &parent->rb_right;
339 } else {
340 if (nft_rbtree_interval_end(rbe))
341 p = &parent->rb_left;
342 else
343 p = &parent->rb_right;
344 }
345 }
346
347 if (!first)
348 first = rb_first(&priv->root);
349
350 /* Detect overlap by going through the list of valid tree nodes.
351 * Values stored in the tree are in reversed order, starting from
352 * highest to lowest value.
353 */
354 for (node = first; node != NULL; node = next) {
355 next = rb_next(node);
356
357 rbe = rb_entry(node, struct nft_rbtree_elem, node);
358
359 if (!nft_set_elem_active(&rbe->ext, genmask))
360 continue;
361
362 /* perform garbage collection to avoid bogus overlap reports
363 * but skip new elements in this transaction.
364 */
365 if (nft_set_elem_expired(&rbe->ext) &&
366 nft_set_elem_active(&rbe->ext, cur_genmask)) {
367 err = nft_rbtree_gc_elem(set, priv, rbe);
368 if (err < 0)
369 return err;
370
371 continue;
372 }
373
374 d = nft_rbtree_cmp(set, rbe, new);
375 if (d == 0) {
376 /* Matching end element: no need to look for an
377 * overlapping greater or equal element.
378 */
379 if (nft_rbtree_interval_end(rbe)) {
380 rbe_le = rbe;
381 break;
382 }
383
384 /* first element that is greater or equal to key value. */
385 if (!rbe_ge) {
386 rbe_ge = rbe;
387 continue;
388 }
389
390 /* this is a closer more or equal element, update it. */
391 if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
392 rbe_ge = rbe;
393 continue;
394 }
395
396 /* element is equal to key value, make sure flags are
397 * the same, an existing more or equal start element
398 * must not be replaced by more or equal end element.
399 */
400 if ((nft_rbtree_interval_start(new) &&
401 nft_rbtree_interval_start(rbe_ge)) ||
402 (nft_rbtree_interval_end(new) &&
403 nft_rbtree_interval_end(rbe_ge))) {
404 rbe_ge = rbe;
405 continue;
406 }
407 } else if (d > 0) {
408 /* annotate element greater than the new element. */
409 rbe_ge = rbe;
410 continue;
411 } else if (d < 0) {
412 /* annotate element less than the new element. */
413 rbe_le = rbe;
414 break;
415 }
416 }
417
418 /* - new start element matching existing start element: full overlap
419 * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
420 */
421 if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
422 nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
423 *ext = &rbe_ge->ext;
424 return -EEXIST;
425 }
426
427 /* - new end element matching existing end element: full overlap
428 * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
429 */
430 if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
431 nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
432 *ext = &rbe_le->ext;
433 return -EEXIST;
434 }
435
436 /* - new start element with existing closest, less or equal key value
437 * being a start element: partial overlap, reported as -ENOTEMPTY.
438 * Anonymous sets allow for two consecutive start element since they
439 * are constant, skip them to avoid bogus overlap reports.
440 */
441 if (!nft_set_is_anonymous(set) && rbe_le &&
442 nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
443 return -ENOTEMPTY;
444
445 /* - new end element with existing closest, less or equal key value
446 * being a end element: partial overlap, reported as -ENOTEMPTY.
447 */
448 if (rbe_le &&
449 nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
450 return -ENOTEMPTY;
451
452 /* - new end element with existing closest, greater or equal key value
453 * being an end element: partial overlap, reported as -ENOTEMPTY
454 */
455 if (rbe_ge &&
456 nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
457 return -ENOTEMPTY;
458
459 /* Accepted element: pick insertion point depending on key value */
460 parent = NULL;
461 p = &priv->root.rb_node;
462 while (*p != NULL) {
463 parent = *p;
464 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
465 d = nft_rbtree_cmp(set, rbe, new);
466
467 if (d < 0)
468 p = &parent->rb_left;
469 else if (d > 0)
470 p = &parent->rb_right;
471 else if (nft_rbtree_interval_end(rbe))
472 p = &parent->rb_left;
473 else
474 p = &parent->rb_right;
475 }
476
477 rb_link_node_rcu(&new->node, parent, p);
478 rb_insert_color(&new->node, &priv->root);
479 return 0;
480 }
481
nft_rbtree_insert(const struct net * net,const struct nft_set * set,const struct nft_set_elem * elem,struct nft_set_ext ** ext)482 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
483 const struct nft_set_elem *elem,
484 struct nft_set_ext **ext)
485 {
486 struct nft_rbtree *priv = nft_set_priv(set);
487 struct nft_rbtree_elem *rbe = elem->priv;
488 int err;
489
490 write_lock_bh(&priv->lock);
491 write_seqcount_begin(&priv->count);
492 err = __nft_rbtree_insert(net, set, rbe, ext);
493 write_seqcount_end(&priv->count);
494 write_unlock_bh(&priv->lock);
495
496 return err;
497 }
498
nft_rbtree_remove(const struct net * net,const struct nft_set * set,const struct nft_set_elem * elem)499 static void nft_rbtree_remove(const struct net *net,
500 const struct nft_set *set,
501 const struct nft_set_elem *elem)
502 {
503 struct nft_rbtree *priv = nft_set_priv(set);
504 struct nft_rbtree_elem *rbe = elem->priv;
505
506 write_lock_bh(&priv->lock);
507 write_seqcount_begin(&priv->count);
508 rb_erase(&rbe->node, &priv->root);
509 write_seqcount_end(&priv->count);
510 write_unlock_bh(&priv->lock);
511 }
512
nft_rbtree_activate(const struct net * net,const struct nft_set * set,const struct nft_set_elem * elem)513 static void nft_rbtree_activate(const struct net *net,
514 const struct nft_set *set,
515 const struct nft_set_elem *elem)
516 {
517 struct nft_rbtree_elem *rbe = elem->priv;
518
519 nft_set_elem_change_active(net, set, &rbe->ext);
520 }
521
nft_rbtree_flush(const struct net * net,const struct nft_set * set,void * priv)522 static bool nft_rbtree_flush(const struct net *net,
523 const struct nft_set *set, void *priv)
524 {
525 struct nft_rbtree_elem *rbe = priv;
526
527 nft_set_elem_change_active(net, set, &rbe->ext);
528
529 return true;
530 }
531
nft_rbtree_deactivate(const struct net * net,const struct nft_set * set,const struct nft_set_elem * elem)532 static void *nft_rbtree_deactivate(const struct net *net,
533 const struct nft_set *set,
534 const struct nft_set_elem *elem)
535 {
536 const struct nft_rbtree *priv = nft_set_priv(set);
537 const struct rb_node *parent = priv->root.rb_node;
538 struct nft_rbtree_elem *rbe, *this = elem->priv;
539 u8 genmask = nft_genmask_next(net);
540 int d;
541
542 while (parent != NULL) {
543 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
544
545 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
546 set->klen);
547 if (d < 0)
548 parent = parent->rb_left;
549 else if (d > 0)
550 parent = parent->rb_right;
551 else {
552 if (nft_rbtree_interval_end(rbe) &&
553 nft_rbtree_interval_start(this)) {
554 parent = parent->rb_left;
555 continue;
556 } else if (nft_rbtree_interval_start(rbe) &&
557 nft_rbtree_interval_end(this)) {
558 parent = parent->rb_right;
559 continue;
560 } else if (nft_set_elem_expired(&rbe->ext)) {
561 break;
562 } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
563 parent = parent->rb_left;
564 continue;
565 }
566 nft_rbtree_flush(net, set, rbe);
567 return rbe;
568 }
569 }
570 return NULL;
571 }
572
nft_rbtree_walk(const struct nft_ctx * ctx,struct nft_set * set,struct nft_set_iter * iter)573 static void nft_rbtree_walk(const struct nft_ctx *ctx,
574 struct nft_set *set,
575 struct nft_set_iter *iter)
576 {
577 struct nft_rbtree *priv = nft_set_priv(set);
578 struct nft_rbtree_elem *rbe;
579 struct nft_set_elem elem;
580 struct rb_node *node;
581
582 read_lock_bh(&priv->lock);
583 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
584 rbe = rb_entry(node, struct nft_rbtree_elem, node);
585
586 if (iter->count < iter->skip)
587 goto cont;
588 if (!nft_set_elem_active(&rbe->ext, iter->genmask))
589 goto cont;
590
591 elem.priv = rbe;
592
593 iter->err = iter->fn(ctx, set, iter, &elem);
594 if (iter->err < 0) {
595 read_unlock_bh(&priv->lock);
596 return;
597 }
598 cont:
599 iter->count++;
600 }
601 read_unlock_bh(&priv->lock);
602 }
603
nft_rbtree_gc(struct work_struct * work)604 static void nft_rbtree_gc(struct work_struct *work)
605 {
606 struct nft_rbtree_elem *rbe, *rbe_end = NULL;
607 struct nftables_pernet *nft_net;
608 struct nft_rbtree *priv;
609 struct nft_trans_gc *gc;
610 struct rb_node *node;
611 struct nft_set *set;
612 unsigned int gc_seq;
613 struct net *net;
614
615 priv = container_of(work, struct nft_rbtree, gc_work.work);
616 set = nft_set_container_of(priv);
617 net = read_pnet(&set->net);
618 nft_net = net_generic(net, nf_tables_net_id);
619 gc_seq = READ_ONCE(nft_net->gc_seq);
620
621 if (nft_set_gc_is_pending(set))
622 goto done;
623
624 gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
625 if (!gc)
626 goto done;
627
628 read_lock_bh(&priv->lock);
629 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
630
631 /* Ruleset has been updated, try later. */
632 if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
633 nft_trans_gc_destroy(gc);
634 gc = NULL;
635 goto try_later;
636 }
637
638 rbe = rb_entry(node, struct nft_rbtree_elem, node);
639
640 if (nft_set_elem_is_dead(&rbe->ext))
641 goto dead_elem;
642
643 /* elements are reversed in the rbtree for historical reasons,
644 * from highest to lowest value, that is why end element is
645 * always visited before the start element.
646 */
647 if (nft_rbtree_interval_end(rbe)) {
648 rbe_end = rbe;
649 continue;
650 }
651
652 if (!nft_set_elem_expired(&rbe->ext))
653 continue;
654
655 nft_set_elem_dead(&rbe->ext);
656
657 if (!rbe_end)
658 continue;
659
660 nft_set_elem_dead(&rbe_end->ext);
661
662 gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
663 if (!gc)
664 goto try_later;
665
666 nft_trans_gc_elem_add(gc, rbe_end);
667 rbe_end = NULL;
668 dead_elem:
669 gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
670 if (!gc)
671 goto try_later;
672
673 nft_trans_gc_elem_add(gc, rbe);
674 }
675
676 try_later:
677 read_unlock_bh(&priv->lock);
678
679 if (gc)
680 nft_trans_gc_queue_async_done(gc);
681 done:
682 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
683 nft_set_gc_interval(set));
684 }
685
nft_rbtree_privsize(const struct nlattr * const nla[],const struct nft_set_desc * desc)686 static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
687 const struct nft_set_desc *desc)
688 {
689 return sizeof(struct nft_rbtree);
690 }
691
nft_rbtree_init(const struct nft_set * set,const struct nft_set_desc * desc,const struct nlattr * const nla[])692 static int nft_rbtree_init(const struct nft_set *set,
693 const struct nft_set_desc *desc,
694 const struct nlattr * const nla[])
695 {
696 struct nft_rbtree *priv = nft_set_priv(set);
697
698 rwlock_init(&priv->lock);
699 seqcount_init(&priv->count);
700 priv->root = RB_ROOT;
701
702 INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
703 if (set->flags & NFT_SET_TIMEOUT)
704 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
705 nft_set_gc_interval(set));
706
707 return 0;
708 }
709
nft_rbtree_destroy(const struct nft_ctx * ctx,const struct nft_set * set)710 static void nft_rbtree_destroy(const struct nft_ctx *ctx,
711 const struct nft_set *set)
712 {
713 struct nft_rbtree *priv = nft_set_priv(set);
714 struct nft_rbtree_elem *rbe;
715 struct rb_node *node;
716
717 cancel_delayed_work_sync(&priv->gc_work);
718 rcu_barrier();
719 while ((node = priv->root.rb_node) != NULL) {
720 rb_erase(node, &priv->root);
721 rbe = rb_entry(node, struct nft_rbtree_elem, node);
722 nf_tables_set_elem_destroy(ctx, set, rbe);
723 }
724 }
725
nft_rbtree_estimate(const struct nft_set_desc * desc,u32 features,struct nft_set_estimate * est)726 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
727 struct nft_set_estimate *est)
728 {
729 if (desc->size)
730 est->size = sizeof(struct nft_rbtree) +
731 desc->size * sizeof(struct nft_rbtree_elem);
732 else
733 est->size = ~0;
734
735 est->lookup = NFT_SET_CLASS_O_LOG_N;
736 est->space = NFT_SET_CLASS_O_N;
737
738 return true;
739 }
740
741 struct nft_set_type nft_set_rbtree_type __read_mostly = {
742 .owner = THIS_MODULE,
743 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
744 .ops = {
745 .privsize = nft_rbtree_privsize,
746 .elemsize = offsetof(struct nft_rbtree_elem, ext),
747 .estimate = nft_rbtree_estimate,
748 .init = nft_rbtree_init,
749 .destroy = nft_rbtree_destroy,
750 .insert = nft_rbtree_insert,
751 .remove = nft_rbtree_remove,
752 .deactivate = nft_rbtree_deactivate,
753 .flush = nft_rbtree_flush,
754 .activate = nft_rbtree_activate,
755 .lookup = nft_rbtree_lookup,
756 .walk = nft_rbtree_walk,
757 .get = nft_rbtree_get,
758 },
759 };
760