• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  * Copyright (c) 2016 Facebook
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  */
13 #include <linux/bpf.h>
14 #include <linux/jhash.h>
15 #include <linux/filter.h>
16 #include "percpu_freelist.h"
17 #define HTAB_CREATE_FLAG_MASK						\
18 	(BPF_F_NO_PREALLOC | BPF_F_RDONLY | BPF_F_WRONLY)
19 
20 struct bucket {
21 	struct hlist_head head;
22 	raw_spinlock_t lock;
23 };
24 
25 struct bpf_htab {
26 	struct bpf_map map;
27 	struct bucket *buckets;
28 	void *elems;
29 	struct pcpu_freelist freelist;
30 	void __percpu *extra_elems;
31 	atomic_t count;	/* number of elements in this hashtable */
32 	u32 n_buckets;	/* number of hash buckets */
33 	u32 elem_size;	/* size of each element in bytes */
34 };
35 
36 enum extra_elem_state {
37 	HTAB_NOT_AN_EXTRA_ELEM = 0,
38 	HTAB_EXTRA_ELEM_FREE,
39 	HTAB_EXTRA_ELEM_USED
40 };
41 
42 /* each htab element is struct htab_elem + key + value */
43 struct htab_elem {
44 	union {
45 		struct hlist_node hash_node;
46 		struct bpf_htab *htab;
47 		struct pcpu_freelist_node fnode;
48 	};
49 	union {
50 		struct rcu_head rcu;
51 		enum extra_elem_state state;
52 	};
53 	u32 hash;
54 	char key[0] __aligned(8);
55 };
56 
htab_elem_set_ptr(struct htab_elem * l,u32 key_size,void __percpu * pptr)57 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
58 				     void __percpu *pptr)
59 {
60 	*(void __percpu **)(l->key + key_size) = pptr;
61 }
62 
htab_elem_get_ptr(struct htab_elem * l,u32 key_size)63 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
64 {
65 	return *(void __percpu **)(l->key + key_size);
66 }
67 
get_htab_elem(struct bpf_htab * htab,int i)68 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
69 {
70 	return (struct htab_elem *) (htab->elems + i * htab->elem_size);
71 }
72 
htab_free_elems(struct bpf_htab * htab)73 static void htab_free_elems(struct bpf_htab *htab)
74 {
75 	int i;
76 
77 	if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
78 		goto free_elems;
79 
80 	for (i = 0; i < htab->map.max_entries; i++) {
81 		void __percpu *pptr;
82 
83 		pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
84 					 htab->map.key_size);
85 		free_percpu(pptr);
86 	}
87 free_elems:
88 	bpf_map_area_free(htab->elems);
89 }
90 
prealloc_elems_and_freelist(struct bpf_htab * htab)91 static int prealloc_elems_and_freelist(struct bpf_htab *htab)
92 {
93 	int err = -ENOMEM, i;
94 
95 	htab->elems = bpf_map_area_alloc(htab->elem_size *
96 					 htab->map.max_entries);
97 	if (!htab->elems)
98 		return -ENOMEM;
99 
100 	if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
101 		goto skip_percpu_elems;
102 
103 	for (i = 0; i < htab->map.max_entries; i++) {
104 		u32 size = round_up(htab->map.value_size, 8);
105 		void __percpu *pptr;
106 
107 		pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
108 		if (!pptr)
109 			goto free_elems;
110 		htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
111 				  pptr);
112 	}
113 
114 skip_percpu_elems:
115 	err = pcpu_freelist_init(&htab->freelist);
116 	if (err)
117 		goto free_elems;
118 
119 	pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
120 			       htab->map.max_entries);
121 	return 0;
122 
123 free_elems:
124 	htab_free_elems(htab);
125 	return err;
126 }
127 
alloc_extra_elems(struct bpf_htab * htab)128 static int alloc_extra_elems(struct bpf_htab *htab)
129 {
130 	void __percpu *pptr;
131 	int cpu;
132 
133 	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
134 	if (!pptr)
135 		return -ENOMEM;
136 
137 	for_each_possible_cpu(cpu) {
138 		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
139 			HTAB_EXTRA_ELEM_FREE;
140 	}
141 	htab->extra_elems = pptr;
142 	return 0;
143 }
144 
145 /* Called from syscall */
htab_map_alloc(union bpf_attr * attr)146 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
147 {
148 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH;
149 	struct bpf_htab *htab;
150 	int err, i;
151 	u64 cost;
152 
153 	if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
154 		/* reserved bits should not be used */
155 		return ERR_PTR(-EINVAL);
156 
157 	htab = kzalloc(sizeof(*htab), GFP_USER);
158 	if (!htab)
159 		return ERR_PTR(-ENOMEM);
160 
161 	/* mandatory map attributes */
162 	htab->map.map_type = attr->map_type;
163 	htab->map.key_size = attr->key_size;
164 	htab->map.value_size = attr->value_size;
165 	htab->map.max_entries = attr->max_entries;
166 	htab->map.map_flags = attr->map_flags;
167 
168 	/* check sanity of attributes.
169 	 * value_size == 0 may be allowed in the future to use map as a set
170 	 */
171 	err = -EINVAL;
172 	if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
173 	    htab->map.value_size == 0)
174 		goto free_htab;
175 
176 	/* hash table size must be power of 2 */
177 	htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
178 
179 	err = -E2BIG;
180 	if (htab->map.key_size > MAX_BPF_STACK)
181 		/* eBPF programs initialize keys on stack, so they cannot be
182 		 * larger than max stack size
183 		 */
184 		goto free_htab;
185 
186 	if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
187 	    MAX_BPF_STACK - sizeof(struct htab_elem))
188 		/* if value_size is bigger, the user space won't be able to
189 		 * access the elements via bpf syscall. This check also makes
190 		 * sure that the elem_size doesn't overflow and it's
191 		 * kmalloc-able later in htab_map_update_elem()
192 		 */
193 		goto free_htab;
194 
195 	if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
196 		/* make sure the size for pcpu_alloc() is reasonable */
197 		goto free_htab;
198 
199 	htab->elem_size = sizeof(struct htab_elem) +
200 			  round_up(htab->map.key_size, 8);
201 	if (percpu)
202 		htab->elem_size += sizeof(void *);
203 	else
204 		htab->elem_size += round_up(htab->map.value_size, 8);
205 
206 	/* prevent zero size kmalloc and check for u32 overflow */
207 	if (htab->n_buckets == 0 ||
208 	    htab->n_buckets > U32_MAX / sizeof(struct bucket))
209 		goto free_htab;
210 
211 	cost = (u64) htab->n_buckets * sizeof(struct bucket) +
212 	       (u64) htab->elem_size * htab->map.max_entries;
213 
214 	if (percpu)
215 		cost += (u64) round_up(htab->map.value_size, 8) *
216 			num_possible_cpus() * htab->map.max_entries;
217 	else
218 	       cost += (u64) htab->elem_size * num_possible_cpus();
219 
220 	if (cost >= U32_MAX - PAGE_SIZE)
221 		/* make sure page count doesn't overflow */
222 		goto free_htab;
223 
224 	htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
225 
226 	/* if map size is larger than memlock limit, reject it early */
227 	err = bpf_map_precharge_memlock(htab->map.pages);
228 	if (err)
229 		goto free_htab;
230 
231 	err = -ENOMEM;
232 	htab->buckets = bpf_map_area_alloc(htab->n_buckets *
233 					   sizeof(struct bucket));
234 	if (!htab->buckets)
235 		goto free_htab;
236 
237 	for (i = 0; i < htab->n_buckets; i++) {
238 		INIT_HLIST_HEAD(&htab->buckets[i].head);
239 		raw_spin_lock_init(&htab->buckets[i].lock);
240 	}
241 
242 	if (!percpu) {
243 		err = alloc_extra_elems(htab);
244 		if (err)
245 			goto free_buckets;
246 	}
247 
248 	if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
249 		err = prealloc_elems_and_freelist(htab);
250 		if (err)
251 			goto free_extra_elems;
252 	}
253 
254 	return &htab->map;
255 
256 free_extra_elems:
257 	free_percpu(htab->extra_elems);
258 free_buckets:
259 	bpf_map_area_free(htab->buckets);
260 free_htab:
261 	kfree(htab);
262 	return ERR_PTR(err);
263 }
264 
htab_map_hash(const void * key,u32 key_len)265 static inline u32 htab_map_hash(const void *key, u32 key_len)
266 {
267 	return jhash(key, key_len, 0);
268 }
269 
__select_bucket(struct bpf_htab * htab,u32 hash)270 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
271 {
272 	return &htab->buckets[hash & (htab->n_buckets - 1)];
273 }
274 
select_bucket(struct bpf_htab * htab,u32 hash)275 static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
276 {
277 	return &__select_bucket(htab, hash)->head;
278 }
279 
lookup_elem_raw(struct hlist_head * head,u32 hash,void * key,u32 key_size)280 static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
281 					 void *key, u32 key_size)
282 {
283 	struct htab_elem *l;
284 
285 	hlist_for_each_entry_rcu(l, head, hash_node)
286 		if (l->hash == hash && !memcmp(&l->key, key, key_size))
287 			return l;
288 
289 	return NULL;
290 }
291 
292 /* Called from syscall or from eBPF program */
__htab_map_lookup_elem(struct bpf_map * map,void * key)293 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
294 {
295 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
296 	struct hlist_head *head;
297 	struct htab_elem *l;
298 	u32 hash, key_size;
299 
300 	/* Must be called with rcu_read_lock. */
301 	WARN_ON_ONCE(!rcu_read_lock_held());
302 
303 	key_size = map->key_size;
304 
305 	hash = htab_map_hash(key, key_size);
306 
307 	head = select_bucket(htab, hash);
308 
309 	l = lookup_elem_raw(head, hash, key, key_size);
310 
311 	return l;
312 }
313 
htab_map_lookup_elem(struct bpf_map * map,void * key)314 static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
315 {
316 	struct htab_elem *l = __htab_map_lookup_elem(map, key);
317 
318 	if (l)
319 		return l->key + round_up(map->key_size, 8);
320 
321 	return NULL;
322 }
323 
324 /* Called from syscall */
htab_map_get_next_key(struct bpf_map * map,void * key,void * next_key)325 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
326 {
327 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
328 	struct hlist_head *head;
329 	struct htab_elem *l, *next_l;
330 	u32 hash, key_size;
331 	int i;
332 
333 	WARN_ON_ONCE(!rcu_read_lock_held());
334 
335 	key_size = map->key_size;
336 
337 	hash = htab_map_hash(key, key_size);
338 
339 	head = select_bucket(htab, hash);
340 
341 	/* lookup the key */
342 	l = lookup_elem_raw(head, hash, key, key_size);
343 
344 	if (!l) {
345 		i = 0;
346 		goto find_first_elem;
347 	}
348 
349 	/* key was found, get next key in the same bucket */
350 	next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
351 				  struct htab_elem, hash_node);
352 
353 	if (next_l) {
354 		/* if next elem in this hash list is non-zero, just return it */
355 		memcpy(next_key, next_l->key, key_size);
356 		return 0;
357 	}
358 
359 	/* no more elements in this hash list, go to the next bucket */
360 	i = hash & (htab->n_buckets - 1);
361 	i++;
362 
363 find_first_elem:
364 	/* iterate over buckets */
365 	for (; i < htab->n_buckets; i++) {
366 		head = select_bucket(htab, i);
367 
368 		/* pick first element in the bucket */
369 		next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
370 					  struct htab_elem, hash_node);
371 		if (next_l) {
372 			/* if it's not empty, just return it */
373 			memcpy(next_key, next_l->key, key_size);
374 			return 0;
375 		}
376 	}
377 
378 	/* iterated over all buckets and all elements */
379 	return -ENOENT;
380 }
381 
htab_elem_free(struct bpf_htab * htab,struct htab_elem * l)382 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
383 {
384 	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
385 		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
386 	kfree(l);
387 }
388 
htab_elem_free_rcu(struct rcu_head * head)389 static void htab_elem_free_rcu(struct rcu_head *head)
390 {
391 	struct htab_elem *l = container_of(head, struct htab_elem, rcu);
392 	struct bpf_htab *htab = l->htab;
393 
394 	/* must increment bpf_prog_active to avoid kprobe+bpf triggering while
395 	 * we're calling kfree, otherwise deadlock is possible if kprobes
396 	 * are placed somewhere inside of slub
397 	 */
398 	preempt_disable();
399 	__this_cpu_inc(bpf_prog_active);
400 	htab_elem_free(htab, l);
401 	__this_cpu_dec(bpf_prog_active);
402 	preempt_enable();
403 }
404 
free_htab_elem(struct bpf_htab * htab,struct htab_elem * l)405 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
406 {
407 	if (l->state == HTAB_EXTRA_ELEM_USED) {
408 		l->state = HTAB_EXTRA_ELEM_FREE;
409 		return;
410 	}
411 
412 	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
413 		pcpu_freelist_push(&htab->freelist, &l->fnode);
414 	} else {
415 		atomic_dec(&htab->count);
416 		l->htab = htab;
417 		call_rcu(&l->rcu, htab_elem_free_rcu);
418 	}
419 }
420 
alloc_htab_elem(struct bpf_htab * htab,void * key,void * value,u32 key_size,u32 hash,bool percpu,bool onallcpus,bool old_elem_exists)421 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
422 					 void *value, u32 key_size, u32 hash,
423 					 bool percpu, bool onallcpus,
424 					 bool old_elem_exists)
425 {
426 	u32 size = htab->map.value_size;
427 	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
428 	struct htab_elem *l_new;
429 	void __percpu *pptr;
430 	int err = 0;
431 
432 	if (prealloc) {
433 		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
434 		if (!l_new)
435 			err = -E2BIG;
436 	} else {
437 		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
438 			atomic_dec(&htab->count);
439 			err = -E2BIG;
440 		} else {
441 			l_new = kmalloc(htab->elem_size,
442 					GFP_ATOMIC | __GFP_NOWARN);
443 			if (!l_new)
444 				return ERR_PTR(-ENOMEM);
445 		}
446 	}
447 
448 	if (err) {
449 		if (!old_elem_exists)
450 			return ERR_PTR(err);
451 
452 		/* if we're updating the existing element and the hash table
453 		 * is full, use per-cpu extra elems
454 		 */
455 		l_new = this_cpu_ptr(htab->extra_elems);
456 		if (l_new->state != HTAB_EXTRA_ELEM_FREE)
457 			return ERR_PTR(-E2BIG);
458 		l_new->state = HTAB_EXTRA_ELEM_USED;
459 	} else {
460 		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
461 	}
462 
463 	memcpy(l_new->key, key, key_size);
464 	if (percpu) {
465 		/* round up value_size to 8 bytes */
466 		size = round_up(size, 8);
467 
468 		if (prealloc) {
469 			pptr = htab_elem_get_ptr(l_new, key_size);
470 		} else {
471 			/* alloc_percpu zero-fills */
472 			pptr = __alloc_percpu_gfp(size, 8,
473 						  GFP_ATOMIC | __GFP_NOWARN);
474 			if (!pptr) {
475 				kfree(l_new);
476 				return ERR_PTR(-ENOMEM);
477 			}
478 		}
479 
480 		if (!onallcpus) {
481 			/* copy true value_size bytes */
482 			memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
483 		} else {
484 			int off = 0, cpu;
485 
486 			for_each_possible_cpu(cpu) {
487 				bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
488 						value + off, size);
489 				off += size;
490 			}
491 		}
492 		if (!prealloc)
493 			htab_elem_set_ptr(l_new, key_size, pptr);
494 	} else {
495 		memcpy(l_new->key + round_up(key_size, 8), value, size);
496 	}
497 
498 	l_new->hash = hash;
499 	return l_new;
500 }
501 
check_flags(struct bpf_htab * htab,struct htab_elem * l_old,u64 map_flags)502 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
503 		       u64 map_flags)
504 {
505 	if (l_old && map_flags == BPF_NOEXIST)
506 		/* elem already exists */
507 		return -EEXIST;
508 
509 	if (!l_old && map_flags == BPF_EXIST)
510 		/* elem doesn't exist, cannot update it */
511 		return -ENOENT;
512 
513 	return 0;
514 }
515 
516 /* Called from syscall or from eBPF program */
htab_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)517 static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
518 				u64 map_flags)
519 {
520 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
521 	struct htab_elem *l_new = NULL, *l_old;
522 	struct hlist_head *head;
523 	unsigned long flags;
524 	struct bucket *b;
525 	u32 key_size, hash;
526 	int ret;
527 
528 	if (unlikely(map_flags > BPF_EXIST))
529 		/* unknown flags */
530 		return -EINVAL;
531 
532 	WARN_ON_ONCE(!rcu_read_lock_held());
533 
534 	key_size = map->key_size;
535 
536 	hash = htab_map_hash(key, key_size);
537 
538 	b = __select_bucket(htab, hash);
539 	head = &b->head;
540 
541 	/* bpf_map_update_elem() can be called in_irq() */
542 	raw_spin_lock_irqsave(&b->lock, flags);
543 
544 	l_old = lookup_elem_raw(head, hash, key, key_size);
545 
546 	ret = check_flags(htab, l_old, map_flags);
547 	if (ret)
548 		goto err;
549 
550 	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
551 				!!l_old);
552 	if (IS_ERR(l_new)) {
553 		/* all pre-allocated elements are in use or memory exhausted */
554 		ret = PTR_ERR(l_new);
555 		goto err;
556 	}
557 
558 	/* add new element to the head of the list, so that
559 	 * concurrent search will find it before old elem
560 	 */
561 	hlist_add_head_rcu(&l_new->hash_node, head);
562 	if (l_old) {
563 		hlist_del_rcu(&l_old->hash_node);
564 		free_htab_elem(htab, l_old);
565 	}
566 	ret = 0;
567 err:
568 	raw_spin_unlock_irqrestore(&b->lock, flags);
569 	return ret;
570 }
571 
__htab_percpu_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags,bool onallcpus)572 static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
573 					 void *value, u64 map_flags,
574 					 bool onallcpus)
575 {
576 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
577 	struct htab_elem *l_new = NULL, *l_old;
578 	struct hlist_head *head;
579 	unsigned long flags;
580 	struct bucket *b;
581 	u32 key_size, hash;
582 	int ret;
583 
584 	if (unlikely(map_flags > BPF_EXIST))
585 		/* unknown flags */
586 		return -EINVAL;
587 
588 	WARN_ON_ONCE(!rcu_read_lock_held());
589 
590 	key_size = map->key_size;
591 
592 	hash = htab_map_hash(key, key_size);
593 
594 	b = __select_bucket(htab, hash);
595 	head = &b->head;
596 
597 	/* bpf_map_update_elem() can be called in_irq() */
598 	raw_spin_lock_irqsave(&b->lock, flags);
599 
600 	l_old = lookup_elem_raw(head, hash, key, key_size);
601 
602 	ret = check_flags(htab, l_old, map_flags);
603 	if (ret)
604 		goto err;
605 
606 	if (l_old) {
607 		void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
608 		u32 size = htab->map.value_size;
609 
610 		/* per-cpu hash map can update value in-place */
611 		if (!onallcpus) {
612 			memcpy(this_cpu_ptr(pptr), value, size);
613 		} else {
614 			int off = 0, cpu;
615 
616 			size = round_up(size, 8);
617 			for_each_possible_cpu(cpu) {
618 				bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
619 						value + off, size);
620 				off += size;
621 			}
622 		}
623 	} else {
624 		l_new = alloc_htab_elem(htab, key, value, key_size,
625 					hash, true, onallcpus, false);
626 		if (IS_ERR(l_new)) {
627 			ret = PTR_ERR(l_new);
628 			goto err;
629 		}
630 		hlist_add_head_rcu(&l_new->hash_node, head);
631 	}
632 	ret = 0;
633 err:
634 	raw_spin_unlock_irqrestore(&b->lock, flags);
635 	return ret;
636 }
637 
htab_percpu_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)638 static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
639 				       void *value, u64 map_flags)
640 {
641 	return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
642 }
643 
644 /* Called from syscall or from eBPF program */
htab_map_delete_elem(struct bpf_map * map,void * key)645 static int htab_map_delete_elem(struct bpf_map *map, void *key)
646 {
647 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
648 	struct hlist_head *head;
649 	struct bucket *b;
650 	struct htab_elem *l;
651 	unsigned long flags;
652 	u32 hash, key_size;
653 	int ret = -ENOENT;
654 
655 	WARN_ON_ONCE(!rcu_read_lock_held());
656 
657 	key_size = map->key_size;
658 
659 	hash = htab_map_hash(key, key_size);
660 	b = __select_bucket(htab, hash);
661 	head = &b->head;
662 
663 	raw_spin_lock_irqsave(&b->lock, flags);
664 
665 	l = lookup_elem_raw(head, hash, key, key_size);
666 
667 	if (l) {
668 		hlist_del_rcu(&l->hash_node);
669 		free_htab_elem(htab, l);
670 		ret = 0;
671 	}
672 
673 	raw_spin_unlock_irqrestore(&b->lock, flags);
674 	return ret;
675 }
676 
delete_all_elements(struct bpf_htab * htab)677 static void delete_all_elements(struct bpf_htab *htab)
678 {
679 	int i;
680 
681 	for (i = 0; i < htab->n_buckets; i++) {
682 		struct hlist_head *head = select_bucket(htab, i);
683 		struct hlist_node *n;
684 		struct htab_elem *l;
685 
686 		hlist_for_each_entry_safe(l, n, head, hash_node) {
687 			hlist_del_rcu(&l->hash_node);
688 			if (l->state != HTAB_EXTRA_ELEM_USED)
689 				htab_elem_free(htab, l);
690 		}
691 	}
692 }
693 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
htab_map_free(struct bpf_map * map)694 static void htab_map_free(struct bpf_map *map)
695 {
696 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
697 
698 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
699 	 * so the programs (can be more than one that used this map) were
700 	 * disconnected from events. Wait for outstanding critical sections in
701 	 * these programs to complete
702 	 */
703 	synchronize_rcu();
704 
705 	/* some of free_htab_elem() callbacks for elements of this map may
706 	 * not have executed. Wait for them.
707 	 */
708 	rcu_barrier();
709 	if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
710 		delete_all_elements(htab);
711 	} else {
712 		htab_free_elems(htab);
713 		pcpu_freelist_destroy(&htab->freelist);
714 	}
715 	free_percpu(htab->extra_elems);
716 	bpf_map_area_free(htab->buckets);
717 	kfree(htab);
718 }
719 
720 static const struct bpf_map_ops htab_ops = {
721 	.map_alloc = htab_map_alloc,
722 	.map_free = htab_map_free,
723 	.map_get_next_key = htab_map_get_next_key,
724 	.map_lookup_elem = htab_map_lookup_elem,
725 	.map_update_elem = htab_map_update_elem,
726 	.map_delete_elem = htab_map_delete_elem,
727 };
728 
729 static struct bpf_map_type_list htab_type __read_mostly = {
730 	.ops = &htab_ops,
731 	.type = BPF_MAP_TYPE_HASH,
732 };
733 
734 /* Called from eBPF program */
htab_percpu_map_lookup_elem(struct bpf_map * map,void * key)735 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
736 {
737 	struct htab_elem *l = __htab_map_lookup_elem(map, key);
738 
739 	if (l)
740 		return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
741 	else
742 		return NULL;
743 }
744 
bpf_percpu_hash_copy(struct bpf_map * map,void * key,void * value)745 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
746 {
747 	struct htab_elem *l;
748 	void __percpu *pptr;
749 	int ret = -ENOENT;
750 	int cpu, off = 0;
751 	u32 size;
752 
753 	/* per_cpu areas are zero-filled and bpf programs can only
754 	 * access 'value_size' of them, so copying rounded areas
755 	 * will not leak any kernel data
756 	 */
757 	size = round_up(map->value_size, 8);
758 	rcu_read_lock();
759 	l = __htab_map_lookup_elem(map, key);
760 	if (!l)
761 		goto out;
762 	pptr = htab_elem_get_ptr(l, map->key_size);
763 	for_each_possible_cpu(cpu) {
764 		bpf_long_memcpy(value + off,
765 				per_cpu_ptr(pptr, cpu), size);
766 		off += size;
767 	}
768 	ret = 0;
769 out:
770 	rcu_read_unlock();
771 	return ret;
772 }
773 
bpf_percpu_hash_update(struct bpf_map * map,void * key,void * value,u64 map_flags)774 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
775 			   u64 map_flags)
776 {
777 	int ret;
778 
779 	rcu_read_lock();
780 	ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true);
781 	rcu_read_unlock();
782 
783 	return ret;
784 }
785 
786 static const struct bpf_map_ops htab_percpu_ops = {
787 	.map_alloc = htab_map_alloc,
788 	.map_free = htab_map_free,
789 	.map_get_next_key = htab_map_get_next_key,
790 	.map_lookup_elem = htab_percpu_map_lookup_elem,
791 	.map_update_elem = htab_percpu_map_update_elem,
792 	.map_delete_elem = htab_map_delete_elem,
793 };
794 
795 static struct bpf_map_type_list htab_percpu_type __read_mostly = {
796 	.ops = &htab_percpu_ops,
797 	.type = BPF_MAP_TYPE_PERCPU_HASH,
798 };
799 
register_htab_map(void)800 static int __init register_htab_map(void)
801 {
802 	bpf_register_map_type(&htab_type);
803 	bpf_register_map_type(&htab_percpu_type);
804 	return 0;
805 }
806 late_initcall(register_htab_map);
807