• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  * Copyright (c) 2016,2017 Facebook
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  */
13 #include <linux/bpf.h>
14 #include <linux/err.h>
15 #include <linux/slab.h>
16 #include <linux/mm.h>
17 #include <linux/filter.h>
18 #include <linux/perf_event.h>
19 
20 #include "map_in_map.h"
21 
22 #define ARRAY_CREATE_FLAG_MASK \
23 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
24 
bpf_array_free_percpu(struct bpf_array * array)25 static void bpf_array_free_percpu(struct bpf_array *array)
26 {
27 	int i;
28 
29 	for (i = 0; i < array->map.max_entries; i++) {
30 		free_percpu(array->pptrs[i]);
31 		cond_resched();
32 	}
33 }
34 
bpf_array_alloc_percpu(struct bpf_array * array)35 static int bpf_array_alloc_percpu(struct bpf_array *array)
36 {
37 	void __percpu *ptr;
38 	int i;
39 
40 	for (i = 0; i < array->map.max_entries; i++) {
41 		ptr = __alloc_percpu_gfp(array->elem_size, 8,
42 					 GFP_USER | __GFP_NOWARN);
43 		if (!ptr) {
44 			bpf_array_free_percpu(array);
45 			return -ENOMEM;
46 		}
47 		array->pptrs[i] = ptr;
48 		cond_resched();
49 	}
50 
51 	return 0;
52 }
53 
54 /* Called from syscall */
array_map_alloc(union bpf_attr * attr)55 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
56 {
57 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
58 	int ret, numa_node = bpf_map_attr_numa_node(attr);
59 	u32 elem_size, index_mask, max_entries;
60 	bool unpriv = !capable(CAP_SYS_ADMIN);
61 	u64 cost, array_size, mask64;
62 	struct bpf_array *array;
63 
64 	/* check sanity of attributes */
65 	if (attr->max_entries == 0 || attr->key_size != 4 ||
66 	    attr->value_size == 0 ||
67 	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
68 	    (percpu && numa_node != NUMA_NO_NODE))
69 		return ERR_PTR(-EINVAL);
70 
71 	if (attr->value_size > KMALLOC_MAX_SIZE)
72 		/* if value_size is bigger, the user space won't be able to
73 		 * access the elements.
74 		 */
75 		return ERR_PTR(-E2BIG);
76 
77 	elem_size = round_up(attr->value_size, 8);
78 
79 	max_entries = attr->max_entries;
80 
81 	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
82 	 * upper most bit set in u32 space is undefined behavior due to
83 	 * resulting 1U << 32, so do it manually here in u64 space.
84 	 */
85 	mask64 = fls_long(max_entries - 1);
86 	mask64 = 1ULL << mask64;
87 	mask64 -= 1;
88 
89 	index_mask = mask64;
90 	if (unpriv) {
91 		/* round up array size to nearest power of 2,
92 		 * since cpu will speculate within index_mask limits
93 		 */
94 		max_entries = index_mask + 1;
95 		/* Check for overflows. */
96 		if (max_entries < attr->max_entries)
97 			return ERR_PTR(-E2BIG);
98 	}
99 
100 	array_size = sizeof(*array);
101 	if (percpu)
102 		array_size += (u64) max_entries * sizeof(void *);
103 	else
104 		array_size += (u64) max_entries * elem_size;
105 
106 	/* make sure there is no u32 overflow later in round_up() */
107 	cost = array_size;
108 	if (cost >= U32_MAX - PAGE_SIZE)
109 		return ERR_PTR(-ENOMEM);
110 	if (percpu) {
111 		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
112 		if (cost >= U32_MAX - PAGE_SIZE)
113 			return ERR_PTR(-ENOMEM);
114 	}
115 	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
116 
117 	ret = bpf_map_precharge_memlock(cost);
118 	if (ret < 0)
119 		return ERR_PTR(ret);
120 
121 	/* allocate all map elements and zero-initialize them */
122 	array = bpf_map_area_alloc(array_size, numa_node);
123 	if (!array)
124 		return ERR_PTR(-ENOMEM);
125 	array->index_mask = index_mask;
126 	array->map.unpriv_array = unpriv;
127 
128 	/* copy mandatory map attributes */
129 	array->map.map_type = attr->map_type;
130 	array->map.key_size = attr->key_size;
131 	array->map.value_size = attr->value_size;
132 	array->map.max_entries = attr->max_entries;
133 	array->map.map_flags = attr->map_flags;
134 	array->map.numa_node = numa_node;
135 	array->map.pages = cost;
136 	array->elem_size = elem_size;
137 
138 	if (percpu && bpf_array_alloc_percpu(array)) {
139 		bpf_map_area_free(array);
140 		return ERR_PTR(-ENOMEM);
141 	}
142 
143 	return &array->map;
144 }
145 
146 /* Called from syscall or from eBPF program */
array_map_lookup_elem(struct bpf_map * map,void * key)147 static void *array_map_lookup_elem(struct bpf_map *map, void *key)
148 {
149 	struct bpf_array *array = container_of(map, struct bpf_array, map);
150 	u32 index = *(u32 *)key;
151 
152 	if (unlikely(index >= array->map.max_entries))
153 		return NULL;
154 
155 	return array->value + array->elem_size * (index & array->index_mask);
156 }
157 
158 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
array_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)159 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
160 {
161 	struct bpf_array *array = container_of(map, struct bpf_array, map);
162 	struct bpf_insn *insn = insn_buf;
163 	u32 elem_size = round_up(map->value_size, 8);
164 	const int ret = BPF_REG_0;
165 	const int map_ptr = BPF_REG_1;
166 	const int index = BPF_REG_2;
167 
168 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
169 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
170 	if (map->unpriv_array) {
171 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
172 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
173 	} else {
174 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
175 	}
176 
177 	if (is_power_of_2(elem_size)) {
178 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
179 	} else {
180 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
181 	}
182 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
183 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
184 	*insn++ = BPF_MOV64_IMM(ret, 0);
185 	return insn - insn_buf;
186 }
187 
188 /* Called from eBPF program */
percpu_array_map_lookup_elem(struct bpf_map * map,void * key)189 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
190 {
191 	struct bpf_array *array = container_of(map, struct bpf_array, map);
192 	u32 index = *(u32 *)key;
193 
194 	if (unlikely(index >= array->map.max_entries))
195 		return NULL;
196 
197 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
198 }
199 
bpf_percpu_array_copy(struct bpf_map * map,void * key,void * value)200 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
201 {
202 	struct bpf_array *array = container_of(map, struct bpf_array, map);
203 	u32 index = *(u32 *)key;
204 	void __percpu *pptr;
205 	int cpu, off = 0;
206 	u32 size;
207 
208 	if (unlikely(index >= array->map.max_entries))
209 		return -ENOENT;
210 
211 	/* per_cpu areas are zero-filled and bpf programs can only
212 	 * access 'value_size' of them, so copying rounded areas
213 	 * will not leak any kernel data
214 	 */
215 	size = round_up(map->value_size, 8);
216 	rcu_read_lock();
217 	pptr = array->pptrs[index & array->index_mask];
218 	for_each_possible_cpu(cpu) {
219 		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
220 		off += size;
221 	}
222 	rcu_read_unlock();
223 	return 0;
224 }
225 
226 /* Called from syscall */
array_map_get_next_key(struct bpf_map * map,void * key,void * next_key)227 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
228 {
229 	struct bpf_array *array = container_of(map, struct bpf_array, map);
230 	u32 index = key ? *(u32 *)key : U32_MAX;
231 	u32 *next = (u32 *)next_key;
232 
233 	if (index >= array->map.max_entries) {
234 		*next = 0;
235 		return 0;
236 	}
237 
238 	if (index == array->map.max_entries - 1)
239 		return -ENOENT;
240 
241 	*next = index + 1;
242 	return 0;
243 }
244 
245 /* Called from syscall or from eBPF program */
array_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)246 static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
247 				 u64 map_flags)
248 {
249 	struct bpf_array *array = container_of(map, struct bpf_array, map);
250 	u32 index = *(u32 *)key;
251 
252 	if (unlikely(map_flags > BPF_EXIST))
253 		/* unknown flags */
254 		return -EINVAL;
255 
256 	if (unlikely(index >= array->map.max_entries))
257 		/* all elements were pre-allocated, cannot insert a new one */
258 		return -E2BIG;
259 
260 	if (unlikely(map_flags == BPF_NOEXIST))
261 		/* all elements already exist */
262 		return -EEXIST;
263 
264 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
265 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
266 		       value, map->value_size);
267 	else
268 		memcpy(array->value +
269 		       array->elem_size * (index & array->index_mask),
270 		       value, map->value_size);
271 	return 0;
272 }
273 
bpf_percpu_array_update(struct bpf_map * map,void * key,void * value,u64 map_flags)274 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
275 			    u64 map_flags)
276 {
277 	struct bpf_array *array = container_of(map, struct bpf_array, map);
278 	u32 index = *(u32 *)key;
279 	void __percpu *pptr;
280 	int cpu, off = 0;
281 	u32 size;
282 
283 	if (unlikely(map_flags > BPF_EXIST))
284 		/* unknown flags */
285 		return -EINVAL;
286 
287 	if (unlikely(index >= array->map.max_entries))
288 		/* all elements were pre-allocated, cannot insert a new one */
289 		return -E2BIG;
290 
291 	if (unlikely(map_flags == BPF_NOEXIST))
292 		/* all elements already exist */
293 		return -EEXIST;
294 
295 	/* the user space will provide round_up(value_size, 8) bytes that
296 	 * will be copied into per-cpu area. bpf programs can only access
297 	 * value_size of it. During lookup the same extra bytes will be
298 	 * returned or zeros which were zero-filled by percpu_alloc,
299 	 * so no kernel data leaks possible
300 	 */
301 	size = round_up(map->value_size, 8);
302 	rcu_read_lock();
303 	pptr = array->pptrs[index & array->index_mask];
304 	for_each_possible_cpu(cpu) {
305 		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
306 		off += size;
307 	}
308 	rcu_read_unlock();
309 	return 0;
310 }
311 
312 /* Called from syscall or from eBPF program */
array_map_delete_elem(struct bpf_map * map,void * key)313 static int array_map_delete_elem(struct bpf_map *map, void *key)
314 {
315 	return -EINVAL;
316 }
317 
318 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
array_map_free(struct bpf_map * map)319 static void array_map_free(struct bpf_map *map)
320 {
321 	struct bpf_array *array = container_of(map, struct bpf_array, map);
322 
323 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
324 	 * so the programs (can be more than one that used this map) were
325 	 * disconnected from events. Wait for outstanding programs to complete
326 	 * and free the array
327 	 */
328 	synchronize_rcu();
329 
330 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
331 		bpf_array_free_percpu(array);
332 
333 	bpf_map_area_free(array);
334 }
335 
336 const struct bpf_map_ops array_map_ops = {
337 	.map_alloc = array_map_alloc,
338 	.map_free = array_map_free,
339 	.map_get_next_key = array_map_get_next_key,
340 	.map_lookup_elem = array_map_lookup_elem,
341 	.map_update_elem = array_map_update_elem,
342 	.map_delete_elem = array_map_delete_elem,
343 	.map_gen_lookup = array_map_gen_lookup,
344 };
345 
346 const struct bpf_map_ops percpu_array_map_ops = {
347 	.map_alloc = array_map_alloc,
348 	.map_free = array_map_free,
349 	.map_get_next_key = array_map_get_next_key,
350 	.map_lookup_elem = percpu_array_map_lookup_elem,
351 	.map_update_elem = array_map_update_elem,
352 	.map_delete_elem = array_map_delete_elem,
353 };
354 
fd_array_map_alloc(union bpf_attr * attr)355 static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
356 {
357 	/* only file descriptors can be stored in this type of map */
358 	if (attr->value_size != sizeof(u32))
359 		return ERR_PTR(-EINVAL);
360 	return array_map_alloc(attr);
361 }
362 
fd_array_map_free(struct bpf_map * map)363 static void fd_array_map_free(struct bpf_map *map)
364 {
365 	struct bpf_array *array = container_of(map, struct bpf_array, map);
366 	int i;
367 
368 	synchronize_rcu();
369 
370 	/* make sure it's empty */
371 	for (i = 0; i < array->map.max_entries; i++)
372 		BUG_ON(array->ptrs[i] != NULL);
373 
374 	bpf_map_area_free(array);
375 }
376 
fd_array_map_lookup_elem(struct bpf_map * map,void * key)377 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
378 {
379 	return NULL;
380 }
381 
382 /* only called from syscall */
bpf_fd_array_map_lookup_elem(struct bpf_map * map,void * key,u32 * value)383 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
384 {
385 	void **elem, *ptr;
386 	int ret =  0;
387 
388 	if (!map->ops->map_fd_sys_lookup_elem)
389 		return -ENOTSUPP;
390 
391 	rcu_read_lock();
392 	elem = array_map_lookup_elem(map, key);
393 	if (elem && (ptr = READ_ONCE(*elem)))
394 		*value = map->ops->map_fd_sys_lookup_elem(ptr);
395 	else
396 		ret = -ENOENT;
397 	rcu_read_unlock();
398 
399 	return ret;
400 }
401 
402 /* only called from syscall */
bpf_fd_array_map_update_elem(struct bpf_map * map,struct file * map_file,void * key,void * value,u64 map_flags)403 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
404 				 void *key, void *value, u64 map_flags)
405 {
406 	struct bpf_array *array = container_of(map, struct bpf_array, map);
407 	void *new_ptr, *old_ptr;
408 	u32 index = *(u32 *)key, ufd;
409 
410 	if (map_flags != BPF_ANY)
411 		return -EINVAL;
412 
413 	if (index >= array->map.max_entries)
414 		return -E2BIG;
415 
416 	ufd = *(u32 *)value;
417 	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
418 	if (IS_ERR(new_ptr))
419 		return PTR_ERR(new_ptr);
420 
421 	old_ptr = xchg(array->ptrs + index, new_ptr);
422 	if (old_ptr)
423 		map->ops->map_fd_put_ptr(old_ptr);
424 
425 	return 0;
426 }
427 
fd_array_map_delete_elem(struct bpf_map * map,void * key)428 static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
429 {
430 	struct bpf_array *array = container_of(map, struct bpf_array, map);
431 	void *old_ptr;
432 	u32 index = *(u32 *)key;
433 
434 	if (index >= array->map.max_entries)
435 		return -E2BIG;
436 
437 	old_ptr = xchg(array->ptrs + index, NULL);
438 	if (old_ptr) {
439 		map->ops->map_fd_put_ptr(old_ptr);
440 		return 0;
441 	} else {
442 		return -ENOENT;
443 	}
444 }
445 
prog_fd_array_get_ptr(struct bpf_map * map,struct file * map_file,int fd)446 static void *prog_fd_array_get_ptr(struct bpf_map *map,
447 				   struct file *map_file, int fd)
448 {
449 	struct bpf_array *array = container_of(map, struct bpf_array, map);
450 	struct bpf_prog *prog = bpf_prog_get(fd);
451 
452 	if (IS_ERR(prog))
453 		return prog;
454 
455 	if (!bpf_prog_array_compatible(array, prog)) {
456 		bpf_prog_put(prog);
457 		return ERR_PTR(-EINVAL);
458 	}
459 
460 	return prog;
461 }
462 
prog_fd_array_put_ptr(void * ptr)463 static void prog_fd_array_put_ptr(void *ptr)
464 {
465 	bpf_prog_put(ptr);
466 }
467 
prog_fd_array_sys_lookup_elem(void * ptr)468 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
469 {
470 	return ((struct bpf_prog *)ptr)->aux->id;
471 }
472 
473 /* decrement refcnt of all bpf_progs that are stored in this map */
bpf_fd_array_map_clear(struct bpf_map * map)474 static void bpf_fd_array_map_clear(struct bpf_map *map)
475 {
476 	struct bpf_array *array = container_of(map, struct bpf_array, map);
477 	int i;
478 
479 	for (i = 0; i < array->map.max_entries; i++)
480 		fd_array_map_delete_elem(map, &i);
481 }
482 
483 const struct bpf_map_ops prog_array_map_ops = {
484 	.map_alloc = fd_array_map_alloc,
485 	.map_free = fd_array_map_free,
486 	.map_get_next_key = array_map_get_next_key,
487 	.map_lookup_elem = fd_array_map_lookup_elem,
488 	.map_delete_elem = fd_array_map_delete_elem,
489 	.map_fd_get_ptr = prog_fd_array_get_ptr,
490 	.map_fd_put_ptr = prog_fd_array_put_ptr,
491 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
492 	.map_release_uref = bpf_fd_array_map_clear,
493 };
494 
bpf_event_entry_gen(struct file * perf_file,struct file * map_file)495 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
496 						   struct file *map_file)
497 {
498 	struct bpf_event_entry *ee;
499 
500 	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
501 	if (ee) {
502 		ee->event = perf_file->private_data;
503 		ee->perf_file = perf_file;
504 		ee->map_file = map_file;
505 	}
506 
507 	return ee;
508 }
509 
__bpf_event_entry_free(struct rcu_head * rcu)510 static void __bpf_event_entry_free(struct rcu_head *rcu)
511 {
512 	struct bpf_event_entry *ee;
513 
514 	ee = container_of(rcu, struct bpf_event_entry, rcu);
515 	fput(ee->perf_file);
516 	kfree(ee);
517 }
518 
bpf_event_entry_free_rcu(struct bpf_event_entry * ee)519 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
520 {
521 	call_rcu(&ee->rcu, __bpf_event_entry_free);
522 }
523 
perf_event_fd_array_get_ptr(struct bpf_map * map,struct file * map_file,int fd)524 static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
525 					 struct file *map_file, int fd)
526 {
527 	struct bpf_event_entry *ee;
528 	struct perf_event *event;
529 	struct file *perf_file;
530 	u64 value;
531 
532 	perf_file = perf_event_get(fd);
533 	if (IS_ERR(perf_file))
534 		return perf_file;
535 
536 	ee = ERR_PTR(-EOPNOTSUPP);
537 	event = perf_file->private_data;
538 	if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
539 		goto err_out;
540 
541 	ee = bpf_event_entry_gen(perf_file, map_file);
542 	if (ee)
543 		return ee;
544 	ee = ERR_PTR(-ENOMEM);
545 err_out:
546 	fput(perf_file);
547 	return ee;
548 }
549 
perf_event_fd_array_put_ptr(void * ptr)550 static void perf_event_fd_array_put_ptr(void *ptr)
551 {
552 	bpf_event_entry_free_rcu(ptr);
553 }
554 
perf_event_fd_array_release(struct bpf_map * map,struct file * map_file)555 static void perf_event_fd_array_release(struct bpf_map *map,
556 					struct file *map_file)
557 {
558 	struct bpf_array *array = container_of(map, struct bpf_array, map);
559 	struct bpf_event_entry *ee;
560 	int i;
561 
562 	rcu_read_lock();
563 	for (i = 0; i < array->map.max_entries; i++) {
564 		ee = READ_ONCE(array->ptrs[i]);
565 		if (ee && ee->map_file == map_file)
566 			fd_array_map_delete_elem(map, &i);
567 	}
568 	rcu_read_unlock();
569 }
570 
571 const struct bpf_map_ops perf_event_array_map_ops = {
572 	.map_alloc = fd_array_map_alloc,
573 	.map_free = fd_array_map_free,
574 	.map_get_next_key = array_map_get_next_key,
575 	.map_lookup_elem = fd_array_map_lookup_elem,
576 	.map_delete_elem = fd_array_map_delete_elem,
577 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
578 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
579 	.map_release = perf_event_fd_array_release,
580 };
581 
582 #ifdef CONFIG_CGROUPS
cgroup_fd_array_get_ptr(struct bpf_map * map,struct file * map_file,int fd)583 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
584 				     struct file *map_file /* not used */,
585 				     int fd)
586 {
587 	return cgroup_get_from_fd(fd);
588 }
589 
cgroup_fd_array_put_ptr(void * ptr)590 static void cgroup_fd_array_put_ptr(void *ptr)
591 {
592 	/* cgroup_put free cgrp after a rcu grace period */
593 	cgroup_put(ptr);
594 }
595 
cgroup_fd_array_free(struct bpf_map * map)596 static void cgroup_fd_array_free(struct bpf_map *map)
597 {
598 	bpf_fd_array_map_clear(map);
599 	fd_array_map_free(map);
600 }
601 
602 const struct bpf_map_ops cgroup_array_map_ops = {
603 	.map_alloc = fd_array_map_alloc,
604 	.map_free = cgroup_fd_array_free,
605 	.map_get_next_key = array_map_get_next_key,
606 	.map_lookup_elem = fd_array_map_lookup_elem,
607 	.map_delete_elem = fd_array_map_delete_elem,
608 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
609 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
610 };
611 #endif
612 
array_of_map_alloc(union bpf_attr * attr)613 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
614 {
615 	struct bpf_map *map, *inner_map_meta;
616 
617 	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
618 	if (IS_ERR(inner_map_meta))
619 		return inner_map_meta;
620 
621 	map = fd_array_map_alloc(attr);
622 	if (IS_ERR(map)) {
623 		bpf_map_meta_free(inner_map_meta);
624 		return map;
625 	}
626 
627 	map->inner_map_meta = inner_map_meta;
628 
629 	return map;
630 }
631 
array_of_map_free(struct bpf_map * map)632 static void array_of_map_free(struct bpf_map *map)
633 {
634 	/* map->inner_map_meta is only accessed by syscall which
635 	 * is protected by fdget/fdput.
636 	 */
637 	bpf_map_meta_free(map->inner_map_meta);
638 	bpf_fd_array_map_clear(map);
639 	fd_array_map_free(map);
640 }
641 
array_of_map_lookup_elem(struct bpf_map * map,void * key)642 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
643 {
644 	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
645 
646 	if (!inner_map)
647 		return NULL;
648 
649 	return READ_ONCE(*inner_map);
650 }
651 
array_of_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)652 static u32 array_of_map_gen_lookup(struct bpf_map *map,
653 				   struct bpf_insn *insn_buf)
654 {
655 	struct bpf_array *array = container_of(map, struct bpf_array, map);
656 	u32 elem_size = round_up(map->value_size, 8);
657 	struct bpf_insn *insn = insn_buf;
658 	const int ret = BPF_REG_0;
659 	const int map_ptr = BPF_REG_1;
660 	const int index = BPF_REG_2;
661 
662 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
663 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
664 	if (map->unpriv_array) {
665 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
666 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
667 	} else {
668 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
669 	}
670 	if (is_power_of_2(elem_size))
671 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
672 	else
673 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
674 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
675 	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
676 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
677 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
678 	*insn++ = BPF_MOV64_IMM(ret, 0);
679 
680 	return insn - insn_buf;
681 }
682 
683 const struct bpf_map_ops array_of_maps_map_ops = {
684 	.map_alloc = array_of_map_alloc,
685 	.map_free = array_of_map_free,
686 	.map_get_next_key = array_map_get_next_key,
687 	.map_lookup_elem = array_of_map_lookup_elem,
688 	.map_delete_elem = fd_array_map_delete_elem,
689 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
690 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
691 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
692 	.map_gen_lookup = array_of_map_gen_lookup,
693 };
694