• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/bpf-cgroup.h>
6 #include <linux/bpf_trace.h>
7 #include <linux/bpf_lirc.h>
8 #include <linux/bpf_verifier.h>
9 #include <linux/bsearch.h>
10 #include <linux/btf.h>
11 #include <linux/syscalls.h>
12 #include <linux/slab.h>
13 #include <linux/sched/signal.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmzone.h>
16 #include <linux/anon_inodes.h>
17 #include <linux/fdtable.h>
18 #include <linux/file.h>
19 #include <linux/fs.h>
20 #include <linux/license.h>
21 #include <linux/filter.h>
22 #include <linux/kernel.h>
23 #include <linux/idr.h>
24 #include <linux/cred.h>
25 #include <linux/timekeeping.h>
26 #include <linux/ctype.h>
27 #include <linux/nospec.h>
28 #include <linux/audit.h>
29 #include <uapi/linux/btf.h>
30 #include <linux/pgtable.h>
31 #include <linux/bpf_lsm.h>
32 #include <linux/poll.h>
33 #include <linux/sort.h>
34 #include <linux/bpf-netns.h>
35 #include <linux/rcupdate_trace.h>
36 #include <linux/memcontrol.h>
37 #include <linux/trace_events.h>
38 
39 #include <trace/hooks/syscall_check.h>
40 
41 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
42 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
43 			  (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
44 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
45 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
46 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
47 			IS_FD_HASH(map))
48 
49 #define BPF_OBJ_FLAG_MASK   (BPF_F_RDONLY | BPF_F_WRONLY)
50 
51 DEFINE_PER_CPU(int, bpf_prog_active);
52 static DEFINE_IDR(prog_idr);
53 static DEFINE_SPINLOCK(prog_idr_lock);
54 static DEFINE_IDR(map_idr);
55 static DEFINE_SPINLOCK(map_idr_lock);
56 static DEFINE_IDR(link_idr);
57 static DEFINE_SPINLOCK(link_idr_lock);
58 
59 int sysctl_unprivileged_bpf_disabled __read_mostly =
60 	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
61 
62 static const struct bpf_map_ops * const bpf_map_types[] = {
63 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
64 #define BPF_MAP_TYPE(_id, _ops) \
65 	[_id] = &_ops,
66 #define BPF_LINK_TYPE(_id, _name)
67 #include <linux/bpf_types.h>
68 #undef BPF_PROG_TYPE
69 #undef BPF_MAP_TYPE
70 #undef BPF_LINK_TYPE
71 };
72 
73 /*
74  * If we're handed a bigger struct than we know of, ensure all the unknown bits
75  * are 0 - i.e. new user-space does not rely on any kernel feature extensions
76  * we don't know about yet.
77  *
78  * There is a ToCToU between this function call and the following
79  * copy_from_user() call. However, this is not a concern since this function is
80  * meant to be a future-proofing of bits.
81  */
bpf_check_uarg_tail_zero(bpfptr_t uaddr,size_t expected_size,size_t actual_size)82 int bpf_check_uarg_tail_zero(bpfptr_t uaddr,
83 			     size_t expected_size,
84 			     size_t actual_size)
85 {
86 	int res;
87 
88 	if (unlikely(actual_size > PAGE_SIZE))	/* silly large */
89 		return -E2BIG;
90 
91 	if (actual_size <= expected_size)
92 		return 0;
93 
94 	if (uaddr.is_kernel)
95 		res = memchr_inv(uaddr.kernel + expected_size, 0,
96 				 actual_size - expected_size) == NULL;
97 	else
98 		res = check_zeroed_user(uaddr.user + expected_size,
99 					actual_size - expected_size);
100 	if (res < 0)
101 		return res;
102 	return res ? 0 : -E2BIG;
103 }
104 
105 const struct bpf_map_ops bpf_map_offload_ops = {
106 	.map_meta_equal = bpf_map_meta_equal,
107 	.map_alloc = bpf_map_offload_map_alloc,
108 	.map_free = bpf_map_offload_map_free,
109 	.map_check_btf = map_check_no_btf,
110 };
111 
find_and_alloc_map(union bpf_attr * attr)112 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
113 {
114 	const struct bpf_map_ops *ops;
115 	u32 type = attr->map_type;
116 	struct bpf_map *map;
117 	int err;
118 
119 	if (type >= ARRAY_SIZE(bpf_map_types))
120 		return ERR_PTR(-EINVAL);
121 	type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
122 	ops = bpf_map_types[type];
123 	if (!ops)
124 		return ERR_PTR(-EINVAL);
125 
126 	if (ops->map_alloc_check) {
127 		err = ops->map_alloc_check(attr);
128 		if (err)
129 			return ERR_PTR(err);
130 	}
131 	if (attr->map_ifindex)
132 		ops = &bpf_map_offload_ops;
133 	map = ops->map_alloc(attr);
134 	if (IS_ERR(map))
135 		return map;
136 	map->ops = ops;
137 	map->map_type = type;
138 	return map;
139 }
140 
bpf_map_write_active_inc(struct bpf_map * map)141 static void bpf_map_write_active_inc(struct bpf_map *map)
142 {
143 	atomic64_inc(&map->writecnt);
144 }
145 
bpf_map_write_active_dec(struct bpf_map * map)146 static void bpf_map_write_active_dec(struct bpf_map *map)
147 {
148 	atomic64_dec(&map->writecnt);
149 }
150 
bpf_map_write_active(const struct bpf_map * map)151 bool bpf_map_write_active(const struct bpf_map *map)
152 {
153 	return atomic64_read(&map->writecnt) != 0;
154 }
155 
bpf_map_value_size(const struct bpf_map * map)156 static u32 bpf_map_value_size(const struct bpf_map *map)
157 {
158 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
159 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
160 	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
161 	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
162 		return round_up(map->value_size, 8) * num_possible_cpus();
163 	else if (IS_FD_MAP(map))
164 		return sizeof(u32);
165 	else
166 		return  map->value_size;
167 }
168 
maybe_wait_bpf_programs(struct bpf_map * map)169 static void maybe_wait_bpf_programs(struct bpf_map *map)
170 {
171 	/* Wait for any running BPF programs to complete so that
172 	 * userspace, when we return to it, knows that all programs
173 	 * that could be running use the new map value.
174 	 */
175 	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
176 	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
177 		synchronize_rcu();
178 }
179 
bpf_map_update_value(struct bpf_map * map,struct fd f,void * key,void * value,__u64 flags)180 static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
181 				void *value, __u64 flags)
182 {
183 	int err;
184 
185 	/* Need to create a kthread, thus must support schedule */
186 	if (bpf_map_is_dev_bound(map)) {
187 		return bpf_map_offload_update_elem(map, key, value, flags);
188 	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
189 		   map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
190 		return map->ops->map_update_elem(map, key, value, flags);
191 	} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
192 		   map->map_type == BPF_MAP_TYPE_SOCKMAP) {
193 		return sock_map_update_elem_sys(map, key, value, flags);
194 	} else if (IS_FD_PROG_ARRAY(map)) {
195 		return bpf_fd_array_map_update_elem(map, f.file, key, value,
196 						    flags);
197 	}
198 
199 	bpf_disable_instrumentation();
200 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
201 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
202 		err = bpf_percpu_hash_update(map, key, value, flags);
203 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
204 		err = bpf_percpu_array_update(map, key, value, flags);
205 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
206 		err = bpf_percpu_cgroup_storage_update(map, key, value,
207 						       flags);
208 	} else if (IS_FD_ARRAY(map)) {
209 		rcu_read_lock();
210 		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
211 						   flags);
212 		rcu_read_unlock();
213 	} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
214 		rcu_read_lock();
215 		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
216 						  flags);
217 		rcu_read_unlock();
218 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
219 		/* rcu_read_lock() is not needed */
220 		err = bpf_fd_reuseport_array_update_elem(map, key, value,
221 							 flags);
222 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
223 		   map->map_type == BPF_MAP_TYPE_STACK ||
224 		   map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
225 		err = map->ops->map_push_elem(map, value, flags);
226 	} else {
227 		rcu_read_lock();
228 		err = map->ops->map_update_elem(map, key, value, flags);
229 		rcu_read_unlock();
230 	}
231 	bpf_enable_instrumentation();
232 	maybe_wait_bpf_programs(map);
233 
234 	return err;
235 }
236 
bpf_map_copy_value(struct bpf_map * map,void * key,void * value,__u64 flags)237 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
238 			      __u64 flags)
239 {
240 	void *ptr;
241 	int err;
242 
243 	if (bpf_map_is_dev_bound(map))
244 		return bpf_map_offload_lookup_elem(map, key, value);
245 
246 	bpf_disable_instrumentation();
247 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
248 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
249 		err = bpf_percpu_hash_copy(map, key, value);
250 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
251 		err = bpf_percpu_array_copy(map, key, value);
252 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
253 		err = bpf_percpu_cgroup_storage_copy(map, key, value);
254 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
255 		err = bpf_stackmap_copy(map, key, value);
256 	} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
257 		err = bpf_fd_array_map_lookup_elem(map, key, value);
258 	} else if (IS_FD_HASH(map)) {
259 		err = bpf_fd_htab_map_lookup_elem(map, key, value);
260 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
261 		err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
262 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
263 		   map->map_type == BPF_MAP_TYPE_STACK ||
264 		   map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
265 		err = map->ops->map_peek_elem(map, value);
266 	} else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
267 		/* struct_ops map requires directly updating "value" */
268 		err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
269 	} else {
270 		rcu_read_lock();
271 		if (map->ops->map_lookup_elem_sys_only)
272 			ptr = map->ops->map_lookup_elem_sys_only(map, key);
273 		else
274 			ptr = map->ops->map_lookup_elem(map, key);
275 		if (IS_ERR(ptr)) {
276 			err = PTR_ERR(ptr);
277 		} else if (!ptr) {
278 			err = -ENOENT;
279 		} else {
280 			err = 0;
281 			if (flags & BPF_F_LOCK)
282 				/* lock 'ptr' and copy everything but lock */
283 				copy_map_value_locked(map, value, ptr, true);
284 			else
285 				copy_map_value(map, value, ptr);
286 			/* mask lock and timer, since value wasn't zero inited */
287 			check_and_init_map_value(map, value);
288 		}
289 		rcu_read_unlock();
290 	}
291 
292 	bpf_enable_instrumentation();
293 	maybe_wait_bpf_programs(map);
294 
295 	return err;
296 }
297 
298 /* Please, do not use this function outside from the map creation path
299  * (e.g. in map update path) without taking care of setting the active
300  * memory cgroup (see at bpf_map_kmalloc_node() for example).
301  */
__bpf_map_area_alloc(u64 size,int numa_node,bool mmapable)302 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
303 {
304 	/* We really just want to fail instead of triggering OOM killer
305 	 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
306 	 * which is used for lower order allocation requests.
307 	 *
308 	 * It has been observed that higher order allocation requests done by
309 	 * vmalloc with __GFP_NORETRY being set might fail due to not trying
310 	 * to reclaim memory from the page cache, thus we set
311 	 * __GFP_RETRY_MAYFAIL to avoid such situations.
312 	 */
313 
314 	const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
315 	unsigned int flags = 0;
316 	unsigned long align = 1;
317 	void *area;
318 
319 	if (size >= SIZE_MAX)
320 		return NULL;
321 
322 	/* kmalloc()'ed memory can't be mmap()'ed */
323 	if (mmapable) {
324 		BUG_ON(!PAGE_ALIGNED(size));
325 		align = SHMLBA;
326 		flags = VM_USERMAP;
327 	} else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
328 		area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
329 				    numa_node);
330 		if (area != NULL)
331 			return area;
332 	}
333 
334 	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
335 			gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
336 			flags, numa_node, __builtin_return_address(0));
337 }
338 
bpf_map_area_alloc(u64 size,int numa_node)339 void *bpf_map_area_alloc(u64 size, int numa_node)
340 {
341 	return __bpf_map_area_alloc(size, numa_node, false);
342 }
343 
bpf_map_area_mmapable_alloc(u64 size,int numa_node)344 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
345 {
346 	return __bpf_map_area_alloc(size, numa_node, true);
347 }
348 
bpf_map_area_free(void * area)349 void bpf_map_area_free(void *area)
350 {
351 	kvfree(area);
352 }
353 
bpf_map_flags_retain_permanent(u32 flags)354 static u32 bpf_map_flags_retain_permanent(u32 flags)
355 {
356 	/* Some map creation flags are not tied to the map object but
357 	 * rather to the map fd instead, so they have no meaning upon
358 	 * map object inspection since multiple file descriptors with
359 	 * different (access) properties can exist here. Thus, given
360 	 * this has zero meaning for the map itself, lets clear these
361 	 * from here.
362 	 */
363 	return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
364 }
365 
bpf_map_init_from_attr(struct bpf_map * map,union bpf_attr * attr)366 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
367 {
368 	map->map_type = attr->map_type;
369 	map->key_size = attr->key_size;
370 	map->value_size = attr->value_size;
371 	map->max_entries = attr->max_entries;
372 	map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
373 	map->numa_node = bpf_map_attr_numa_node(attr);
374 	map->map_extra = attr->map_extra;
375 }
376 
bpf_map_alloc_id(struct bpf_map * map)377 static int bpf_map_alloc_id(struct bpf_map *map)
378 {
379 	int id;
380 
381 	idr_preload(GFP_KERNEL);
382 	spin_lock_bh(&map_idr_lock);
383 	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
384 	if (id > 0)
385 		map->id = id;
386 	spin_unlock_bh(&map_idr_lock);
387 	idr_preload_end();
388 
389 	if (WARN_ON_ONCE(!id))
390 		return -ENOSPC;
391 
392 	return id > 0 ? 0 : id;
393 }
394 
bpf_map_free_id(struct bpf_map * map,bool do_idr_lock)395 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
396 {
397 	unsigned long flags;
398 
399 	/* Offloaded maps are removed from the IDR store when their device
400 	 * disappears - even if someone holds an fd to them they are unusable,
401 	 * the memory is gone, all ops will fail; they are simply waiting for
402 	 * refcnt to drop to be freed.
403 	 */
404 	if (!map->id)
405 		return;
406 
407 	if (do_idr_lock)
408 		spin_lock_irqsave(&map_idr_lock, flags);
409 	else
410 		__acquire(&map_idr_lock);
411 
412 	idr_remove(&map_idr, map->id);
413 	map->id = 0;
414 
415 	if (do_idr_lock)
416 		spin_unlock_irqrestore(&map_idr_lock, flags);
417 	else
418 		__release(&map_idr_lock);
419 }
420 
421 #ifdef CONFIG_MEMCG_KMEM
bpf_map_save_memcg(struct bpf_map * map)422 static void bpf_map_save_memcg(struct bpf_map *map)
423 {
424 	/* Currently if a map is created by a process belonging to the root
425 	 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
426 	 * So we have to check map->objcg for being NULL each time it's
427 	 * being used.
428 	 */
429 	map->objcg = get_obj_cgroup_from_current();
430 }
431 
bpf_map_release_memcg(struct bpf_map * map)432 static void bpf_map_release_memcg(struct bpf_map *map)
433 {
434 	if (map->objcg)
435 		obj_cgroup_put(map->objcg);
436 }
437 
bpf_map_get_memcg(const struct bpf_map * map)438 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
439 {
440 	if (map->objcg)
441 		return get_mem_cgroup_from_objcg(map->objcg);
442 
443 	return root_mem_cgroup;
444 }
445 
bpf_map_kmalloc_node(const struct bpf_map * map,size_t size,gfp_t flags,int node)446 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
447 			   int node)
448 {
449 	struct mem_cgroup *memcg, *old_memcg;
450 	void *ptr;
451 
452 	memcg = bpf_map_get_memcg(map);
453 	old_memcg = set_active_memcg(memcg);
454 	ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
455 	set_active_memcg(old_memcg);
456 	mem_cgroup_put(memcg);
457 
458 	return ptr;
459 }
460 
bpf_map_kzalloc(const struct bpf_map * map,size_t size,gfp_t flags)461 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
462 {
463 	struct mem_cgroup *memcg, *old_memcg;
464 	void *ptr;
465 
466 	memcg = bpf_map_get_memcg(map);
467 	old_memcg = set_active_memcg(memcg);
468 	ptr = kzalloc(size, flags | __GFP_ACCOUNT);
469 	set_active_memcg(old_memcg);
470 	mem_cgroup_put(memcg);
471 
472 	return ptr;
473 }
474 
bpf_map_alloc_percpu(const struct bpf_map * map,size_t size,size_t align,gfp_t flags)475 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
476 				    size_t align, gfp_t flags)
477 {
478 	struct mem_cgroup *memcg, *old_memcg;
479 	void __percpu *ptr;
480 
481 	memcg = bpf_map_get_memcg(map);
482 	old_memcg = set_active_memcg(memcg);
483 	ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
484 	set_active_memcg(old_memcg);
485 	mem_cgroup_put(memcg);
486 
487 	return ptr;
488 }
489 
490 #else
bpf_map_save_memcg(struct bpf_map * map)491 static void bpf_map_save_memcg(struct bpf_map *map)
492 {
493 }
494 
bpf_map_release_memcg(struct bpf_map * map)495 static void bpf_map_release_memcg(struct bpf_map *map)
496 {
497 }
498 #endif
499 
bpf_map_kptr_off_cmp(const void * a,const void * b)500 static int bpf_map_kptr_off_cmp(const void *a, const void *b)
501 {
502 	const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b;
503 
504 	if (off_desc1->offset < off_desc2->offset)
505 		return -1;
506 	else if (off_desc1->offset > off_desc2->offset)
507 		return 1;
508 	return 0;
509 }
510 
bpf_map_kptr_off_contains(struct bpf_map * map,u32 offset)511 struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
512 {
513 	/* Since members are iterated in btf_find_field in increasing order,
514 	 * offsets appended to kptr_off_tab are in increasing order, so we can
515 	 * do bsearch to find exact match.
516 	 */
517 	struct bpf_map_value_off *tab;
518 
519 	if (!map_value_has_kptrs(map))
520 		return NULL;
521 	tab = map->kptr_off_tab;
522 	return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp);
523 }
524 
bpf_map_free_kptr_off_tab(struct bpf_map * map)525 void bpf_map_free_kptr_off_tab(struct bpf_map *map)
526 {
527 	struct bpf_map_value_off *tab = map->kptr_off_tab;
528 	int i;
529 
530 	if (!map_value_has_kptrs(map))
531 		return;
532 	for (i = 0; i < tab->nr_off; i++) {
533 		if (tab->off[i].kptr.module)
534 			module_put(tab->off[i].kptr.module);
535 		btf_put(tab->off[i].kptr.btf);
536 	}
537 	kfree(tab);
538 	map->kptr_off_tab = NULL;
539 }
540 
bpf_map_copy_kptr_off_tab(const struct bpf_map * map)541 struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map)
542 {
543 	struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab;
544 	int size, i;
545 
546 	if (!map_value_has_kptrs(map))
547 		return ERR_PTR(-ENOENT);
548 	size = offsetof(struct bpf_map_value_off, off[tab->nr_off]);
549 	new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN);
550 	if (!new_tab)
551 		return ERR_PTR(-ENOMEM);
552 	/* Do a deep copy of the kptr_off_tab */
553 	for (i = 0; i < tab->nr_off; i++) {
554 		btf_get(tab->off[i].kptr.btf);
555 		if (tab->off[i].kptr.module && !try_module_get(tab->off[i].kptr.module)) {
556 			while (i--) {
557 				if (tab->off[i].kptr.module)
558 					module_put(tab->off[i].kptr.module);
559 				btf_put(tab->off[i].kptr.btf);
560 			}
561 			kfree(new_tab);
562 			return ERR_PTR(-ENXIO);
563 		}
564 	}
565 	return new_tab;
566 }
567 
bpf_map_equal_kptr_off_tab(const struct bpf_map * map_a,const struct bpf_map * map_b)568 bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
569 {
570 	struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab;
571 	bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b);
572 	int size;
573 
574 	if (!a_has_kptr && !b_has_kptr)
575 		return true;
576 	if (a_has_kptr != b_has_kptr)
577 		return false;
578 	if (tab_a->nr_off != tab_b->nr_off)
579 		return false;
580 	size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]);
581 	return !memcmp(tab_a, tab_b, size);
582 }
583 
584 /* Caller must ensure map_value_has_kptrs is true. Note that this function can
585  * be called on a map value while the map_value is visible to BPF programs, as
586  * it ensures the correct synchronization, and we already enforce the same using
587  * the bpf_kptr_xchg helper on the BPF program side for referenced kptrs.
588  */
bpf_map_free_kptrs(struct bpf_map * map,void * map_value)589 void bpf_map_free_kptrs(struct bpf_map *map, void *map_value)
590 {
591 	struct bpf_map_value_off *tab = map->kptr_off_tab;
592 	unsigned long *btf_id_ptr;
593 	int i;
594 
595 	for (i = 0; i < tab->nr_off; i++) {
596 		struct bpf_map_value_off_desc *off_desc = &tab->off[i];
597 		unsigned long old_ptr;
598 
599 		btf_id_ptr = map_value + off_desc->offset;
600 		if (off_desc->type == BPF_KPTR_UNREF) {
601 			u64 *p = (u64 *)btf_id_ptr;
602 
603 			WRITE_ONCE(*p, 0);
604 			continue;
605 		}
606 		old_ptr = xchg(btf_id_ptr, 0);
607 		off_desc->kptr.dtor((void *)old_ptr);
608 	}
609 }
610 
611 /* called from workqueue */
bpf_map_free_deferred(struct work_struct * work)612 static void bpf_map_free_deferred(struct work_struct *work)
613 {
614 	struct bpf_map *map = container_of(work, struct bpf_map, work);
615 
616 	security_bpf_map_free(map);
617 	kfree(map->off_arr);
618 	bpf_map_release_memcg(map);
619 	/* implementation dependent freeing, map_free callback also does
620 	 * bpf_map_free_kptr_off_tab, if needed.
621 	 */
622 	map->ops->map_free(map);
623 }
624 
bpf_map_put_uref(struct bpf_map * map)625 static void bpf_map_put_uref(struct bpf_map *map)
626 {
627 	if (atomic64_dec_and_test(&map->usercnt)) {
628 		if (map->ops->map_release_uref)
629 			map->ops->map_release_uref(map);
630 	}
631 }
632 
633 /* decrement map refcnt and schedule it for freeing via workqueue
634  * (unrelying map implementation ops->map_free() might sleep)
635  */
__bpf_map_put(struct bpf_map * map,bool do_idr_lock)636 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
637 {
638 	if (atomic64_dec_and_test(&map->refcnt)) {
639 		/* bpf_map_free_id() must be called first */
640 		bpf_map_free_id(map, do_idr_lock);
641 		btf_put(map->btf);
642 		INIT_WORK(&map->work, bpf_map_free_deferred);
643 		/* Avoid spawning kworkers, since they all might contend
644 		 * for the same mutex like slab_mutex.
645 		 */
646 		queue_work(system_unbound_wq, &map->work);
647 	}
648 }
649 
bpf_map_put(struct bpf_map * map)650 void bpf_map_put(struct bpf_map *map)
651 {
652 	__bpf_map_put(map, true);
653 }
654 EXPORT_SYMBOL_GPL(bpf_map_put);
655 
bpf_map_put_with_uref(struct bpf_map * map)656 void bpf_map_put_with_uref(struct bpf_map *map)
657 {
658 	bpf_map_put_uref(map);
659 	bpf_map_put(map);
660 }
661 
bpf_map_release(struct inode * inode,struct file * filp)662 static int bpf_map_release(struct inode *inode, struct file *filp)
663 {
664 	struct bpf_map *map = filp->private_data;
665 
666 	if (map->ops->map_release)
667 		map->ops->map_release(map, filp);
668 
669 	bpf_map_put_with_uref(map);
670 	return 0;
671 }
672 
map_get_sys_perms(struct bpf_map * map,struct fd f)673 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
674 {
675 	fmode_t mode = f.file->f_mode;
676 
677 	/* Our file permissions may have been overridden by global
678 	 * map permissions facing syscall side.
679 	 */
680 	if (READ_ONCE(map->frozen))
681 		mode &= ~FMODE_CAN_WRITE;
682 	return mode;
683 }
684 
685 #ifdef CONFIG_PROC_FS
686 /* Provides an approximation of the map's memory footprint.
687  * Used only to provide a backward compatibility and display
688  * a reasonable "memlock" info.
689  */
bpf_map_memory_footprint(const struct bpf_map * map)690 static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
691 {
692 	unsigned long size;
693 
694 	size = round_up(map->key_size + bpf_map_value_size(map), 8);
695 
696 	return round_up(map->max_entries * size, PAGE_SIZE);
697 }
698 
bpf_map_show_fdinfo(struct seq_file * m,struct file * filp)699 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
700 {
701 	struct bpf_map *map = filp->private_data;
702 	u32 type = 0, jited = 0;
703 
704 	if (map_type_contains_progs(map)) {
705 		spin_lock(&map->owner.lock);
706 		type  = map->owner.type;
707 		jited = map->owner.jited;
708 		spin_unlock(&map->owner.lock);
709 	}
710 
711 	seq_printf(m,
712 		   "map_type:\t%u\n"
713 		   "key_size:\t%u\n"
714 		   "value_size:\t%u\n"
715 		   "max_entries:\t%u\n"
716 		   "map_flags:\t%#x\n"
717 		   "map_extra:\t%#llx\n"
718 		   "memlock:\t%lu\n"
719 		   "map_id:\t%u\n"
720 		   "frozen:\t%u\n",
721 		   map->map_type,
722 		   map->key_size,
723 		   map->value_size,
724 		   map->max_entries,
725 		   map->map_flags,
726 		   (unsigned long long)map->map_extra,
727 		   bpf_map_memory_footprint(map),
728 		   map->id,
729 		   READ_ONCE(map->frozen));
730 	if (type) {
731 		seq_printf(m, "owner_prog_type:\t%u\n", type);
732 		seq_printf(m, "owner_jited:\t%u\n", jited);
733 	}
734 }
735 #endif
736 
bpf_dummy_read(struct file * filp,char __user * buf,size_t siz,loff_t * ppos)737 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
738 			      loff_t *ppos)
739 {
740 	/* We need this handler such that alloc_file() enables
741 	 * f_mode with FMODE_CAN_READ.
742 	 */
743 	return -EINVAL;
744 }
745 
bpf_dummy_write(struct file * filp,const char __user * buf,size_t siz,loff_t * ppos)746 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
747 			       size_t siz, loff_t *ppos)
748 {
749 	/* We need this handler such that alloc_file() enables
750 	 * f_mode with FMODE_CAN_WRITE.
751 	 */
752 	return -EINVAL;
753 }
754 
755 /* called for any extra memory-mapped regions (except initial) */
bpf_map_mmap_open(struct vm_area_struct * vma)756 static void bpf_map_mmap_open(struct vm_area_struct *vma)
757 {
758 	struct bpf_map *map = vma->vm_file->private_data;
759 
760 	if (vma->vm_flags & VM_MAYWRITE)
761 		bpf_map_write_active_inc(map);
762 }
763 
764 /* called for all unmapped memory region (including initial) */
bpf_map_mmap_close(struct vm_area_struct * vma)765 static void bpf_map_mmap_close(struct vm_area_struct *vma)
766 {
767 	struct bpf_map *map = vma->vm_file->private_data;
768 
769 	if (vma->vm_flags & VM_MAYWRITE)
770 		bpf_map_write_active_dec(map);
771 }
772 
773 static const struct vm_operations_struct bpf_map_default_vmops = {
774 	.open		= bpf_map_mmap_open,
775 	.close		= bpf_map_mmap_close,
776 };
777 
bpf_map_mmap(struct file * filp,struct vm_area_struct * vma)778 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
779 {
780 	struct bpf_map *map = filp->private_data;
781 	int err;
782 
783 	if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
784 	    map_value_has_timer(map) || map_value_has_kptrs(map))
785 		return -ENOTSUPP;
786 
787 	if (!(vma->vm_flags & VM_SHARED))
788 		return -EINVAL;
789 
790 	mutex_lock(&map->freeze_mutex);
791 
792 	if (vma->vm_flags & VM_WRITE) {
793 		if (map->frozen) {
794 			err = -EPERM;
795 			goto out;
796 		}
797 		/* map is meant to be read-only, so do not allow mapping as
798 		 * writable, because it's possible to leak a writable page
799 		 * reference and allows user-space to still modify it after
800 		 * freezing, while verifier will assume contents do not change
801 		 */
802 		if (map->map_flags & BPF_F_RDONLY_PROG) {
803 			err = -EACCES;
804 			goto out;
805 		}
806 	}
807 
808 	/* set default open/close callbacks */
809 	vma->vm_ops = &bpf_map_default_vmops;
810 	vma->vm_private_data = map;
811 	vm_flags_clear(vma, VM_MAYEXEC);
812 	if (!(vma->vm_flags & VM_WRITE))
813 		/* disallow re-mapping with PROT_WRITE */
814 		vm_flags_clear(vma, VM_MAYWRITE);
815 
816 	err = map->ops->map_mmap(map, vma);
817 	if (err)
818 		goto out;
819 
820 	if (vma->vm_flags & VM_MAYWRITE)
821 		bpf_map_write_active_inc(map);
822 out:
823 	mutex_unlock(&map->freeze_mutex);
824 	return err;
825 }
826 
bpf_map_poll(struct file * filp,struct poll_table_struct * pts)827 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
828 {
829 	struct bpf_map *map = filp->private_data;
830 
831 	if (map->ops->map_poll)
832 		return map->ops->map_poll(map, filp, pts);
833 
834 	return EPOLLERR;
835 }
836 
837 const struct file_operations bpf_map_fops = {
838 #ifdef CONFIG_PROC_FS
839 	.show_fdinfo	= bpf_map_show_fdinfo,
840 #endif
841 	.release	= bpf_map_release,
842 	.read		= bpf_dummy_read,
843 	.write		= bpf_dummy_write,
844 	.mmap		= bpf_map_mmap,
845 	.poll		= bpf_map_poll,
846 };
847 
bpf_map_new_fd(struct bpf_map * map,int flags)848 int bpf_map_new_fd(struct bpf_map *map, int flags)
849 {
850 	int ret;
851 
852 	ret = security_bpf_map(map, OPEN_FMODE(flags));
853 	if (ret < 0)
854 		return ret;
855 
856 	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
857 				flags | O_CLOEXEC);
858 }
859 
bpf_get_file_flag(int flags)860 int bpf_get_file_flag(int flags)
861 {
862 	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
863 		return -EINVAL;
864 	if (flags & BPF_F_RDONLY)
865 		return O_RDONLY;
866 	if (flags & BPF_F_WRONLY)
867 		return O_WRONLY;
868 	return O_RDWR;
869 }
870 
871 /* helper macro to check that unused fields 'union bpf_attr' are zero */
872 #define CHECK_ATTR(CMD) \
873 	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
874 		   sizeof(attr->CMD##_LAST_FIELD), 0, \
875 		   sizeof(*attr) - \
876 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
877 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
878 
879 /* dst and src must have at least "size" number of bytes.
880  * Return strlen on success and < 0 on error.
881  */
bpf_obj_name_cpy(char * dst,const char * src,unsigned int size)882 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
883 {
884 	const char *end = src + size;
885 	const char *orig_src = src;
886 
887 	memset(dst, 0, size);
888 	/* Copy all isalnum(), '_' and '.' chars. */
889 	while (src < end && *src) {
890 		if (!isalnum(*src) &&
891 		    *src != '_' && *src != '.')
892 			return -EINVAL;
893 		*dst++ = *src++;
894 	}
895 
896 	/* No '\0' found in "size" number of bytes */
897 	if (src == end)
898 		return -EINVAL;
899 
900 	return src - orig_src;
901 }
902 
map_check_no_btf(const struct bpf_map * map,const struct btf * btf,const struct btf_type * key_type,const struct btf_type * value_type)903 int map_check_no_btf(const struct bpf_map *map,
904 		     const struct btf *btf,
905 		     const struct btf_type *key_type,
906 		     const struct btf_type *value_type)
907 {
908 	return -ENOTSUPP;
909 }
910 
map_off_arr_cmp(const void * _a,const void * _b,const void * priv)911 static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv)
912 {
913 	const u32 a = *(const u32 *)_a;
914 	const u32 b = *(const u32 *)_b;
915 
916 	if (a < b)
917 		return -1;
918 	else if (a > b)
919 		return 1;
920 	return 0;
921 }
922 
map_off_arr_swap(void * _a,void * _b,int size,const void * priv)923 static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv)
924 {
925 	struct bpf_map *map = (struct bpf_map *)priv;
926 	u32 *off_base = map->off_arr->field_off;
927 	u32 *a = _a, *b = _b;
928 	u8 *sz_a, *sz_b;
929 
930 	sz_a = map->off_arr->field_sz + (a - off_base);
931 	sz_b = map->off_arr->field_sz + (b - off_base);
932 
933 	swap(*a, *b);
934 	swap(*sz_a, *sz_b);
935 }
936 
bpf_map_alloc_off_arr(struct bpf_map * map)937 static int bpf_map_alloc_off_arr(struct bpf_map *map)
938 {
939 	bool has_spin_lock = map_value_has_spin_lock(map);
940 	bool has_timer = map_value_has_timer(map);
941 	bool has_kptrs = map_value_has_kptrs(map);
942 	struct bpf_map_off_arr *off_arr;
943 	u32 i;
944 
945 	if (!has_spin_lock && !has_timer && !has_kptrs) {
946 		map->off_arr = NULL;
947 		return 0;
948 	}
949 
950 	off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN);
951 	if (!off_arr)
952 		return -ENOMEM;
953 	map->off_arr = off_arr;
954 
955 	off_arr->cnt = 0;
956 	if (has_spin_lock) {
957 		i = off_arr->cnt;
958 
959 		off_arr->field_off[i] = map->spin_lock_off;
960 		off_arr->field_sz[i] = sizeof(struct bpf_spin_lock);
961 		off_arr->cnt++;
962 	}
963 	if (has_timer) {
964 		i = off_arr->cnt;
965 
966 		off_arr->field_off[i] = map->timer_off;
967 		off_arr->field_sz[i] = sizeof(struct bpf_timer);
968 		off_arr->cnt++;
969 	}
970 	if (has_kptrs) {
971 		struct bpf_map_value_off *tab = map->kptr_off_tab;
972 		u32 *off = &off_arr->field_off[off_arr->cnt];
973 		u8 *sz = &off_arr->field_sz[off_arr->cnt];
974 
975 		for (i = 0; i < tab->nr_off; i++) {
976 			*off++ = tab->off[i].offset;
977 			*sz++ = sizeof(u64);
978 		}
979 		off_arr->cnt += tab->nr_off;
980 	}
981 
982 	if (off_arr->cnt == 1)
983 		return 0;
984 	sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]),
985 	       map_off_arr_cmp, map_off_arr_swap, map);
986 	return 0;
987 }
988 
map_check_btf(struct bpf_map * map,const struct btf * btf,u32 btf_key_id,u32 btf_value_id)989 static int map_check_btf(struct bpf_map *map, const struct btf *btf,
990 			 u32 btf_key_id, u32 btf_value_id)
991 {
992 	const struct btf_type *key_type, *value_type;
993 	u32 key_size, value_size;
994 	int ret = 0;
995 
996 	/* Some maps allow key to be unspecified. */
997 	if (btf_key_id) {
998 		key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
999 		if (!key_type || key_size != map->key_size)
1000 			return -EINVAL;
1001 	} else {
1002 		key_type = btf_type_by_id(btf, 0);
1003 		if (!map->ops->map_check_btf)
1004 			return -EINVAL;
1005 	}
1006 
1007 	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
1008 	if (!value_type || value_size != map->value_size)
1009 		return -EINVAL;
1010 
1011 	map->spin_lock_off = btf_find_spin_lock(btf, value_type);
1012 
1013 	if (map_value_has_spin_lock(map)) {
1014 		if (map->map_flags & BPF_F_RDONLY_PROG)
1015 			return -EACCES;
1016 		if (map->map_type != BPF_MAP_TYPE_HASH &&
1017 		    map->map_type != BPF_MAP_TYPE_ARRAY &&
1018 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
1019 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
1020 		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
1021 		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
1022 			return -ENOTSUPP;
1023 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
1024 		    map->value_size) {
1025 			WARN_ONCE(1,
1026 				  "verifier bug spin_lock_off %d value_size %d\n",
1027 				  map->spin_lock_off, map->value_size);
1028 			return -EFAULT;
1029 		}
1030 	}
1031 
1032 	map->timer_off = btf_find_timer(btf, value_type);
1033 	if (map_value_has_timer(map)) {
1034 		if (map->map_flags & BPF_F_RDONLY_PROG)
1035 			return -EACCES;
1036 		if (map->map_type != BPF_MAP_TYPE_HASH &&
1037 		    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1038 		    map->map_type != BPF_MAP_TYPE_ARRAY)
1039 			return -EOPNOTSUPP;
1040 	}
1041 
1042 	map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
1043 	if (map_value_has_kptrs(map)) {
1044 		if (!bpf_capable()) {
1045 			ret = -EPERM;
1046 			goto free_map_tab;
1047 		}
1048 		if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
1049 			ret = -EACCES;
1050 			goto free_map_tab;
1051 		}
1052 		if (map->map_type != BPF_MAP_TYPE_HASH &&
1053 		    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1054 		    map->map_type != BPF_MAP_TYPE_ARRAY &&
1055 		    map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
1056 			ret = -EOPNOTSUPP;
1057 			goto free_map_tab;
1058 		}
1059 	}
1060 
1061 	if (map->ops->map_check_btf) {
1062 		ret = map->ops->map_check_btf(map, btf, key_type, value_type);
1063 		if (ret < 0)
1064 			goto free_map_tab;
1065 	}
1066 
1067 	return ret;
1068 free_map_tab:
1069 	bpf_map_free_kptr_off_tab(map);
1070 	return ret;
1071 }
1072 
1073 #define BPF_MAP_CREATE_LAST_FIELD map_extra
1074 /* called via syscall */
map_create(union bpf_attr * attr)1075 static int map_create(union bpf_attr *attr)
1076 {
1077 	int numa_node = bpf_map_attr_numa_node(attr);
1078 	struct bpf_map *map;
1079 	int f_flags;
1080 	int err;
1081 
1082 	err = CHECK_ATTR(BPF_MAP_CREATE);
1083 	if (err)
1084 		return -EINVAL;
1085 
1086 	if (attr->btf_vmlinux_value_type_id) {
1087 		if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
1088 		    attr->btf_key_type_id || attr->btf_value_type_id)
1089 			return -EINVAL;
1090 	} else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
1091 		return -EINVAL;
1092 	}
1093 
1094 	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
1095 	    attr->map_extra != 0)
1096 		return -EINVAL;
1097 
1098 	f_flags = bpf_get_file_flag(attr->map_flags);
1099 	if (f_flags < 0)
1100 		return f_flags;
1101 
1102 	if (numa_node != NUMA_NO_NODE &&
1103 	    ((unsigned int)numa_node >= nr_node_ids ||
1104 	     !node_online(numa_node)))
1105 		return -EINVAL;
1106 
1107 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
1108 	map = find_and_alloc_map(attr);
1109 	if (IS_ERR(map))
1110 		return PTR_ERR(map);
1111 
1112 	err = bpf_obj_name_cpy(map->name, attr->map_name,
1113 			       sizeof(attr->map_name));
1114 	if (err < 0)
1115 		goto free_map;
1116 
1117 	atomic64_set(&map->refcnt, 1);
1118 	atomic64_set(&map->usercnt, 1);
1119 	mutex_init(&map->freeze_mutex);
1120 	spin_lock_init(&map->owner.lock);
1121 
1122 	map->spin_lock_off = -EINVAL;
1123 	map->timer_off = -EINVAL;
1124 	if (attr->btf_key_type_id || attr->btf_value_type_id ||
1125 	    /* Even the map's value is a kernel's struct,
1126 	     * the bpf_prog.o must have BTF to begin with
1127 	     * to figure out the corresponding kernel's
1128 	     * counter part.  Thus, attr->btf_fd has
1129 	     * to be valid also.
1130 	     */
1131 	    attr->btf_vmlinux_value_type_id) {
1132 		struct btf *btf;
1133 
1134 		btf = btf_get_by_fd(attr->btf_fd);
1135 		if (IS_ERR(btf)) {
1136 			err = PTR_ERR(btf);
1137 			goto free_map;
1138 		}
1139 		if (btf_is_kernel(btf)) {
1140 			btf_put(btf);
1141 			err = -EACCES;
1142 			goto free_map;
1143 		}
1144 		map->btf = btf;
1145 
1146 		if (attr->btf_value_type_id) {
1147 			err = map_check_btf(map, btf, attr->btf_key_type_id,
1148 					    attr->btf_value_type_id);
1149 			if (err)
1150 				goto free_map;
1151 		}
1152 
1153 		map->btf_key_type_id = attr->btf_key_type_id;
1154 		map->btf_value_type_id = attr->btf_value_type_id;
1155 		map->btf_vmlinux_value_type_id =
1156 			attr->btf_vmlinux_value_type_id;
1157 	}
1158 
1159 	err = bpf_map_alloc_off_arr(map);
1160 	if (err)
1161 		goto free_map;
1162 
1163 	err = security_bpf_map_alloc(map);
1164 	if (err)
1165 		goto free_map_off_arr;
1166 
1167 	err = bpf_map_alloc_id(map);
1168 	if (err)
1169 		goto free_map_sec;
1170 
1171 	bpf_map_save_memcg(map);
1172 
1173 	err = bpf_map_new_fd(map, f_flags);
1174 	if (err < 0) {
1175 		/* failed to allocate fd.
1176 		 * bpf_map_put_with_uref() is needed because the above
1177 		 * bpf_map_alloc_id() has published the map
1178 		 * to the userspace and the userspace may
1179 		 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
1180 		 */
1181 		bpf_map_put_with_uref(map);
1182 		return err;
1183 	}
1184 
1185 	return err;
1186 
1187 free_map_sec:
1188 	security_bpf_map_free(map);
1189 free_map_off_arr:
1190 	kfree(map->off_arr);
1191 free_map:
1192 	btf_put(map->btf);
1193 	map->ops->map_free(map);
1194 	return err;
1195 }
1196 
1197 /* if error is returned, fd is released.
1198  * On success caller should complete fd access with matching fdput()
1199  */
__bpf_map_get(struct fd f)1200 struct bpf_map *__bpf_map_get(struct fd f)
1201 {
1202 	if (!f.file)
1203 		return ERR_PTR(-EBADF);
1204 	if (f.file->f_op != &bpf_map_fops) {
1205 		fdput(f);
1206 		return ERR_PTR(-EINVAL);
1207 	}
1208 
1209 	return f.file->private_data;
1210 }
1211 
bpf_map_inc(struct bpf_map * map)1212 void bpf_map_inc(struct bpf_map *map)
1213 {
1214 	atomic64_inc(&map->refcnt);
1215 }
1216 EXPORT_SYMBOL_GPL(bpf_map_inc);
1217 
bpf_map_inc_with_uref(struct bpf_map * map)1218 void bpf_map_inc_with_uref(struct bpf_map *map)
1219 {
1220 	atomic64_inc(&map->refcnt);
1221 	atomic64_inc(&map->usercnt);
1222 }
1223 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
1224 
bpf_map_get(u32 ufd)1225 struct bpf_map *bpf_map_get(u32 ufd)
1226 {
1227 	struct fd f = fdget(ufd);
1228 	struct bpf_map *map;
1229 
1230 	map = __bpf_map_get(f);
1231 	if (IS_ERR(map))
1232 		return map;
1233 
1234 	bpf_map_inc(map);
1235 	fdput(f);
1236 
1237 	return map;
1238 }
1239 EXPORT_SYMBOL(bpf_map_get);
1240 
bpf_map_get_with_uref(u32 ufd)1241 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
1242 {
1243 	struct fd f = fdget(ufd);
1244 	struct bpf_map *map;
1245 
1246 	map = __bpf_map_get(f);
1247 	if (IS_ERR(map))
1248 		return map;
1249 
1250 	bpf_map_inc_with_uref(map);
1251 	fdput(f);
1252 
1253 	return map;
1254 }
1255 
1256 /* map_idr_lock should have been held */
__bpf_map_inc_not_zero(struct bpf_map * map,bool uref)1257 static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
1258 {
1259 	int refold;
1260 
1261 	refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
1262 	if (!refold)
1263 		return ERR_PTR(-ENOENT);
1264 	if (uref)
1265 		atomic64_inc(&map->usercnt);
1266 
1267 	return map;
1268 }
1269 
bpf_map_inc_not_zero(struct bpf_map * map)1270 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
1271 {
1272 	spin_lock_bh(&map_idr_lock);
1273 	map = __bpf_map_inc_not_zero(map, false);
1274 	spin_unlock_bh(&map_idr_lock);
1275 
1276 	return map;
1277 }
1278 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
1279 
bpf_stackmap_copy(struct bpf_map * map,void * key,void * value)1280 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
1281 {
1282 	return -ENOTSUPP;
1283 }
1284 
__bpf_copy_key(void __user * ukey,u64 key_size)1285 static void *__bpf_copy_key(void __user *ukey, u64 key_size)
1286 {
1287 	if (key_size)
1288 		return vmemdup_user(ukey, key_size);
1289 
1290 	if (ukey)
1291 		return ERR_PTR(-EINVAL);
1292 
1293 	return NULL;
1294 }
1295 
___bpf_copy_key(bpfptr_t ukey,u64 key_size)1296 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
1297 {
1298 	if (key_size)
1299 		return kvmemdup_bpfptr(ukey, key_size);
1300 
1301 	if (!bpfptr_is_null(ukey))
1302 		return ERR_PTR(-EINVAL);
1303 
1304 	return NULL;
1305 }
1306 
1307 /* last field in 'union bpf_attr' used by this command */
1308 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
1309 
map_lookup_elem(union bpf_attr * attr)1310 static int map_lookup_elem(union bpf_attr *attr)
1311 {
1312 	void __user *ukey = u64_to_user_ptr(attr->key);
1313 	void __user *uvalue = u64_to_user_ptr(attr->value);
1314 	int ufd = attr->map_fd;
1315 	struct bpf_map *map;
1316 	void *key, *value;
1317 	u32 value_size;
1318 	struct fd f;
1319 	int err;
1320 
1321 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
1322 		return -EINVAL;
1323 
1324 	if (attr->flags & ~BPF_F_LOCK)
1325 		return -EINVAL;
1326 
1327 	f = fdget(ufd);
1328 	map = __bpf_map_get(f);
1329 	if (IS_ERR(map))
1330 		return PTR_ERR(map);
1331 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1332 		err = -EPERM;
1333 		goto err_put;
1334 	}
1335 
1336 	if ((attr->flags & BPF_F_LOCK) &&
1337 	    !map_value_has_spin_lock(map)) {
1338 		err = -EINVAL;
1339 		goto err_put;
1340 	}
1341 
1342 	key = __bpf_copy_key(ukey, map->key_size);
1343 	if (IS_ERR(key)) {
1344 		err = PTR_ERR(key);
1345 		goto err_put;
1346 	}
1347 
1348 	value_size = bpf_map_value_size(map);
1349 
1350 	err = -ENOMEM;
1351 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
1352 	if (!value)
1353 		goto free_key;
1354 
1355 	if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
1356 		if (copy_from_user(value, uvalue, value_size))
1357 			err = -EFAULT;
1358 		else
1359 			err = bpf_map_copy_value(map, key, value, attr->flags);
1360 		goto free_value;
1361 	}
1362 
1363 	err = bpf_map_copy_value(map, key, value, attr->flags);
1364 	if (err)
1365 		goto free_value;
1366 
1367 	err = -EFAULT;
1368 	if (copy_to_user(uvalue, value, value_size) != 0)
1369 		goto free_value;
1370 
1371 	err = 0;
1372 
1373 free_value:
1374 	kvfree(value);
1375 free_key:
1376 	kvfree(key);
1377 err_put:
1378 	fdput(f);
1379 	return err;
1380 }
1381 
1382 
1383 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
1384 
map_update_elem(union bpf_attr * attr,bpfptr_t uattr)1385 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
1386 {
1387 	bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1388 	bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
1389 	int ufd = attr->map_fd;
1390 	struct bpf_map *map;
1391 	void *key, *value;
1392 	u32 value_size;
1393 	struct fd f;
1394 	int err;
1395 
1396 	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
1397 		return -EINVAL;
1398 
1399 	f = fdget(ufd);
1400 	map = __bpf_map_get(f);
1401 	if (IS_ERR(map))
1402 		return PTR_ERR(map);
1403 	bpf_map_write_active_inc(map);
1404 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1405 		err = -EPERM;
1406 		goto err_put;
1407 	}
1408 
1409 	if ((attr->flags & BPF_F_LOCK) &&
1410 	    !map_value_has_spin_lock(map)) {
1411 		err = -EINVAL;
1412 		goto err_put;
1413 	}
1414 
1415 	key = ___bpf_copy_key(ukey, map->key_size);
1416 	if (IS_ERR(key)) {
1417 		err = PTR_ERR(key);
1418 		goto err_put;
1419 	}
1420 
1421 	value_size = bpf_map_value_size(map);
1422 	value = kvmemdup_bpfptr(uvalue, value_size);
1423 	if (IS_ERR(value)) {
1424 		err = PTR_ERR(value);
1425 		goto free_key;
1426 	}
1427 
1428 	err = bpf_map_update_value(map, f, key, value, attr->flags);
1429 
1430 	kvfree(value);
1431 free_key:
1432 	kvfree(key);
1433 err_put:
1434 	bpf_map_write_active_dec(map);
1435 	fdput(f);
1436 	return err;
1437 }
1438 
1439 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1440 
map_delete_elem(union bpf_attr * attr,bpfptr_t uattr)1441 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
1442 {
1443 	bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1444 	int ufd = attr->map_fd;
1445 	struct bpf_map *map;
1446 	struct fd f;
1447 	void *key;
1448 	int err;
1449 
1450 	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
1451 		return -EINVAL;
1452 
1453 	f = fdget(ufd);
1454 	map = __bpf_map_get(f);
1455 	if (IS_ERR(map))
1456 		return PTR_ERR(map);
1457 	bpf_map_write_active_inc(map);
1458 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1459 		err = -EPERM;
1460 		goto err_put;
1461 	}
1462 
1463 	key = ___bpf_copy_key(ukey, map->key_size);
1464 	if (IS_ERR(key)) {
1465 		err = PTR_ERR(key);
1466 		goto err_put;
1467 	}
1468 
1469 	if (bpf_map_is_dev_bound(map)) {
1470 		err = bpf_map_offload_delete_elem(map, key);
1471 		goto out;
1472 	} else if (IS_FD_PROG_ARRAY(map) ||
1473 		   map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1474 		/* These maps require sleepable context */
1475 		err = map->ops->map_delete_elem(map, key);
1476 		goto out;
1477 	}
1478 
1479 	bpf_disable_instrumentation();
1480 	rcu_read_lock();
1481 	err = map->ops->map_delete_elem(map, key);
1482 	rcu_read_unlock();
1483 	bpf_enable_instrumentation();
1484 	maybe_wait_bpf_programs(map);
1485 out:
1486 	kvfree(key);
1487 err_put:
1488 	bpf_map_write_active_dec(map);
1489 	fdput(f);
1490 	return err;
1491 }
1492 
1493 /* last field in 'union bpf_attr' used by this command */
1494 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1495 
map_get_next_key(union bpf_attr * attr)1496 static int map_get_next_key(union bpf_attr *attr)
1497 {
1498 	void __user *ukey = u64_to_user_ptr(attr->key);
1499 	void __user *unext_key = u64_to_user_ptr(attr->next_key);
1500 	int ufd = attr->map_fd;
1501 	struct bpf_map *map;
1502 	void *key, *next_key;
1503 	struct fd f;
1504 	int err;
1505 
1506 	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
1507 		return -EINVAL;
1508 
1509 	f = fdget(ufd);
1510 	map = __bpf_map_get(f);
1511 	if (IS_ERR(map))
1512 		return PTR_ERR(map);
1513 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1514 		err = -EPERM;
1515 		goto err_put;
1516 	}
1517 
1518 	if (ukey) {
1519 		key = __bpf_copy_key(ukey, map->key_size);
1520 		if (IS_ERR(key)) {
1521 			err = PTR_ERR(key);
1522 			goto err_put;
1523 		}
1524 	} else {
1525 		key = NULL;
1526 	}
1527 
1528 	err = -ENOMEM;
1529 	next_key = kvmalloc(map->key_size, GFP_USER);
1530 	if (!next_key)
1531 		goto free_key;
1532 
1533 	if (bpf_map_is_dev_bound(map)) {
1534 		err = bpf_map_offload_get_next_key(map, key, next_key);
1535 		goto out;
1536 	}
1537 
1538 	rcu_read_lock();
1539 	err = map->ops->map_get_next_key(map, key, next_key);
1540 	rcu_read_unlock();
1541 out:
1542 	if (err)
1543 		goto free_next_key;
1544 
1545 	err = -EFAULT;
1546 	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
1547 		goto free_next_key;
1548 
1549 	err = 0;
1550 
1551 free_next_key:
1552 	kvfree(next_key);
1553 free_key:
1554 	kvfree(key);
1555 err_put:
1556 	fdput(f);
1557 	return err;
1558 }
1559 
generic_map_delete_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)1560 int generic_map_delete_batch(struct bpf_map *map,
1561 			     const union bpf_attr *attr,
1562 			     union bpf_attr __user *uattr)
1563 {
1564 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
1565 	u32 cp, max_count;
1566 	int err = 0;
1567 	void *key;
1568 
1569 	if (attr->batch.elem_flags & ~BPF_F_LOCK)
1570 		return -EINVAL;
1571 
1572 	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1573 	    !map_value_has_spin_lock(map)) {
1574 		return -EINVAL;
1575 	}
1576 
1577 	max_count = attr->batch.count;
1578 	if (!max_count)
1579 		return 0;
1580 
1581 	if (put_user(0, &uattr->batch.count))
1582 		return -EFAULT;
1583 
1584 	key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1585 	if (!key)
1586 		return -ENOMEM;
1587 
1588 	for (cp = 0; cp < max_count; cp++) {
1589 		err = -EFAULT;
1590 		if (copy_from_user(key, keys + cp * map->key_size,
1591 				   map->key_size))
1592 			break;
1593 
1594 		if (bpf_map_is_dev_bound(map)) {
1595 			err = bpf_map_offload_delete_elem(map, key);
1596 			break;
1597 		}
1598 
1599 		bpf_disable_instrumentation();
1600 		rcu_read_lock();
1601 		err = map->ops->map_delete_elem(map, key);
1602 		rcu_read_unlock();
1603 		bpf_enable_instrumentation();
1604 		if (err)
1605 			break;
1606 		cond_resched();
1607 	}
1608 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1609 		err = -EFAULT;
1610 
1611 	kvfree(key);
1612 
1613 	maybe_wait_bpf_programs(map);
1614 	return err;
1615 }
1616 
generic_map_update_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)1617 int generic_map_update_batch(struct bpf_map *map,
1618 			     const union bpf_attr *attr,
1619 			     union bpf_attr __user *uattr)
1620 {
1621 	void __user *values = u64_to_user_ptr(attr->batch.values);
1622 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
1623 	u32 value_size, cp, max_count;
1624 	int ufd = attr->batch.map_fd;
1625 	void *key, *value;
1626 	struct fd f;
1627 	int err = 0;
1628 
1629 	if (attr->batch.elem_flags & ~BPF_F_LOCK)
1630 		return -EINVAL;
1631 
1632 	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1633 	    !map_value_has_spin_lock(map)) {
1634 		return -EINVAL;
1635 	}
1636 
1637 	value_size = bpf_map_value_size(map);
1638 
1639 	max_count = attr->batch.count;
1640 	if (!max_count)
1641 		return 0;
1642 
1643 	if (put_user(0, &uattr->batch.count))
1644 		return -EFAULT;
1645 
1646 	key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1647 	if (!key)
1648 		return -ENOMEM;
1649 
1650 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
1651 	if (!value) {
1652 		kvfree(key);
1653 		return -ENOMEM;
1654 	}
1655 
1656 	f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
1657 	for (cp = 0; cp < max_count; cp++) {
1658 		err = -EFAULT;
1659 		if (copy_from_user(key, keys + cp * map->key_size,
1660 		    map->key_size) ||
1661 		    copy_from_user(value, values + cp * value_size, value_size))
1662 			break;
1663 
1664 		err = bpf_map_update_value(map, f, key, value,
1665 					   attr->batch.elem_flags);
1666 
1667 		if (err)
1668 			break;
1669 		cond_resched();
1670 	}
1671 
1672 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1673 		err = -EFAULT;
1674 
1675 	kvfree(value);
1676 	kvfree(key);
1677 	fdput(f);
1678 	return err;
1679 }
1680 
1681 #define MAP_LOOKUP_RETRIES 3
1682 
generic_map_lookup_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)1683 int generic_map_lookup_batch(struct bpf_map *map,
1684 				    const union bpf_attr *attr,
1685 				    union bpf_attr __user *uattr)
1686 {
1687 	void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1688 	void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1689 	void __user *values = u64_to_user_ptr(attr->batch.values);
1690 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
1691 	void *buf, *buf_prevkey, *prev_key, *key, *value;
1692 	int err, retry = MAP_LOOKUP_RETRIES;
1693 	u32 value_size, cp, max_count;
1694 
1695 	if (attr->batch.elem_flags & ~BPF_F_LOCK)
1696 		return -EINVAL;
1697 
1698 	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1699 	    !map_value_has_spin_lock(map))
1700 		return -EINVAL;
1701 
1702 	value_size = bpf_map_value_size(map);
1703 
1704 	max_count = attr->batch.count;
1705 	if (!max_count)
1706 		return 0;
1707 
1708 	if (put_user(0, &uattr->batch.count))
1709 		return -EFAULT;
1710 
1711 	buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1712 	if (!buf_prevkey)
1713 		return -ENOMEM;
1714 
1715 	buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
1716 	if (!buf) {
1717 		kvfree(buf_prevkey);
1718 		return -ENOMEM;
1719 	}
1720 
1721 	err = -EFAULT;
1722 	prev_key = NULL;
1723 	if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
1724 		goto free_buf;
1725 	key = buf;
1726 	value = key + map->key_size;
1727 	if (ubatch)
1728 		prev_key = buf_prevkey;
1729 
1730 	for (cp = 0; cp < max_count;) {
1731 		rcu_read_lock();
1732 		err = map->ops->map_get_next_key(map, prev_key, key);
1733 		rcu_read_unlock();
1734 		if (err)
1735 			break;
1736 		err = bpf_map_copy_value(map, key, value,
1737 					 attr->batch.elem_flags);
1738 
1739 		if (err == -ENOENT) {
1740 			if (retry) {
1741 				retry--;
1742 				continue;
1743 			}
1744 			err = -EINTR;
1745 			break;
1746 		}
1747 
1748 		if (err)
1749 			goto free_buf;
1750 
1751 		if (copy_to_user(keys + cp * map->key_size, key,
1752 				 map->key_size)) {
1753 			err = -EFAULT;
1754 			goto free_buf;
1755 		}
1756 		if (copy_to_user(values + cp * value_size, value, value_size)) {
1757 			err = -EFAULT;
1758 			goto free_buf;
1759 		}
1760 
1761 		if (!prev_key)
1762 			prev_key = buf_prevkey;
1763 
1764 		swap(prev_key, key);
1765 		retry = MAP_LOOKUP_RETRIES;
1766 		cp++;
1767 		cond_resched();
1768 	}
1769 
1770 	if (err == -EFAULT)
1771 		goto free_buf;
1772 
1773 	if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1774 		    (cp && copy_to_user(uobatch, prev_key, map->key_size))))
1775 		err = -EFAULT;
1776 
1777 free_buf:
1778 	kvfree(buf_prevkey);
1779 	kvfree(buf);
1780 	return err;
1781 }
1782 
1783 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
1784 
map_lookup_and_delete_elem(union bpf_attr * attr)1785 static int map_lookup_and_delete_elem(union bpf_attr *attr)
1786 {
1787 	void __user *ukey = u64_to_user_ptr(attr->key);
1788 	void __user *uvalue = u64_to_user_ptr(attr->value);
1789 	int ufd = attr->map_fd;
1790 	struct bpf_map *map;
1791 	void *key, *value;
1792 	u32 value_size;
1793 	struct fd f;
1794 	int err;
1795 
1796 	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
1797 		return -EINVAL;
1798 
1799 	if (attr->flags & ~BPF_F_LOCK)
1800 		return -EINVAL;
1801 
1802 	f = fdget(ufd);
1803 	map = __bpf_map_get(f);
1804 	if (IS_ERR(map))
1805 		return PTR_ERR(map);
1806 	bpf_map_write_active_inc(map);
1807 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
1808 	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1809 		err = -EPERM;
1810 		goto err_put;
1811 	}
1812 
1813 	if (attr->flags &&
1814 	    (map->map_type == BPF_MAP_TYPE_QUEUE ||
1815 	     map->map_type == BPF_MAP_TYPE_STACK)) {
1816 		err = -EINVAL;
1817 		goto err_put;
1818 	}
1819 
1820 	if ((attr->flags & BPF_F_LOCK) &&
1821 	    !map_value_has_spin_lock(map)) {
1822 		err = -EINVAL;
1823 		goto err_put;
1824 	}
1825 
1826 	key = __bpf_copy_key(ukey, map->key_size);
1827 	if (IS_ERR(key)) {
1828 		err = PTR_ERR(key);
1829 		goto err_put;
1830 	}
1831 
1832 	value_size = bpf_map_value_size(map);
1833 
1834 	err = -ENOMEM;
1835 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
1836 	if (!value)
1837 		goto free_key;
1838 
1839 	err = -ENOTSUPP;
1840 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
1841 	    map->map_type == BPF_MAP_TYPE_STACK) {
1842 		err = map->ops->map_pop_elem(map, value);
1843 	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
1844 		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1845 		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
1846 		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
1847 		if (!bpf_map_is_dev_bound(map)) {
1848 			bpf_disable_instrumentation();
1849 			rcu_read_lock();
1850 			err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
1851 			rcu_read_unlock();
1852 			bpf_enable_instrumentation();
1853 		}
1854 	}
1855 
1856 	if (err)
1857 		goto free_value;
1858 
1859 	if (copy_to_user(uvalue, value, value_size) != 0) {
1860 		err = -EFAULT;
1861 		goto free_value;
1862 	}
1863 
1864 	err = 0;
1865 
1866 free_value:
1867 	kvfree(value);
1868 free_key:
1869 	kvfree(key);
1870 err_put:
1871 	bpf_map_write_active_dec(map);
1872 	fdput(f);
1873 	return err;
1874 }
1875 
1876 #define BPF_MAP_FREEZE_LAST_FIELD map_fd
1877 
map_freeze(const union bpf_attr * attr)1878 static int map_freeze(const union bpf_attr *attr)
1879 {
1880 	int err = 0, ufd = attr->map_fd;
1881 	struct bpf_map *map;
1882 	struct fd f;
1883 
1884 	if (CHECK_ATTR(BPF_MAP_FREEZE))
1885 		return -EINVAL;
1886 
1887 	f = fdget(ufd);
1888 	map = __bpf_map_get(f);
1889 	if (IS_ERR(map))
1890 		return PTR_ERR(map);
1891 
1892 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
1893 	    map_value_has_timer(map) || map_value_has_kptrs(map)) {
1894 		fdput(f);
1895 		return -ENOTSUPP;
1896 	}
1897 
1898 	mutex_lock(&map->freeze_mutex);
1899 	if (bpf_map_write_active(map)) {
1900 		err = -EBUSY;
1901 		goto err_put;
1902 	}
1903 	if (READ_ONCE(map->frozen)) {
1904 		err = -EBUSY;
1905 		goto err_put;
1906 	}
1907 	if (!bpf_capable()) {
1908 		err = -EPERM;
1909 		goto err_put;
1910 	}
1911 
1912 	WRITE_ONCE(map->frozen, true);
1913 err_put:
1914 	mutex_unlock(&map->freeze_mutex);
1915 	fdput(f);
1916 	return err;
1917 }
1918 
1919 static const struct bpf_prog_ops * const bpf_prog_types[] = {
1920 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
1921 	[_id] = & _name ## _prog_ops,
1922 #define BPF_MAP_TYPE(_id, _ops)
1923 #define BPF_LINK_TYPE(_id, _name)
1924 #include <linux/bpf_types.h>
1925 #undef BPF_PROG_TYPE
1926 #undef BPF_MAP_TYPE
1927 #undef BPF_LINK_TYPE
1928 };
1929 
find_prog_type(enum bpf_prog_type type,struct bpf_prog * prog)1930 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
1931 {
1932 	const struct bpf_prog_ops *ops;
1933 
1934 	if (type >= ARRAY_SIZE(bpf_prog_types))
1935 		return -EINVAL;
1936 	type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
1937 	ops = bpf_prog_types[type];
1938 	if (!ops)
1939 		return -EINVAL;
1940 
1941 	if (!bpf_prog_is_dev_bound(prog->aux))
1942 		prog->aux->ops = ops;
1943 	else
1944 		prog->aux->ops = &bpf_offload_prog_ops;
1945 	prog->type = type;
1946 	return 0;
1947 }
1948 
1949 enum bpf_audit {
1950 	BPF_AUDIT_LOAD,
1951 	BPF_AUDIT_UNLOAD,
1952 	BPF_AUDIT_MAX,
1953 };
1954 
1955 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
1956 	[BPF_AUDIT_LOAD]   = "LOAD",
1957 	[BPF_AUDIT_UNLOAD] = "UNLOAD",
1958 };
1959 
bpf_audit_prog(const struct bpf_prog * prog,unsigned int op)1960 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1961 {
1962 	struct audit_context *ctx = NULL;
1963 	struct audit_buffer *ab;
1964 
1965 	if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
1966 		return;
1967 	if (audit_enabled == AUDIT_OFF)
1968 		return;
1969 	if (!in_irq() && !irqs_disabled())
1970 		ctx = audit_context();
1971 	ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
1972 	if (unlikely(!ab))
1973 		return;
1974 	audit_log_format(ab, "prog-id=%u op=%s",
1975 			 prog->aux->id, bpf_audit_str[op]);
1976 	audit_log_end(ab);
1977 }
1978 
bpf_prog_alloc_id(struct bpf_prog * prog)1979 static int bpf_prog_alloc_id(struct bpf_prog *prog)
1980 {
1981 	int id;
1982 
1983 	idr_preload(GFP_KERNEL);
1984 	spin_lock_bh(&prog_idr_lock);
1985 	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
1986 	if (id > 0)
1987 		prog->aux->id = id;
1988 	spin_unlock_bh(&prog_idr_lock);
1989 	idr_preload_end();
1990 
1991 	/* id is in [1, INT_MAX) */
1992 	if (WARN_ON_ONCE(!id))
1993 		return -ENOSPC;
1994 
1995 	return id > 0 ? 0 : id;
1996 }
1997 
bpf_prog_free_id(struct bpf_prog * prog,bool do_idr_lock)1998 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
1999 {
2000 	unsigned long flags;
2001 
2002 	/* cBPF to eBPF migrations are currently not in the idr store.
2003 	 * Offloaded programs are removed from the store when their device
2004 	 * disappears - even if someone grabs an fd to them they are unusable,
2005 	 * simply waiting for refcnt to drop to be freed.
2006 	 */
2007 	if (!prog->aux->id)
2008 		return;
2009 
2010 	if (do_idr_lock)
2011 		spin_lock_irqsave(&prog_idr_lock, flags);
2012 	else
2013 		__acquire(&prog_idr_lock);
2014 
2015 	idr_remove(&prog_idr, prog->aux->id);
2016 	prog->aux->id = 0;
2017 
2018 	if (do_idr_lock)
2019 		spin_unlock_irqrestore(&prog_idr_lock, flags);
2020 	else
2021 		__release(&prog_idr_lock);
2022 }
2023 
__bpf_prog_put_rcu(struct rcu_head * rcu)2024 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
2025 {
2026 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
2027 
2028 	kvfree(aux->func_info);
2029 	kfree(aux->func_info_aux);
2030 	free_uid(aux->user);
2031 	security_bpf_prog_free(aux);
2032 	bpf_prog_free(aux->prog);
2033 }
2034 
__bpf_prog_put_noref(struct bpf_prog * prog,bool deferred)2035 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
2036 {
2037 	bpf_prog_kallsyms_del_all(prog);
2038 	btf_put(prog->aux->btf);
2039 	kvfree(prog->aux->jited_linfo);
2040 	kvfree(prog->aux->linfo);
2041 	kfree(prog->aux->kfunc_tab);
2042 	if (prog->aux->attach_btf)
2043 		btf_put(prog->aux->attach_btf);
2044 
2045 	if (deferred) {
2046 		if (prog->aux->sleepable)
2047 			call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
2048 		else
2049 			call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
2050 	} else {
2051 		__bpf_prog_put_rcu(&prog->aux->rcu);
2052 	}
2053 }
2054 
bpf_prog_put_deferred(struct work_struct * work)2055 static void bpf_prog_put_deferred(struct work_struct *work)
2056 {
2057 	struct bpf_prog_aux *aux;
2058 	struct bpf_prog *prog;
2059 
2060 	aux = container_of(work, struct bpf_prog_aux, work);
2061 	prog = aux->prog;
2062 	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
2063 	bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
2064 	bpf_prog_free_id(prog, true);
2065 	__bpf_prog_put_noref(prog, true);
2066 }
2067 
__bpf_prog_put(struct bpf_prog * prog,bool do_idr_lock)2068 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
2069 {
2070 	struct bpf_prog_aux *aux = prog->aux;
2071 
2072 	if (atomic64_dec_and_test(&aux->refcnt)) {
2073 		if (in_irq() || irqs_disabled()) {
2074 			INIT_WORK(&aux->work, bpf_prog_put_deferred);
2075 			schedule_work(&aux->work);
2076 		} else {
2077 			bpf_prog_put_deferred(&aux->work);
2078 		}
2079 	}
2080 }
2081 
bpf_prog_put(struct bpf_prog * prog)2082 void bpf_prog_put(struct bpf_prog *prog)
2083 {
2084 	__bpf_prog_put(prog, true);
2085 }
2086 EXPORT_SYMBOL_GPL(bpf_prog_put);
2087 
bpf_prog_release(struct inode * inode,struct file * filp)2088 static int bpf_prog_release(struct inode *inode, struct file *filp)
2089 {
2090 	struct bpf_prog *prog = filp->private_data;
2091 
2092 	bpf_prog_put(prog);
2093 	return 0;
2094 }
2095 
2096 struct bpf_prog_kstats {
2097 	u64 nsecs;
2098 	u64 cnt;
2099 	u64 misses;
2100 };
2101 
bpf_prog_inc_misses_counter(struct bpf_prog * prog)2102 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog)
2103 {
2104 	struct bpf_prog_stats *stats;
2105 	unsigned int flags;
2106 
2107 	stats = this_cpu_ptr(prog->stats);
2108 	flags = u64_stats_update_begin_irqsave(&stats->syncp);
2109 	u64_stats_inc(&stats->misses);
2110 	u64_stats_update_end_irqrestore(&stats->syncp, flags);
2111 }
2112 
bpf_prog_get_stats(const struct bpf_prog * prog,struct bpf_prog_kstats * stats)2113 static void bpf_prog_get_stats(const struct bpf_prog *prog,
2114 			       struct bpf_prog_kstats *stats)
2115 {
2116 	u64 nsecs = 0, cnt = 0, misses = 0;
2117 	int cpu;
2118 
2119 	for_each_possible_cpu(cpu) {
2120 		const struct bpf_prog_stats *st;
2121 		unsigned int start;
2122 		u64 tnsecs, tcnt, tmisses;
2123 
2124 		st = per_cpu_ptr(prog->stats, cpu);
2125 		do {
2126 			start = u64_stats_fetch_begin_irq(&st->syncp);
2127 			tnsecs = u64_stats_read(&st->nsecs);
2128 			tcnt = u64_stats_read(&st->cnt);
2129 			tmisses = u64_stats_read(&st->misses);
2130 		} while (u64_stats_fetch_retry_irq(&st->syncp, start));
2131 		nsecs += tnsecs;
2132 		cnt += tcnt;
2133 		misses += tmisses;
2134 	}
2135 	stats->nsecs = nsecs;
2136 	stats->cnt = cnt;
2137 	stats->misses = misses;
2138 }
2139 
2140 #ifdef CONFIG_PROC_FS
bpf_prog_show_fdinfo(struct seq_file * m,struct file * filp)2141 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
2142 {
2143 	const struct bpf_prog *prog = filp->private_data;
2144 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
2145 	struct bpf_prog_kstats stats;
2146 
2147 	bpf_prog_get_stats(prog, &stats);
2148 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2149 	seq_printf(m,
2150 		   "prog_type:\t%u\n"
2151 		   "prog_jited:\t%u\n"
2152 		   "prog_tag:\t%s\n"
2153 		   "memlock:\t%llu\n"
2154 		   "prog_id:\t%u\n"
2155 		   "run_time_ns:\t%llu\n"
2156 		   "run_cnt:\t%llu\n"
2157 		   "recursion_misses:\t%llu\n"
2158 		   "verified_insns:\t%u\n",
2159 		   prog->type,
2160 		   prog->jited,
2161 		   prog_tag,
2162 		   prog->pages * 1ULL << PAGE_SHIFT,
2163 		   prog->aux->id,
2164 		   stats.nsecs,
2165 		   stats.cnt,
2166 		   stats.misses,
2167 		   prog->aux->verified_insns);
2168 }
2169 #endif
2170 
2171 const struct file_operations bpf_prog_fops = {
2172 #ifdef CONFIG_PROC_FS
2173 	.show_fdinfo	= bpf_prog_show_fdinfo,
2174 #endif
2175 	.release	= bpf_prog_release,
2176 	.read		= bpf_dummy_read,
2177 	.write		= bpf_dummy_write,
2178 };
2179 
bpf_prog_new_fd(struct bpf_prog * prog)2180 int bpf_prog_new_fd(struct bpf_prog *prog)
2181 {
2182 	int ret;
2183 
2184 	ret = security_bpf_prog(prog);
2185 	if (ret < 0)
2186 		return ret;
2187 
2188 	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
2189 				O_RDWR | O_CLOEXEC);
2190 }
2191 
____bpf_prog_get(struct fd f)2192 static struct bpf_prog *____bpf_prog_get(struct fd f)
2193 {
2194 	if (!f.file)
2195 		return ERR_PTR(-EBADF);
2196 	if (f.file->f_op != &bpf_prog_fops) {
2197 		fdput(f);
2198 		return ERR_PTR(-EINVAL);
2199 	}
2200 
2201 	return f.file->private_data;
2202 }
2203 
bpf_prog_add(struct bpf_prog * prog,int i)2204 void bpf_prog_add(struct bpf_prog *prog, int i)
2205 {
2206 	atomic64_add(i, &prog->aux->refcnt);
2207 }
2208 EXPORT_SYMBOL_GPL(bpf_prog_add);
2209 
bpf_prog_sub(struct bpf_prog * prog,int i)2210 void bpf_prog_sub(struct bpf_prog *prog, int i)
2211 {
2212 	/* Only to be used for undoing previous bpf_prog_add() in some
2213 	 * error path. We still know that another entity in our call
2214 	 * path holds a reference to the program, thus atomic_sub() can
2215 	 * be safely used in such cases!
2216 	 */
2217 	WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
2218 }
2219 EXPORT_SYMBOL_GPL(bpf_prog_sub);
2220 
bpf_prog_inc(struct bpf_prog * prog)2221 void bpf_prog_inc(struct bpf_prog *prog)
2222 {
2223 	atomic64_inc(&prog->aux->refcnt);
2224 }
2225 EXPORT_SYMBOL_GPL(bpf_prog_inc);
2226 
2227 /* prog_idr_lock should have been held */
bpf_prog_inc_not_zero(struct bpf_prog * prog)2228 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
2229 {
2230 	int refold;
2231 
2232 	refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
2233 
2234 	if (!refold)
2235 		return ERR_PTR(-ENOENT);
2236 
2237 	return prog;
2238 }
2239 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
2240 
bpf_prog_get_ok(struct bpf_prog * prog,enum bpf_prog_type * attach_type,bool attach_drv)2241 bool bpf_prog_get_ok(struct bpf_prog *prog,
2242 			    enum bpf_prog_type *attach_type, bool attach_drv)
2243 {
2244 	/* not an attachment, just a refcount inc, always allow */
2245 	if (!attach_type)
2246 		return true;
2247 
2248 	if (prog->type != *attach_type)
2249 		return false;
2250 	if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
2251 		return false;
2252 
2253 	return true;
2254 }
2255 
__bpf_prog_get(u32 ufd,enum bpf_prog_type * attach_type,bool attach_drv)2256 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
2257 				       bool attach_drv)
2258 {
2259 	struct fd f = fdget(ufd);
2260 	struct bpf_prog *prog;
2261 
2262 	prog = ____bpf_prog_get(f);
2263 	if (IS_ERR(prog))
2264 		return prog;
2265 	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
2266 		prog = ERR_PTR(-EINVAL);
2267 		goto out;
2268 	}
2269 
2270 	bpf_prog_inc(prog);
2271 out:
2272 	fdput(f);
2273 	return prog;
2274 }
2275 
bpf_prog_get(u32 ufd)2276 struct bpf_prog *bpf_prog_get(u32 ufd)
2277 {
2278 	return __bpf_prog_get(ufd, NULL, false);
2279 }
2280 
bpf_prog_get_type_dev(u32 ufd,enum bpf_prog_type type,bool attach_drv)2281 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
2282 				       bool attach_drv)
2283 {
2284 	return __bpf_prog_get(ufd, &type, attach_drv);
2285 }
2286 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
2287 
2288 /* Initially all BPF programs could be loaded w/o specifying
2289  * expected_attach_type. Later for some of them specifying expected_attach_type
2290  * at load time became required so that program could be validated properly.
2291  * Programs of types that are allowed to be loaded both w/ and w/o (for
2292  * backward compatibility) expected_attach_type, should have the default attach
2293  * type assigned to expected_attach_type for the latter case, so that it can be
2294  * validated later at attach time.
2295  *
2296  * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
2297  * prog type requires it but has some attach types that have to be backward
2298  * compatible.
2299  */
bpf_prog_load_fixup_attach_type(union bpf_attr * attr)2300 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2301 {
2302 	switch (attr->prog_type) {
2303 	case BPF_PROG_TYPE_CGROUP_SOCK:
2304 		/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2305 		 * exist so checking for non-zero is the way to go here.
2306 		 */
2307 		if (!attr->expected_attach_type)
2308 			attr->expected_attach_type =
2309 				BPF_CGROUP_INET_SOCK_CREATE;
2310 		break;
2311 	case BPF_PROG_TYPE_SK_REUSEPORT:
2312 		if (!attr->expected_attach_type)
2313 			attr->expected_attach_type =
2314 				BPF_SK_REUSEPORT_SELECT;
2315 		break;
2316 	}
2317 }
2318 
2319 static int
bpf_prog_load_check_attach(enum bpf_prog_type prog_type,enum bpf_attach_type expected_attach_type,struct btf * attach_btf,u32 btf_id,struct bpf_prog * dst_prog)2320 bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
2321 			   enum bpf_attach_type expected_attach_type,
2322 			   struct btf *attach_btf, u32 btf_id,
2323 			   struct bpf_prog *dst_prog)
2324 {
2325 	if (btf_id) {
2326 		if (btf_id > BTF_MAX_TYPE)
2327 			return -EINVAL;
2328 
2329 		if (!attach_btf && !dst_prog)
2330 			return -EINVAL;
2331 
2332 		switch (prog_type) {
2333 		case BPF_PROG_TYPE_TRACING:
2334 		case BPF_PROG_TYPE_LSM:
2335 		case BPF_PROG_TYPE_STRUCT_OPS:
2336 		case BPF_PROG_TYPE_EXT:
2337 			break;
2338 		default:
2339 			return -EINVAL;
2340 		}
2341 	}
2342 
2343 	if (attach_btf && (!btf_id || dst_prog))
2344 		return -EINVAL;
2345 
2346 	if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
2347 	    prog_type != BPF_PROG_TYPE_EXT)
2348 		return -EINVAL;
2349 
2350 	switch (prog_type) {
2351 	case BPF_PROG_TYPE_CGROUP_SOCK:
2352 		switch (expected_attach_type) {
2353 		case BPF_CGROUP_INET_SOCK_CREATE:
2354 		case BPF_CGROUP_INET_SOCK_RELEASE:
2355 		case BPF_CGROUP_INET4_POST_BIND:
2356 		case BPF_CGROUP_INET6_POST_BIND:
2357 			return 0;
2358 		default:
2359 			return -EINVAL;
2360 		}
2361 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2362 		switch (expected_attach_type) {
2363 		case BPF_CGROUP_INET4_BIND:
2364 		case BPF_CGROUP_INET6_BIND:
2365 		case BPF_CGROUP_INET4_CONNECT:
2366 		case BPF_CGROUP_INET6_CONNECT:
2367 		case BPF_CGROUP_INET4_GETPEERNAME:
2368 		case BPF_CGROUP_INET6_GETPEERNAME:
2369 		case BPF_CGROUP_INET4_GETSOCKNAME:
2370 		case BPF_CGROUP_INET6_GETSOCKNAME:
2371 		case BPF_CGROUP_UDP4_SENDMSG:
2372 		case BPF_CGROUP_UDP6_SENDMSG:
2373 		case BPF_CGROUP_UDP4_RECVMSG:
2374 		case BPF_CGROUP_UDP6_RECVMSG:
2375 			return 0;
2376 		default:
2377 			return -EINVAL;
2378 		}
2379 	case BPF_PROG_TYPE_CGROUP_SKB:
2380 		switch (expected_attach_type) {
2381 		case BPF_CGROUP_INET_INGRESS:
2382 		case BPF_CGROUP_INET_EGRESS:
2383 			return 0;
2384 		default:
2385 			return -EINVAL;
2386 		}
2387 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2388 		switch (expected_attach_type) {
2389 		case BPF_CGROUP_SETSOCKOPT:
2390 		case BPF_CGROUP_GETSOCKOPT:
2391 			return 0;
2392 		default:
2393 			return -EINVAL;
2394 		}
2395 	case BPF_PROG_TYPE_SK_LOOKUP:
2396 		if (expected_attach_type == BPF_SK_LOOKUP)
2397 			return 0;
2398 		return -EINVAL;
2399 	case BPF_PROG_TYPE_SK_REUSEPORT:
2400 		switch (expected_attach_type) {
2401 		case BPF_SK_REUSEPORT_SELECT:
2402 		case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
2403 			return 0;
2404 		default:
2405 			return -EINVAL;
2406 		}
2407 	case BPF_PROG_TYPE_SYSCALL:
2408 	case BPF_PROG_TYPE_EXT:
2409 		if (expected_attach_type)
2410 			return -EINVAL;
2411 		fallthrough;
2412 	default:
2413 		return 0;
2414 	}
2415 }
2416 
is_net_admin_prog_type(enum bpf_prog_type prog_type)2417 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2418 {
2419 	switch (prog_type) {
2420 	case BPF_PROG_TYPE_SCHED_CLS:
2421 	case BPF_PROG_TYPE_SCHED_ACT:
2422 	case BPF_PROG_TYPE_XDP:
2423 	case BPF_PROG_TYPE_LWT_IN:
2424 	case BPF_PROG_TYPE_LWT_OUT:
2425 	case BPF_PROG_TYPE_LWT_XMIT:
2426 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2427 	case BPF_PROG_TYPE_SK_SKB:
2428 	case BPF_PROG_TYPE_SK_MSG:
2429 	case BPF_PROG_TYPE_LIRC_MODE2:
2430 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
2431 	case BPF_PROG_TYPE_CGROUP_DEVICE:
2432 	case BPF_PROG_TYPE_CGROUP_SOCK:
2433 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2434 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2435 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
2436 	case BPF_PROG_TYPE_SOCK_OPS:
2437 	case BPF_PROG_TYPE_EXT: /* extends any prog */
2438 		return true;
2439 	case BPF_PROG_TYPE_CGROUP_SKB:
2440 		/* always unpriv */
2441 	case BPF_PROG_TYPE_SK_REUSEPORT:
2442 		/* equivalent to SOCKET_FILTER. need CAP_BPF only */
2443 	default:
2444 		return false;
2445 	}
2446 }
2447 
is_perfmon_prog_type(enum bpf_prog_type prog_type)2448 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2449 {
2450 	switch (prog_type) {
2451 	case BPF_PROG_TYPE_KPROBE:
2452 	case BPF_PROG_TYPE_TRACEPOINT:
2453 	case BPF_PROG_TYPE_PERF_EVENT:
2454 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
2455 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2456 	case BPF_PROG_TYPE_TRACING:
2457 	case BPF_PROG_TYPE_LSM:
2458 	case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2459 	case BPF_PROG_TYPE_EXT: /* extends any prog */
2460 		return true;
2461 	default:
2462 		return false;
2463 	}
2464 }
2465 
2466 /* last field in 'union bpf_attr' used by this command */
2467 #define	BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size
2468 
bpf_prog_load(union bpf_attr * attr,bpfptr_t uattr)2469 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
2470 {
2471 	enum bpf_prog_type type = attr->prog_type;
2472 	struct bpf_prog *prog, *dst_prog = NULL;
2473 	struct btf *attach_btf = NULL;
2474 	int err;
2475 	char license[128];
2476 	bool is_gpl;
2477 
2478 	if (CHECK_ATTR(BPF_PROG_LOAD))
2479 		return -EINVAL;
2480 
2481 	if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
2482 				 BPF_F_ANY_ALIGNMENT |
2483 				 BPF_F_TEST_STATE_FREQ |
2484 				 BPF_F_SLEEPABLE |
2485 				 BPF_F_TEST_RND_HI32 |
2486 				 BPF_F_XDP_HAS_FRAGS))
2487 		return -EINVAL;
2488 
2489 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
2490 	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2491 	    !bpf_capable())
2492 		return -EPERM;
2493 
2494 	/* copy eBPF program license from user space */
2495 	if (strncpy_from_bpfptr(license,
2496 				make_bpfptr(attr->license, uattr.is_kernel),
2497 				sizeof(license) - 1) < 0)
2498 		return -EFAULT;
2499 	license[sizeof(license) - 1] = 0;
2500 
2501 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
2502 	is_gpl = license_is_gpl_compatible(license);
2503 
2504 	if (attr->insn_cnt == 0 ||
2505 	    attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
2506 		return -E2BIG;
2507 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
2508 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
2509 	    !bpf_capable())
2510 		return -EPERM;
2511 
2512 	if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
2513 		return -EPERM;
2514 	if (is_perfmon_prog_type(type) && !perfmon_capable())
2515 		return -EPERM;
2516 
2517 	/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
2518 	 * or btf, we need to check which one it is
2519 	 */
2520 	if (attr->attach_prog_fd) {
2521 		dst_prog = bpf_prog_get(attr->attach_prog_fd);
2522 		if (IS_ERR(dst_prog)) {
2523 			dst_prog = NULL;
2524 			attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
2525 			if (IS_ERR(attach_btf))
2526 				return -EINVAL;
2527 			if (!btf_is_kernel(attach_btf)) {
2528 				/* attaching through specifying bpf_prog's BTF
2529 				 * objects directly might be supported eventually
2530 				 */
2531 				btf_put(attach_btf);
2532 				return -ENOTSUPP;
2533 			}
2534 		}
2535 	} else if (attr->attach_btf_id) {
2536 		/* fall back to vmlinux BTF, if BTF type ID is specified */
2537 		attach_btf = bpf_get_btf_vmlinux();
2538 		if (IS_ERR(attach_btf))
2539 			return PTR_ERR(attach_btf);
2540 		if (!attach_btf)
2541 			return -EINVAL;
2542 		btf_get(attach_btf);
2543 	}
2544 
2545 	bpf_prog_load_fixup_attach_type(attr);
2546 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
2547 				       attach_btf, attr->attach_btf_id,
2548 				       dst_prog)) {
2549 		if (dst_prog)
2550 			bpf_prog_put(dst_prog);
2551 		if (attach_btf)
2552 			btf_put(attach_btf);
2553 		return -EINVAL;
2554 	}
2555 
2556 	/* plain bpf_prog allocation */
2557 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
2558 	if (!prog) {
2559 		if (dst_prog)
2560 			bpf_prog_put(dst_prog);
2561 		if (attach_btf)
2562 			btf_put(attach_btf);
2563 		return -ENOMEM;
2564 	}
2565 
2566 	prog->expected_attach_type = attr->expected_attach_type;
2567 	prog->aux->attach_btf = attach_btf;
2568 	prog->aux->attach_btf_id = attr->attach_btf_id;
2569 	prog->aux->dst_prog = dst_prog;
2570 	prog->aux->offload_requested = !!attr->prog_ifindex;
2571 	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
2572 	prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
2573 
2574 	err = security_bpf_prog_alloc(prog->aux);
2575 	if (err)
2576 		goto free_prog;
2577 
2578 	prog->aux->user = get_current_user();
2579 	prog->len = attr->insn_cnt;
2580 
2581 	err = -EFAULT;
2582 	if (copy_from_bpfptr(prog->insns,
2583 			     make_bpfptr(attr->insns, uattr.is_kernel),
2584 			     bpf_prog_insn_size(prog)) != 0)
2585 		goto free_prog_sec;
2586 
2587 	prog->orig_prog = NULL;
2588 	prog->jited = 0;
2589 
2590 	atomic64_set(&prog->aux->refcnt, 1);
2591 	prog->gpl_compatible = is_gpl ? 1 : 0;
2592 
2593 	if (bpf_prog_is_dev_bound(prog->aux)) {
2594 		err = bpf_prog_offload_init(prog, attr);
2595 		if (err)
2596 			goto free_prog_sec;
2597 	}
2598 
2599 	/* find program type: socket_filter vs tracing_filter */
2600 	err = find_prog_type(type, prog);
2601 	if (err < 0)
2602 		goto free_prog_sec;
2603 
2604 	prog->aux->load_time = ktime_get_boottime_ns();
2605 	err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
2606 			       sizeof(attr->prog_name));
2607 	if (err < 0)
2608 		goto free_prog_sec;
2609 
2610 	/* run eBPF verifier */
2611 	err = bpf_check(&prog, attr, uattr);
2612 	if (err < 0)
2613 		goto free_used_maps;
2614 
2615 	prog = bpf_prog_select_runtime(prog, &err);
2616 	if (err < 0)
2617 		goto free_used_maps;
2618 
2619 	err = bpf_prog_alloc_id(prog);
2620 	if (err)
2621 		goto free_used_maps;
2622 
2623 	/* Upon success of bpf_prog_alloc_id(), the BPF prog is
2624 	 * effectively publicly exposed. However, retrieving via
2625 	 * bpf_prog_get_fd_by_id() will take another reference,
2626 	 * therefore it cannot be gone underneath us.
2627 	 *
2628 	 * Only for the time /after/ successful bpf_prog_new_fd()
2629 	 * and before returning to userspace, we might just hold
2630 	 * one reference and any parallel close on that fd could
2631 	 * rip everything out. Hence, below notifications must
2632 	 * happen before bpf_prog_new_fd().
2633 	 *
2634 	 * Also, any failure handling from this point onwards must
2635 	 * be using bpf_prog_put() given the program is exposed.
2636 	 */
2637 	bpf_prog_kallsyms_add(prog);
2638 	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
2639 	bpf_audit_prog(prog, BPF_AUDIT_LOAD);
2640 
2641 	err = bpf_prog_new_fd(prog);
2642 	if (err < 0)
2643 		bpf_prog_put(prog);
2644 	return err;
2645 
2646 free_used_maps:
2647 	/* In case we have subprogs, we need to wait for a grace
2648 	 * period before we can tear down JIT memory since symbols
2649 	 * are already exposed under kallsyms.
2650 	 */
2651 	__bpf_prog_put_noref(prog, prog->aux->func_cnt);
2652 	return err;
2653 free_prog_sec:
2654 	free_uid(prog->aux->user);
2655 	security_bpf_prog_free(prog->aux);
2656 free_prog:
2657 	if (prog->aux->attach_btf)
2658 		btf_put(prog->aux->attach_btf);
2659 	bpf_prog_free(prog);
2660 	return err;
2661 }
2662 
2663 #define BPF_OBJ_LAST_FIELD file_flags
2664 
bpf_obj_pin(const union bpf_attr * attr)2665 static int bpf_obj_pin(const union bpf_attr *attr)
2666 {
2667 	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
2668 		return -EINVAL;
2669 
2670 	return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
2671 }
2672 
bpf_obj_get(const union bpf_attr * attr)2673 static int bpf_obj_get(const union bpf_attr *attr)
2674 {
2675 	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
2676 	    attr->file_flags & ~BPF_OBJ_FLAG_MASK)
2677 		return -EINVAL;
2678 
2679 	return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
2680 				attr->file_flags);
2681 }
2682 
bpf_link_init(struct bpf_link * link,enum bpf_link_type type,const struct bpf_link_ops * ops,struct bpf_prog * prog)2683 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
2684 		   const struct bpf_link_ops *ops, struct bpf_prog *prog)
2685 {
2686 	atomic64_set(&link->refcnt, 1);
2687 	link->type = type;
2688 	link->id = 0;
2689 	link->ops = ops;
2690 	link->prog = prog;
2691 }
2692 
bpf_link_free_id(int id)2693 static void bpf_link_free_id(int id)
2694 {
2695 	if (!id)
2696 		return;
2697 
2698 	spin_lock_bh(&link_idr_lock);
2699 	idr_remove(&link_idr, id);
2700 	spin_unlock_bh(&link_idr_lock);
2701 }
2702 
2703 /* Clean up bpf_link and corresponding anon_inode file and FD. After
2704  * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
2705  * anon_inode's release() call. This helper marksbpf_link as
2706  * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
2707  * is not decremented, it's the responsibility of a calling code that failed
2708  * to complete bpf_link initialization.
2709  */
bpf_link_cleanup(struct bpf_link_primer * primer)2710 void bpf_link_cleanup(struct bpf_link_primer *primer)
2711 {
2712 	primer->link->prog = NULL;
2713 	bpf_link_free_id(primer->id);
2714 	fput(primer->file);
2715 	put_unused_fd(primer->fd);
2716 }
2717 
bpf_link_inc(struct bpf_link * link)2718 void bpf_link_inc(struct bpf_link *link)
2719 {
2720 	atomic64_inc(&link->refcnt);
2721 }
2722 
2723 /* bpf_link_free is guaranteed to be called from process context */
bpf_link_free(struct bpf_link * link)2724 static void bpf_link_free(struct bpf_link *link)
2725 {
2726 	bpf_link_free_id(link->id);
2727 	if (link->prog) {
2728 		/* detach BPF program, clean up used resources */
2729 		link->ops->release(link);
2730 		bpf_prog_put(link->prog);
2731 	}
2732 	/* free bpf_link and its containing memory */
2733 	link->ops->dealloc(link);
2734 }
2735 
bpf_link_put_deferred(struct work_struct * work)2736 static void bpf_link_put_deferred(struct work_struct *work)
2737 {
2738 	struct bpf_link *link = container_of(work, struct bpf_link, work);
2739 
2740 	bpf_link_free(link);
2741 }
2742 
2743 /* bpf_link_put can be called from atomic context, but ensures that resources
2744  * are freed from process context
2745  */
bpf_link_put(struct bpf_link * link)2746 void bpf_link_put(struct bpf_link *link)
2747 {
2748 	if (!atomic64_dec_and_test(&link->refcnt))
2749 		return;
2750 
2751 	if (in_atomic()) {
2752 		INIT_WORK(&link->work, bpf_link_put_deferred);
2753 		schedule_work(&link->work);
2754 	} else {
2755 		bpf_link_free(link);
2756 	}
2757 }
2758 EXPORT_SYMBOL(bpf_link_put);
2759 
bpf_link_release(struct inode * inode,struct file * filp)2760 static int bpf_link_release(struct inode *inode, struct file *filp)
2761 {
2762 	struct bpf_link *link = filp->private_data;
2763 
2764 	bpf_link_put(link);
2765 	return 0;
2766 }
2767 
2768 #ifdef CONFIG_PROC_FS
2769 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
2770 #define BPF_MAP_TYPE(_id, _ops)
2771 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
2772 static const char *bpf_link_type_strs[] = {
2773 	[BPF_LINK_TYPE_UNSPEC] = "<invalid>",
2774 #include <linux/bpf_types.h>
2775 };
2776 #undef BPF_PROG_TYPE
2777 #undef BPF_MAP_TYPE
2778 #undef BPF_LINK_TYPE
2779 
bpf_link_show_fdinfo(struct seq_file * m,struct file * filp)2780 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2781 {
2782 	const struct bpf_link *link = filp->private_data;
2783 	const struct bpf_prog *prog = link->prog;
2784 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
2785 
2786 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2787 	seq_printf(m,
2788 		   "link_type:\t%s\n"
2789 		   "link_id:\t%u\n"
2790 		   "prog_tag:\t%s\n"
2791 		   "prog_id:\t%u\n",
2792 		   bpf_link_type_strs[link->type],
2793 		   link->id,
2794 		   prog_tag,
2795 		   prog->aux->id);
2796 	if (link->ops->show_fdinfo)
2797 		link->ops->show_fdinfo(link, m);
2798 }
2799 #endif
2800 
2801 static const struct file_operations bpf_link_fops = {
2802 #ifdef CONFIG_PROC_FS
2803 	.show_fdinfo	= bpf_link_show_fdinfo,
2804 #endif
2805 	.release	= bpf_link_release,
2806 	.read		= bpf_dummy_read,
2807 	.write		= bpf_dummy_write,
2808 };
2809 
bpf_link_alloc_id(struct bpf_link * link)2810 static int bpf_link_alloc_id(struct bpf_link *link)
2811 {
2812 	int id;
2813 
2814 	idr_preload(GFP_KERNEL);
2815 	spin_lock_bh(&link_idr_lock);
2816 	id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
2817 	spin_unlock_bh(&link_idr_lock);
2818 	idr_preload_end();
2819 
2820 	return id;
2821 }
2822 
2823 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
2824  * reserving unused FD and allocating ID from link_idr. This is to be paired
2825  * with bpf_link_settle() to install FD and ID and expose bpf_link to
2826  * user-space, if bpf_link is successfully attached. If not, bpf_link and
2827  * pre-allocated resources are to be freed with bpf_cleanup() call. All the
2828  * transient state is passed around in struct bpf_link_primer.
2829  * This is preferred way to create and initialize bpf_link, especially when
2830  * there are complicated and expensive operations in between creating bpf_link
2831  * itself and attaching it to BPF hook. By using bpf_link_prime() and
2832  * bpf_link_settle() kernel code using bpf_link doesn't have to perform
2833  * expensive (and potentially failing) roll back operations in a rare case
2834  * that file, FD, or ID can't be allocated.
2835  */
bpf_link_prime(struct bpf_link * link,struct bpf_link_primer * primer)2836 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
2837 {
2838 	struct file *file;
2839 	int fd, id;
2840 
2841 	fd = get_unused_fd_flags(O_CLOEXEC);
2842 	if (fd < 0)
2843 		return fd;
2844 
2845 
2846 	id = bpf_link_alloc_id(link);
2847 	if (id < 0) {
2848 		put_unused_fd(fd);
2849 		return id;
2850 	}
2851 
2852 	file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2853 	if (IS_ERR(file)) {
2854 		bpf_link_free_id(id);
2855 		put_unused_fd(fd);
2856 		return PTR_ERR(file);
2857 	}
2858 
2859 	primer->link = link;
2860 	primer->file = file;
2861 	primer->fd = fd;
2862 	primer->id = id;
2863 	return 0;
2864 }
2865 
bpf_link_settle(struct bpf_link_primer * primer)2866 int bpf_link_settle(struct bpf_link_primer *primer)
2867 {
2868 	/* make bpf_link fetchable by ID */
2869 	spin_lock_bh(&link_idr_lock);
2870 	primer->link->id = primer->id;
2871 	spin_unlock_bh(&link_idr_lock);
2872 	/* make bpf_link fetchable by FD */
2873 	fd_install(primer->fd, primer->file);
2874 	/* pass through installed FD */
2875 	return primer->fd;
2876 }
2877 
bpf_link_new_fd(struct bpf_link * link)2878 int bpf_link_new_fd(struct bpf_link *link)
2879 {
2880 	return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
2881 }
2882 
bpf_link_get_from_fd(u32 ufd)2883 struct bpf_link *bpf_link_get_from_fd(u32 ufd)
2884 {
2885 	struct fd f = fdget(ufd);
2886 	struct bpf_link *link;
2887 
2888 	if (!f.file)
2889 		return ERR_PTR(-EBADF);
2890 	if (f.file->f_op != &bpf_link_fops) {
2891 		fdput(f);
2892 		return ERR_PTR(-EINVAL);
2893 	}
2894 
2895 	link = f.file->private_data;
2896 	bpf_link_inc(link);
2897 	fdput(f);
2898 
2899 	return link;
2900 }
2901 EXPORT_SYMBOL(bpf_link_get_from_fd);
2902 
bpf_tracing_link_release(struct bpf_link * link)2903 static void bpf_tracing_link_release(struct bpf_link *link)
2904 {
2905 	struct bpf_tracing_link *tr_link =
2906 		container_of(link, struct bpf_tracing_link, link.link);
2907 
2908 	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
2909 						tr_link->trampoline));
2910 
2911 	bpf_trampoline_put(tr_link->trampoline);
2912 
2913 	/* tgt_prog is NULL if target is a kernel function */
2914 	if (tr_link->tgt_prog)
2915 		bpf_prog_put(tr_link->tgt_prog);
2916 }
2917 
bpf_tracing_link_dealloc(struct bpf_link * link)2918 static void bpf_tracing_link_dealloc(struct bpf_link *link)
2919 {
2920 	struct bpf_tracing_link *tr_link =
2921 		container_of(link, struct bpf_tracing_link, link.link);
2922 
2923 	kfree(tr_link);
2924 }
2925 
bpf_tracing_link_show_fdinfo(const struct bpf_link * link,struct seq_file * seq)2926 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
2927 					 struct seq_file *seq)
2928 {
2929 	struct bpf_tracing_link *tr_link =
2930 		container_of(link, struct bpf_tracing_link, link.link);
2931 
2932 	seq_printf(seq,
2933 		   "attach_type:\t%d\n",
2934 		   tr_link->attach_type);
2935 }
2936 
bpf_tracing_link_fill_link_info(const struct bpf_link * link,struct bpf_link_info * info)2937 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
2938 					   struct bpf_link_info *info)
2939 {
2940 	struct bpf_tracing_link *tr_link =
2941 		container_of(link, struct bpf_tracing_link, link.link);
2942 
2943 	info->tracing.attach_type = tr_link->attach_type;
2944 	bpf_trampoline_unpack_key(tr_link->trampoline->key,
2945 				  &info->tracing.target_obj_id,
2946 				  &info->tracing.target_btf_id);
2947 
2948 	return 0;
2949 }
2950 
2951 static const struct bpf_link_ops bpf_tracing_link_lops = {
2952 	.release = bpf_tracing_link_release,
2953 	.dealloc = bpf_tracing_link_dealloc,
2954 	.show_fdinfo = bpf_tracing_link_show_fdinfo,
2955 	.fill_link_info = bpf_tracing_link_fill_link_info,
2956 };
2957 
bpf_tracing_prog_attach(struct bpf_prog * prog,int tgt_prog_fd,u32 btf_id,u64 bpf_cookie)2958 static int bpf_tracing_prog_attach(struct bpf_prog *prog,
2959 				   int tgt_prog_fd,
2960 				   u32 btf_id,
2961 				   u64 bpf_cookie)
2962 {
2963 	struct bpf_link_primer link_primer;
2964 	struct bpf_prog *tgt_prog = NULL;
2965 	struct bpf_trampoline *tr = NULL;
2966 	struct bpf_tracing_link *link;
2967 	u64 key = 0;
2968 	int err;
2969 
2970 	switch (prog->type) {
2971 	case BPF_PROG_TYPE_TRACING:
2972 		if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
2973 		    prog->expected_attach_type != BPF_TRACE_FEXIT &&
2974 		    prog->expected_attach_type != BPF_MODIFY_RETURN) {
2975 			err = -EINVAL;
2976 			goto out_put_prog;
2977 		}
2978 		break;
2979 	case BPF_PROG_TYPE_EXT:
2980 		if (prog->expected_attach_type != 0) {
2981 			err = -EINVAL;
2982 			goto out_put_prog;
2983 		}
2984 		break;
2985 	case BPF_PROG_TYPE_LSM:
2986 		if (prog->expected_attach_type != BPF_LSM_MAC) {
2987 			err = -EINVAL;
2988 			goto out_put_prog;
2989 		}
2990 		break;
2991 	default:
2992 		err = -EINVAL;
2993 		goto out_put_prog;
2994 	}
2995 
2996 	if (!!tgt_prog_fd != !!btf_id) {
2997 		err = -EINVAL;
2998 		goto out_put_prog;
2999 	}
3000 
3001 	if (tgt_prog_fd) {
3002 		/* For now we only allow new targets for BPF_PROG_TYPE_EXT */
3003 		if (prog->type != BPF_PROG_TYPE_EXT) {
3004 			err = -EINVAL;
3005 			goto out_put_prog;
3006 		}
3007 
3008 		tgt_prog = bpf_prog_get(tgt_prog_fd);
3009 		if (IS_ERR(tgt_prog)) {
3010 			err = PTR_ERR(tgt_prog);
3011 			tgt_prog = NULL;
3012 			goto out_put_prog;
3013 		}
3014 
3015 		key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
3016 	}
3017 
3018 	link = kzalloc(sizeof(*link), GFP_USER);
3019 	if (!link) {
3020 		err = -ENOMEM;
3021 		goto out_put_prog;
3022 	}
3023 	bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING,
3024 		      &bpf_tracing_link_lops, prog);
3025 	link->attach_type = prog->expected_attach_type;
3026 	link->link.cookie = bpf_cookie;
3027 
3028 	mutex_lock(&prog->aux->dst_mutex);
3029 
3030 	/* There are a few possible cases here:
3031 	 *
3032 	 * - if prog->aux->dst_trampoline is set, the program was just loaded
3033 	 *   and not yet attached to anything, so we can use the values stored
3034 	 *   in prog->aux
3035 	 *
3036 	 * - if prog->aux->dst_trampoline is NULL, the program has already been
3037          *   attached to a target and its initial target was cleared (below)
3038 	 *
3039 	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
3040 	 *   target_btf_id using the link_create API.
3041 	 *
3042 	 * - if tgt_prog == NULL when this function was called using the old
3043 	 *   raw_tracepoint_open API, and we need a target from prog->aux
3044 	 *
3045 	 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
3046 	 *   was detached and is going for re-attachment.
3047 	 *
3048 	 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf
3049 	 *   are NULL, then program was already attached and user did not provide
3050 	 *   tgt_prog_fd so we have no way to find out or create trampoline
3051 	 */
3052 	if (!prog->aux->dst_trampoline && !tgt_prog) {
3053 		/*
3054 		 * Allow re-attach for TRACING and LSM programs. If it's
3055 		 * currently linked, bpf_trampoline_link_prog will fail.
3056 		 * EXT programs need to specify tgt_prog_fd, so they
3057 		 * re-attach in separate code path.
3058 		 */
3059 		if (prog->type != BPF_PROG_TYPE_TRACING &&
3060 		    prog->type != BPF_PROG_TYPE_LSM) {
3061 			err = -EINVAL;
3062 			goto out_unlock;
3063 		}
3064 		/* We can allow re-attach only if we have valid attach_btf. */
3065 		if (!prog->aux->attach_btf) {
3066 			err = -EINVAL;
3067 			goto out_unlock;
3068 		}
3069 		btf_id = prog->aux->attach_btf_id;
3070 		key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
3071 	}
3072 
3073 	if (!prog->aux->dst_trampoline ||
3074 	    (key && key != prog->aux->dst_trampoline->key)) {
3075 		/* If there is no saved target, or the specified target is
3076 		 * different from the destination specified at load time, we
3077 		 * need a new trampoline and a check for compatibility
3078 		 */
3079 		struct bpf_attach_target_info tgt_info = {};
3080 
3081 		err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
3082 					      &tgt_info);
3083 		if (err)
3084 			goto out_unlock;
3085 
3086 		tr = bpf_trampoline_get(key, &tgt_info);
3087 		if (!tr) {
3088 			err = -ENOMEM;
3089 			goto out_unlock;
3090 		}
3091 	} else {
3092 		/* The caller didn't specify a target, or the target was the
3093 		 * same as the destination supplied during program load. This
3094 		 * means we can reuse the trampoline and reference from program
3095 		 * load time, and there is no need to allocate a new one. This
3096 		 * can only happen once for any program, as the saved values in
3097 		 * prog->aux are cleared below.
3098 		 */
3099 		tr = prog->aux->dst_trampoline;
3100 		tgt_prog = prog->aux->dst_prog;
3101 	}
3102 
3103 	err = bpf_link_prime(&link->link.link, &link_primer);
3104 	if (err)
3105 		goto out_unlock;
3106 
3107 	err = bpf_trampoline_link_prog(&link->link, tr);
3108 	if (err) {
3109 		bpf_link_cleanup(&link_primer);
3110 		link = NULL;
3111 		goto out_unlock;
3112 	}
3113 
3114 	link->tgt_prog = tgt_prog;
3115 	link->trampoline = tr;
3116 
3117 	/* Always clear the trampoline and target prog from prog->aux to make
3118 	 * sure the original attach destination is not kept alive after a
3119 	 * program is (re-)attached to another target.
3120 	 */
3121 	if (prog->aux->dst_prog &&
3122 	    (tgt_prog_fd || tr != prog->aux->dst_trampoline))
3123 		/* got extra prog ref from syscall, or attaching to different prog */
3124 		bpf_prog_put(prog->aux->dst_prog);
3125 	if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
3126 		/* we allocated a new trampoline, so free the old one */
3127 		bpf_trampoline_put(prog->aux->dst_trampoline);
3128 
3129 	prog->aux->dst_prog = NULL;
3130 	prog->aux->dst_trampoline = NULL;
3131 	mutex_unlock(&prog->aux->dst_mutex);
3132 
3133 	return bpf_link_settle(&link_primer);
3134 out_unlock:
3135 	if (tr && tr != prog->aux->dst_trampoline)
3136 		bpf_trampoline_put(tr);
3137 	mutex_unlock(&prog->aux->dst_mutex);
3138 	kfree(link);
3139 out_put_prog:
3140 	if (tgt_prog_fd && tgt_prog)
3141 		bpf_prog_put(tgt_prog);
3142 	return err;
3143 }
3144 
3145 struct bpf_raw_tp_link {
3146 	struct bpf_link link;
3147 	struct bpf_raw_event_map *btp;
3148 };
3149 
bpf_raw_tp_link_release(struct bpf_link * link)3150 static void bpf_raw_tp_link_release(struct bpf_link *link)
3151 {
3152 	struct bpf_raw_tp_link *raw_tp =
3153 		container_of(link, struct bpf_raw_tp_link, link);
3154 
3155 	bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
3156 	bpf_put_raw_tracepoint(raw_tp->btp);
3157 }
3158 
bpf_raw_tp_link_dealloc(struct bpf_link * link)3159 static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
3160 {
3161 	struct bpf_raw_tp_link *raw_tp =
3162 		container_of(link, struct bpf_raw_tp_link, link);
3163 
3164 	kfree(raw_tp);
3165 }
3166 
bpf_raw_tp_link_show_fdinfo(const struct bpf_link * link,struct seq_file * seq)3167 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
3168 					struct seq_file *seq)
3169 {
3170 	struct bpf_raw_tp_link *raw_tp_link =
3171 		container_of(link, struct bpf_raw_tp_link, link);
3172 
3173 	seq_printf(seq,
3174 		   "tp_name:\t%s\n",
3175 		   raw_tp_link->btp->tp->name);
3176 }
3177 
bpf_raw_tp_link_fill_link_info(const struct bpf_link * link,struct bpf_link_info * info)3178 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
3179 					  struct bpf_link_info *info)
3180 {
3181 	struct bpf_raw_tp_link *raw_tp_link =
3182 		container_of(link, struct bpf_raw_tp_link, link);
3183 	char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
3184 	const char *tp_name = raw_tp_link->btp->tp->name;
3185 	u32 ulen = info->raw_tracepoint.tp_name_len;
3186 	size_t tp_len = strlen(tp_name);
3187 
3188 	if (!ulen ^ !ubuf)
3189 		return -EINVAL;
3190 
3191 	info->raw_tracepoint.tp_name_len = tp_len + 1;
3192 
3193 	if (!ubuf)
3194 		return 0;
3195 
3196 	if (ulen >= tp_len + 1) {
3197 		if (copy_to_user(ubuf, tp_name, tp_len + 1))
3198 			return -EFAULT;
3199 	} else {
3200 		char zero = '\0';
3201 
3202 		if (copy_to_user(ubuf, tp_name, ulen - 1))
3203 			return -EFAULT;
3204 		if (put_user(zero, ubuf + ulen - 1))
3205 			return -EFAULT;
3206 		return -ENOSPC;
3207 	}
3208 
3209 	return 0;
3210 }
3211 
3212 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
3213 	.release = bpf_raw_tp_link_release,
3214 	.dealloc = bpf_raw_tp_link_dealloc,
3215 	.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
3216 	.fill_link_info = bpf_raw_tp_link_fill_link_info,
3217 };
3218 
3219 #ifdef CONFIG_PERF_EVENTS
3220 struct bpf_perf_link {
3221 	struct bpf_link link;
3222 	struct file *perf_file;
3223 };
3224 
bpf_perf_link_release(struct bpf_link * link)3225 static void bpf_perf_link_release(struct bpf_link *link)
3226 {
3227 	struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
3228 	struct perf_event *event = perf_link->perf_file->private_data;
3229 
3230 	perf_event_free_bpf_prog(event);
3231 	fput(perf_link->perf_file);
3232 }
3233 
bpf_perf_link_dealloc(struct bpf_link * link)3234 static void bpf_perf_link_dealloc(struct bpf_link *link)
3235 {
3236 	struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
3237 
3238 	kfree(perf_link);
3239 }
3240 
3241 static const struct bpf_link_ops bpf_perf_link_lops = {
3242 	.release = bpf_perf_link_release,
3243 	.dealloc = bpf_perf_link_dealloc,
3244 };
3245 
bpf_perf_link_attach(const union bpf_attr * attr,struct bpf_prog * prog)3246 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3247 {
3248 	struct bpf_link_primer link_primer;
3249 	struct bpf_perf_link *link;
3250 	struct perf_event *event;
3251 	struct file *perf_file;
3252 	int err;
3253 
3254 	if (attr->link_create.flags)
3255 		return -EINVAL;
3256 
3257 	perf_file = perf_event_get(attr->link_create.target_fd);
3258 	if (IS_ERR(perf_file))
3259 		return PTR_ERR(perf_file);
3260 
3261 	link = kzalloc(sizeof(*link), GFP_USER);
3262 	if (!link) {
3263 		err = -ENOMEM;
3264 		goto out_put_file;
3265 	}
3266 	bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
3267 	link->perf_file = perf_file;
3268 
3269 	err = bpf_link_prime(&link->link, &link_primer);
3270 	if (err) {
3271 		kfree(link);
3272 		goto out_put_file;
3273 	}
3274 
3275 	event = perf_file->private_data;
3276 	err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
3277 	if (err) {
3278 		bpf_link_cleanup(&link_primer);
3279 		goto out_put_file;
3280 	}
3281 	/* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
3282 	bpf_prog_inc(prog);
3283 
3284 	return bpf_link_settle(&link_primer);
3285 
3286 out_put_file:
3287 	fput(perf_file);
3288 	return err;
3289 }
3290 #else
bpf_perf_link_attach(const union bpf_attr * attr,struct bpf_prog * prog)3291 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3292 {
3293 	return -EOPNOTSUPP;
3294 }
3295 #endif /* CONFIG_PERF_EVENTS */
3296 
bpf_raw_tp_link_attach(struct bpf_prog * prog,const char __user * user_tp_name)3297 static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
3298 				  const char __user *user_tp_name)
3299 {
3300 	struct bpf_link_primer link_primer;
3301 	struct bpf_raw_tp_link *link;
3302 	struct bpf_raw_event_map *btp;
3303 	const char *tp_name;
3304 	char buf[128];
3305 	int err;
3306 
3307 	switch (prog->type) {
3308 	case BPF_PROG_TYPE_TRACING:
3309 	case BPF_PROG_TYPE_EXT:
3310 	case BPF_PROG_TYPE_LSM:
3311 		if (user_tp_name)
3312 			/* The attach point for this category of programs
3313 			 * should be specified via btf_id during program load.
3314 			 */
3315 			return -EINVAL;
3316 		if (prog->type == BPF_PROG_TYPE_TRACING &&
3317 		    prog->expected_attach_type == BPF_TRACE_RAW_TP) {
3318 			tp_name = prog->aux->attach_func_name;
3319 			break;
3320 		}
3321 		return bpf_tracing_prog_attach(prog, 0, 0, 0);
3322 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
3323 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
3324 		if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0)
3325 			return -EFAULT;
3326 		buf[sizeof(buf) - 1] = 0;
3327 		tp_name = buf;
3328 		break;
3329 	default:
3330 		return -EINVAL;
3331 	}
3332 
3333 	btp = bpf_get_raw_tracepoint(tp_name);
3334 	if (!btp)
3335 		return -ENOENT;
3336 
3337 	link = kzalloc(sizeof(*link), GFP_USER);
3338 	if (!link) {
3339 		err = -ENOMEM;
3340 		goto out_put_btp;
3341 	}
3342 	bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
3343 		      &bpf_raw_tp_link_lops, prog);
3344 	link->btp = btp;
3345 
3346 	err = bpf_link_prime(&link->link, &link_primer);
3347 	if (err) {
3348 		kfree(link);
3349 		goto out_put_btp;
3350 	}
3351 
3352 	err = bpf_probe_register(link->btp, prog);
3353 	if (err) {
3354 		bpf_link_cleanup(&link_primer);
3355 		goto out_put_btp;
3356 	}
3357 
3358 	return bpf_link_settle(&link_primer);
3359 
3360 out_put_btp:
3361 	bpf_put_raw_tracepoint(btp);
3362 	return err;
3363 }
3364 
3365 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
3366 
bpf_raw_tracepoint_open(const union bpf_attr * attr)3367 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
3368 {
3369 	struct bpf_prog *prog;
3370 	int fd;
3371 
3372 	if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
3373 		return -EINVAL;
3374 
3375 	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
3376 	if (IS_ERR(prog))
3377 		return PTR_ERR(prog);
3378 
3379 	fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name));
3380 	if (fd < 0)
3381 		bpf_prog_put(prog);
3382 	return fd;
3383 }
3384 
bpf_prog_attach_check_attach_type(const struct bpf_prog * prog,enum bpf_attach_type attach_type)3385 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
3386 					     enum bpf_attach_type attach_type)
3387 {
3388 	switch (prog->type) {
3389 	case BPF_PROG_TYPE_CGROUP_SOCK:
3390 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3391 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3392 	case BPF_PROG_TYPE_SK_LOOKUP:
3393 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
3394 	case BPF_PROG_TYPE_CGROUP_SKB:
3395 		if (!capable(CAP_NET_ADMIN))
3396 			/* cg-skb progs can be loaded by unpriv user.
3397 			 * check permissions at attach time.
3398 			 */
3399 			return -EPERM;
3400 		return prog->enforce_expected_attach_type &&
3401 			prog->expected_attach_type != attach_type ?
3402 			-EINVAL : 0;
3403 	case BPF_PROG_TYPE_KPROBE:
3404 		if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI &&
3405 		    attach_type != BPF_TRACE_KPROBE_MULTI)
3406 			return -EINVAL;
3407 		return 0;
3408 	default:
3409 		return 0;
3410 	}
3411 }
3412 
3413 static enum bpf_prog_type
attach_type_to_prog_type(enum bpf_attach_type attach_type)3414 attach_type_to_prog_type(enum bpf_attach_type attach_type)
3415 {
3416 	switch (attach_type) {
3417 	case BPF_CGROUP_INET_INGRESS:
3418 	case BPF_CGROUP_INET_EGRESS:
3419 		return BPF_PROG_TYPE_CGROUP_SKB;
3420 	case BPF_CGROUP_INET_SOCK_CREATE:
3421 	case BPF_CGROUP_INET_SOCK_RELEASE:
3422 	case BPF_CGROUP_INET4_POST_BIND:
3423 	case BPF_CGROUP_INET6_POST_BIND:
3424 		return BPF_PROG_TYPE_CGROUP_SOCK;
3425 	case BPF_CGROUP_INET4_BIND:
3426 	case BPF_CGROUP_INET6_BIND:
3427 	case BPF_CGROUP_INET4_CONNECT:
3428 	case BPF_CGROUP_INET6_CONNECT:
3429 	case BPF_CGROUP_INET4_GETPEERNAME:
3430 	case BPF_CGROUP_INET6_GETPEERNAME:
3431 	case BPF_CGROUP_INET4_GETSOCKNAME:
3432 	case BPF_CGROUP_INET6_GETSOCKNAME:
3433 	case BPF_CGROUP_UDP4_SENDMSG:
3434 	case BPF_CGROUP_UDP6_SENDMSG:
3435 	case BPF_CGROUP_UDP4_RECVMSG:
3436 	case BPF_CGROUP_UDP6_RECVMSG:
3437 		return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
3438 	case BPF_CGROUP_SOCK_OPS:
3439 		return BPF_PROG_TYPE_SOCK_OPS;
3440 	case BPF_CGROUP_DEVICE:
3441 		return BPF_PROG_TYPE_CGROUP_DEVICE;
3442 	case BPF_SK_MSG_VERDICT:
3443 		return BPF_PROG_TYPE_SK_MSG;
3444 	case BPF_SK_SKB_STREAM_PARSER:
3445 	case BPF_SK_SKB_STREAM_VERDICT:
3446 	case BPF_SK_SKB_VERDICT:
3447 		return BPF_PROG_TYPE_SK_SKB;
3448 	case BPF_LIRC_MODE2:
3449 		return BPF_PROG_TYPE_LIRC_MODE2;
3450 	case BPF_FLOW_DISSECTOR:
3451 		return BPF_PROG_TYPE_FLOW_DISSECTOR;
3452 	case BPF_CGROUP_SYSCTL:
3453 		return BPF_PROG_TYPE_CGROUP_SYSCTL;
3454 	case BPF_CGROUP_GETSOCKOPT:
3455 	case BPF_CGROUP_SETSOCKOPT:
3456 		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
3457 	case BPF_TRACE_ITER:
3458 	case BPF_TRACE_RAW_TP:
3459 	case BPF_TRACE_FENTRY:
3460 	case BPF_TRACE_FEXIT:
3461 	case BPF_MODIFY_RETURN:
3462 		return BPF_PROG_TYPE_TRACING;
3463 	case BPF_LSM_MAC:
3464 		return BPF_PROG_TYPE_LSM;
3465 	case BPF_SK_LOOKUP:
3466 		return BPF_PROG_TYPE_SK_LOOKUP;
3467 	case BPF_XDP:
3468 		return BPF_PROG_TYPE_XDP;
3469 	case BPF_LSM_CGROUP:
3470 		return BPF_PROG_TYPE_LSM;
3471 	default:
3472 		return BPF_PROG_TYPE_UNSPEC;
3473 	}
3474 }
3475 
3476 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
3477 
3478 #define BPF_F_ATTACH_MASK \
3479 	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
3480 
bpf_prog_attach(const union bpf_attr * attr)3481 static int bpf_prog_attach(const union bpf_attr *attr)
3482 {
3483 	enum bpf_prog_type ptype;
3484 	struct bpf_prog *prog;
3485 	int ret;
3486 
3487 	if (CHECK_ATTR(BPF_PROG_ATTACH))
3488 		return -EINVAL;
3489 
3490 	if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
3491 		return -EINVAL;
3492 
3493 	ptype = attach_type_to_prog_type(attr->attach_type);
3494 	if (ptype == BPF_PROG_TYPE_UNSPEC)
3495 		return -EINVAL;
3496 
3497 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3498 	if (IS_ERR(prog))
3499 		return PTR_ERR(prog);
3500 
3501 	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
3502 		bpf_prog_put(prog);
3503 		return -EINVAL;
3504 	}
3505 
3506 	switch (ptype) {
3507 	case BPF_PROG_TYPE_SK_SKB:
3508 	case BPF_PROG_TYPE_SK_MSG:
3509 		ret = sock_map_get_from_fd(attr, prog);
3510 		break;
3511 	case BPF_PROG_TYPE_LIRC_MODE2:
3512 		ret = lirc_prog_attach(attr, prog);
3513 		break;
3514 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
3515 		ret = netns_bpf_prog_attach(attr, prog);
3516 		break;
3517 	case BPF_PROG_TYPE_CGROUP_DEVICE:
3518 	case BPF_PROG_TYPE_CGROUP_SKB:
3519 	case BPF_PROG_TYPE_CGROUP_SOCK:
3520 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3521 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3522 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
3523 	case BPF_PROG_TYPE_SOCK_OPS:
3524 	case BPF_PROG_TYPE_LSM:
3525 		if (ptype == BPF_PROG_TYPE_LSM &&
3526 		    prog->expected_attach_type != BPF_LSM_CGROUP)
3527 			ret = -EINVAL;
3528 		else
3529 			ret = cgroup_bpf_prog_attach(attr, ptype, prog);
3530 		break;
3531 	default:
3532 		ret = -EINVAL;
3533 	}
3534 
3535 	if (ret)
3536 		bpf_prog_put(prog);
3537 	return ret;
3538 }
3539 
3540 #define BPF_PROG_DETACH_LAST_FIELD attach_type
3541 
bpf_prog_detach(const union bpf_attr * attr)3542 static int bpf_prog_detach(const union bpf_attr *attr)
3543 {
3544 	enum bpf_prog_type ptype;
3545 
3546 	if (CHECK_ATTR(BPF_PROG_DETACH))
3547 		return -EINVAL;
3548 
3549 	ptype = attach_type_to_prog_type(attr->attach_type);
3550 
3551 	switch (ptype) {
3552 	case BPF_PROG_TYPE_SK_MSG:
3553 	case BPF_PROG_TYPE_SK_SKB:
3554 		return sock_map_prog_detach(attr, ptype);
3555 	case BPF_PROG_TYPE_LIRC_MODE2:
3556 		return lirc_prog_detach(attr);
3557 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
3558 		return netns_bpf_prog_detach(attr, ptype);
3559 	case BPF_PROG_TYPE_CGROUP_DEVICE:
3560 	case BPF_PROG_TYPE_CGROUP_SKB:
3561 	case BPF_PROG_TYPE_CGROUP_SOCK:
3562 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3563 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3564 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
3565 	case BPF_PROG_TYPE_SOCK_OPS:
3566 	case BPF_PROG_TYPE_LSM:
3567 		return cgroup_bpf_prog_detach(attr, ptype);
3568 	default:
3569 		return -EINVAL;
3570 	}
3571 }
3572 
3573 #define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags
3574 
bpf_prog_query(const union bpf_attr * attr,union bpf_attr __user * uattr)3575 static int bpf_prog_query(const union bpf_attr *attr,
3576 			  union bpf_attr __user *uattr)
3577 {
3578 	if (!capable(CAP_NET_ADMIN))
3579 		return -EPERM;
3580 	if (CHECK_ATTR(BPF_PROG_QUERY))
3581 		return -EINVAL;
3582 	if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
3583 		return -EINVAL;
3584 
3585 	switch (attr->query.attach_type) {
3586 	case BPF_CGROUP_INET_INGRESS:
3587 	case BPF_CGROUP_INET_EGRESS:
3588 	case BPF_CGROUP_INET_SOCK_CREATE:
3589 	case BPF_CGROUP_INET_SOCK_RELEASE:
3590 	case BPF_CGROUP_INET4_BIND:
3591 	case BPF_CGROUP_INET6_BIND:
3592 	case BPF_CGROUP_INET4_POST_BIND:
3593 	case BPF_CGROUP_INET6_POST_BIND:
3594 	case BPF_CGROUP_INET4_CONNECT:
3595 	case BPF_CGROUP_INET6_CONNECT:
3596 	case BPF_CGROUP_INET4_GETPEERNAME:
3597 	case BPF_CGROUP_INET6_GETPEERNAME:
3598 	case BPF_CGROUP_INET4_GETSOCKNAME:
3599 	case BPF_CGROUP_INET6_GETSOCKNAME:
3600 	case BPF_CGROUP_UDP4_SENDMSG:
3601 	case BPF_CGROUP_UDP6_SENDMSG:
3602 	case BPF_CGROUP_UDP4_RECVMSG:
3603 	case BPF_CGROUP_UDP6_RECVMSG:
3604 	case BPF_CGROUP_SOCK_OPS:
3605 	case BPF_CGROUP_DEVICE:
3606 	case BPF_CGROUP_SYSCTL:
3607 	case BPF_CGROUP_GETSOCKOPT:
3608 	case BPF_CGROUP_SETSOCKOPT:
3609 	case BPF_LSM_CGROUP:
3610 		return cgroup_bpf_prog_query(attr, uattr);
3611 	case BPF_LIRC_MODE2:
3612 		return lirc_prog_query(attr, uattr);
3613 	case BPF_FLOW_DISSECTOR:
3614 	case BPF_SK_LOOKUP:
3615 		return netns_bpf_prog_query(attr, uattr);
3616 	case BPF_SK_SKB_STREAM_PARSER:
3617 	case BPF_SK_SKB_STREAM_VERDICT:
3618 	case BPF_SK_MSG_VERDICT:
3619 	case BPF_SK_SKB_VERDICT:
3620 		return sock_map_bpf_prog_query(attr, uattr);
3621 	default:
3622 		return -EINVAL;
3623 	}
3624 }
3625 
3626 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size
3627 
bpf_prog_test_run(const union bpf_attr * attr,union bpf_attr __user * uattr)3628 static int bpf_prog_test_run(const union bpf_attr *attr,
3629 			     union bpf_attr __user *uattr)
3630 {
3631 	struct bpf_prog *prog;
3632 	int ret = -ENOTSUPP;
3633 
3634 	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
3635 		return -EINVAL;
3636 
3637 	if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
3638 	    (!attr->test.ctx_size_in && attr->test.ctx_in))
3639 		return -EINVAL;
3640 
3641 	if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
3642 	    (!attr->test.ctx_size_out && attr->test.ctx_out))
3643 		return -EINVAL;
3644 
3645 	prog = bpf_prog_get(attr->test.prog_fd);
3646 	if (IS_ERR(prog))
3647 		return PTR_ERR(prog);
3648 
3649 	if (prog->aux->ops->test_run)
3650 		ret = prog->aux->ops->test_run(prog, attr, uattr);
3651 
3652 	bpf_prog_put(prog);
3653 	return ret;
3654 }
3655 
3656 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
3657 
bpf_obj_get_next_id(const union bpf_attr * attr,union bpf_attr __user * uattr,struct idr * idr,spinlock_t * lock)3658 static int bpf_obj_get_next_id(const union bpf_attr *attr,
3659 			       union bpf_attr __user *uattr,
3660 			       struct idr *idr,
3661 			       spinlock_t *lock)
3662 {
3663 	u32 next_id = attr->start_id;
3664 	int err = 0;
3665 
3666 	if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
3667 		return -EINVAL;
3668 
3669 	if (!capable(CAP_SYS_ADMIN))
3670 		return -EPERM;
3671 
3672 	next_id++;
3673 	spin_lock_bh(lock);
3674 	if (!idr_get_next(idr, &next_id))
3675 		err = -ENOENT;
3676 	spin_unlock_bh(lock);
3677 
3678 	if (!err)
3679 		err = put_user(next_id, &uattr->next_id);
3680 
3681 	return err;
3682 }
3683 
bpf_map_get_curr_or_next(u32 * id)3684 struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
3685 {
3686 	struct bpf_map *map;
3687 
3688 	spin_lock_bh(&map_idr_lock);
3689 again:
3690 	map = idr_get_next(&map_idr, id);
3691 	if (map) {
3692 		map = __bpf_map_inc_not_zero(map, false);
3693 		if (IS_ERR(map)) {
3694 			(*id)++;
3695 			goto again;
3696 		}
3697 	}
3698 	spin_unlock_bh(&map_idr_lock);
3699 
3700 	return map;
3701 }
3702 
bpf_prog_get_curr_or_next(u32 * id)3703 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
3704 {
3705 	struct bpf_prog *prog;
3706 
3707 	spin_lock_bh(&prog_idr_lock);
3708 again:
3709 	prog = idr_get_next(&prog_idr, id);
3710 	if (prog) {
3711 		prog = bpf_prog_inc_not_zero(prog);
3712 		if (IS_ERR(prog)) {
3713 			(*id)++;
3714 			goto again;
3715 		}
3716 	}
3717 	spin_unlock_bh(&prog_idr_lock);
3718 
3719 	return prog;
3720 }
3721 
3722 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
3723 
bpf_prog_by_id(u32 id)3724 struct bpf_prog *bpf_prog_by_id(u32 id)
3725 {
3726 	struct bpf_prog *prog;
3727 
3728 	if (!id)
3729 		return ERR_PTR(-ENOENT);
3730 
3731 	spin_lock_bh(&prog_idr_lock);
3732 	prog = idr_find(&prog_idr, id);
3733 	if (prog)
3734 		prog = bpf_prog_inc_not_zero(prog);
3735 	else
3736 		prog = ERR_PTR(-ENOENT);
3737 	spin_unlock_bh(&prog_idr_lock);
3738 	return prog;
3739 }
3740 
bpf_prog_get_fd_by_id(const union bpf_attr * attr)3741 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
3742 {
3743 	struct bpf_prog *prog;
3744 	u32 id = attr->prog_id;
3745 	int fd;
3746 
3747 	if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
3748 		return -EINVAL;
3749 
3750 	if (!capable(CAP_SYS_ADMIN))
3751 		return -EPERM;
3752 
3753 	prog = bpf_prog_by_id(id);
3754 	if (IS_ERR(prog))
3755 		return PTR_ERR(prog);
3756 
3757 	fd = bpf_prog_new_fd(prog);
3758 	if (fd < 0)
3759 		bpf_prog_put(prog);
3760 
3761 	return fd;
3762 }
3763 
3764 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
3765 
bpf_map_get_fd_by_id(const union bpf_attr * attr)3766 static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
3767 {
3768 	struct bpf_map *map;
3769 	u32 id = attr->map_id;
3770 	int f_flags;
3771 	int fd;
3772 
3773 	if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
3774 	    attr->open_flags & ~BPF_OBJ_FLAG_MASK)
3775 		return -EINVAL;
3776 
3777 	if (!capable(CAP_SYS_ADMIN))
3778 		return -EPERM;
3779 
3780 	f_flags = bpf_get_file_flag(attr->open_flags);
3781 	if (f_flags < 0)
3782 		return f_flags;
3783 
3784 	spin_lock_bh(&map_idr_lock);
3785 	map = idr_find(&map_idr, id);
3786 	if (map)
3787 		map = __bpf_map_inc_not_zero(map, true);
3788 	else
3789 		map = ERR_PTR(-ENOENT);
3790 	spin_unlock_bh(&map_idr_lock);
3791 
3792 	if (IS_ERR(map))
3793 		return PTR_ERR(map);
3794 
3795 	fd = bpf_map_new_fd(map, f_flags);
3796 	if (fd < 0)
3797 		bpf_map_put_with_uref(map);
3798 
3799 	return fd;
3800 }
3801 
bpf_map_from_imm(const struct bpf_prog * prog,unsigned long addr,u32 * off,u32 * type)3802 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
3803 					      unsigned long addr, u32 *off,
3804 					      u32 *type)
3805 {
3806 	const struct bpf_map *map;
3807 	int i;
3808 
3809 	mutex_lock(&prog->aux->used_maps_mutex);
3810 	for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
3811 		map = prog->aux->used_maps[i];
3812 		if (map == (void *)addr) {
3813 			*type = BPF_PSEUDO_MAP_FD;
3814 			goto out;
3815 		}
3816 		if (!map->ops->map_direct_value_meta)
3817 			continue;
3818 		if (!map->ops->map_direct_value_meta(map, addr, off)) {
3819 			*type = BPF_PSEUDO_MAP_VALUE;
3820 			goto out;
3821 		}
3822 	}
3823 	map = NULL;
3824 
3825 out:
3826 	mutex_unlock(&prog->aux->used_maps_mutex);
3827 	return map;
3828 }
3829 
bpf_insn_prepare_dump(const struct bpf_prog * prog,const struct cred * f_cred)3830 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
3831 					      const struct cred *f_cred)
3832 {
3833 	const struct bpf_map *map;
3834 	struct bpf_insn *insns;
3835 	u32 off, type;
3836 	u64 imm;
3837 	u8 code;
3838 	int i;
3839 
3840 	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
3841 			GFP_USER);
3842 	if (!insns)
3843 		return insns;
3844 
3845 	for (i = 0; i < prog->len; i++) {
3846 		code = insns[i].code;
3847 
3848 		if (code == (BPF_JMP | BPF_TAIL_CALL)) {
3849 			insns[i].code = BPF_JMP | BPF_CALL;
3850 			insns[i].imm = BPF_FUNC_tail_call;
3851 			/* fall-through */
3852 		}
3853 		if (code == (BPF_JMP | BPF_CALL) ||
3854 		    code == (BPF_JMP | BPF_CALL_ARGS)) {
3855 			if (code == (BPF_JMP | BPF_CALL_ARGS))
3856 				insns[i].code = BPF_JMP | BPF_CALL;
3857 			if (!bpf_dump_raw_ok(f_cred))
3858 				insns[i].imm = 0;
3859 			continue;
3860 		}
3861 		if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
3862 			insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
3863 			continue;
3864 		}
3865 
3866 		if (code != (BPF_LD | BPF_IMM | BPF_DW))
3867 			continue;
3868 
3869 		imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
3870 		map = bpf_map_from_imm(prog, imm, &off, &type);
3871 		if (map) {
3872 			insns[i].src_reg = type;
3873 			insns[i].imm = map->id;
3874 			insns[i + 1].imm = off;
3875 			continue;
3876 		}
3877 	}
3878 
3879 	return insns;
3880 }
3881 
set_info_rec_size(struct bpf_prog_info * info)3882 static int set_info_rec_size(struct bpf_prog_info *info)
3883 {
3884 	/*
3885 	 * Ensure info.*_rec_size is the same as kernel expected size
3886 	 *
3887 	 * or
3888 	 *
3889 	 * Only allow zero *_rec_size if both _rec_size and _cnt are
3890 	 * zero.  In this case, the kernel will set the expected
3891 	 * _rec_size back to the info.
3892 	 */
3893 
3894 	if ((info->nr_func_info || info->func_info_rec_size) &&
3895 	    info->func_info_rec_size != sizeof(struct bpf_func_info))
3896 		return -EINVAL;
3897 
3898 	if ((info->nr_line_info || info->line_info_rec_size) &&
3899 	    info->line_info_rec_size != sizeof(struct bpf_line_info))
3900 		return -EINVAL;
3901 
3902 	if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
3903 	    info->jited_line_info_rec_size != sizeof(__u64))
3904 		return -EINVAL;
3905 
3906 	info->func_info_rec_size = sizeof(struct bpf_func_info);
3907 	info->line_info_rec_size = sizeof(struct bpf_line_info);
3908 	info->jited_line_info_rec_size = sizeof(__u64);
3909 
3910 	return 0;
3911 }
3912 
bpf_prog_get_info_by_fd(struct file * file,struct bpf_prog * prog,const union bpf_attr * attr,union bpf_attr __user * uattr)3913 static int bpf_prog_get_info_by_fd(struct file *file,
3914 				   struct bpf_prog *prog,
3915 				   const union bpf_attr *attr,
3916 				   union bpf_attr __user *uattr)
3917 {
3918 	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3919 	struct btf *attach_btf = bpf_prog_get_target_btf(prog);
3920 	struct bpf_prog_info info;
3921 	u32 info_len = attr->info.info_len;
3922 	struct bpf_prog_kstats stats;
3923 	char __user *uinsns;
3924 	u32 ulen;
3925 	int err;
3926 
3927 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
3928 	if (err)
3929 		return err;
3930 	info_len = min_t(u32, sizeof(info), info_len);
3931 
3932 	memset(&info, 0, sizeof(info));
3933 	if (copy_from_user(&info, uinfo, info_len))
3934 		return -EFAULT;
3935 
3936 	info.type = prog->type;
3937 	info.id = prog->aux->id;
3938 	info.load_time = prog->aux->load_time;
3939 	info.created_by_uid = from_kuid_munged(current_user_ns(),
3940 					       prog->aux->user->uid);
3941 	info.gpl_compatible = prog->gpl_compatible;
3942 
3943 	memcpy(info.tag, prog->tag, sizeof(prog->tag));
3944 	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
3945 
3946 	mutex_lock(&prog->aux->used_maps_mutex);
3947 	ulen = info.nr_map_ids;
3948 	info.nr_map_ids = prog->aux->used_map_cnt;
3949 	ulen = min_t(u32, info.nr_map_ids, ulen);
3950 	if (ulen) {
3951 		u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
3952 		u32 i;
3953 
3954 		for (i = 0; i < ulen; i++)
3955 			if (put_user(prog->aux->used_maps[i]->id,
3956 				     &user_map_ids[i])) {
3957 				mutex_unlock(&prog->aux->used_maps_mutex);
3958 				return -EFAULT;
3959 			}
3960 	}
3961 	mutex_unlock(&prog->aux->used_maps_mutex);
3962 
3963 	err = set_info_rec_size(&info);
3964 	if (err)
3965 		return err;
3966 
3967 	bpf_prog_get_stats(prog, &stats);
3968 	info.run_time_ns = stats.nsecs;
3969 	info.run_cnt = stats.cnt;
3970 	info.recursion_misses = stats.misses;
3971 
3972 	info.verified_insns = prog->aux->verified_insns;
3973 
3974 	if (!bpf_capable()) {
3975 		info.jited_prog_len = 0;
3976 		info.xlated_prog_len = 0;
3977 		info.nr_jited_ksyms = 0;
3978 		info.nr_jited_func_lens = 0;
3979 		info.nr_func_info = 0;
3980 		info.nr_line_info = 0;
3981 		info.nr_jited_line_info = 0;
3982 		goto done;
3983 	}
3984 
3985 	ulen = info.xlated_prog_len;
3986 	info.xlated_prog_len = bpf_prog_insn_size(prog);
3987 	if (info.xlated_prog_len && ulen) {
3988 		struct bpf_insn *insns_sanitized;
3989 		bool fault;
3990 
3991 		if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
3992 			info.xlated_prog_insns = 0;
3993 			goto done;
3994 		}
3995 		insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
3996 		if (!insns_sanitized)
3997 			return -ENOMEM;
3998 		uinsns = u64_to_user_ptr(info.xlated_prog_insns);
3999 		ulen = min_t(u32, info.xlated_prog_len, ulen);
4000 		fault = copy_to_user(uinsns, insns_sanitized, ulen);
4001 		kfree(insns_sanitized);
4002 		if (fault)
4003 			return -EFAULT;
4004 	}
4005 
4006 	if (bpf_prog_is_dev_bound(prog->aux)) {
4007 		err = bpf_prog_offload_info_fill(&info, prog);
4008 		if (err)
4009 			return err;
4010 		goto done;
4011 	}
4012 
4013 	/* NOTE: the following code is supposed to be skipped for offload.
4014 	 * bpf_prog_offload_info_fill() is the place to fill similar fields
4015 	 * for offload.
4016 	 */
4017 	ulen = info.jited_prog_len;
4018 	if (prog->aux->func_cnt) {
4019 		u32 i;
4020 
4021 		info.jited_prog_len = 0;
4022 		for (i = 0; i < prog->aux->func_cnt; i++)
4023 			info.jited_prog_len += prog->aux->func[i]->jited_len;
4024 	} else {
4025 		info.jited_prog_len = prog->jited_len;
4026 	}
4027 
4028 	if (info.jited_prog_len && ulen) {
4029 		if (bpf_dump_raw_ok(file->f_cred)) {
4030 			uinsns = u64_to_user_ptr(info.jited_prog_insns);
4031 			ulen = min_t(u32, info.jited_prog_len, ulen);
4032 
4033 			/* for multi-function programs, copy the JITed
4034 			 * instructions for all the functions
4035 			 */
4036 			if (prog->aux->func_cnt) {
4037 				u32 len, free, i;
4038 				u8 *img;
4039 
4040 				free = ulen;
4041 				for (i = 0; i < prog->aux->func_cnt; i++) {
4042 					len = prog->aux->func[i]->jited_len;
4043 					len = min_t(u32, len, free);
4044 					img = (u8 *) prog->aux->func[i]->bpf_func;
4045 					if (copy_to_user(uinsns, img, len))
4046 						return -EFAULT;
4047 					uinsns += len;
4048 					free -= len;
4049 					if (!free)
4050 						break;
4051 				}
4052 			} else {
4053 				if (copy_to_user(uinsns, prog->bpf_func, ulen))
4054 					return -EFAULT;
4055 			}
4056 		} else {
4057 			info.jited_prog_insns = 0;
4058 		}
4059 	}
4060 
4061 	ulen = info.nr_jited_ksyms;
4062 	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
4063 	if (ulen) {
4064 		if (bpf_dump_raw_ok(file->f_cred)) {
4065 			unsigned long ksym_addr;
4066 			u64 __user *user_ksyms;
4067 			u32 i;
4068 
4069 			/* copy the address of the kernel symbol
4070 			 * corresponding to each function
4071 			 */
4072 			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
4073 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
4074 			if (prog->aux->func_cnt) {
4075 				for (i = 0; i < ulen; i++) {
4076 					ksym_addr = (unsigned long)
4077 						prog->aux->func[i]->bpf_func;
4078 					if (put_user((u64) ksym_addr,
4079 						     &user_ksyms[i]))
4080 						return -EFAULT;
4081 				}
4082 			} else {
4083 				ksym_addr = (unsigned long) prog->bpf_func;
4084 				if (put_user((u64) ksym_addr, &user_ksyms[0]))
4085 					return -EFAULT;
4086 			}
4087 		} else {
4088 			info.jited_ksyms = 0;
4089 		}
4090 	}
4091 
4092 	ulen = info.nr_jited_func_lens;
4093 	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
4094 	if (ulen) {
4095 		if (bpf_dump_raw_ok(file->f_cred)) {
4096 			u32 __user *user_lens;
4097 			u32 func_len, i;
4098 
4099 			/* copy the JITed image lengths for each function */
4100 			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
4101 			user_lens = u64_to_user_ptr(info.jited_func_lens);
4102 			if (prog->aux->func_cnt) {
4103 				for (i = 0; i < ulen; i++) {
4104 					func_len =
4105 						prog->aux->func[i]->jited_len;
4106 					if (put_user(func_len, &user_lens[i]))
4107 						return -EFAULT;
4108 				}
4109 			} else {
4110 				func_len = prog->jited_len;
4111 				if (put_user(func_len, &user_lens[0]))
4112 					return -EFAULT;
4113 			}
4114 		} else {
4115 			info.jited_func_lens = 0;
4116 		}
4117 	}
4118 
4119 	if (prog->aux->btf)
4120 		info.btf_id = btf_obj_id(prog->aux->btf);
4121 	info.attach_btf_id = prog->aux->attach_btf_id;
4122 	if (attach_btf)
4123 		info.attach_btf_obj_id = btf_obj_id(attach_btf);
4124 
4125 	ulen = info.nr_func_info;
4126 	info.nr_func_info = prog->aux->func_info_cnt;
4127 	if (info.nr_func_info && ulen) {
4128 		char __user *user_finfo;
4129 
4130 		user_finfo = u64_to_user_ptr(info.func_info);
4131 		ulen = min_t(u32, info.nr_func_info, ulen);
4132 		if (copy_to_user(user_finfo, prog->aux->func_info,
4133 				 info.func_info_rec_size * ulen))
4134 			return -EFAULT;
4135 	}
4136 
4137 	ulen = info.nr_line_info;
4138 	info.nr_line_info = prog->aux->nr_linfo;
4139 	if (info.nr_line_info && ulen) {
4140 		__u8 __user *user_linfo;
4141 
4142 		user_linfo = u64_to_user_ptr(info.line_info);
4143 		ulen = min_t(u32, info.nr_line_info, ulen);
4144 		if (copy_to_user(user_linfo, prog->aux->linfo,
4145 				 info.line_info_rec_size * ulen))
4146 			return -EFAULT;
4147 	}
4148 
4149 	ulen = info.nr_jited_line_info;
4150 	if (prog->aux->jited_linfo)
4151 		info.nr_jited_line_info = prog->aux->nr_linfo;
4152 	else
4153 		info.nr_jited_line_info = 0;
4154 	if (info.nr_jited_line_info && ulen) {
4155 		if (bpf_dump_raw_ok(file->f_cred)) {
4156 			unsigned long line_addr;
4157 			__u64 __user *user_linfo;
4158 			u32 i;
4159 
4160 			user_linfo = u64_to_user_ptr(info.jited_line_info);
4161 			ulen = min_t(u32, info.nr_jited_line_info, ulen);
4162 			for (i = 0; i < ulen; i++) {
4163 				line_addr = (unsigned long)prog->aux->jited_linfo[i];
4164 				if (put_user((__u64)line_addr, &user_linfo[i]))
4165 					return -EFAULT;
4166 			}
4167 		} else {
4168 			info.jited_line_info = 0;
4169 		}
4170 	}
4171 
4172 	ulen = info.nr_prog_tags;
4173 	info.nr_prog_tags = prog->aux->func_cnt ? : 1;
4174 	if (ulen) {
4175 		__u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
4176 		u32 i;
4177 
4178 		user_prog_tags = u64_to_user_ptr(info.prog_tags);
4179 		ulen = min_t(u32, info.nr_prog_tags, ulen);
4180 		if (prog->aux->func_cnt) {
4181 			for (i = 0; i < ulen; i++) {
4182 				if (copy_to_user(user_prog_tags[i],
4183 						 prog->aux->func[i]->tag,
4184 						 BPF_TAG_SIZE))
4185 					return -EFAULT;
4186 			}
4187 		} else {
4188 			if (copy_to_user(user_prog_tags[0],
4189 					 prog->tag, BPF_TAG_SIZE))
4190 				return -EFAULT;
4191 		}
4192 	}
4193 
4194 done:
4195 	if (copy_to_user(uinfo, &info, info_len) ||
4196 	    put_user(info_len, &uattr->info.info_len))
4197 		return -EFAULT;
4198 
4199 	return 0;
4200 }
4201 
bpf_map_get_info_by_fd(struct file * file,struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)4202 static int bpf_map_get_info_by_fd(struct file *file,
4203 				  struct bpf_map *map,
4204 				  const union bpf_attr *attr,
4205 				  union bpf_attr __user *uattr)
4206 {
4207 	struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
4208 	struct bpf_map_info info;
4209 	u32 info_len = attr->info.info_len;
4210 	int err;
4211 
4212 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
4213 	if (err)
4214 		return err;
4215 	info_len = min_t(u32, sizeof(info), info_len);
4216 
4217 	memset(&info, 0, sizeof(info));
4218 	info.type = map->map_type;
4219 	info.id = map->id;
4220 	info.key_size = map->key_size;
4221 	info.value_size = map->value_size;
4222 	info.max_entries = map->max_entries;
4223 	info.map_flags = map->map_flags;
4224 	info.map_extra = map->map_extra;
4225 	memcpy(info.name, map->name, sizeof(map->name));
4226 
4227 	if (map->btf) {
4228 		info.btf_id = btf_obj_id(map->btf);
4229 		info.btf_key_type_id = map->btf_key_type_id;
4230 		info.btf_value_type_id = map->btf_value_type_id;
4231 	}
4232 	info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4233 
4234 	if (bpf_map_is_dev_bound(map)) {
4235 		err = bpf_map_offload_info_fill(&info, map);
4236 		if (err)
4237 			return err;
4238 	}
4239 
4240 	if (copy_to_user(uinfo, &info, info_len) ||
4241 	    put_user(info_len, &uattr->info.info_len))
4242 		return -EFAULT;
4243 
4244 	return 0;
4245 }
4246 
bpf_btf_get_info_by_fd(struct file * file,struct btf * btf,const union bpf_attr * attr,union bpf_attr __user * uattr)4247 static int bpf_btf_get_info_by_fd(struct file *file,
4248 				  struct btf *btf,
4249 				  const union bpf_attr *attr,
4250 				  union bpf_attr __user *uattr)
4251 {
4252 	struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
4253 	u32 info_len = attr->info.info_len;
4254 	int err;
4255 
4256 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
4257 	if (err)
4258 		return err;
4259 
4260 	return btf_get_info_by_fd(btf, attr, uattr);
4261 }
4262 
bpf_link_get_info_by_fd(struct file * file,struct bpf_link * link,const union bpf_attr * attr,union bpf_attr __user * uattr)4263 static int bpf_link_get_info_by_fd(struct file *file,
4264 				  struct bpf_link *link,
4265 				  const union bpf_attr *attr,
4266 				  union bpf_attr __user *uattr)
4267 {
4268 	struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
4269 	struct bpf_link_info info;
4270 	u32 info_len = attr->info.info_len;
4271 	int err;
4272 
4273 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
4274 	if (err)
4275 		return err;
4276 	info_len = min_t(u32, sizeof(info), info_len);
4277 
4278 	memset(&info, 0, sizeof(info));
4279 	if (copy_from_user(&info, uinfo, info_len))
4280 		return -EFAULT;
4281 
4282 	info.type = link->type;
4283 	info.id = link->id;
4284 	info.prog_id = link->prog->aux->id;
4285 
4286 	if (link->ops->fill_link_info) {
4287 		err = link->ops->fill_link_info(link, &info);
4288 		if (err)
4289 			return err;
4290 	}
4291 
4292 	if (copy_to_user(uinfo, &info, info_len) ||
4293 	    put_user(info_len, &uattr->info.info_len))
4294 		return -EFAULT;
4295 
4296 	return 0;
4297 }
4298 
4299 
4300 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
4301 
bpf_obj_get_info_by_fd(const union bpf_attr * attr,union bpf_attr __user * uattr)4302 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
4303 				  union bpf_attr __user *uattr)
4304 {
4305 	int ufd = attr->info.bpf_fd;
4306 	struct fd f;
4307 	int err;
4308 
4309 	if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
4310 		return -EINVAL;
4311 
4312 	f = fdget(ufd);
4313 	if (!f.file)
4314 		return -EBADFD;
4315 
4316 	if (f.file->f_op == &bpf_prog_fops)
4317 		err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
4318 					      uattr);
4319 	else if (f.file->f_op == &bpf_map_fops)
4320 		err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
4321 					     uattr);
4322 	else if (f.file->f_op == &btf_fops)
4323 		err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
4324 	else if (f.file->f_op == &bpf_link_fops)
4325 		err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
4326 					      attr, uattr);
4327 	else
4328 		err = -EINVAL;
4329 
4330 	fdput(f);
4331 	return err;
4332 }
4333 
4334 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level
4335 
bpf_btf_load(const union bpf_attr * attr,bpfptr_t uattr)4336 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr)
4337 {
4338 	if (CHECK_ATTR(BPF_BTF_LOAD))
4339 		return -EINVAL;
4340 
4341 	if (!bpf_capable())
4342 		return -EPERM;
4343 
4344 	return btf_new_fd(attr, uattr);
4345 }
4346 
4347 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
4348 
bpf_btf_get_fd_by_id(const union bpf_attr * attr)4349 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
4350 {
4351 	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
4352 		return -EINVAL;
4353 
4354 	if (!capable(CAP_SYS_ADMIN))
4355 		return -EPERM;
4356 
4357 	return btf_get_fd_by_id(attr->btf_id);
4358 }
4359 
bpf_task_fd_query_copy(const union bpf_attr * attr,union bpf_attr __user * uattr,u32 prog_id,u32 fd_type,const char * buf,u64 probe_offset,u64 probe_addr)4360 static int bpf_task_fd_query_copy(const union bpf_attr *attr,
4361 				    union bpf_attr __user *uattr,
4362 				    u32 prog_id, u32 fd_type,
4363 				    const char *buf, u64 probe_offset,
4364 				    u64 probe_addr)
4365 {
4366 	char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
4367 	u32 len = buf ? strlen(buf) : 0, input_len;
4368 	int err = 0;
4369 
4370 	if (put_user(len, &uattr->task_fd_query.buf_len))
4371 		return -EFAULT;
4372 	input_len = attr->task_fd_query.buf_len;
4373 	if (input_len && ubuf) {
4374 		if (!len) {
4375 			/* nothing to copy, just make ubuf NULL terminated */
4376 			char zero = '\0';
4377 
4378 			if (put_user(zero, ubuf))
4379 				return -EFAULT;
4380 		} else if (input_len >= len + 1) {
4381 			/* ubuf can hold the string with NULL terminator */
4382 			if (copy_to_user(ubuf, buf, len + 1))
4383 				return -EFAULT;
4384 		} else {
4385 			/* ubuf cannot hold the string with NULL terminator,
4386 			 * do a partial copy with NULL terminator.
4387 			 */
4388 			char zero = '\0';
4389 
4390 			err = -ENOSPC;
4391 			if (copy_to_user(ubuf, buf, input_len - 1))
4392 				return -EFAULT;
4393 			if (put_user(zero, ubuf + input_len - 1))
4394 				return -EFAULT;
4395 		}
4396 	}
4397 
4398 	if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
4399 	    put_user(fd_type, &uattr->task_fd_query.fd_type) ||
4400 	    put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
4401 	    put_user(probe_addr, &uattr->task_fd_query.probe_addr))
4402 		return -EFAULT;
4403 
4404 	return err;
4405 }
4406 
4407 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
4408 
bpf_task_fd_query(const union bpf_attr * attr,union bpf_attr __user * uattr)4409 static int bpf_task_fd_query(const union bpf_attr *attr,
4410 			     union bpf_attr __user *uattr)
4411 {
4412 	pid_t pid = attr->task_fd_query.pid;
4413 	u32 fd = attr->task_fd_query.fd;
4414 	const struct perf_event *event;
4415 	struct task_struct *task;
4416 	struct file *file;
4417 	int err;
4418 
4419 	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
4420 		return -EINVAL;
4421 
4422 	if (!capable(CAP_SYS_ADMIN))
4423 		return -EPERM;
4424 
4425 	if (attr->task_fd_query.flags != 0)
4426 		return -EINVAL;
4427 
4428 	rcu_read_lock();
4429 	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
4430 	rcu_read_unlock();
4431 	if (!task)
4432 		return -ENOENT;
4433 
4434 	err = 0;
4435 	file = fget_task(task, fd);
4436 	put_task_struct(task);
4437 	if (!file)
4438 		return -EBADF;
4439 
4440 	if (file->f_op == &bpf_link_fops) {
4441 		struct bpf_link *link = file->private_data;
4442 
4443 		if (link->ops == &bpf_raw_tp_link_lops) {
4444 			struct bpf_raw_tp_link *raw_tp =
4445 				container_of(link, struct bpf_raw_tp_link, link);
4446 			struct bpf_raw_event_map *btp = raw_tp->btp;
4447 
4448 			err = bpf_task_fd_query_copy(attr, uattr,
4449 						     raw_tp->link.prog->aux->id,
4450 						     BPF_FD_TYPE_RAW_TRACEPOINT,
4451 						     btp->tp->name, 0, 0);
4452 			goto put_file;
4453 		}
4454 		goto out_not_supp;
4455 	}
4456 
4457 	event = perf_get_event(file);
4458 	if (!IS_ERR(event)) {
4459 		u64 probe_offset, probe_addr;
4460 		u32 prog_id, fd_type;
4461 		const char *buf;
4462 
4463 		err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
4464 					      &buf, &probe_offset,
4465 					      &probe_addr);
4466 		if (!err)
4467 			err = bpf_task_fd_query_copy(attr, uattr, prog_id,
4468 						     fd_type, buf,
4469 						     probe_offset,
4470 						     probe_addr);
4471 		goto put_file;
4472 	}
4473 
4474 out_not_supp:
4475 	err = -ENOTSUPP;
4476 put_file:
4477 	fput(file);
4478 	return err;
4479 }
4480 
4481 #define BPF_MAP_BATCH_LAST_FIELD batch.flags
4482 
4483 #define BPF_DO_BATCH(fn)			\
4484 	do {					\
4485 		if (!fn) {			\
4486 			err = -ENOTSUPP;	\
4487 			goto err_put;		\
4488 		}				\
4489 		err = fn(map, attr, uattr);	\
4490 	} while (0)
4491 
bpf_map_do_batch(const union bpf_attr * attr,union bpf_attr __user * uattr,int cmd)4492 static int bpf_map_do_batch(const union bpf_attr *attr,
4493 			    union bpf_attr __user *uattr,
4494 			    int cmd)
4495 {
4496 	bool has_read  = cmd == BPF_MAP_LOOKUP_BATCH ||
4497 			 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
4498 	bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
4499 	struct bpf_map *map;
4500 	int err, ufd;
4501 	struct fd f;
4502 
4503 	if (CHECK_ATTR(BPF_MAP_BATCH))
4504 		return -EINVAL;
4505 
4506 	ufd = attr->batch.map_fd;
4507 	f = fdget(ufd);
4508 	map = __bpf_map_get(f);
4509 	if (IS_ERR(map))
4510 		return PTR_ERR(map);
4511 	if (has_write)
4512 		bpf_map_write_active_inc(map);
4513 	if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
4514 		err = -EPERM;
4515 		goto err_put;
4516 	}
4517 	if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
4518 		err = -EPERM;
4519 		goto err_put;
4520 	}
4521 
4522 	if (cmd == BPF_MAP_LOOKUP_BATCH)
4523 		BPF_DO_BATCH(map->ops->map_lookup_batch);
4524 	else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
4525 		BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
4526 	else if (cmd == BPF_MAP_UPDATE_BATCH)
4527 		BPF_DO_BATCH(map->ops->map_update_batch);
4528 	else
4529 		BPF_DO_BATCH(map->ops->map_delete_batch);
4530 err_put:
4531 	if (has_write)
4532 		bpf_map_write_active_dec(map);
4533 	fdput(f);
4534 	return err;
4535 }
4536 
4537 #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies
link_create(union bpf_attr * attr,bpfptr_t uattr)4538 static int link_create(union bpf_attr *attr, bpfptr_t uattr)
4539 {
4540 	enum bpf_prog_type ptype;
4541 	struct bpf_prog *prog;
4542 	int ret;
4543 
4544 	if (CHECK_ATTR(BPF_LINK_CREATE))
4545 		return -EINVAL;
4546 
4547 	prog = bpf_prog_get(attr->link_create.prog_fd);
4548 	if (IS_ERR(prog))
4549 		return PTR_ERR(prog);
4550 
4551 	ret = bpf_prog_attach_check_attach_type(prog,
4552 						attr->link_create.attach_type);
4553 	if (ret)
4554 		goto out;
4555 
4556 	switch (prog->type) {
4557 	case BPF_PROG_TYPE_EXT:
4558 		break;
4559 	case BPF_PROG_TYPE_PERF_EVENT:
4560 	case BPF_PROG_TYPE_TRACEPOINT:
4561 		if (attr->link_create.attach_type != BPF_PERF_EVENT) {
4562 			ret = -EINVAL;
4563 			goto out;
4564 		}
4565 		break;
4566 	case BPF_PROG_TYPE_KPROBE:
4567 		if (attr->link_create.attach_type != BPF_PERF_EVENT &&
4568 		    attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) {
4569 			ret = -EINVAL;
4570 			goto out;
4571 		}
4572 		break;
4573 	default:
4574 		ptype = attach_type_to_prog_type(attr->link_create.attach_type);
4575 		if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
4576 			ret = -EINVAL;
4577 			goto out;
4578 		}
4579 		break;
4580 	}
4581 
4582 	switch (prog->type) {
4583 	case BPF_PROG_TYPE_CGROUP_SKB:
4584 	case BPF_PROG_TYPE_CGROUP_SOCK:
4585 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4586 	case BPF_PROG_TYPE_SOCK_OPS:
4587 	case BPF_PROG_TYPE_CGROUP_DEVICE:
4588 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
4589 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4590 		ret = cgroup_bpf_link_attach(attr, prog);
4591 		break;
4592 	case BPF_PROG_TYPE_EXT:
4593 		ret = bpf_tracing_prog_attach(prog,
4594 					      attr->link_create.target_fd,
4595 					      attr->link_create.target_btf_id,
4596 					      attr->link_create.tracing.cookie);
4597 		break;
4598 	case BPF_PROG_TYPE_LSM:
4599 	case BPF_PROG_TYPE_TRACING:
4600 		if (attr->link_create.attach_type != prog->expected_attach_type) {
4601 			ret = -EINVAL;
4602 			goto out;
4603 		}
4604 		if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
4605 			ret = bpf_raw_tp_link_attach(prog, NULL);
4606 		else if (prog->expected_attach_type == BPF_TRACE_ITER)
4607 			ret = bpf_iter_link_attach(attr, uattr, prog);
4608 		else if (prog->expected_attach_type == BPF_LSM_CGROUP)
4609 			ret = cgroup_bpf_link_attach(attr, prog);
4610 		else
4611 			ret = bpf_tracing_prog_attach(prog,
4612 						      attr->link_create.target_fd,
4613 						      attr->link_create.target_btf_id,
4614 						      attr->link_create.tracing.cookie);
4615 		break;
4616 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4617 	case BPF_PROG_TYPE_SK_LOOKUP:
4618 		ret = netns_bpf_link_create(attr, prog);
4619 		break;
4620 #ifdef CONFIG_NET
4621 	case BPF_PROG_TYPE_XDP:
4622 		ret = bpf_xdp_link_attach(attr, prog);
4623 		break;
4624 #endif
4625 	case BPF_PROG_TYPE_PERF_EVENT:
4626 	case BPF_PROG_TYPE_TRACEPOINT:
4627 		ret = bpf_perf_link_attach(attr, prog);
4628 		break;
4629 	case BPF_PROG_TYPE_KPROBE:
4630 		if (attr->link_create.attach_type == BPF_PERF_EVENT)
4631 			ret = bpf_perf_link_attach(attr, prog);
4632 		else
4633 			ret = bpf_kprobe_multi_link_attach(attr, prog);
4634 		break;
4635 	default:
4636 		ret = -EINVAL;
4637 	}
4638 
4639 out:
4640 	if (ret < 0)
4641 		bpf_prog_put(prog);
4642 	return ret;
4643 }
4644 
4645 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
4646 
link_update(union bpf_attr * attr)4647 static int link_update(union bpf_attr *attr)
4648 {
4649 	struct bpf_prog *old_prog = NULL, *new_prog;
4650 	struct bpf_link *link;
4651 	u32 flags;
4652 	int ret;
4653 
4654 	if (CHECK_ATTR(BPF_LINK_UPDATE))
4655 		return -EINVAL;
4656 
4657 	flags = attr->link_update.flags;
4658 	if (flags & ~BPF_F_REPLACE)
4659 		return -EINVAL;
4660 
4661 	link = bpf_link_get_from_fd(attr->link_update.link_fd);
4662 	if (IS_ERR(link))
4663 		return PTR_ERR(link);
4664 
4665 	new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
4666 	if (IS_ERR(new_prog)) {
4667 		ret = PTR_ERR(new_prog);
4668 		goto out_put_link;
4669 	}
4670 
4671 	if (flags & BPF_F_REPLACE) {
4672 		old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
4673 		if (IS_ERR(old_prog)) {
4674 			ret = PTR_ERR(old_prog);
4675 			old_prog = NULL;
4676 			goto out_put_progs;
4677 		}
4678 	} else if (attr->link_update.old_prog_fd) {
4679 		ret = -EINVAL;
4680 		goto out_put_progs;
4681 	}
4682 
4683 	if (link->ops->update_prog)
4684 		ret = link->ops->update_prog(link, new_prog, old_prog);
4685 	else
4686 		ret = -EINVAL;
4687 
4688 out_put_progs:
4689 	if (old_prog)
4690 		bpf_prog_put(old_prog);
4691 	if (ret)
4692 		bpf_prog_put(new_prog);
4693 out_put_link:
4694 	bpf_link_put(link);
4695 	return ret;
4696 }
4697 
4698 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
4699 
link_detach(union bpf_attr * attr)4700 static int link_detach(union bpf_attr *attr)
4701 {
4702 	struct bpf_link *link;
4703 	int ret;
4704 
4705 	if (CHECK_ATTR(BPF_LINK_DETACH))
4706 		return -EINVAL;
4707 
4708 	link = bpf_link_get_from_fd(attr->link_detach.link_fd);
4709 	if (IS_ERR(link))
4710 		return PTR_ERR(link);
4711 
4712 	if (link->ops->detach)
4713 		ret = link->ops->detach(link);
4714 	else
4715 		ret = -EOPNOTSUPP;
4716 
4717 	bpf_link_put(link);
4718 	return ret;
4719 }
4720 
bpf_link_inc_not_zero(struct bpf_link * link)4721 static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
4722 {
4723 	return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
4724 }
4725 
bpf_link_by_id(u32 id)4726 struct bpf_link *bpf_link_by_id(u32 id)
4727 {
4728 	struct bpf_link *link;
4729 
4730 	if (!id)
4731 		return ERR_PTR(-ENOENT);
4732 
4733 	spin_lock_bh(&link_idr_lock);
4734 	/* before link is "settled", ID is 0, pretend it doesn't exist yet */
4735 	link = idr_find(&link_idr, id);
4736 	if (link) {
4737 		if (link->id)
4738 			link = bpf_link_inc_not_zero(link);
4739 		else
4740 			link = ERR_PTR(-EAGAIN);
4741 	} else {
4742 		link = ERR_PTR(-ENOENT);
4743 	}
4744 	spin_unlock_bh(&link_idr_lock);
4745 	return link;
4746 }
4747 
bpf_link_get_curr_or_next(u32 * id)4748 struct bpf_link *bpf_link_get_curr_or_next(u32 *id)
4749 {
4750 	struct bpf_link *link;
4751 
4752 	spin_lock_bh(&link_idr_lock);
4753 again:
4754 	link = idr_get_next(&link_idr, id);
4755 	if (link) {
4756 		link = bpf_link_inc_not_zero(link);
4757 		if (IS_ERR(link)) {
4758 			(*id)++;
4759 			goto again;
4760 		}
4761 	}
4762 	spin_unlock_bh(&link_idr_lock);
4763 
4764 	return link;
4765 }
4766 
4767 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
4768 
bpf_link_get_fd_by_id(const union bpf_attr * attr)4769 static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
4770 {
4771 	struct bpf_link *link;
4772 	u32 id = attr->link_id;
4773 	int fd;
4774 
4775 	if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
4776 		return -EINVAL;
4777 
4778 	if (!capable(CAP_SYS_ADMIN))
4779 		return -EPERM;
4780 
4781 	link = bpf_link_by_id(id);
4782 	if (IS_ERR(link))
4783 		return PTR_ERR(link);
4784 
4785 	fd = bpf_link_new_fd(link);
4786 	if (fd < 0)
4787 		bpf_link_put(link);
4788 
4789 	return fd;
4790 }
4791 
4792 DEFINE_MUTEX(bpf_stats_enabled_mutex);
4793 
bpf_stats_release(struct inode * inode,struct file * file)4794 static int bpf_stats_release(struct inode *inode, struct file *file)
4795 {
4796 	mutex_lock(&bpf_stats_enabled_mutex);
4797 	static_key_slow_dec(&bpf_stats_enabled_key.key);
4798 	mutex_unlock(&bpf_stats_enabled_mutex);
4799 	return 0;
4800 }
4801 
4802 static const struct file_operations bpf_stats_fops = {
4803 	.release = bpf_stats_release,
4804 };
4805 
bpf_enable_runtime_stats(void)4806 static int bpf_enable_runtime_stats(void)
4807 {
4808 	int fd;
4809 
4810 	mutex_lock(&bpf_stats_enabled_mutex);
4811 
4812 	/* Set a very high limit to avoid overflow */
4813 	if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
4814 		mutex_unlock(&bpf_stats_enabled_mutex);
4815 		return -EBUSY;
4816 	}
4817 
4818 	fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
4819 	if (fd >= 0)
4820 		static_key_slow_inc(&bpf_stats_enabled_key.key);
4821 
4822 	mutex_unlock(&bpf_stats_enabled_mutex);
4823 	return fd;
4824 }
4825 
4826 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
4827 
bpf_enable_stats(union bpf_attr * attr)4828 static int bpf_enable_stats(union bpf_attr *attr)
4829 {
4830 
4831 	if (CHECK_ATTR(BPF_ENABLE_STATS))
4832 		return -EINVAL;
4833 
4834 	if (!capable(CAP_SYS_ADMIN))
4835 		return -EPERM;
4836 
4837 	switch (attr->enable_stats.type) {
4838 	case BPF_STATS_RUN_TIME:
4839 		return bpf_enable_runtime_stats();
4840 	default:
4841 		break;
4842 	}
4843 	return -EINVAL;
4844 }
4845 
4846 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
4847 
bpf_iter_create(union bpf_attr * attr)4848 static int bpf_iter_create(union bpf_attr *attr)
4849 {
4850 	struct bpf_link *link;
4851 	int err;
4852 
4853 	if (CHECK_ATTR(BPF_ITER_CREATE))
4854 		return -EINVAL;
4855 
4856 	if (attr->iter_create.flags)
4857 		return -EINVAL;
4858 
4859 	link = bpf_link_get_from_fd(attr->iter_create.link_fd);
4860 	if (IS_ERR(link))
4861 		return PTR_ERR(link);
4862 
4863 	err = bpf_iter_new_fd(link);
4864 	bpf_link_put(link);
4865 
4866 	return err;
4867 }
4868 
4869 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
4870 
bpf_prog_bind_map(union bpf_attr * attr)4871 static int bpf_prog_bind_map(union bpf_attr *attr)
4872 {
4873 	struct bpf_prog *prog;
4874 	struct bpf_map *map;
4875 	struct bpf_map **used_maps_old, **used_maps_new;
4876 	int i, ret = 0;
4877 
4878 	if (CHECK_ATTR(BPF_PROG_BIND_MAP))
4879 		return -EINVAL;
4880 
4881 	if (attr->prog_bind_map.flags)
4882 		return -EINVAL;
4883 
4884 	prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
4885 	if (IS_ERR(prog))
4886 		return PTR_ERR(prog);
4887 
4888 	map = bpf_map_get(attr->prog_bind_map.map_fd);
4889 	if (IS_ERR(map)) {
4890 		ret = PTR_ERR(map);
4891 		goto out_prog_put;
4892 	}
4893 
4894 	mutex_lock(&prog->aux->used_maps_mutex);
4895 
4896 	used_maps_old = prog->aux->used_maps;
4897 
4898 	for (i = 0; i < prog->aux->used_map_cnt; i++)
4899 		if (used_maps_old[i] == map) {
4900 			bpf_map_put(map);
4901 			goto out_unlock;
4902 		}
4903 
4904 	used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
4905 				      sizeof(used_maps_new[0]),
4906 				      GFP_KERNEL);
4907 	if (!used_maps_new) {
4908 		ret = -ENOMEM;
4909 		goto out_unlock;
4910 	}
4911 
4912 	memcpy(used_maps_new, used_maps_old,
4913 	       sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
4914 	used_maps_new[prog->aux->used_map_cnt] = map;
4915 
4916 	prog->aux->used_map_cnt++;
4917 	prog->aux->used_maps = used_maps_new;
4918 
4919 	kfree(used_maps_old);
4920 
4921 out_unlock:
4922 	mutex_unlock(&prog->aux->used_maps_mutex);
4923 
4924 	if (ret)
4925 		bpf_map_put(map);
4926 out_prog_put:
4927 	bpf_prog_put(prog);
4928 	return ret;
4929 }
4930 
__sys_bpf(int cmd,bpfptr_t uattr,unsigned int size)4931 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
4932 {
4933 	union bpf_attr attr;
4934 	bool capable;
4935 	int err;
4936 
4937 	capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
4938 
4939 	/* Intent here is for unprivileged_bpf_disabled to block key object
4940 	 * creation commands for unprivileged users; other actions depend
4941 	 * of fd availability and access to bpffs, so are dependent on
4942 	 * object creation success.  Capabilities are later verified for
4943 	 * operations such as load and map create, so even with unprivileged
4944 	 * BPF disabled, capability checks are still carried out for these
4945 	 * and other operations.
4946 	 */
4947 	if (!capable &&
4948 	    (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
4949 		return -EPERM;
4950 
4951 	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
4952 	if (err)
4953 		return err;
4954 	size = min_t(u32, size, sizeof(attr));
4955 
4956 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
4957 	memset(&attr, 0, sizeof(attr));
4958 	if (copy_from_bpfptr(&attr, uattr, size) != 0)
4959 		return -EFAULT;
4960 
4961 	trace_android_vh_check_bpf_syscall(cmd, &attr, size);
4962 
4963 	err = security_bpf(cmd, &attr, size);
4964 	if (err < 0)
4965 		return err;
4966 
4967 	switch (cmd) {
4968 	case BPF_MAP_CREATE:
4969 		err = map_create(&attr);
4970 		break;
4971 	case BPF_MAP_LOOKUP_ELEM:
4972 		err = map_lookup_elem(&attr);
4973 		break;
4974 	case BPF_MAP_UPDATE_ELEM:
4975 		err = map_update_elem(&attr, uattr);
4976 		break;
4977 	case BPF_MAP_DELETE_ELEM:
4978 		err = map_delete_elem(&attr, uattr);
4979 		break;
4980 	case BPF_MAP_GET_NEXT_KEY:
4981 		err = map_get_next_key(&attr);
4982 		break;
4983 	case BPF_MAP_FREEZE:
4984 		err = map_freeze(&attr);
4985 		break;
4986 	case BPF_PROG_LOAD:
4987 		err = bpf_prog_load(&attr, uattr);
4988 		break;
4989 	case BPF_OBJ_PIN:
4990 		err = bpf_obj_pin(&attr);
4991 		break;
4992 	case BPF_OBJ_GET:
4993 		err = bpf_obj_get(&attr);
4994 		break;
4995 	case BPF_PROG_ATTACH:
4996 		err = bpf_prog_attach(&attr);
4997 		break;
4998 	case BPF_PROG_DETACH:
4999 		err = bpf_prog_detach(&attr);
5000 		break;
5001 	case BPF_PROG_QUERY:
5002 		err = bpf_prog_query(&attr, uattr.user);
5003 		break;
5004 	case BPF_PROG_TEST_RUN:
5005 		err = bpf_prog_test_run(&attr, uattr.user);
5006 		break;
5007 	case BPF_PROG_GET_NEXT_ID:
5008 		err = bpf_obj_get_next_id(&attr, uattr.user,
5009 					  &prog_idr, &prog_idr_lock);
5010 		break;
5011 	case BPF_MAP_GET_NEXT_ID:
5012 		err = bpf_obj_get_next_id(&attr, uattr.user,
5013 					  &map_idr, &map_idr_lock);
5014 		break;
5015 	case BPF_BTF_GET_NEXT_ID:
5016 		err = bpf_obj_get_next_id(&attr, uattr.user,
5017 					  &btf_idr, &btf_idr_lock);
5018 		break;
5019 	case BPF_PROG_GET_FD_BY_ID:
5020 		err = bpf_prog_get_fd_by_id(&attr);
5021 		break;
5022 	case BPF_MAP_GET_FD_BY_ID:
5023 		err = bpf_map_get_fd_by_id(&attr);
5024 		break;
5025 	case BPF_OBJ_GET_INFO_BY_FD:
5026 		err = bpf_obj_get_info_by_fd(&attr, uattr.user);
5027 		break;
5028 	case BPF_RAW_TRACEPOINT_OPEN:
5029 		err = bpf_raw_tracepoint_open(&attr);
5030 		break;
5031 	case BPF_BTF_LOAD:
5032 		err = bpf_btf_load(&attr, uattr);
5033 		break;
5034 	case BPF_BTF_GET_FD_BY_ID:
5035 		err = bpf_btf_get_fd_by_id(&attr);
5036 		break;
5037 	case BPF_TASK_FD_QUERY:
5038 		err = bpf_task_fd_query(&attr, uattr.user);
5039 		break;
5040 	case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
5041 		err = map_lookup_and_delete_elem(&attr);
5042 		break;
5043 	case BPF_MAP_LOOKUP_BATCH:
5044 		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
5045 		break;
5046 	case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
5047 		err = bpf_map_do_batch(&attr, uattr.user,
5048 				       BPF_MAP_LOOKUP_AND_DELETE_BATCH);
5049 		break;
5050 	case BPF_MAP_UPDATE_BATCH:
5051 		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
5052 		break;
5053 	case BPF_MAP_DELETE_BATCH:
5054 		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
5055 		break;
5056 	case BPF_LINK_CREATE:
5057 		err = link_create(&attr, uattr);
5058 		break;
5059 	case BPF_LINK_UPDATE:
5060 		err = link_update(&attr);
5061 		break;
5062 	case BPF_LINK_GET_FD_BY_ID:
5063 		err = bpf_link_get_fd_by_id(&attr);
5064 		break;
5065 	case BPF_LINK_GET_NEXT_ID:
5066 		err = bpf_obj_get_next_id(&attr, uattr.user,
5067 					  &link_idr, &link_idr_lock);
5068 		break;
5069 	case BPF_ENABLE_STATS:
5070 		err = bpf_enable_stats(&attr);
5071 		break;
5072 	case BPF_ITER_CREATE:
5073 		err = bpf_iter_create(&attr);
5074 		break;
5075 	case BPF_LINK_DETACH:
5076 		err = link_detach(&attr);
5077 		break;
5078 	case BPF_PROG_BIND_MAP:
5079 		err = bpf_prog_bind_map(&attr);
5080 		break;
5081 	default:
5082 		err = -EINVAL;
5083 		break;
5084 	}
5085 
5086 	return err;
5087 }
5088 
SYSCALL_DEFINE3(bpf,int,cmd,union bpf_attr __user *,uattr,unsigned int,size)5089 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
5090 {
5091 	return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
5092 }
5093 
syscall_prog_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)5094 static bool syscall_prog_is_valid_access(int off, int size,
5095 					 enum bpf_access_type type,
5096 					 const struct bpf_prog *prog,
5097 					 struct bpf_insn_access_aux *info)
5098 {
5099 	if (off < 0 || off >= U16_MAX)
5100 		return false;
5101 	if (off % size != 0)
5102 		return false;
5103 	return true;
5104 }
5105 
BPF_CALL_3(bpf_sys_bpf,int,cmd,union bpf_attr *,attr,u32,attr_size)5106 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
5107 {
5108 	switch (cmd) {
5109 	case BPF_MAP_CREATE:
5110 	case BPF_MAP_DELETE_ELEM:
5111 	case BPF_MAP_UPDATE_ELEM:
5112 	case BPF_MAP_FREEZE:
5113 	case BPF_MAP_GET_FD_BY_ID:
5114 	case BPF_PROG_LOAD:
5115 	case BPF_BTF_LOAD:
5116 	case BPF_LINK_CREATE:
5117 	case BPF_RAW_TRACEPOINT_OPEN:
5118 		break;
5119 	default:
5120 		return -EINVAL;
5121 	}
5122 	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
5123 }
5124 
5125 
5126 /* To shut up -Wmissing-prototypes.
5127  * This function is used by the kernel light skeleton
5128  * to load bpf programs when modules are loaded or during kernel boot.
5129  * See tools/lib/bpf/skel_internal.h
5130  */
5131 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
5132 
kern_sys_bpf(int cmd,union bpf_attr * attr,unsigned int size)5133 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
5134 {
5135 	struct bpf_prog * __maybe_unused prog;
5136 	struct bpf_tramp_run_ctx __maybe_unused run_ctx;
5137 
5138 	switch (cmd) {
5139 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */
5140 	case BPF_PROG_TEST_RUN:
5141 		if (attr->test.data_in || attr->test.data_out ||
5142 		    attr->test.ctx_out || attr->test.duration ||
5143 		    attr->test.repeat || attr->test.flags)
5144 			return -EINVAL;
5145 
5146 		prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL);
5147 		if (IS_ERR(prog))
5148 			return PTR_ERR(prog);
5149 
5150 		if (attr->test.ctx_size_in < prog->aux->max_ctx_offset ||
5151 		    attr->test.ctx_size_in > U16_MAX) {
5152 			bpf_prog_put(prog);
5153 			return -EINVAL;
5154 		}
5155 
5156 		run_ctx.bpf_cookie = 0;
5157 		if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
5158 			/* recursion detected */
5159 			__bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
5160 			bpf_prog_put(prog);
5161 			return -EBUSY;
5162 		}
5163 		attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
5164 		__bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */,
5165 						&run_ctx);
5166 		bpf_prog_put(prog);
5167 		return 0;
5168 #endif
5169 	default:
5170 		return ____bpf_sys_bpf(cmd, attr, size);
5171 	}
5172 }
5173 EXPORT_SYMBOL(kern_sys_bpf);
5174 
5175 static const struct bpf_func_proto bpf_sys_bpf_proto = {
5176 	.func		= bpf_sys_bpf,
5177 	.gpl_only	= false,
5178 	.ret_type	= RET_INTEGER,
5179 	.arg1_type	= ARG_ANYTHING,
5180 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5181 	.arg3_type	= ARG_CONST_SIZE,
5182 };
5183 
5184 const struct bpf_func_proto * __weak
tracing_prog_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)5185 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5186 {
5187 	return bpf_base_func_proto(func_id);
5188 }
5189 
BPF_CALL_1(bpf_sys_close,u32,fd)5190 BPF_CALL_1(bpf_sys_close, u32, fd)
5191 {
5192 	/* When bpf program calls this helper there should not be
5193 	 * an fdget() without matching completed fdput().
5194 	 * This helper is allowed in the following callchain only:
5195 	 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close
5196 	 */
5197 	return close_fd(fd);
5198 }
5199 
5200 static const struct bpf_func_proto bpf_sys_close_proto = {
5201 	.func		= bpf_sys_close,
5202 	.gpl_only	= false,
5203 	.ret_type	= RET_INTEGER,
5204 	.arg1_type	= ARG_ANYTHING,
5205 };
5206 
BPF_CALL_4(bpf_kallsyms_lookup_name,const char *,name,int,name_sz,int,flags,u64 *,res)5207 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
5208 {
5209 	if (flags)
5210 		return -EINVAL;
5211 
5212 	if (name_sz <= 1 || name[name_sz - 1])
5213 		return -EINVAL;
5214 
5215 	if (!bpf_dump_raw_ok(current_cred()))
5216 		return -EPERM;
5217 
5218 	*res = kallsyms_lookup_name(name);
5219 	return *res ? 0 : -ENOENT;
5220 }
5221 
5222 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
5223 	.func		= bpf_kallsyms_lookup_name,
5224 	.gpl_only	= false,
5225 	.ret_type	= RET_INTEGER,
5226 	.arg1_type	= ARG_PTR_TO_MEM,
5227 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
5228 	.arg3_type	= ARG_ANYTHING,
5229 	.arg4_type	= ARG_PTR_TO_LONG,
5230 };
5231 
5232 static const struct bpf_func_proto *
syscall_prog_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)5233 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5234 {
5235 	switch (func_id) {
5236 	case BPF_FUNC_sys_bpf:
5237 		return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
5238 	case BPF_FUNC_btf_find_by_name_kind:
5239 		return &bpf_btf_find_by_name_kind_proto;
5240 	case BPF_FUNC_sys_close:
5241 		return &bpf_sys_close_proto;
5242 	case BPF_FUNC_kallsyms_lookup_name:
5243 		return &bpf_kallsyms_lookup_name_proto;
5244 	default:
5245 		return tracing_prog_func_proto(func_id, prog);
5246 	}
5247 }
5248 
5249 const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
5250 	.get_func_proto  = syscall_prog_func_proto,
5251 	.is_valid_access = syscall_prog_is_valid_access,
5252 };
5253 
5254 const struct bpf_prog_ops bpf_syscall_prog_ops = {
5255 	.test_run = bpf_prog_test_run_syscall,
5256 };
5257 
5258 #ifdef CONFIG_SYSCTL
bpf_stats_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)5259 static int bpf_stats_handler(struct ctl_table *table, int write,
5260 			     void *buffer, size_t *lenp, loff_t *ppos)
5261 {
5262 	struct static_key *key = (struct static_key *)table->data;
5263 	static int saved_val;
5264 	int val, ret;
5265 	struct ctl_table tmp = {
5266 		.data   = &val,
5267 		.maxlen = sizeof(val),
5268 		.mode   = table->mode,
5269 		.extra1 = SYSCTL_ZERO,
5270 		.extra2 = SYSCTL_ONE,
5271 	};
5272 
5273 	if (write && !capable(CAP_SYS_ADMIN))
5274 		return -EPERM;
5275 
5276 	mutex_lock(&bpf_stats_enabled_mutex);
5277 	val = saved_val;
5278 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
5279 	if (write && !ret && val != saved_val) {
5280 		if (val)
5281 			static_key_slow_inc(key);
5282 		else
5283 			static_key_slow_dec(key);
5284 		saved_val = val;
5285 	}
5286 	mutex_unlock(&bpf_stats_enabled_mutex);
5287 	return ret;
5288 }
5289 
unpriv_ebpf_notify(int new_state)5290 void __weak unpriv_ebpf_notify(int new_state)
5291 {
5292 }
5293 
bpf_unpriv_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)5294 static int bpf_unpriv_handler(struct ctl_table *table, int write,
5295 			      void *buffer, size_t *lenp, loff_t *ppos)
5296 {
5297 	int ret, unpriv_enable = *(int *)table->data;
5298 	bool locked_state = unpriv_enable == 1;
5299 	struct ctl_table tmp = *table;
5300 
5301 	if (write && !capable(CAP_SYS_ADMIN))
5302 		return -EPERM;
5303 
5304 	tmp.data = &unpriv_enable;
5305 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
5306 	if (write && !ret) {
5307 		if (locked_state && unpriv_enable != 1)
5308 			return -EPERM;
5309 		*(int *)table->data = unpriv_enable;
5310 	}
5311 
5312 	if (write)
5313 		unpriv_ebpf_notify(unpriv_enable);
5314 
5315 	return ret;
5316 }
5317 
5318 static struct ctl_table bpf_syscall_table[] = {
5319 	{
5320 		.procname	= "unprivileged_bpf_disabled",
5321 		.data		= &sysctl_unprivileged_bpf_disabled,
5322 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
5323 		.mode		= 0644,
5324 		.proc_handler	= bpf_unpriv_handler,
5325 		.extra1		= SYSCTL_ZERO,
5326 		.extra2		= SYSCTL_TWO,
5327 	},
5328 	{
5329 		.procname	= "bpf_stats_enabled",
5330 		.data		= &bpf_stats_enabled_key.key,
5331 		.maxlen		= sizeof(bpf_stats_enabled_key),
5332 		.mode		= 0644,
5333 		.proc_handler	= bpf_stats_handler,
5334 	},
5335 	{ }
5336 };
5337 
bpf_syscall_sysctl_init(void)5338 static int __init bpf_syscall_sysctl_init(void)
5339 {
5340 	register_sysctl_init("kernel", bpf_syscall_table);
5341 	return 0;
5342 }
5343 late_initcall(bpf_syscall_sysctl_init);
5344 #endif /* CONFIG_SYSCTL */
5345