• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful, but
8  * WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10  * General Public License for more details.
11  */
12 #include <linux/bpf.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mmzone.h>
17 #include <linux/anon_inodes.h>
18 #include <linux/file.h>
19 #include <linux/license.h>
20 #include <linux/filter.h>
21 #include <linux/version.h>
22 
23 #define BPF_OBJ_FLAG_MASK   (BPF_F_RDONLY | BPF_F_WRONLY)
24 
25 DEFINE_PER_CPU(int, bpf_prog_active);
26 
27 int sysctl_unprivileged_bpf_disabled __read_mostly;
28 
29 static LIST_HEAD(bpf_map_types);
30 
find_and_alloc_map(union bpf_attr * attr)31 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
32 {
33 	struct bpf_map_type_list *tl;
34 	struct bpf_map *map;
35 
36 	list_for_each_entry(tl, &bpf_map_types, list_node) {
37 		if (tl->type == attr->map_type) {
38 			map = tl->ops->map_alloc(attr);
39 			if (IS_ERR(map))
40 				return map;
41 			map->ops = tl->ops;
42 			map->map_type = attr->map_type;
43 			return map;
44 		}
45 	}
46 	return ERR_PTR(-EINVAL);
47 }
48 
49 /* boot time registration of different map implementations */
bpf_register_map_type(struct bpf_map_type_list * tl)50 void bpf_register_map_type(struct bpf_map_type_list *tl)
51 {
52 	list_add(&tl->list_node, &bpf_map_types);
53 }
54 
bpf_map_area_alloc(size_t size)55 void *bpf_map_area_alloc(size_t size)
56 {
57 	/* We definitely need __GFP_NORETRY, so OOM killer doesn't
58 	 * trigger under memory pressure as we really just want to
59 	 * fail instead.
60 	 */
61 	const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
62 	void *area;
63 
64 	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
65 		area = kmalloc(size, GFP_USER | flags);
66 		if (area != NULL)
67 			return area;
68 	}
69 
70 	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
71 			 PAGE_KERNEL);
72 }
73 
bpf_map_area_free(void * area)74 void bpf_map_area_free(void *area)
75 {
76 	kvfree(area);
77 }
78 
bpf_map_precharge_memlock(u32 pages)79 int bpf_map_precharge_memlock(u32 pages)
80 {
81 	struct user_struct *user = get_current_user();
82 	unsigned long memlock_limit, cur;
83 
84 	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
85 	cur = atomic_long_read(&user->locked_vm);
86 	free_uid(user);
87 	if (cur + pages > memlock_limit)
88 		return -EPERM;
89 	return 0;
90 }
91 
bpf_map_charge_memlock(struct bpf_map * map)92 static int bpf_map_charge_memlock(struct bpf_map *map)
93 {
94 	struct user_struct *user = get_current_user();
95 	unsigned long memlock_limit;
96 
97 	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
98 
99 	atomic_long_add(map->pages, &user->locked_vm);
100 
101 	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
102 		atomic_long_sub(map->pages, &user->locked_vm);
103 		free_uid(user);
104 		return -EPERM;
105 	}
106 	map->user = user;
107 	return 0;
108 }
109 
bpf_map_uncharge_memlock(struct bpf_map * map)110 static void bpf_map_uncharge_memlock(struct bpf_map *map)
111 {
112 	struct user_struct *user = map->user;
113 
114 	atomic_long_sub(map->pages, &user->locked_vm);
115 	free_uid(user);
116 }
117 
118 /* called from workqueue */
bpf_map_free_deferred(struct work_struct * work)119 static void bpf_map_free_deferred(struct work_struct *work)
120 {
121 	struct bpf_map *map = container_of(work, struct bpf_map, work);
122 
123 	bpf_map_uncharge_memlock(map);
124 	security_bpf_map_free(map);
125 	/* implementation dependent freeing */
126 	map->ops->map_free(map);
127 }
128 
bpf_map_put_uref(struct bpf_map * map)129 static void bpf_map_put_uref(struct bpf_map *map)
130 {
131 	if (atomic_dec_and_test(&map->usercnt)) {
132 		if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
133 			bpf_fd_array_map_clear(map);
134 	}
135 }
136 
137 /* decrement map refcnt and schedule it for freeing via workqueue
138  * (unrelying map implementation ops->map_free() might sleep)
139  */
bpf_map_put(struct bpf_map * map)140 void bpf_map_put(struct bpf_map *map)
141 {
142 	if (atomic_dec_and_test(&map->refcnt)) {
143 		INIT_WORK(&map->work, bpf_map_free_deferred);
144 		schedule_work(&map->work);
145 	}
146 }
147 
bpf_map_put_with_uref(struct bpf_map * map)148 void bpf_map_put_with_uref(struct bpf_map *map)
149 {
150 	bpf_map_put_uref(map);
151 	bpf_map_put(map);
152 }
153 
bpf_map_release(struct inode * inode,struct file * filp)154 static int bpf_map_release(struct inode *inode, struct file *filp)
155 {
156 	struct bpf_map *map = filp->private_data;
157 
158 	if (map->ops->map_release)
159 		map->ops->map_release(map, filp);
160 
161 	bpf_map_put_with_uref(map);
162 	return 0;
163 }
164 
165 #ifdef CONFIG_PROC_FS
bpf_map_show_fdinfo(struct seq_file * m,struct file * filp)166 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
167 {
168 	const struct bpf_map *map = filp->private_data;
169 
170 	seq_printf(m,
171 		   "map_type:\t%u\n"
172 		   "key_size:\t%u\n"
173 		   "value_size:\t%u\n"
174 		   "max_entries:\t%u\n"
175 		   "map_flags:\t%#x\n",
176 		   map->map_type,
177 		   map->key_size,
178 		   map->value_size,
179 		   map->max_entries,
180 		   map->map_flags);
181 }
182 #endif
183 
bpf_dummy_read(struct file * filp,char __user * buf,size_t siz,loff_t * ppos)184 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
185 			      loff_t *ppos)
186 {
187 	/* We need this handler such that alloc_file() enables
188 	 * f_mode with FMODE_CAN_READ.
189 	 */
190 	return -EINVAL;
191 }
192 
bpf_dummy_write(struct file * filp,const char __user * buf,size_t siz,loff_t * ppos)193 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
194 			       size_t siz, loff_t *ppos)
195 {
196 	/* We need this handler such that alloc_file() enables
197 	 * f_mode with FMODE_CAN_WRITE.
198 	 */
199 	return -EINVAL;
200 }
201 
202 const struct file_operations bpf_map_fops = {
203 #ifdef CONFIG_PROC_FS
204 	.show_fdinfo	= bpf_map_show_fdinfo,
205 #endif
206 	.release	= bpf_map_release,
207 	.read		= bpf_dummy_read,
208 	.write		= bpf_dummy_write,
209 };
210 
bpf_map_new_fd(struct bpf_map * map,int flags)211 int bpf_map_new_fd(struct bpf_map *map, int flags)
212 {
213 	int ret;
214 
215 	ret = security_bpf_map(map, OPEN_FMODE(flags));
216 	if (ret < 0)
217 		return ret;
218 
219 	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
220 				flags | O_CLOEXEC);
221 }
222 
bpf_get_file_flag(int flags)223 int bpf_get_file_flag(int flags)
224 {
225 	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
226 		return -EINVAL;
227 	if (flags & BPF_F_RDONLY)
228 		return O_RDONLY;
229 	if (flags & BPF_F_WRONLY)
230 		return O_WRONLY;
231 	return O_RDWR;
232 }
233 
234 /* helper macro to check that unused fields 'union bpf_attr' are zero */
235 #define CHECK_ATTR(CMD) \
236 	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
237 		   sizeof(attr->CMD##_LAST_FIELD), 0, \
238 		   sizeof(*attr) - \
239 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
240 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
241 
242 #define BPF_MAP_CREATE_LAST_FIELD map_flags
243 /* called via syscall */
map_create(union bpf_attr * attr)244 static int map_create(union bpf_attr *attr)
245 {
246 	struct bpf_map *map;
247 	int f_flags;
248 	int err;
249 
250 	err = CHECK_ATTR(BPF_MAP_CREATE);
251 	if (err)
252 		return -EINVAL;
253 
254 	f_flags = bpf_get_file_flag(attr->map_flags);
255 	if (f_flags < 0)
256 		return f_flags;
257 
258 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
259 	map = find_and_alloc_map(attr);
260 	if (IS_ERR(map))
261 		return PTR_ERR(map);
262 
263 	atomic_set(&map->refcnt, 1);
264 	atomic_set(&map->usercnt, 1);
265 
266 	err = security_bpf_map_alloc(map);
267 	if (err)
268 		goto free_map_nouncharge;
269 
270 	err = bpf_map_charge_memlock(map);
271 	if (err)
272 		goto free_map_sec;
273 
274 	err = bpf_map_new_fd(map, f_flags);
275 	if (err < 0)
276 		/* failed to allocate fd */
277 		goto free_map;
278 
279 	return err;
280 
281 free_map:
282 	bpf_map_uncharge_memlock(map);
283 free_map_sec:
284 	security_bpf_map_free(map);
285 free_map_nouncharge:
286 	map->ops->map_free(map);
287 	return err;
288 }
289 
290 /* if error is returned, fd is released.
291  * On success caller should complete fd access with matching fdput()
292  */
__bpf_map_get(struct fd f)293 struct bpf_map *__bpf_map_get(struct fd f)
294 {
295 	if (!f.file)
296 		return ERR_PTR(-EBADF);
297 	if (f.file->f_op != &bpf_map_fops) {
298 		fdput(f);
299 		return ERR_PTR(-EINVAL);
300 	}
301 
302 	return f.file->private_data;
303 }
304 
305 /* prog's and map's refcnt limit */
306 #define BPF_MAX_REFCNT 32768
307 
bpf_map_inc(struct bpf_map * map,bool uref)308 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
309 {
310 	if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
311 		atomic_dec(&map->refcnt);
312 		return ERR_PTR(-EBUSY);
313 	}
314 	if (uref)
315 		atomic_inc(&map->usercnt);
316 	return map;
317 }
318 
bpf_map_get_with_uref(u32 ufd)319 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
320 {
321 	struct fd f = fdget(ufd);
322 	struct bpf_map *map;
323 
324 	map = __bpf_map_get(f);
325 	if (IS_ERR(map))
326 		return map;
327 
328 	map = bpf_map_inc(map, true);
329 	fdput(f);
330 
331 	return map;
332 }
333 
334 /* helper to convert user pointers passed inside __aligned_u64 fields */
u64_to_ptr(__u64 val)335 static void __user *u64_to_ptr(__u64 val)
336 {
337 	return (void __user *) (unsigned long) val;
338 }
339 
bpf_stackmap_copy(struct bpf_map * map,void * key,void * value)340 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
341 {
342 	return -ENOTSUPP;
343 }
344 
345 /* last field in 'union bpf_attr' used by this command */
346 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
347 
map_lookup_elem(union bpf_attr * attr)348 static int map_lookup_elem(union bpf_attr *attr)
349 {
350 	void __user *ukey = u64_to_ptr(attr->key);
351 	void __user *uvalue = u64_to_ptr(attr->value);
352 	int ufd = attr->map_fd;
353 	struct bpf_map *map;
354 	void *key, *value, *ptr;
355 	u32 value_size;
356 	struct fd f;
357 	int err;
358 
359 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
360 		return -EINVAL;
361 
362 	f = fdget(ufd);
363 	map = __bpf_map_get(f);
364 	if (IS_ERR(map))
365 		return PTR_ERR(map);
366 
367 	if (!(f.file->f_mode & FMODE_CAN_READ)) {
368 		err = -EPERM;
369 		goto err_put;
370 	}
371 
372 	err = -ENOMEM;
373 	key = kmalloc(map->key_size, GFP_USER);
374 	if (!key)
375 		goto err_put;
376 
377 	err = -EFAULT;
378 	if (copy_from_user(key, ukey, map->key_size) != 0)
379 		goto free_key;
380 
381 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
382 	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
383 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
384 	else
385 		value_size = map->value_size;
386 
387 	err = -ENOMEM;
388 	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
389 	if (!value)
390 		goto free_key;
391 
392 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
393 		err = bpf_percpu_hash_copy(map, key, value);
394 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
395 		err = bpf_percpu_array_copy(map, key, value);
396 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
397 		err = bpf_stackmap_copy(map, key, value);
398 	} else {
399 		rcu_read_lock();
400 		ptr = map->ops->map_lookup_elem(map, key);
401 		if (ptr)
402 			memcpy(value, ptr, value_size);
403 		rcu_read_unlock();
404 		err = ptr ? 0 : -ENOENT;
405 	}
406 
407 	if (err)
408 		goto free_value;
409 
410 	err = -EFAULT;
411 	if (copy_to_user(uvalue, value, value_size) != 0)
412 		goto free_value;
413 
414 	err = 0;
415 
416 free_value:
417 	kfree(value);
418 free_key:
419 	kfree(key);
420 err_put:
421 	fdput(f);
422 	return err;
423 }
424 
425 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
426 
map_update_elem(union bpf_attr * attr)427 static int map_update_elem(union bpf_attr *attr)
428 {
429 	void __user *ukey = u64_to_ptr(attr->key);
430 	void __user *uvalue = u64_to_ptr(attr->value);
431 	int ufd = attr->map_fd;
432 	struct bpf_map *map;
433 	void *key, *value;
434 	u32 value_size;
435 	struct fd f;
436 	int err;
437 
438 	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
439 		return -EINVAL;
440 
441 	f = fdget(ufd);
442 	map = __bpf_map_get(f);
443 	if (IS_ERR(map))
444 		return PTR_ERR(map);
445 
446 	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
447 		err = -EPERM;
448 		goto err_put;
449 	}
450 
451 	err = -ENOMEM;
452 	key = kmalloc(map->key_size, GFP_USER);
453 	if (!key)
454 		goto err_put;
455 
456 	err = -EFAULT;
457 	if (copy_from_user(key, ukey, map->key_size) != 0)
458 		goto free_key;
459 
460 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
461 	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
462 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
463 	else
464 		value_size = map->value_size;
465 
466 	err = -ENOMEM;
467 	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
468 	if (!value)
469 		goto free_key;
470 
471 	err = -EFAULT;
472 	if (copy_from_user(value, uvalue, value_size) != 0)
473 		goto free_value;
474 
475 	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
476 	 * inside bpf map update or delete otherwise deadlocks are possible
477 	 */
478 	preempt_disable();
479 	__this_cpu_inc(bpf_prog_active);
480 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
481 		err = bpf_percpu_hash_update(map, key, value, attr->flags);
482 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
483 		err = bpf_percpu_array_update(map, key, value, attr->flags);
484 	} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
485 		   map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
486 		   map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
487 		rcu_read_lock();
488 		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
489 						   attr->flags);
490 		rcu_read_unlock();
491 	} else {
492 		rcu_read_lock();
493 		err = map->ops->map_update_elem(map, key, value, attr->flags);
494 		rcu_read_unlock();
495 	}
496 	__this_cpu_dec(bpf_prog_active);
497 	preempt_enable();
498 
499 free_value:
500 	kfree(value);
501 free_key:
502 	kfree(key);
503 err_put:
504 	fdput(f);
505 	return err;
506 }
507 
508 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
509 
map_delete_elem(union bpf_attr * attr)510 static int map_delete_elem(union bpf_attr *attr)
511 {
512 	void __user *ukey = u64_to_ptr(attr->key);
513 	int ufd = attr->map_fd;
514 	struct bpf_map *map;
515 	struct fd f;
516 	void *key;
517 	int err;
518 
519 	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
520 		return -EINVAL;
521 
522 	f = fdget(ufd);
523 	map = __bpf_map_get(f);
524 	if (IS_ERR(map))
525 		return PTR_ERR(map);
526 
527 	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
528 		err = -EPERM;
529 		goto err_put;
530 	}
531 
532 	err = -ENOMEM;
533 	key = kmalloc(map->key_size, GFP_USER);
534 	if (!key)
535 		goto err_put;
536 
537 	err = -EFAULT;
538 	if (copy_from_user(key, ukey, map->key_size) != 0)
539 		goto free_key;
540 
541 	preempt_disable();
542 	__this_cpu_inc(bpf_prog_active);
543 	rcu_read_lock();
544 	err = map->ops->map_delete_elem(map, key);
545 	rcu_read_unlock();
546 	__this_cpu_dec(bpf_prog_active);
547 	preempt_enable();
548 
549 free_key:
550 	kfree(key);
551 err_put:
552 	fdput(f);
553 	return err;
554 }
555 
556 /* last field in 'union bpf_attr' used by this command */
557 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
558 
map_get_next_key(union bpf_attr * attr)559 static int map_get_next_key(union bpf_attr *attr)
560 {
561 	void __user *ukey = u64_to_ptr(attr->key);
562 	void __user *unext_key = u64_to_ptr(attr->next_key);
563 	int ufd = attr->map_fd;
564 	struct bpf_map *map;
565 	void *key, *next_key;
566 	struct fd f;
567 	int err;
568 
569 	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
570 		return -EINVAL;
571 
572 	f = fdget(ufd);
573 	map = __bpf_map_get(f);
574 	if (IS_ERR(map))
575 		return PTR_ERR(map);
576 
577 	if (!(f.file->f_mode & FMODE_CAN_READ)) {
578 		err = -EPERM;
579 		goto err_put;
580 	}
581 
582 	err = -ENOMEM;
583 	key = kmalloc(map->key_size, GFP_USER);
584 	if (!key)
585 		goto err_put;
586 
587 	err = -EFAULT;
588 	if (copy_from_user(key, ukey, map->key_size) != 0)
589 		goto free_key;
590 
591 	err = -ENOMEM;
592 	next_key = kmalloc(map->key_size, GFP_USER);
593 	if (!next_key)
594 		goto free_key;
595 
596 	rcu_read_lock();
597 	err = map->ops->map_get_next_key(map, key, next_key);
598 	rcu_read_unlock();
599 	if (err)
600 		goto free_next_key;
601 
602 	err = -EFAULT;
603 	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
604 		goto free_next_key;
605 
606 	err = 0;
607 
608 free_next_key:
609 	kfree(next_key);
610 free_key:
611 	kfree(key);
612 err_put:
613 	fdput(f);
614 	return err;
615 }
616 
617 static LIST_HEAD(bpf_prog_types);
618 
find_prog_type(enum bpf_prog_type type,struct bpf_prog * prog)619 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
620 {
621 	struct bpf_prog_type_list *tl;
622 
623 	list_for_each_entry(tl, &bpf_prog_types, list_node) {
624 		if (tl->type == type) {
625 			prog->aux->ops = tl->ops;
626 			prog->type = type;
627 			return 0;
628 		}
629 	}
630 
631 	return -EINVAL;
632 }
633 
bpf_register_prog_type(struct bpf_prog_type_list * tl)634 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
635 {
636 	list_add(&tl->list_node, &bpf_prog_types);
637 }
638 
639 /* drop refcnt on maps used by eBPF program and free auxilary data */
free_used_maps(struct bpf_prog_aux * aux)640 static void free_used_maps(struct bpf_prog_aux *aux)
641 {
642 	int i;
643 
644 	for (i = 0; i < aux->used_map_cnt; i++)
645 		bpf_map_put(aux->used_maps[i]);
646 
647 	kfree(aux->used_maps);
648 }
649 
bpf_prog_charge_memlock(struct bpf_prog * prog)650 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
651 {
652 	struct user_struct *user = get_current_user();
653 	unsigned long memlock_limit;
654 
655 	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
656 
657 	atomic_long_add(prog->pages, &user->locked_vm);
658 	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
659 		atomic_long_sub(prog->pages, &user->locked_vm);
660 		free_uid(user);
661 		return -EPERM;
662 	}
663 	prog->aux->user = user;
664 	return 0;
665 }
666 
bpf_prog_uncharge_memlock(struct bpf_prog * prog)667 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
668 {
669 	struct user_struct *user = prog->aux->user;
670 
671 	atomic_long_sub(prog->pages, &user->locked_vm);
672 	free_uid(user);
673 }
674 
__bpf_prog_put_rcu(struct rcu_head * rcu)675 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
676 {
677 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
678 
679 	free_used_maps(aux);
680 	bpf_prog_uncharge_memlock(aux->prog);
681 	security_bpf_prog_free(aux);
682 	bpf_prog_free(aux->prog);
683 }
684 
bpf_prog_put(struct bpf_prog * prog)685 void bpf_prog_put(struct bpf_prog *prog)
686 {
687 	if (atomic_dec_and_test(&prog->aux->refcnt))
688 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
689 }
690 EXPORT_SYMBOL_GPL(bpf_prog_put);
691 
bpf_prog_release(struct inode * inode,struct file * filp)692 static int bpf_prog_release(struct inode *inode, struct file *filp)
693 {
694 	struct bpf_prog *prog = filp->private_data;
695 
696 	bpf_prog_put(prog);
697 	return 0;
698 }
699 
700 const struct file_operations bpf_prog_fops = {
701         .release = bpf_prog_release,
702 	.read		= bpf_dummy_read,
703 	.write		= bpf_dummy_write,
704 };
705 
bpf_prog_new_fd(struct bpf_prog * prog)706 int bpf_prog_new_fd(struct bpf_prog *prog)
707 {
708 	int ret;
709 
710 	ret = security_bpf_prog(prog);
711 	if (ret < 0)
712 		return ret;
713 
714 	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
715 				O_RDWR | O_CLOEXEC);
716 }
717 
____bpf_prog_get(struct fd f)718 static struct bpf_prog *____bpf_prog_get(struct fd f)
719 {
720 	if (!f.file)
721 		return ERR_PTR(-EBADF);
722 	if (f.file->f_op != &bpf_prog_fops) {
723 		fdput(f);
724 		return ERR_PTR(-EINVAL);
725 	}
726 
727 	return f.file->private_data;
728 }
729 
bpf_prog_add(struct bpf_prog * prog,int i)730 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
731 {
732 	if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
733 		atomic_sub(i, &prog->aux->refcnt);
734 		return ERR_PTR(-EBUSY);
735 	}
736 	return prog;
737 }
738 EXPORT_SYMBOL_GPL(bpf_prog_add);
739 
bpf_prog_inc(struct bpf_prog * prog)740 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
741 {
742 	return bpf_prog_add(prog, 1);
743 }
744 
__bpf_prog_get(u32 ufd,enum bpf_prog_type * type)745 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
746 {
747 	struct fd f = fdget(ufd);
748 	struct bpf_prog *prog;
749 
750 	prog = ____bpf_prog_get(f);
751 	if (IS_ERR(prog))
752 		return prog;
753 	if (type && prog->type != *type) {
754 		prog = ERR_PTR(-EINVAL);
755 		goto out;
756 	}
757 
758 	prog = bpf_prog_inc(prog);
759 out:
760 	fdput(f);
761 	return prog;
762 }
763 
bpf_prog_get(u32 ufd)764 struct bpf_prog *bpf_prog_get(u32 ufd)
765 {
766 	return __bpf_prog_get(ufd, NULL);
767 }
768 
bpf_prog_get_type(u32 ufd,enum bpf_prog_type type)769 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
770 {
771 	return __bpf_prog_get(ufd, &type);
772 }
773 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
774 
775 /* last field in 'union bpf_attr' used by this command */
776 #define	BPF_PROG_LOAD_LAST_FIELD kern_version
777 
bpf_prog_load(union bpf_attr * attr)778 static int bpf_prog_load(union bpf_attr *attr)
779 {
780 	enum bpf_prog_type type = attr->prog_type;
781 	struct bpf_prog *prog;
782 	int err;
783 	char license[128];
784 	bool is_gpl;
785 
786 	if (CHECK_ATTR(BPF_PROG_LOAD))
787 		return -EINVAL;
788 
789 	/* copy eBPF program license from user space */
790 	if (strncpy_from_user(license, u64_to_ptr(attr->license),
791 			      sizeof(license) - 1) < 0)
792 		return -EFAULT;
793 	license[sizeof(license) - 1] = 0;
794 
795 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
796 	is_gpl = license_is_gpl_compatible(license);
797 
798 	if (attr->insn_cnt >= BPF_MAXINSNS)
799 		return -EINVAL;
800 
801 	if (type == BPF_PROG_TYPE_KPROBE &&
802 	    attr->kern_version != LINUX_VERSION_CODE)
803 		return -EINVAL;
804 
805 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
806 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
807 	    !capable(CAP_SYS_ADMIN))
808 		return -EPERM;
809 
810 	/* plain bpf_prog allocation */
811 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
812 	if (!prog)
813 		return -ENOMEM;
814 
815 	err = security_bpf_prog_alloc(prog->aux);
816 	if (err)
817 		goto free_prog_nouncharge;
818 
819 	err = bpf_prog_charge_memlock(prog);
820 	if (err)
821 		goto free_prog_sec;
822 
823 	prog->len = attr->insn_cnt;
824 
825 	err = -EFAULT;
826 	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
827 			   prog->len * sizeof(struct bpf_insn)) != 0)
828 		goto free_prog;
829 
830 	prog->orig_prog = NULL;
831 	prog->jited = 0;
832 
833 	atomic_set(&prog->aux->refcnt, 1);
834 	prog->gpl_compatible = is_gpl ? 1 : 0;
835 
836 	/* find program type: socket_filter vs tracing_filter */
837 	err = find_prog_type(type, prog);
838 	if (err < 0)
839 		goto free_prog;
840 
841 	/* run eBPF verifier */
842 	err = bpf_check(&prog, attr);
843 	if (err < 0)
844 		goto free_used_maps;
845 
846 	/* eBPF program is ready to be JITed */
847 	prog = bpf_prog_select_runtime(prog, &err);
848 	if (err < 0)
849 		goto free_used_maps;
850 
851 	err = bpf_prog_new_fd(prog);
852 	if (err < 0)
853 		/* failed to allocate fd */
854 		goto free_used_maps;
855 
856 	return err;
857 
858 free_used_maps:
859 	free_used_maps(prog->aux);
860 free_prog:
861 	bpf_prog_uncharge_memlock(prog);
862 free_prog_sec:
863 	security_bpf_prog_free(prog->aux);
864 free_prog_nouncharge:
865 	bpf_prog_free(prog);
866 	return err;
867 }
868 
869 #define BPF_OBJ_LAST_FIELD file_flags
870 
bpf_obj_pin(const union bpf_attr * attr)871 static int bpf_obj_pin(const union bpf_attr *attr)
872 {
873 	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
874 		return -EINVAL;
875 
876 	return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
877 }
878 
bpf_obj_get(const union bpf_attr * attr)879 static int bpf_obj_get(const union bpf_attr *attr)
880 {
881 	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
882 	    attr->file_flags & ~BPF_OBJ_FLAG_MASK)
883 		return -EINVAL;
884 
885 	return bpf_obj_get_user(u64_to_ptr(attr->pathname),
886 				attr->file_flags);
887 }
888 
889 #ifdef CONFIG_CGROUP_BPF
890 
891 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
892 
bpf_prog_attach(const union bpf_attr * attr)893 static int bpf_prog_attach(const union bpf_attr *attr)
894 {
895 	struct bpf_prog *prog;
896 	struct cgroup *cgrp;
897 	int ret;
898 
899 	if (!capable(CAP_NET_ADMIN))
900 		return -EPERM;
901 
902 	if (CHECK_ATTR(BPF_PROG_ATTACH))
903 		return -EINVAL;
904 
905 	if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
906 		return -EINVAL;
907 
908 	switch (attr->attach_type) {
909 	case BPF_CGROUP_INET_INGRESS:
910 	case BPF_CGROUP_INET_EGRESS:
911 		prog = bpf_prog_get_type(attr->attach_bpf_fd,
912 					 BPF_PROG_TYPE_CGROUP_SKB);
913 		if (IS_ERR(prog))
914 			return PTR_ERR(prog);
915 
916 		cgrp = cgroup_get_from_fd(attr->target_fd);
917 		if (IS_ERR(cgrp)) {
918 			bpf_prog_put(prog);
919 			return PTR_ERR(cgrp);
920 		}
921 
922 		ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
923 					attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
924 		if (ret)
925 			bpf_prog_put(prog);
926 		cgroup_put(cgrp);
927 		break;
928 
929 	default:
930 		return -EINVAL;
931 	}
932 
933 	return ret;
934 }
935 
936 #define BPF_PROG_DETACH_LAST_FIELD attach_type
937 
bpf_prog_detach(const union bpf_attr * attr)938 static int bpf_prog_detach(const union bpf_attr *attr)
939 {
940 	struct cgroup *cgrp;
941 	int ret;
942 
943 	if (!capable(CAP_NET_ADMIN))
944 		return -EPERM;
945 
946 	if (CHECK_ATTR(BPF_PROG_DETACH))
947 		return -EINVAL;
948 
949 	switch (attr->attach_type) {
950 	case BPF_CGROUP_INET_INGRESS:
951 	case BPF_CGROUP_INET_EGRESS:
952 		cgrp = cgroup_get_from_fd(attr->target_fd);
953 		if (IS_ERR(cgrp))
954 			return PTR_ERR(cgrp);
955 
956 		ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
957 		cgroup_put(cgrp);
958 		break;
959 
960 	default:
961 		return -EINVAL;
962 	}
963 
964 	return ret;
965 }
966 #endif /* CONFIG_CGROUP_BPF */
967 
SYSCALL_DEFINE3(bpf,int,cmd,union bpf_attr __user *,uattr,unsigned int,size)968 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
969 {
970 	union bpf_attr attr = {};
971 	int err;
972 
973 	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
974 		return -EPERM;
975 
976 	if (!access_ok(VERIFY_READ, uattr, 1))
977 		return -EFAULT;
978 
979 	if (size > PAGE_SIZE)	/* silly large */
980 		return -E2BIG;
981 
982 	/* If we're handed a bigger struct than we know of,
983 	 * ensure all the unknown bits are 0 - i.e. new
984 	 * user-space does not rely on any kernel feature
985 	 * extensions we dont know about yet.
986 	 */
987 	if (size > sizeof(attr)) {
988 		unsigned char __user *addr;
989 		unsigned char __user *end;
990 		unsigned char val;
991 
992 		addr = (void __user *)uattr + sizeof(attr);
993 		end  = (void __user *)uattr + size;
994 
995 		for (; addr < end; addr++) {
996 			err = get_user(val, addr);
997 			if (err)
998 				return err;
999 			if (val)
1000 				return -E2BIG;
1001 		}
1002 		size = sizeof(attr);
1003 	}
1004 
1005 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
1006 	if (copy_from_user(&attr, uattr, size) != 0)
1007 		return -EFAULT;
1008 
1009 	err = security_bpf(cmd, &attr, size);
1010 	if (err < 0)
1011 		return err;
1012 
1013 	switch (cmd) {
1014 	case BPF_MAP_CREATE:
1015 		err = map_create(&attr);
1016 		break;
1017 	case BPF_MAP_LOOKUP_ELEM:
1018 		err = map_lookup_elem(&attr);
1019 		break;
1020 	case BPF_MAP_UPDATE_ELEM:
1021 		err = map_update_elem(&attr);
1022 		break;
1023 	case BPF_MAP_DELETE_ELEM:
1024 		err = map_delete_elem(&attr);
1025 		break;
1026 	case BPF_MAP_GET_NEXT_KEY:
1027 		err = map_get_next_key(&attr);
1028 		break;
1029 	case BPF_PROG_LOAD:
1030 		err = bpf_prog_load(&attr);
1031 		break;
1032 	case BPF_OBJ_PIN:
1033 		err = bpf_obj_pin(&attr);
1034 		break;
1035 	case BPF_OBJ_GET:
1036 		err = bpf_obj_get(&attr);
1037 		break;
1038 
1039 #ifdef CONFIG_CGROUP_BPF
1040 	case BPF_PROG_ATTACH:
1041 		err = bpf_prog_attach(&attr);
1042 		break;
1043 	case BPF_PROG_DETACH:
1044 		err = bpf_prog_detach(&attr);
1045 		break;
1046 #endif
1047 
1048 	default:
1049 		err = -EINVAL;
1050 		break;
1051 	}
1052 
1053 	return err;
1054 }
1055