1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12 #include <linux/bpf.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mmzone.h>
17 #include <linux/anon_inodes.h>
18 #include <linux/file.h>
19 #include <linux/license.h>
20 #include <linux/filter.h>
21 #include <linux/version.h>
22
23 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
24
25 DEFINE_PER_CPU(int, bpf_prog_active);
26
27 int sysctl_unprivileged_bpf_disabled __read_mostly;
28
29 static LIST_HEAD(bpf_map_types);
30
find_and_alloc_map(union bpf_attr * attr)31 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
32 {
33 struct bpf_map_type_list *tl;
34 struct bpf_map *map;
35
36 list_for_each_entry(tl, &bpf_map_types, list_node) {
37 if (tl->type == attr->map_type) {
38 map = tl->ops->map_alloc(attr);
39 if (IS_ERR(map))
40 return map;
41 map->ops = tl->ops;
42 map->map_type = attr->map_type;
43 return map;
44 }
45 }
46 return ERR_PTR(-EINVAL);
47 }
48
49 /* boot time registration of different map implementations */
bpf_register_map_type(struct bpf_map_type_list * tl)50 void bpf_register_map_type(struct bpf_map_type_list *tl)
51 {
52 list_add(&tl->list_node, &bpf_map_types);
53 }
54
bpf_map_area_alloc(size_t size)55 void *bpf_map_area_alloc(size_t size)
56 {
57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
58 * trigger under memory pressure as we really just want to
59 * fail instead.
60 */
61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
62 void *area;
63
64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
65 area = kmalloc(size, GFP_USER | flags);
66 if (area != NULL)
67 return area;
68 }
69
70 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
71 PAGE_KERNEL);
72 }
73
bpf_map_area_free(void * area)74 void bpf_map_area_free(void *area)
75 {
76 kvfree(area);
77 }
78
bpf_map_precharge_memlock(u32 pages)79 int bpf_map_precharge_memlock(u32 pages)
80 {
81 struct user_struct *user = get_current_user();
82 unsigned long memlock_limit, cur;
83
84 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
85 cur = atomic_long_read(&user->locked_vm);
86 free_uid(user);
87 if (cur + pages > memlock_limit)
88 return -EPERM;
89 return 0;
90 }
91
bpf_map_charge_memlock(struct bpf_map * map)92 static int bpf_map_charge_memlock(struct bpf_map *map)
93 {
94 struct user_struct *user = get_current_user();
95 unsigned long memlock_limit;
96
97 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
98
99 atomic_long_add(map->pages, &user->locked_vm);
100
101 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
102 atomic_long_sub(map->pages, &user->locked_vm);
103 free_uid(user);
104 return -EPERM;
105 }
106 map->user = user;
107 return 0;
108 }
109
bpf_map_uncharge_memlock(struct bpf_map * map)110 static void bpf_map_uncharge_memlock(struct bpf_map *map)
111 {
112 struct user_struct *user = map->user;
113
114 atomic_long_sub(map->pages, &user->locked_vm);
115 free_uid(user);
116 }
117
118 /* called from workqueue */
bpf_map_free_deferred(struct work_struct * work)119 static void bpf_map_free_deferred(struct work_struct *work)
120 {
121 struct bpf_map *map = container_of(work, struct bpf_map, work);
122
123 bpf_map_uncharge_memlock(map);
124 security_bpf_map_free(map);
125 /* implementation dependent freeing */
126 map->ops->map_free(map);
127 }
128
bpf_map_put_uref(struct bpf_map * map)129 static void bpf_map_put_uref(struct bpf_map *map)
130 {
131 if (atomic_dec_and_test(&map->usercnt)) {
132 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
133 bpf_fd_array_map_clear(map);
134 }
135 }
136
137 /* decrement map refcnt and schedule it for freeing via workqueue
138 * (unrelying map implementation ops->map_free() might sleep)
139 */
bpf_map_put(struct bpf_map * map)140 void bpf_map_put(struct bpf_map *map)
141 {
142 if (atomic_dec_and_test(&map->refcnt)) {
143 INIT_WORK(&map->work, bpf_map_free_deferred);
144 schedule_work(&map->work);
145 }
146 }
147
bpf_map_put_with_uref(struct bpf_map * map)148 void bpf_map_put_with_uref(struct bpf_map *map)
149 {
150 bpf_map_put_uref(map);
151 bpf_map_put(map);
152 }
153
bpf_map_release(struct inode * inode,struct file * filp)154 static int bpf_map_release(struct inode *inode, struct file *filp)
155 {
156 struct bpf_map *map = filp->private_data;
157
158 if (map->ops->map_release)
159 map->ops->map_release(map, filp);
160
161 bpf_map_put_with_uref(map);
162 return 0;
163 }
164
165 #ifdef CONFIG_PROC_FS
bpf_map_show_fdinfo(struct seq_file * m,struct file * filp)166 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
167 {
168 const struct bpf_map *map = filp->private_data;
169
170 seq_printf(m,
171 "map_type:\t%u\n"
172 "key_size:\t%u\n"
173 "value_size:\t%u\n"
174 "max_entries:\t%u\n"
175 "map_flags:\t%#x\n",
176 map->map_type,
177 map->key_size,
178 map->value_size,
179 map->max_entries,
180 map->map_flags);
181 }
182 #endif
183
bpf_dummy_read(struct file * filp,char __user * buf,size_t siz,loff_t * ppos)184 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
185 loff_t *ppos)
186 {
187 /* We need this handler such that alloc_file() enables
188 * f_mode with FMODE_CAN_READ.
189 */
190 return -EINVAL;
191 }
192
bpf_dummy_write(struct file * filp,const char __user * buf,size_t siz,loff_t * ppos)193 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
194 size_t siz, loff_t *ppos)
195 {
196 /* We need this handler such that alloc_file() enables
197 * f_mode with FMODE_CAN_WRITE.
198 */
199 return -EINVAL;
200 }
201
202 const struct file_operations bpf_map_fops = {
203 #ifdef CONFIG_PROC_FS
204 .show_fdinfo = bpf_map_show_fdinfo,
205 #endif
206 .release = bpf_map_release,
207 .read = bpf_dummy_read,
208 .write = bpf_dummy_write,
209 };
210
bpf_map_new_fd(struct bpf_map * map,int flags)211 int bpf_map_new_fd(struct bpf_map *map, int flags)
212 {
213 int ret;
214
215 ret = security_bpf_map(map, OPEN_FMODE(flags));
216 if (ret < 0)
217 return ret;
218
219 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
220 flags | O_CLOEXEC);
221 }
222
bpf_get_file_flag(int flags)223 int bpf_get_file_flag(int flags)
224 {
225 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
226 return -EINVAL;
227 if (flags & BPF_F_RDONLY)
228 return O_RDONLY;
229 if (flags & BPF_F_WRONLY)
230 return O_WRONLY;
231 return O_RDWR;
232 }
233
234 /* helper macro to check that unused fields 'union bpf_attr' are zero */
235 #define CHECK_ATTR(CMD) \
236 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
237 sizeof(attr->CMD##_LAST_FIELD), 0, \
238 sizeof(*attr) - \
239 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
240 sizeof(attr->CMD##_LAST_FIELD)) != NULL
241
242 #define BPF_MAP_CREATE_LAST_FIELD map_flags
243 /* called via syscall */
map_create(union bpf_attr * attr)244 static int map_create(union bpf_attr *attr)
245 {
246 struct bpf_map *map;
247 int f_flags;
248 int err;
249
250 err = CHECK_ATTR(BPF_MAP_CREATE);
251 if (err)
252 return -EINVAL;
253
254 f_flags = bpf_get_file_flag(attr->map_flags);
255 if (f_flags < 0)
256 return f_flags;
257
258 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
259 map = find_and_alloc_map(attr);
260 if (IS_ERR(map))
261 return PTR_ERR(map);
262
263 atomic_set(&map->refcnt, 1);
264 atomic_set(&map->usercnt, 1);
265
266 err = security_bpf_map_alloc(map);
267 if (err)
268 goto free_map_nouncharge;
269
270 err = bpf_map_charge_memlock(map);
271 if (err)
272 goto free_map_sec;
273
274 err = bpf_map_new_fd(map, f_flags);
275 if (err < 0)
276 /* failed to allocate fd */
277 goto free_map;
278
279 return err;
280
281 free_map:
282 bpf_map_uncharge_memlock(map);
283 free_map_sec:
284 security_bpf_map_free(map);
285 free_map_nouncharge:
286 map->ops->map_free(map);
287 return err;
288 }
289
290 /* if error is returned, fd is released.
291 * On success caller should complete fd access with matching fdput()
292 */
__bpf_map_get(struct fd f)293 struct bpf_map *__bpf_map_get(struct fd f)
294 {
295 if (!f.file)
296 return ERR_PTR(-EBADF);
297 if (f.file->f_op != &bpf_map_fops) {
298 fdput(f);
299 return ERR_PTR(-EINVAL);
300 }
301
302 return f.file->private_data;
303 }
304
305 /* prog's and map's refcnt limit */
306 #define BPF_MAX_REFCNT 32768
307
bpf_map_inc(struct bpf_map * map,bool uref)308 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
309 {
310 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
311 atomic_dec(&map->refcnt);
312 return ERR_PTR(-EBUSY);
313 }
314 if (uref)
315 atomic_inc(&map->usercnt);
316 return map;
317 }
318
bpf_map_get_with_uref(u32 ufd)319 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
320 {
321 struct fd f = fdget(ufd);
322 struct bpf_map *map;
323
324 map = __bpf_map_get(f);
325 if (IS_ERR(map))
326 return map;
327
328 map = bpf_map_inc(map, true);
329 fdput(f);
330
331 return map;
332 }
333
334 /* helper to convert user pointers passed inside __aligned_u64 fields */
u64_to_ptr(__u64 val)335 static void __user *u64_to_ptr(__u64 val)
336 {
337 return (void __user *) (unsigned long) val;
338 }
339
bpf_stackmap_copy(struct bpf_map * map,void * key,void * value)340 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
341 {
342 return -ENOTSUPP;
343 }
344
345 /* last field in 'union bpf_attr' used by this command */
346 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
347
map_lookup_elem(union bpf_attr * attr)348 static int map_lookup_elem(union bpf_attr *attr)
349 {
350 void __user *ukey = u64_to_ptr(attr->key);
351 void __user *uvalue = u64_to_ptr(attr->value);
352 int ufd = attr->map_fd;
353 struct bpf_map *map;
354 void *key, *value, *ptr;
355 u32 value_size;
356 struct fd f;
357 int err;
358
359 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
360 return -EINVAL;
361
362 f = fdget(ufd);
363 map = __bpf_map_get(f);
364 if (IS_ERR(map))
365 return PTR_ERR(map);
366
367 if (!(f.file->f_mode & FMODE_CAN_READ)) {
368 err = -EPERM;
369 goto err_put;
370 }
371
372 err = -ENOMEM;
373 key = kmalloc(map->key_size, GFP_USER);
374 if (!key)
375 goto err_put;
376
377 err = -EFAULT;
378 if (copy_from_user(key, ukey, map->key_size) != 0)
379 goto free_key;
380
381 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
382 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
383 value_size = round_up(map->value_size, 8) * num_possible_cpus();
384 else
385 value_size = map->value_size;
386
387 err = -ENOMEM;
388 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
389 if (!value)
390 goto free_key;
391
392 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
393 err = bpf_percpu_hash_copy(map, key, value);
394 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
395 err = bpf_percpu_array_copy(map, key, value);
396 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
397 err = bpf_stackmap_copy(map, key, value);
398 } else {
399 rcu_read_lock();
400 ptr = map->ops->map_lookup_elem(map, key);
401 if (ptr)
402 memcpy(value, ptr, value_size);
403 rcu_read_unlock();
404 err = ptr ? 0 : -ENOENT;
405 }
406
407 if (err)
408 goto free_value;
409
410 err = -EFAULT;
411 if (copy_to_user(uvalue, value, value_size) != 0)
412 goto free_value;
413
414 err = 0;
415
416 free_value:
417 kfree(value);
418 free_key:
419 kfree(key);
420 err_put:
421 fdput(f);
422 return err;
423 }
424
425 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
426
map_update_elem(union bpf_attr * attr)427 static int map_update_elem(union bpf_attr *attr)
428 {
429 void __user *ukey = u64_to_ptr(attr->key);
430 void __user *uvalue = u64_to_ptr(attr->value);
431 int ufd = attr->map_fd;
432 struct bpf_map *map;
433 void *key, *value;
434 u32 value_size;
435 struct fd f;
436 int err;
437
438 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
439 return -EINVAL;
440
441 f = fdget(ufd);
442 map = __bpf_map_get(f);
443 if (IS_ERR(map))
444 return PTR_ERR(map);
445
446 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
447 err = -EPERM;
448 goto err_put;
449 }
450
451 err = -ENOMEM;
452 key = kmalloc(map->key_size, GFP_USER);
453 if (!key)
454 goto err_put;
455
456 err = -EFAULT;
457 if (copy_from_user(key, ukey, map->key_size) != 0)
458 goto free_key;
459
460 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
461 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
462 value_size = round_up(map->value_size, 8) * num_possible_cpus();
463 else
464 value_size = map->value_size;
465
466 err = -ENOMEM;
467 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
468 if (!value)
469 goto free_key;
470
471 err = -EFAULT;
472 if (copy_from_user(value, uvalue, value_size) != 0)
473 goto free_value;
474
475 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
476 * inside bpf map update or delete otherwise deadlocks are possible
477 */
478 preempt_disable();
479 __this_cpu_inc(bpf_prog_active);
480 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
481 err = bpf_percpu_hash_update(map, key, value, attr->flags);
482 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
483 err = bpf_percpu_array_update(map, key, value, attr->flags);
484 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
485 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
486 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
487 rcu_read_lock();
488 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
489 attr->flags);
490 rcu_read_unlock();
491 } else {
492 rcu_read_lock();
493 err = map->ops->map_update_elem(map, key, value, attr->flags);
494 rcu_read_unlock();
495 }
496 __this_cpu_dec(bpf_prog_active);
497 preempt_enable();
498
499 free_value:
500 kfree(value);
501 free_key:
502 kfree(key);
503 err_put:
504 fdput(f);
505 return err;
506 }
507
508 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
509
map_delete_elem(union bpf_attr * attr)510 static int map_delete_elem(union bpf_attr *attr)
511 {
512 void __user *ukey = u64_to_ptr(attr->key);
513 int ufd = attr->map_fd;
514 struct bpf_map *map;
515 struct fd f;
516 void *key;
517 int err;
518
519 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
520 return -EINVAL;
521
522 f = fdget(ufd);
523 map = __bpf_map_get(f);
524 if (IS_ERR(map))
525 return PTR_ERR(map);
526
527 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
528 err = -EPERM;
529 goto err_put;
530 }
531
532 err = -ENOMEM;
533 key = kmalloc(map->key_size, GFP_USER);
534 if (!key)
535 goto err_put;
536
537 err = -EFAULT;
538 if (copy_from_user(key, ukey, map->key_size) != 0)
539 goto free_key;
540
541 preempt_disable();
542 __this_cpu_inc(bpf_prog_active);
543 rcu_read_lock();
544 err = map->ops->map_delete_elem(map, key);
545 rcu_read_unlock();
546 __this_cpu_dec(bpf_prog_active);
547 preempt_enable();
548
549 free_key:
550 kfree(key);
551 err_put:
552 fdput(f);
553 return err;
554 }
555
556 /* last field in 'union bpf_attr' used by this command */
557 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
558
map_get_next_key(union bpf_attr * attr)559 static int map_get_next_key(union bpf_attr *attr)
560 {
561 void __user *ukey = u64_to_ptr(attr->key);
562 void __user *unext_key = u64_to_ptr(attr->next_key);
563 int ufd = attr->map_fd;
564 struct bpf_map *map;
565 void *key, *next_key;
566 struct fd f;
567 int err;
568
569 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
570 return -EINVAL;
571
572 f = fdget(ufd);
573 map = __bpf_map_get(f);
574 if (IS_ERR(map))
575 return PTR_ERR(map);
576
577 if (!(f.file->f_mode & FMODE_CAN_READ)) {
578 err = -EPERM;
579 goto err_put;
580 }
581
582 err = -ENOMEM;
583 key = kmalloc(map->key_size, GFP_USER);
584 if (!key)
585 goto err_put;
586
587 err = -EFAULT;
588 if (copy_from_user(key, ukey, map->key_size) != 0)
589 goto free_key;
590
591 err = -ENOMEM;
592 next_key = kmalloc(map->key_size, GFP_USER);
593 if (!next_key)
594 goto free_key;
595
596 rcu_read_lock();
597 err = map->ops->map_get_next_key(map, key, next_key);
598 rcu_read_unlock();
599 if (err)
600 goto free_next_key;
601
602 err = -EFAULT;
603 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
604 goto free_next_key;
605
606 err = 0;
607
608 free_next_key:
609 kfree(next_key);
610 free_key:
611 kfree(key);
612 err_put:
613 fdput(f);
614 return err;
615 }
616
617 static LIST_HEAD(bpf_prog_types);
618
find_prog_type(enum bpf_prog_type type,struct bpf_prog * prog)619 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
620 {
621 struct bpf_prog_type_list *tl;
622
623 list_for_each_entry(tl, &bpf_prog_types, list_node) {
624 if (tl->type == type) {
625 prog->aux->ops = tl->ops;
626 prog->type = type;
627 return 0;
628 }
629 }
630
631 return -EINVAL;
632 }
633
bpf_register_prog_type(struct bpf_prog_type_list * tl)634 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
635 {
636 list_add(&tl->list_node, &bpf_prog_types);
637 }
638
639 /* drop refcnt on maps used by eBPF program and free auxilary data */
free_used_maps(struct bpf_prog_aux * aux)640 static void free_used_maps(struct bpf_prog_aux *aux)
641 {
642 int i;
643
644 for (i = 0; i < aux->used_map_cnt; i++)
645 bpf_map_put(aux->used_maps[i]);
646
647 kfree(aux->used_maps);
648 }
649
bpf_prog_charge_memlock(struct bpf_prog * prog)650 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
651 {
652 struct user_struct *user = get_current_user();
653 unsigned long memlock_limit;
654
655 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
656
657 atomic_long_add(prog->pages, &user->locked_vm);
658 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
659 atomic_long_sub(prog->pages, &user->locked_vm);
660 free_uid(user);
661 return -EPERM;
662 }
663 prog->aux->user = user;
664 return 0;
665 }
666
bpf_prog_uncharge_memlock(struct bpf_prog * prog)667 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
668 {
669 struct user_struct *user = prog->aux->user;
670
671 atomic_long_sub(prog->pages, &user->locked_vm);
672 free_uid(user);
673 }
674
__bpf_prog_put_rcu(struct rcu_head * rcu)675 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
676 {
677 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
678
679 free_used_maps(aux);
680 bpf_prog_uncharge_memlock(aux->prog);
681 security_bpf_prog_free(aux);
682 bpf_prog_free(aux->prog);
683 }
684
bpf_prog_put(struct bpf_prog * prog)685 void bpf_prog_put(struct bpf_prog *prog)
686 {
687 if (atomic_dec_and_test(&prog->aux->refcnt))
688 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
689 }
690 EXPORT_SYMBOL_GPL(bpf_prog_put);
691
bpf_prog_release(struct inode * inode,struct file * filp)692 static int bpf_prog_release(struct inode *inode, struct file *filp)
693 {
694 struct bpf_prog *prog = filp->private_data;
695
696 bpf_prog_put(prog);
697 return 0;
698 }
699
700 const struct file_operations bpf_prog_fops = {
701 .release = bpf_prog_release,
702 .read = bpf_dummy_read,
703 .write = bpf_dummy_write,
704 };
705
bpf_prog_new_fd(struct bpf_prog * prog)706 int bpf_prog_new_fd(struct bpf_prog *prog)
707 {
708 int ret;
709
710 ret = security_bpf_prog(prog);
711 if (ret < 0)
712 return ret;
713
714 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
715 O_RDWR | O_CLOEXEC);
716 }
717
____bpf_prog_get(struct fd f)718 static struct bpf_prog *____bpf_prog_get(struct fd f)
719 {
720 if (!f.file)
721 return ERR_PTR(-EBADF);
722 if (f.file->f_op != &bpf_prog_fops) {
723 fdput(f);
724 return ERR_PTR(-EINVAL);
725 }
726
727 return f.file->private_data;
728 }
729
bpf_prog_add(struct bpf_prog * prog,int i)730 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
731 {
732 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
733 atomic_sub(i, &prog->aux->refcnt);
734 return ERR_PTR(-EBUSY);
735 }
736 return prog;
737 }
738 EXPORT_SYMBOL_GPL(bpf_prog_add);
739
bpf_prog_inc(struct bpf_prog * prog)740 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
741 {
742 return bpf_prog_add(prog, 1);
743 }
744
__bpf_prog_get(u32 ufd,enum bpf_prog_type * type)745 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
746 {
747 struct fd f = fdget(ufd);
748 struct bpf_prog *prog;
749
750 prog = ____bpf_prog_get(f);
751 if (IS_ERR(prog))
752 return prog;
753 if (type && prog->type != *type) {
754 prog = ERR_PTR(-EINVAL);
755 goto out;
756 }
757
758 prog = bpf_prog_inc(prog);
759 out:
760 fdput(f);
761 return prog;
762 }
763
bpf_prog_get(u32 ufd)764 struct bpf_prog *bpf_prog_get(u32 ufd)
765 {
766 return __bpf_prog_get(ufd, NULL);
767 }
768
bpf_prog_get_type(u32 ufd,enum bpf_prog_type type)769 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
770 {
771 return __bpf_prog_get(ufd, &type);
772 }
773 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
774
775 /* last field in 'union bpf_attr' used by this command */
776 #define BPF_PROG_LOAD_LAST_FIELD kern_version
777
bpf_prog_load(union bpf_attr * attr)778 static int bpf_prog_load(union bpf_attr *attr)
779 {
780 enum bpf_prog_type type = attr->prog_type;
781 struct bpf_prog *prog;
782 int err;
783 char license[128];
784 bool is_gpl;
785
786 if (CHECK_ATTR(BPF_PROG_LOAD))
787 return -EINVAL;
788
789 /* copy eBPF program license from user space */
790 if (strncpy_from_user(license, u64_to_ptr(attr->license),
791 sizeof(license) - 1) < 0)
792 return -EFAULT;
793 license[sizeof(license) - 1] = 0;
794
795 /* eBPF programs must be GPL compatible to use GPL-ed functions */
796 is_gpl = license_is_gpl_compatible(license);
797
798 if (attr->insn_cnt >= BPF_MAXINSNS)
799 return -EINVAL;
800
801 if (type == BPF_PROG_TYPE_KPROBE &&
802 attr->kern_version != LINUX_VERSION_CODE)
803 return -EINVAL;
804
805 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
806 type != BPF_PROG_TYPE_CGROUP_SKB &&
807 !capable(CAP_SYS_ADMIN))
808 return -EPERM;
809
810 /* plain bpf_prog allocation */
811 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
812 if (!prog)
813 return -ENOMEM;
814
815 err = security_bpf_prog_alloc(prog->aux);
816 if (err)
817 goto free_prog_nouncharge;
818
819 err = bpf_prog_charge_memlock(prog);
820 if (err)
821 goto free_prog_sec;
822
823 prog->len = attr->insn_cnt;
824
825 err = -EFAULT;
826 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
827 prog->len * sizeof(struct bpf_insn)) != 0)
828 goto free_prog;
829
830 prog->orig_prog = NULL;
831 prog->jited = 0;
832
833 atomic_set(&prog->aux->refcnt, 1);
834 prog->gpl_compatible = is_gpl ? 1 : 0;
835
836 /* find program type: socket_filter vs tracing_filter */
837 err = find_prog_type(type, prog);
838 if (err < 0)
839 goto free_prog;
840
841 /* run eBPF verifier */
842 err = bpf_check(&prog, attr);
843 if (err < 0)
844 goto free_used_maps;
845
846 /* eBPF program is ready to be JITed */
847 prog = bpf_prog_select_runtime(prog, &err);
848 if (err < 0)
849 goto free_used_maps;
850
851 err = bpf_prog_new_fd(prog);
852 if (err < 0)
853 /* failed to allocate fd */
854 goto free_used_maps;
855
856 return err;
857
858 free_used_maps:
859 free_used_maps(prog->aux);
860 free_prog:
861 bpf_prog_uncharge_memlock(prog);
862 free_prog_sec:
863 security_bpf_prog_free(prog->aux);
864 free_prog_nouncharge:
865 bpf_prog_free(prog);
866 return err;
867 }
868
869 #define BPF_OBJ_LAST_FIELD file_flags
870
bpf_obj_pin(const union bpf_attr * attr)871 static int bpf_obj_pin(const union bpf_attr *attr)
872 {
873 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
874 return -EINVAL;
875
876 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
877 }
878
bpf_obj_get(const union bpf_attr * attr)879 static int bpf_obj_get(const union bpf_attr *attr)
880 {
881 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
882 attr->file_flags & ~BPF_OBJ_FLAG_MASK)
883 return -EINVAL;
884
885 return bpf_obj_get_user(u64_to_ptr(attr->pathname),
886 attr->file_flags);
887 }
888
889 #ifdef CONFIG_CGROUP_BPF
890
891 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
892
bpf_prog_attach(const union bpf_attr * attr)893 static int bpf_prog_attach(const union bpf_attr *attr)
894 {
895 struct bpf_prog *prog;
896 struct cgroup *cgrp;
897 int ret;
898
899 if (!capable(CAP_NET_ADMIN))
900 return -EPERM;
901
902 if (CHECK_ATTR(BPF_PROG_ATTACH))
903 return -EINVAL;
904
905 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
906 return -EINVAL;
907
908 switch (attr->attach_type) {
909 case BPF_CGROUP_INET_INGRESS:
910 case BPF_CGROUP_INET_EGRESS:
911 prog = bpf_prog_get_type(attr->attach_bpf_fd,
912 BPF_PROG_TYPE_CGROUP_SKB);
913 if (IS_ERR(prog))
914 return PTR_ERR(prog);
915
916 cgrp = cgroup_get_from_fd(attr->target_fd);
917 if (IS_ERR(cgrp)) {
918 bpf_prog_put(prog);
919 return PTR_ERR(cgrp);
920 }
921
922 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
923 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
924 if (ret)
925 bpf_prog_put(prog);
926 cgroup_put(cgrp);
927 break;
928
929 default:
930 return -EINVAL;
931 }
932
933 return ret;
934 }
935
936 #define BPF_PROG_DETACH_LAST_FIELD attach_type
937
bpf_prog_detach(const union bpf_attr * attr)938 static int bpf_prog_detach(const union bpf_attr *attr)
939 {
940 struct cgroup *cgrp;
941 int ret;
942
943 if (!capable(CAP_NET_ADMIN))
944 return -EPERM;
945
946 if (CHECK_ATTR(BPF_PROG_DETACH))
947 return -EINVAL;
948
949 switch (attr->attach_type) {
950 case BPF_CGROUP_INET_INGRESS:
951 case BPF_CGROUP_INET_EGRESS:
952 cgrp = cgroup_get_from_fd(attr->target_fd);
953 if (IS_ERR(cgrp))
954 return PTR_ERR(cgrp);
955
956 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
957 cgroup_put(cgrp);
958 break;
959
960 default:
961 return -EINVAL;
962 }
963
964 return ret;
965 }
966 #endif /* CONFIG_CGROUP_BPF */
967
SYSCALL_DEFINE3(bpf,int,cmd,union bpf_attr __user *,uattr,unsigned int,size)968 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
969 {
970 union bpf_attr attr = {};
971 int err;
972
973 if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
974 return -EPERM;
975
976 if (!access_ok(VERIFY_READ, uattr, 1))
977 return -EFAULT;
978
979 if (size > PAGE_SIZE) /* silly large */
980 return -E2BIG;
981
982 /* If we're handed a bigger struct than we know of,
983 * ensure all the unknown bits are 0 - i.e. new
984 * user-space does not rely on any kernel feature
985 * extensions we dont know about yet.
986 */
987 if (size > sizeof(attr)) {
988 unsigned char __user *addr;
989 unsigned char __user *end;
990 unsigned char val;
991
992 addr = (void __user *)uattr + sizeof(attr);
993 end = (void __user *)uattr + size;
994
995 for (; addr < end; addr++) {
996 err = get_user(val, addr);
997 if (err)
998 return err;
999 if (val)
1000 return -E2BIG;
1001 }
1002 size = sizeof(attr);
1003 }
1004
1005 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1006 if (copy_from_user(&attr, uattr, size) != 0)
1007 return -EFAULT;
1008
1009 err = security_bpf(cmd, &attr, size);
1010 if (err < 0)
1011 return err;
1012
1013 switch (cmd) {
1014 case BPF_MAP_CREATE:
1015 err = map_create(&attr);
1016 break;
1017 case BPF_MAP_LOOKUP_ELEM:
1018 err = map_lookup_elem(&attr);
1019 break;
1020 case BPF_MAP_UPDATE_ELEM:
1021 err = map_update_elem(&attr);
1022 break;
1023 case BPF_MAP_DELETE_ELEM:
1024 err = map_delete_elem(&attr);
1025 break;
1026 case BPF_MAP_GET_NEXT_KEY:
1027 err = map_get_next_key(&attr);
1028 break;
1029 case BPF_PROG_LOAD:
1030 err = bpf_prog_load(&attr);
1031 break;
1032 case BPF_OBJ_PIN:
1033 err = bpf_obj_pin(&attr);
1034 break;
1035 case BPF_OBJ_GET:
1036 err = bpf_obj_get(&attr);
1037 break;
1038
1039 #ifdef CONFIG_CGROUP_BPF
1040 case BPF_PROG_ATTACH:
1041 err = bpf_prog_attach(&attr);
1042 break;
1043 case BPF_PROG_DETACH:
1044 err = bpf_prog_detach(&attr);
1045 break;
1046 #endif
1047
1048 default:
1049 err = -EINVAL;
1050 break;
1051 }
1052
1053 return err;
1054 }
1055