• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Minimal file system backend for holding eBPF maps and programs,
3   * used by bpf(2) object pinning.
4   *
5   * Authors:
6   *
7   *	Daniel Borkmann <daniel@iogearbox.net>
8   *
9   * This program is free software; you can redistribute it and/or
10   * modify it under the terms of the GNU General Public License
11   * version 2 as published by the Free Software Foundation.
12   */
13  
14  #include <linux/init.h>
15  #include <linux/magic.h>
16  #include <linux/major.h>
17  #include <linux/mount.h>
18  #include <linux/namei.h>
19  #include <linux/fs.h>
20  #include <linux/kdev_t.h>
21  #include <linux/filter.h>
22  #include <linux/bpf.h>
23  
24  enum bpf_type {
25  	BPF_TYPE_UNSPEC	= 0,
26  	BPF_TYPE_PROG,
27  	BPF_TYPE_MAP,
28  };
29  
bpf_any_get(void * raw,enum bpf_type type)30  static void *bpf_any_get(void *raw, enum bpf_type type)
31  {
32  	switch (type) {
33  	case BPF_TYPE_PROG:
34  		raw = bpf_prog_inc(raw);
35  		break;
36  	case BPF_TYPE_MAP:
37  		raw = bpf_map_inc(raw, true);
38  		break;
39  	default:
40  		WARN_ON_ONCE(1);
41  		break;
42  	}
43  
44  	return raw;
45  }
46  
bpf_any_put(void * raw,enum bpf_type type)47  static void bpf_any_put(void *raw, enum bpf_type type)
48  {
49  	switch (type) {
50  	case BPF_TYPE_PROG:
51  		bpf_prog_put(raw);
52  		break;
53  	case BPF_TYPE_MAP:
54  		bpf_map_put_with_uref(raw);
55  		break;
56  	default:
57  		WARN_ON_ONCE(1);
58  		break;
59  	}
60  }
61  
bpf_fd_probe_obj(u32 ufd,enum bpf_type * type)62  static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
63  {
64  	void *raw;
65  
66  	*type = BPF_TYPE_MAP;
67  	raw = bpf_map_get_with_uref(ufd);
68  	if (IS_ERR(raw)) {
69  		*type = BPF_TYPE_PROG;
70  		raw = bpf_prog_get(ufd);
71  	}
72  
73  	return raw;
74  }
75  
76  static const struct inode_operations bpf_dir_iops;
77  
78  static const struct inode_operations bpf_prog_iops = { };
79  static const struct inode_operations bpf_map_iops  = { };
80  
bpf_get_inode(struct super_block * sb,const struct inode * dir,umode_t mode)81  static struct inode *bpf_get_inode(struct super_block *sb,
82  				   const struct inode *dir,
83  				   umode_t mode)
84  {
85  	struct inode *inode;
86  
87  	switch (mode & S_IFMT) {
88  	case S_IFDIR:
89  	case S_IFREG:
90  		break;
91  	default:
92  		return ERR_PTR(-EINVAL);
93  	}
94  
95  	inode = new_inode(sb);
96  	if (!inode)
97  		return ERR_PTR(-ENOSPC);
98  
99  	inode->i_ino = get_next_ino();
100  	inode->i_atime = current_time(inode);
101  	inode->i_mtime = inode->i_atime;
102  	inode->i_ctime = inode->i_atime;
103  
104  	inode_init_owner(inode, dir, mode);
105  
106  	return inode;
107  }
108  
bpf_inode_type(const struct inode * inode,enum bpf_type * type)109  static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
110  {
111  	*type = BPF_TYPE_UNSPEC;
112  	if (inode->i_op == &bpf_prog_iops)
113  		*type = BPF_TYPE_PROG;
114  	else if (inode->i_op == &bpf_map_iops)
115  		*type = BPF_TYPE_MAP;
116  	else
117  		return -EACCES;
118  
119  	return 0;
120  }
121  
bpf_mkdir(struct inode * dir,struct dentry * dentry,umode_t mode)122  static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
123  {
124  	struct inode *inode;
125  
126  	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
127  	if (IS_ERR(inode))
128  		return PTR_ERR(inode);
129  
130  	inode->i_op = &bpf_dir_iops;
131  	inode->i_fop = &simple_dir_operations;
132  
133  	inc_nlink(inode);
134  	inc_nlink(dir);
135  
136  	d_instantiate(dentry, inode);
137  	dget(dentry);
138  
139  	return 0;
140  }
141  
bpf_mkobj_ops(struct inode * dir,struct dentry * dentry,umode_t mode,const struct inode_operations * iops)142  static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
143  			 umode_t mode, const struct inode_operations *iops)
144  {
145  	struct inode *inode;
146  
147  	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
148  	if (IS_ERR(inode))
149  		return PTR_ERR(inode);
150  
151  	inode->i_op = iops;
152  	inode->i_private = dentry->d_fsdata;
153  
154  	d_instantiate(dentry, inode);
155  	dget(dentry);
156  
157  	return 0;
158  }
159  
bpf_mkobj(struct inode * dir,struct dentry * dentry,umode_t mode,dev_t devt)160  static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode,
161  		     dev_t devt)
162  {
163  	enum bpf_type type = MINOR(devt);
164  
165  	if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) ||
166  	    dentry->d_fsdata == NULL)
167  		return -EPERM;
168  
169  	switch (type) {
170  	case BPF_TYPE_PROG:
171  		return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops);
172  	case BPF_TYPE_MAP:
173  		return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops);
174  	default:
175  		return -EPERM;
176  	}
177  }
178  
179  static struct dentry *
bpf_lookup(struct inode * dir,struct dentry * dentry,unsigned flags)180  bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
181  {
182  	if (strchr(dentry->d_name.name, '.'))
183  		return ERR_PTR(-EPERM);
184  	return simple_lookup(dir, dentry, flags);
185  }
186  
187  static const struct inode_operations bpf_dir_iops = {
188  	.lookup		= bpf_lookup,
189  	.mknod		= bpf_mkobj,
190  	.mkdir		= bpf_mkdir,
191  	.rmdir		= simple_rmdir,
192  	.rename		= simple_rename,
193  	.link		= simple_link,
194  	.unlink		= simple_unlink,
195  };
196  
bpf_obj_do_pin(const struct filename * pathname,void * raw,enum bpf_type type)197  static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
198  			  enum bpf_type type)
199  {
200  	struct dentry *dentry;
201  	struct inode *dir;
202  	struct path path;
203  	umode_t mode;
204  	dev_t devt;
205  	int ret;
206  
207  	dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
208  	if (IS_ERR(dentry))
209  		return PTR_ERR(dentry);
210  
211  	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
212  	devt = MKDEV(UNNAMED_MAJOR, type);
213  
214  	ret = security_path_mknod(&path, dentry, mode, devt);
215  	if (ret)
216  		goto out;
217  
218  	dir = d_inode(path.dentry);
219  	if (dir->i_op != &bpf_dir_iops) {
220  		ret = -EPERM;
221  		goto out;
222  	}
223  
224  	dentry->d_fsdata = raw;
225  	ret = vfs_mknod(dir, dentry, mode, devt);
226  	dentry->d_fsdata = NULL;
227  out:
228  	done_path_create(&path, dentry);
229  	return ret;
230  }
231  
bpf_obj_pin_user(u32 ufd,const char __user * pathname)232  int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
233  {
234  	struct filename *pname;
235  	enum bpf_type type;
236  	void *raw;
237  	int ret;
238  
239  	pname = getname(pathname);
240  	if (IS_ERR(pname))
241  		return PTR_ERR(pname);
242  
243  	raw = bpf_fd_probe_obj(ufd, &type);
244  	if (IS_ERR(raw)) {
245  		ret = PTR_ERR(raw);
246  		goto out;
247  	}
248  
249  	ret = bpf_obj_do_pin(pname, raw, type);
250  	if (ret != 0)
251  		bpf_any_put(raw, type);
252  out:
253  	putname(pname);
254  	return ret;
255  }
256  
bpf_obj_do_get(const struct filename * pathname,enum bpf_type * type,int flags)257  static void *bpf_obj_do_get(const struct filename *pathname,
258  			    enum bpf_type *type, int flags)
259  {
260  	struct inode *inode;
261  	struct path path;
262  	void *raw;
263  	int ret;
264  
265  	ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
266  	if (ret)
267  		return ERR_PTR(ret);
268  
269  	inode = d_backing_inode(path.dentry);
270  	ret = inode_permission(inode, ACC_MODE(flags));
271  	if (ret)
272  		goto out;
273  
274  	ret = bpf_inode_type(inode, type);
275  	if (ret)
276  		goto out;
277  
278  	raw = bpf_any_get(inode->i_private, *type);
279  	if (!IS_ERR(raw))
280  		touch_atime(&path);
281  
282  	path_put(&path);
283  	return raw;
284  out:
285  	path_put(&path);
286  	return ERR_PTR(ret);
287  }
288  
bpf_obj_get_user(const char __user * pathname,int flags)289  int bpf_obj_get_user(const char __user *pathname, int flags)
290  {
291  	enum bpf_type type = BPF_TYPE_UNSPEC;
292  	struct filename *pname;
293  	int ret = -ENOENT;
294  	int f_flags;
295  	void *raw;
296  
297  	f_flags = bpf_get_file_flag(flags);
298  	if (f_flags < 0)
299  		return f_flags;
300  
301  	pname = getname(pathname);
302  	if (IS_ERR(pname))
303  		return PTR_ERR(pname);
304  
305  	raw = bpf_obj_do_get(pname, &type, f_flags);
306  	if (IS_ERR(raw)) {
307  		ret = PTR_ERR(raw);
308  		goto out;
309  	}
310  
311  	if (type == BPF_TYPE_PROG)
312  		ret = bpf_prog_new_fd(raw);
313  	else if (type == BPF_TYPE_MAP)
314  		ret = bpf_map_new_fd(raw, f_flags);
315  	else
316  		goto out;
317  
318  	if (ret < 0)
319  		bpf_any_put(raw, type);
320  out:
321  	putname(pname);
322  	return ret;
323  }
324  
__get_prog_inode(struct inode * inode,enum bpf_prog_type type)325  static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
326  {
327  	struct bpf_prog *prog;
328  	int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
329  	if (ret)
330  		return ERR_PTR(ret);
331  
332  	if (inode->i_op == &bpf_map_iops)
333  		return ERR_PTR(-EINVAL);
334  	if (inode->i_op != &bpf_prog_iops)
335  		return ERR_PTR(-EACCES);
336  
337  	prog = inode->i_private;
338  
339  	ret = security_bpf_prog(prog);
340  	if (ret < 0)
341  		return ERR_PTR(ret);
342  
343  	return bpf_prog_inc(prog);
344  }
345  
bpf_prog_get_type_path(const char * name,enum bpf_prog_type type)346  struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
347  {
348  	struct bpf_prog *prog;
349  	struct path path;
350  	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
351  	if (ret)
352  		return ERR_PTR(ret);
353  	prog = __get_prog_inode(d_backing_inode(path.dentry), type);
354  	if (!IS_ERR(prog))
355  		touch_atime(&path);
356  	path_put(&path);
357  	return prog;
358  }
359  EXPORT_SYMBOL(bpf_prog_get_type_path);
360  
bpf_evict_inode(struct inode * inode)361  static void bpf_evict_inode(struct inode *inode)
362  {
363  	enum bpf_type type;
364  
365  	truncate_inode_pages_final(&inode->i_data);
366  	clear_inode(inode);
367  
368  	if (!bpf_inode_type(inode, &type))
369  		bpf_any_put(inode->i_private, type);
370  }
371  
372  static const struct super_operations bpf_super_ops = {
373  	.statfs		= simple_statfs,
374  	.drop_inode	= generic_delete_inode,
375  	.evict_inode	= bpf_evict_inode,
376  };
377  
bpf_fill_super(struct super_block * sb,void * data,int silent)378  static int bpf_fill_super(struct super_block *sb, void *data, int silent)
379  {
380  	static struct tree_descr bpf_rfiles[] = { { "" } };
381  	struct inode *inode;
382  	int ret;
383  
384  	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
385  	if (ret)
386  		return ret;
387  
388  	sb->s_op = &bpf_super_ops;
389  
390  	inode = sb->s_root->d_inode;
391  	inode->i_op = &bpf_dir_iops;
392  	inode->i_mode &= ~S_IALLUGO;
393  	inode->i_mode |= S_ISVTX | S_IRWXUGO;
394  
395  	return 0;
396  }
397  
bpf_mount(struct file_system_type * type,int flags,const char * dev_name,void * data)398  static struct dentry *bpf_mount(struct file_system_type *type, int flags,
399  				const char *dev_name, void *data)
400  {
401  	return mount_nodev(type, flags, data, bpf_fill_super);
402  }
403  
404  static struct file_system_type bpf_fs_type = {
405  	.owner		= THIS_MODULE,
406  	.name		= "bpf",
407  	.mount		= bpf_mount,
408  	.kill_sb	= kill_litter_super,
409  };
410  
bpf_init(void)411  static int __init bpf_init(void)
412  {
413  	int ret;
414  
415  	ret = sysfs_create_mount_point(fs_kobj, "bpf");
416  	if (ret)
417  		return ret;
418  
419  	ret = register_filesystem(&bpf_fs_type);
420  	if (ret)
421  		sysfs_remove_mount_point(fs_kobj, "bpf");
422  
423  	return ret;
424  }
425  fs_initcall(bpf_init);
426