• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include <linux/ceph/ceph_debug.h>
3 
4 #include <linux/backing-dev.h>
5 #include <linux/ctype.h>
6 #include <linux/fs.h>
7 #include <linux/inet.h>
8 #include <linux/in6.h>
9 #include <linux/module.h>
10 #include <linux/mount.h>
11 #include <linux/parser.h>
12 #include <linux/sched.h>
13 #include <linux/seq_file.h>
14 #include <linux/slab.h>
15 #include <linux/statfs.h>
16 #include <linux/string.h>
17 
18 #include "super.h"
19 #include "mds_client.h"
20 
21 #include <linux/ceph/decode.h>
22 #include <linux/ceph/mon_client.h>
23 #include <linux/ceph/auth.h>
24 #include <linux/ceph/debugfs.h>
25 
26 /*
27  * Ceph superblock operations
28  *
29  * Handle the basics of mounting, unmounting.
30  */
31 
32 /*
33  * super ops
34  */
ceph_put_super(struct super_block * s)35 static void ceph_put_super(struct super_block *s)
36 {
37 	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
38 
39 	dout("put_super\n");
40 	ceph_mdsc_close_sessions(fsc->mdsc);
41 
42 	/*
43 	 * ensure we release the bdi before put_anon_super releases
44 	 * the device name.
45 	 */
46 	if (s->s_bdi == &fsc->backing_dev_info) {
47 		bdi_unregister(&fsc->backing_dev_info);
48 		s->s_bdi = NULL;
49 	}
50 
51 	return;
52 }
53 
ceph_statfs(struct dentry * dentry,struct kstatfs * buf)54 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
55 {
56 	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
57 	struct ceph_monmap *monmap = fsc->client->monc.monmap;
58 	struct ceph_statfs st;
59 	u64 fsid;
60 	int err;
61 
62 	dout("statfs\n");
63 	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
64 	if (err < 0)
65 		return err;
66 
67 	/* fill in kstatfs */
68 	buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
69 
70 	/*
71 	 * express utilization in terms of large blocks to avoid
72 	 * overflow on 32-bit machines.
73 	 *
74 	 * NOTE: for the time being, we make bsize == frsize to humor
75 	 * not-yet-ancient versions of glibc that are broken.
76 	 * Someday, we will probably want to report a real block
77 	 * size...  whatever that may mean for a network file system!
78 	 */
79 	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
80 	buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
81 	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
82 	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
83 	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
84 
85 	buf->f_files = le64_to_cpu(st.num_objects);
86 	buf->f_ffree = -1;
87 	buf->f_namelen = NAME_MAX;
88 
89 	/* leave fsid little-endian, regardless of host endianness */
90 	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
91 	buf->f_fsid.val[0] = fsid & 0xffffffff;
92 	buf->f_fsid.val[1] = fsid >> 32;
93 
94 	return 0;
95 }
96 
97 
ceph_sync_fs(struct super_block * sb,int wait)98 static int ceph_sync_fs(struct super_block *sb, int wait)
99 {
100 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
101 
102 	if (!wait) {
103 		dout("sync_fs (non-blocking)\n");
104 		ceph_flush_dirty_caps(fsc->mdsc);
105 		dout("sync_fs (non-blocking) done\n");
106 		return 0;
107 	}
108 
109 	dout("sync_fs (blocking)\n");
110 	ceph_osdc_sync(&fsc->client->osdc);
111 	ceph_mdsc_sync(fsc->mdsc);
112 	dout("sync_fs (blocking) done\n");
113 	return 0;
114 }
115 
116 /*
117  * mount options
118  */
119 enum {
120 	Opt_wsize,
121 	Opt_rsize,
122 	Opt_rasize,
123 	Opt_caps_wanted_delay_min,
124 	Opt_caps_wanted_delay_max,
125 	Opt_cap_release_safety,
126 	Opt_readdir_max_entries,
127 	Opt_readdir_max_bytes,
128 	Opt_congestion_kb,
129 	Opt_last_int,
130 	/* int args above */
131 	Opt_snapdirname,
132 	Opt_last_string,
133 	/* string args above */
134 	Opt_dirstat,
135 	Opt_nodirstat,
136 	Opt_rbytes,
137 	Opt_norbytes,
138 	Opt_asyncreaddir,
139 	Opt_noasyncreaddir,
140 	Opt_dcache,
141 	Opt_nodcache,
142 	Opt_ino32,
143 	Opt_noino32,
144 };
145 
146 static match_table_t fsopt_tokens = {
147 	{Opt_wsize, "wsize=%d"},
148 	{Opt_rsize, "rsize=%d"},
149 	{Opt_rasize, "rasize=%d"},
150 	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
151 	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
152 	{Opt_cap_release_safety, "cap_release_safety=%d"},
153 	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
154 	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
155 	{Opt_congestion_kb, "write_congestion_kb=%d"},
156 	/* int args above */
157 	{Opt_snapdirname, "snapdirname=%s"},
158 	/* string args above */
159 	{Opt_dirstat, "dirstat"},
160 	{Opt_nodirstat, "nodirstat"},
161 	{Opt_rbytes, "rbytes"},
162 	{Opt_norbytes, "norbytes"},
163 	{Opt_asyncreaddir, "asyncreaddir"},
164 	{Opt_noasyncreaddir, "noasyncreaddir"},
165 	{Opt_dcache, "dcache"},
166 	{Opt_nodcache, "nodcache"},
167 	{Opt_ino32, "ino32"},
168 	{Opt_noino32, "noino32"},
169 	{-1, NULL}
170 };
171 
parse_fsopt_token(char * c,void * private)172 static int parse_fsopt_token(char *c, void *private)
173 {
174 	struct ceph_mount_options *fsopt = private;
175 	substring_t argstr[MAX_OPT_ARGS];
176 	int token, intval, ret;
177 
178 	token = match_token((char *)c, fsopt_tokens, argstr);
179 	if (token < 0)
180 		return -EINVAL;
181 
182 	if (token < Opt_last_int) {
183 		ret = match_int(&argstr[0], &intval);
184 		if (ret < 0) {
185 			pr_err("bad mount option arg (not int) "
186 			       "at '%s'\n", c);
187 			return ret;
188 		}
189 		dout("got int token %d val %d\n", token, intval);
190 	} else if (token > Opt_last_int && token < Opt_last_string) {
191 		dout("got string token %d val %s\n", token,
192 		     argstr[0].from);
193 	} else {
194 		dout("got token %d\n", token);
195 	}
196 
197 	switch (token) {
198 	case Opt_snapdirname:
199 		kfree(fsopt->snapdir_name);
200 		fsopt->snapdir_name = kstrndup(argstr[0].from,
201 					       argstr[0].to-argstr[0].from,
202 					       GFP_KERNEL);
203 		if (!fsopt->snapdir_name)
204 			return -ENOMEM;
205 		break;
206 
207 		/* misc */
208 	case Opt_wsize:
209 		fsopt->wsize = intval;
210 		break;
211 	case Opt_rsize:
212 		fsopt->rsize = intval;
213 		break;
214 	case Opt_rasize:
215 		fsopt->rasize = intval;
216 		break;
217 	case Opt_caps_wanted_delay_min:
218 		fsopt->caps_wanted_delay_min = intval;
219 		break;
220 	case Opt_caps_wanted_delay_max:
221 		fsopt->caps_wanted_delay_max = intval;
222 		break;
223 	case Opt_readdir_max_entries:
224 		fsopt->max_readdir = intval;
225 		break;
226 	case Opt_readdir_max_bytes:
227 		fsopt->max_readdir_bytes = intval;
228 		break;
229 	case Opt_congestion_kb:
230 		fsopt->congestion_kb = intval;
231 		break;
232 	case Opt_dirstat:
233 		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
234 		break;
235 	case Opt_nodirstat:
236 		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
237 		break;
238 	case Opt_rbytes:
239 		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
240 		break;
241 	case Opt_norbytes:
242 		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
243 		break;
244 	case Opt_asyncreaddir:
245 		fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
246 		break;
247 	case Opt_noasyncreaddir:
248 		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
249 		break;
250 	case Opt_dcache:
251 		fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
252 		break;
253 	case Opt_nodcache:
254 		fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
255 		break;
256 	case Opt_ino32:
257 		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
258 		break;
259 	case Opt_noino32:
260 		fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
261 		break;
262 	default:
263 		BUG_ON(token);
264 	}
265 	return 0;
266 }
267 
destroy_mount_options(struct ceph_mount_options * args)268 static void destroy_mount_options(struct ceph_mount_options *args)
269 {
270 	dout("destroy_mount_options %p\n", args);
271 	kfree(args->snapdir_name);
272 	kfree(args);
273 }
274 
strcmp_null(const char * s1,const char * s2)275 static int strcmp_null(const char *s1, const char *s2)
276 {
277 	if (!s1 && !s2)
278 		return 0;
279 	if (s1 && !s2)
280 		return -1;
281 	if (!s1 && s2)
282 		return 1;
283 	return strcmp(s1, s2);
284 }
285 
compare_mount_options(struct ceph_mount_options * new_fsopt,struct ceph_options * new_opt,struct ceph_fs_client * fsc)286 static int compare_mount_options(struct ceph_mount_options *new_fsopt,
287 				 struct ceph_options *new_opt,
288 				 struct ceph_fs_client *fsc)
289 {
290 	struct ceph_mount_options *fsopt1 = new_fsopt;
291 	struct ceph_mount_options *fsopt2 = fsc->mount_options;
292 	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
293 	int ret;
294 
295 	ret = memcmp(fsopt1, fsopt2, ofs);
296 	if (ret)
297 		return ret;
298 
299 	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
300 	if (ret)
301 		return ret;
302 
303 	return ceph_compare_options(new_opt, fsc->client);
304 }
305 
parse_mount_options(struct ceph_mount_options ** pfsopt,struct ceph_options ** popt,int flags,char * options,const char * dev_name,const char ** path)306 static int parse_mount_options(struct ceph_mount_options **pfsopt,
307 			       struct ceph_options **popt,
308 			       int flags, char *options,
309 			       const char *dev_name,
310 			       const char **path)
311 {
312 	struct ceph_mount_options *fsopt;
313 	const char *dev_name_end;
314 	int err = -ENOMEM;
315 
316 	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
317 	if (!fsopt)
318 		return -ENOMEM;
319 
320 	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
321 
322 	fsopt->sb_flags = flags;
323 	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
324 
325 	fsopt->rsize = CEPH_RSIZE_DEFAULT;
326 	fsopt->rasize = CEPH_RASIZE_DEFAULT;
327 	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
328 	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
329 	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
330 	fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
331 	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
332 	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
333 	fsopt->congestion_kb = default_congestion_kb();
334 
335 	/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
336 	err = -EINVAL;
337 	if (!dev_name)
338 		goto out;
339 	*path = strstr(dev_name, ":/");
340 	if (*path == NULL) {
341 		pr_err("device name is missing path (no :/ in %s)\n",
342 				dev_name);
343 		goto out;
344 	}
345 	dev_name_end = *path;
346 	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
347 
348 	/* path on server */
349 	*path += 2;
350 	dout("server path '%s'\n", *path);
351 
352 	*popt = ceph_parse_options(options, dev_name, dev_name_end,
353 				 parse_fsopt_token, (void *)fsopt);
354 	if (IS_ERR(*popt)) {
355 		err = PTR_ERR(*popt);
356 		goto out;
357 	}
358 
359 	/* success */
360 	*pfsopt = fsopt;
361 	return 0;
362 
363 out:
364 	destroy_mount_options(fsopt);
365 	return err;
366 }
367 
368 /**
369  * ceph_show_options - Show mount options in /proc/mounts
370  * @m: seq_file to write to
371  * @root: root of that (sub)tree
372  */
ceph_show_options(struct seq_file * m,struct dentry * root)373 static int ceph_show_options(struct seq_file *m, struct dentry *root)
374 {
375 	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
376 	struct ceph_mount_options *fsopt = fsc->mount_options;
377 	struct ceph_options *opt = fsc->client->options;
378 
379 	if (opt->flags & CEPH_OPT_FSID)
380 		seq_printf(m, ",fsid=%pU", &opt->fsid);
381 	if (opt->flags & CEPH_OPT_NOSHARE)
382 		seq_puts(m, ",noshare");
383 	if (opt->flags & CEPH_OPT_NOCRC)
384 		seq_puts(m, ",nocrc");
385 
386 	if (opt->name)
387 		seq_printf(m, ",name=%s", opt->name);
388 	if (opt->key)
389 		seq_puts(m, ",secret=<hidden>");
390 
391 	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
392 		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
393 	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
394 		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
395 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
396 		seq_printf(m, ",osdkeepalivetimeout=%d",
397 			   opt->osd_keepalive_timeout);
398 
399 	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
400 		seq_puts(m, ",dirstat");
401 	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
402 		seq_puts(m, ",norbytes");
403 	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
404 		seq_puts(m, ",noasyncreaddir");
405 	if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
406 		seq_puts(m, ",dcache");
407 	else
408 		seq_puts(m, ",nodcache");
409 
410 	if (fsopt->wsize)
411 		seq_printf(m, ",wsize=%d", fsopt->wsize);
412 	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
413 		seq_printf(m, ",rsize=%d", fsopt->rsize);
414 	if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
415 		seq_printf(m, ",rasize=%d", fsopt->rasize);
416 	if (fsopt->congestion_kb != default_congestion_kb())
417 		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
418 	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
419 		seq_printf(m, ",caps_wanted_delay_min=%d",
420 			 fsopt->caps_wanted_delay_min);
421 	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
422 		seq_printf(m, ",caps_wanted_delay_max=%d",
423 			   fsopt->caps_wanted_delay_max);
424 	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
425 		seq_printf(m, ",cap_release_safety=%d",
426 			   fsopt->cap_release_safety);
427 	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
428 		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
429 	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
430 		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
431 	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
432 		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
433 	return 0;
434 }
435 
436 /*
437  * handle any mon messages the standard library doesn't understand.
438  * return error if we don't either.
439  */
extra_mon_dispatch(struct ceph_client * client,struct ceph_msg * msg)440 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
441 {
442 	struct ceph_fs_client *fsc = client->private;
443 	int type = le16_to_cpu(msg->hdr.type);
444 
445 	switch (type) {
446 	case CEPH_MSG_MDS_MAP:
447 		ceph_mdsc_handle_map(fsc->mdsc, msg);
448 		return 0;
449 
450 	default:
451 		return -1;
452 	}
453 }
454 
455 /*
456  * create a new fs client
457  */
create_fs_client(struct ceph_mount_options * fsopt,struct ceph_options * opt)458 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
459 					struct ceph_options *opt)
460 {
461 	struct ceph_fs_client *fsc;
462 	const unsigned supported_features =
463 		CEPH_FEATURE_FLOCK |
464 		CEPH_FEATURE_DIRLAYOUTHASH;
465 	const unsigned required_features = 0;
466 	int err = -ENOMEM;
467 
468 	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
469 	if (!fsc)
470 		return ERR_PTR(-ENOMEM);
471 
472 	fsc->client = ceph_create_client(opt, fsc, supported_features,
473 					 required_features);
474 	if (IS_ERR(fsc->client)) {
475 		err = PTR_ERR(fsc->client);
476 		goto fail;
477 	}
478 	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
479 	fsc->client->monc.want_mdsmap = 1;
480 
481 	fsc->mount_options = fsopt;
482 
483 	fsc->sb = NULL;
484 	fsc->mount_state = CEPH_MOUNT_MOUNTING;
485 
486 	atomic_long_set(&fsc->writeback_count, 0);
487 
488 	err = bdi_init(&fsc->backing_dev_info);
489 	if (err < 0)
490 		goto fail_client;
491 
492 	err = -ENOMEM;
493 	/*
494 	 * The number of concurrent works can be high but they don't need
495 	 * to be processed in parallel, limit concurrency.
496 	 */
497 	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
498 	if (fsc->wb_wq == NULL)
499 		goto fail_bdi;
500 	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
501 	if (fsc->pg_inv_wq == NULL)
502 		goto fail_wb_wq;
503 	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
504 	if (fsc->trunc_wq == NULL)
505 		goto fail_pg_inv_wq;
506 
507 	/* set up mempools */
508 	err = -ENOMEM;
509 	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
510 			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
511 	if (!fsc->wb_pagevec_pool)
512 		goto fail_trunc_wq;
513 
514 	/* caps */
515 	fsc->min_caps = fsopt->max_readdir;
516 
517 	return fsc;
518 
519 fail_trunc_wq:
520 	destroy_workqueue(fsc->trunc_wq);
521 fail_pg_inv_wq:
522 	destroy_workqueue(fsc->pg_inv_wq);
523 fail_wb_wq:
524 	destroy_workqueue(fsc->wb_wq);
525 fail_bdi:
526 	bdi_destroy(&fsc->backing_dev_info);
527 fail_client:
528 	ceph_destroy_client(fsc->client);
529 fail:
530 	kfree(fsc);
531 	return ERR_PTR(err);
532 }
533 
destroy_fs_client(struct ceph_fs_client * fsc)534 static void destroy_fs_client(struct ceph_fs_client *fsc)
535 {
536 	dout("destroy_fs_client %p\n", fsc);
537 
538 	destroy_workqueue(fsc->wb_wq);
539 	destroy_workqueue(fsc->pg_inv_wq);
540 	destroy_workqueue(fsc->trunc_wq);
541 
542 	bdi_destroy(&fsc->backing_dev_info);
543 
544 	mempool_destroy(fsc->wb_pagevec_pool);
545 
546 	destroy_mount_options(fsc->mount_options);
547 
548 	ceph_fs_debugfs_cleanup(fsc);
549 
550 	ceph_destroy_client(fsc->client);
551 
552 	kfree(fsc);
553 	dout("destroy_fs_client %p done\n", fsc);
554 }
555 
556 /*
557  * caches
558  */
559 struct kmem_cache *ceph_inode_cachep;
560 struct kmem_cache *ceph_cap_cachep;
561 struct kmem_cache *ceph_dentry_cachep;
562 struct kmem_cache *ceph_file_cachep;
563 
ceph_inode_init_once(void * foo)564 static void ceph_inode_init_once(void *foo)
565 {
566 	struct ceph_inode_info *ci = foo;
567 	inode_init_once(&ci->vfs_inode);
568 }
569 
init_caches(void)570 static int __init init_caches(void)
571 {
572 	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
573 				      sizeof(struct ceph_inode_info),
574 				      __alignof__(struct ceph_inode_info),
575 				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
576 				      ceph_inode_init_once);
577 	if (ceph_inode_cachep == NULL)
578 		return -ENOMEM;
579 
580 	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
581 				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
582 	if (ceph_cap_cachep == NULL)
583 		goto bad_cap;
584 
585 	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
586 					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
587 	if (ceph_dentry_cachep == NULL)
588 		goto bad_dentry;
589 
590 	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
591 				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
592 	if (ceph_file_cachep == NULL)
593 		goto bad_file;
594 
595 	return 0;
596 
597 bad_file:
598 	kmem_cache_destroy(ceph_dentry_cachep);
599 bad_dentry:
600 	kmem_cache_destroy(ceph_cap_cachep);
601 bad_cap:
602 	kmem_cache_destroy(ceph_inode_cachep);
603 	return -ENOMEM;
604 }
605 
destroy_caches(void)606 static void destroy_caches(void)
607 {
608 	kmem_cache_destroy(ceph_inode_cachep);
609 	kmem_cache_destroy(ceph_cap_cachep);
610 	kmem_cache_destroy(ceph_dentry_cachep);
611 	kmem_cache_destroy(ceph_file_cachep);
612 }
613 
614 
615 /*
616  * ceph_umount_begin - initiate forced umount.  Tear down down the
617  * mount, skipping steps that may hang while waiting for server(s).
618  */
ceph_umount_begin(struct super_block * sb)619 static void ceph_umount_begin(struct super_block *sb)
620 {
621 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
622 
623 	dout("ceph_umount_begin - starting forced umount\n");
624 	if (!fsc)
625 		return;
626 	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
627 	return;
628 }
629 
630 static const struct super_operations ceph_super_ops = {
631 	.alloc_inode	= ceph_alloc_inode,
632 	.destroy_inode	= ceph_destroy_inode,
633 	.write_inode    = ceph_write_inode,
634 	.sync_fs        = ceph_sync_fs,
635 	.put_super	= ceph_put_super,
636 	.show_options   = ceph_show_options,
637 	.statfs		= ceph_statfs,
638 	.umount_begin   = ceph_umount_begin,
639 };
640 
641 /*
642  * Bootstrap mount by opening the root directory.  Note the mount
643  * @started time from caller, and time out if this takes too long.
644  */
open_root_dentry(struct ceph_fs_client * fsc,const char * path,unsigned long started)645 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
646 				       const char *path,
647 				       unsigned long started)
648 {
649 	struct ceph_mds_client *mdsc = fsc->mdsc;
650 	struct ceph_mds_request *req = NULL;
651 	int err;
652 	struct dentry *root;
653 
654 	/* open dir */
655 	dout("open_root_inode opening '%s'\n", path);
656 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
657 	if (IS_ERR(req))
658 		return ERR_CAST(req);
659 	req->r_path1 = kstrdup(path, GFP_NOFS);
660 	req->r_ino1.ino = CEPH_INO_ROOT;
661 	req->r_ino1.snap = CEPH_NOSNAP;
662 	req->r_started = started;
663 	req->r_timeout = fsc->client->options->mount_timeout * HZ;
664 	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
665 	req->r_num_caps = 2;
666 	err = ceph_mdsc_do_request(mdsc, NULL, req);
667 	if (err == 0) {
668 		struct inode *inode = req->r_target_inode;
669 		req->r_target_inode = NULL;
670 		dout("open_root_inode success\n");
671 		if (ceph_ino(inode) == CEPH_INO_ROOT &&
672 		    fsc->sb->s_root == NULL) {
673 			root = d_make_root(inode);
674 			if (!root) {
675 				root = ERR_PTR(-ENOMEM);
676 				goto out;
677 			}
678 		} else {
679 			root = d_obtain_alias(inode);
680 		}
681 		ceph_init_dentry(root);
682 		dout("open_root_inode success, root dentry is %p\n", root);
683 	} else {
684 		root = ERR_PTR(err);
685 	}
686 out:
687 	ceph_mdsc_put_request(req);
688 	return root;
689 }
690 
691 
692 
693 
694 /*
695  * mount: join the ceph cluster, and open root directory.
696  */
ceph_real_mount(struct ceph_fs_client * fsc,const char * path)697 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
698 		      const char *path)
699 {
700 	int err;
701 	unsigned long started = jiffies;  /* note the start time */
702 	struct dentry *root;
703 	int first = 0;   /* first vfsmount for this super_block */
704 
705 	dout("mount start\n");
706 	mutex_lock(&fsc->client->mount_mutex);
707 
708 	err = __ceph_open_session(fsc->client, started);
709 	if (err < 0)
710 		goto out;
711 
712 	dout("mount opening root\n");
713 	root = open_root_dentry(fsc, "", started);
714 	if (IS_ERR(root)) {
715 		err = PTR_ERR(root);
716 		goto out;
717 	}
718 	if (fsc->sb->s_root) {
719 		dput(root);
720 	} else {
721 		fsc->sb->s_root = root;
722 		first = 1;
723 
724 		err = ceph_fs_debugfs_init(fsc);
725 		if (err < 0)
726 			goto fail;
727 	}
728 
729 	if (path[0] == 0) {
730 		dget(root);
731 	} else {
732 		dout("mount opening base mountpoint\n");
733 		root = open_root_dentry(fsc, path, started);
734 		if (IS_ERR(root)) {
735 			err = PTR_ERR(root);
736 			goto fail;
737 		}
738 	}
739 
740 	fsc->mount_state = CEPH_MOUNT_MOUNTED;
741 	dout("mount success\n");
742 	mutex_unlock(&fsc->client->mount_mutex);
743 	return root;
744 
745 out:
746 	mutex_unlock(&fsc->client->mount_mutex);
747 	return ERR_PTR(err);
748 
749 fail:
750 	if (first) {
751 		dput(fsc->sb->s_root);
752 		fsc->sb->s_root = NULL;
753 	}
754 	goto out;
755 }
756 
ceph_set_super(struct super_block * s,void * data)757 static int ceph_set_super(struct super_block *s, void *data)
758 {
759 	struct ceph_fs_client *fsc = data;
760 	int ret;
761 
762 	dout("set_super %p data %p\n", s, data);
763 
764 	s->s_flags = fsc->mount_options->sb_flags;
765 	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
766 
767 	s->s_fs_info = fsc;
768 	fsc->sb = s;
769 
770 	s->s_op = &ceph_super_ops;
771 	s->s_export_op = &ceph_export_ops;
772 
773 	s->s_time_gran = 1000;  /* 1000 ns == 1 us */
774 
775 	ret = set_anon_super(s, NULL);  /* what is that second arg for? */
776 	if (ret != 0)
777 		goto fail;
778 
779 	return ret;
780 
781 fail:
782 	s->s_fs_info = NULL;
783 	fsc->sb = NULL;
784 	return ret;
785 }
786 
787 /*
788  * share superblock if same fs AND options
789  */
ceph_compare_super(struct super_block * sb,void * data)790 static int ceph_compare_super(struct super_block *sb, void *data)
791 {
792 	struct ceph_fs_client *new = data;
793 	struct ceph_mount_options *fsopt = new->mount_options;
794 	struct ceph_options *opt = new->client->options;
795 	struct ceph_fs_client *other = ceph_sb_to_client(sb);
796 
797 	dout("ceph_compare_super %p\n", sb);
798 
799 	if (compare_mount_options(fsopt, opt, other)) {
800 		dout("monitor(s)/mount options don't match\n");
801 		return 0;
802 	}
803 	if ((opt->flags & CEPH_OPT_FSID) &&
804 	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
805 		dout("fsid doesn't match\n");
806 		return 0;
807 	}
808 	if (fsopt->sb_flags != other->mount_options->sb_flags) {
809 		dout("flags differ\n");
810 		return 0;
811 	}
812 	return 1;
813 }
814 
815 /*
816  * construct our own bdi so we can control readahead, etc.
817  */
818 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
819 
ceph_register_bdi(struct super_block * sb,struct ceph_fs_client * fsc)820 static int ceph_register_bdi(struct super_block *sb,
821 			     struct ceph_fs_client *fsc)
822 {
823 	int err;
824 
825 	/* set ra_pages based on rasize mount option? */
826 	if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
827 		fsc->backing_dev_info.ra_pages =
828 			(fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
829 			>> PAGE_SHIFT;
830 	else
831 		fsc->backing_dev_info.ra_pages =
832 			default_backing_dev_info.ra_pages;
833 
834 	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
835 			   atomic_long_inc_return(&bdi_seq));
836 	if (!err)
837 		sb->s_bdi = &fsc->backing_dev_info;
838 	return err;
839 }
840 
ceph_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * data)841 static struct dentry *ceph_mount(struct file_system_type *fs_type,
842 		       int flags, const char *dev_name, void *data)
843 {
844 	struct super_block *sb;
845 	struct ceph_fs_client *fsc;
846 	struct dentry *res;
847 	int err;
848 	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
849 	const char *path = NULL;
850 	struct ceph_mount_options *fsopt = NULL;
851 	struct ceph_options *opt = NULL;
852 
853 	dout("ceph_mount\n");
854 	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
855 	if (err < 0) {
856 		res = ERR_PTR(err);
857 		goto out_final;
858 	}
859 
860 	/* create client (which we may/may not use) */
861 	fsc = create_fs_client(fsopt, opt);
862 	if (IS_ERR(fsc)) {
863 		res = ERR_CAST(fsc);
864 		destroy_mount_options(fsopt);
865 		ceph_destroy_options(opt);
866 		goto out_final;
867 	}
868 
869 	err = ceph_mdsc_init(fsc);
870 	if (err < 0) {
871 		res = ERR_PTR(err);
872 		goto out;
873 	}
874 
875 	if (ceph_test_opt(fsc->client, NOSHARE))
876 		compare_super = NULL;
877 	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
878 	if (IS_ERR(sb)) {
879 		res = ERR_CAST(sb);
880 		goto out;
881 	}
882 
883 	if (ceph_sb_to_client(sb) != fsc) {
884 		ceph_mdsc_destroy(fsc);
885 		destroy_fs_client(fsc);
886 		fsc = ceph_sb_to_client(sb);
887 		dout("get_sb got existing client %p\n", fsc);
888 	} else {
889 		dout("get_sb using new client %p\n", fsc);
890 		err = ceph_register_bdi(sb, fsc);
891 		if (err < 0) {
892 			res = ERR_PTR(err);
893 			goto out_splat;
894 		}
895 	}
896 
897 	res = ceph_real_mount(fsc, path);
898 	if (IS_ERR(res))
899 		goto out_splat;
900 	dout("root %p inode %p ino %llx.%llx\n", res,
901 	     res->d_inode, ceph_vinop(res->d_inode));
902 	return res;
903 
904 out_splat:
905 	ceph_mdsc_close_sessions(fsc->mdsc);
906 	deactivate_locked_super(sb);
907 	goto out_final;
908 
909 out:
910 	ceph_mdsc_destroy(fsc);
911 	destroy_fs_client(fsc);
912 out_final:
913 	dout("ceph_mount fail %ld\n", PTR_ERR(res));
914 	return res;
915 }
916 
ceph_kill_sb(struct super_block * s)917 static void ceph_kill_sb(struct super_block *s)
918 {
919 	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
920 	dout("kill_sb %p\n", s);
921 	ceph_mdsc_pre_umount(fsc->mdsc);
922 	kill_anon_super(s);    /* will call put_super after sb is r/o */
923 	ceph_mdsc_destroy(fsc);
924 	destroy_fs_client(fsc);
925 }
926 
927 static struct file_system_type ceph_fs_type = {
928 	.owner		= THIS_MODULE,
929 	.name		= "ceph",
930 	.mount		= ceph_mount,
931 	.kill_sb	= ceph_kill_sb,
932 	.fs_flags	= FS_RENAME_DOES_D_MOVE,
933 };
934 
935 #define _STRINGIFY(x) #x
936 #define STRINGIFY(x) _STRINGIFY(x)
937 
init_ceph(void)938 static int __init init_ceph(void)
939 {
940 	int ret = init_caches();
941 	if (ret)
942 		goto out;
943 
944 	ceph_xattr_init();
945 	ret = register_filesystem(&ceph_fs_type);
946 	if (ret)
947 		goto out_icache;
948 
949 	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
950 
951 	return 0;
952 
953 out_icache:
954 	ceph_xattr_exit();
955 	destroy_caches();
956 out:
957 	return ret;
958 }
959 
exit_ceph(void)960 static void __exit exit_ceph(void)
961 {
962 	dout("exit_ceph\n");
963 	unregister_filesystem(&ceph_fs_type);
964 	ceph_xattr_exit();
965 	destroy_caches();
966 }
967 
968 module_init(init_ceph);
969 module_exit(exit_ceph);
970 
971 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
972 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
973 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
974 MODULE_DESCRIPTION("Ceph filesystem for Linux");
975 MODULE_LICENSE("GPL");
976