• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/hmdfs/main.c
4  *
5  * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6  */
7 
8 
9 #include "hmdfs.h"
10 
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21 
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 
31 #include "comm/node_cb.h"
32 #include "stash.h"
33 
34 #define CREATE_TRACE_POINTS
35 #include "hmdfs_trace.h"
36 
37 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
38 
39 #define HMDFS_SB_SEQ_FROM 1
40 
41 struct hmdfs_mount_priv {
42 	const char *dev_name;
43 	const char *raw_data;
44 };
45 
46 struct syncfs_item {
47 	struct list_head list;
48 	struct completion done;
49 	bool need_abort;
50 };
51 
52 static DEFINE_IDA(hmdfs_sb_seq);
53 
hmdfs_alloc_sb_seq(void)54 static inline int hmdfs_alloc_sb_seq(void)
55 {
56 	return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
57 }
58 
hmdfs_free_sb_seq(unsigned int seq)59 static inline void hmdfs_free_sb_seq(unsigned int seq)
60 {
61 	if (!seq)
62 		return;
63 	ida_simple_remove(&hmdfs_sb_seq, seq);
64 }
65 
hmdfs_xattr_local_get(struct dentry * dentry,const char * name,void * value,size_t size)66 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
67 				 void *value, size_t size)
68 {
69 	struct path lower_path;
70 	ssize_t res = 0;
71 
72 	hmdfs_get_lower_path(dentry, &lower_path);
73 	res = vfs_getxattr(lower_path.dentry, name, value, size);
74 	hmdfs_put_lower_path(&lower_path);
75 	return res;
76 }
77 
hmdfs_xattr_remote_get(struct dentry * dentry,const char * name,void * value,size_t size)78 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
79 				  void *value, size_t size)
80 {
81 	struct inode *inode = d_inode(dentry);
82 	struct hmdfs_inode_info *info = hmdfs_i(inode);
83 	struct hmdfs_peer *conn = info->conn;
84 	char *send_buf = NULL;
85 	ssize_t res = 0;
86 
87 	send_buf = hmdfs_get_dentry_relative_path(dentry);
88 	if (!send_buf)
89 		return -ENOMEM;
90 
91 	res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
92 	kfree(send_buf);
93 	return res;
94 }
95 
hmdfs_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * value,size_t size)96 static int hmdfs_xattr_get(const struct xattr_handler *handler,
97 			   struct dentry *dentry, struct inode *inode,
98 			   const char *name, void *value, size_t size)
99 {
100 	int res = 0;
101 	struct hmdfs_inode_info *info = hmdfs_i(inode);
102 	size_t r_size = size;
103 
104 	if (!hmdfs_support_xattr(dentry))
105 		return -EOPNOTSUPP;
106 
107 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
108 		return -EOPNOTSUPP;
109 
110 	if (size > HMDFS_XATTR_SIZE_MAX)
111 		r_size = HMDFS_XATTR_SIZE_MAX;
112 
113 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
114 		res = hmdfs_xattr_local_get(dentry, name, value, r_size);
115 	else
116 		res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
117 
118 	if (res == -ERANGE && r_size != size) {
119 		hmdfs_info("no support xattr value size over than: %d",
120 			   HMDFS_XATTR_SIZE_MAX);
121 		res = -E2BIG;
122 	}
123 
124 	return res;
125 }
126 
hmdfs_xattr_local_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)127 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
128 				 const void *value, size_t size, int flags)
129 {
130 	struct path lower_path;
131 	int res = 0;
132 
133 	hmdfs_get_lower_path(dentry, &lower_path);
134 	if (value) {
135 		res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
136 	} else {
137 		WARN_ON(flags != XATTR_REPLACE);
138 		res = vfs_removexattr(lower_path.dentry, name);
139 	}
140 
141 	hmdfs_put_lower_path(&lower_path);
142 	return res;
143 }
144 
hmdfs_xattr_remote_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)145 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
146 				  const void *value, size_t size, int flags)
147 {
148 	struct inode *inode = d_inode(dentry);
149 	struct hmdfs_inode_info *info = hmdfs_i(inode);
150 	struct hmdfs_peer *conn = info->conn;
151 	char *send_buf = NULL;
152 	int res = 0;
153 
154 	send_buf = hmdfs_get_dentry_relative_path(dentry);
155 	if (!send_buf)
156 		return -ENOMEM;
157 
158 	res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
159 	kfree(send_buf);
160 	return res;
161 }
162 
hmdfs_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)163 static int hmdfs_xattr_set(const struct xattr_handler *handler,
164 			   struct dentry *dentry, struct inode *inode,
165 			   const char *name, const void *value,
166 			   size_t size, int flags)
167 {
168 	struct hmdfs_inode_info *info = hmdfs_i(inode);
169 
170 	if (!hmdfs_support_xattr(dentry))
171 		return -EOPNOTSUPP;
172 
173 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
174 		return -EOPNOTSUPP;
175 
176 	if (size > HMDFS_XATTR_SIZE_MAX) {
177 		hmdfs_info("no support too long xattr value: %zu", size);
178 		return -E2BIG;
179 	}
180 
181 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
182 		return hmdfs_xattr_local_set(dentry, name, value, size, flags);
183 
184 	return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
185 }
186 
187 const struct xattr_handler hmdfs_xattr_handler = {
188 	.prefix = "", /* catch all */
189 	.get = hmdfs_xattr_get,
190 	.set = hmdfs_xattr_set,
191 };
192 
193 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
194 	&hmdfs_xattr_handler,
195 };
196 
197 #define HMDFS_NODE_EVT_CB_DELAY 2
198 
199 struct kmem_cache *hmdfs_inode_cachep;
200 struct kmem_cache *hmdfs_dentry_cachep;
201 
i_callback(struct rcu_head * head)202 static void i_callback(struct rcu_head *head)
203 {
204 	struct inode *inode = container_of(head, struct inode, i_rcu);
205 
206 	kmem_cache_free(hmdfs_inode_cachep,
207 			container_of(inode, struct hmdfs_inode_info,
208 				     vfs_inode));
209 }
210 
hmdfs_destroy_inode(struct inode * inode)211 static void hmdfs_destroy_inode(struct inode *inode)
212 {
213 	call_rcu(&inode->i_rcu, i_callback);
214 }
215 
hmdfs_evict_inode(struct inode * inode)216 static void hmdfs_evict_inode(struct inode *inode)
217 {
218 	struct hmdfs_inode_info *info = hmdfs_i(inode);
219 
220 	truncate_inode_pages(&inode->i_data, 0);
221 	clear_inode(inode);
222 	if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
223 	    info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
224 		return;
225 	if (info->inode_type == HMDFS_LAYER_ZERO ||
226 	    info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
227 	    info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
228 		iput(info->lower_inode);
229 		info->lower_inode = NULL;
230 	}
231 }
232 
hmdfs_put_super(struct super_block * sb)233 void hmdfs_put_super(struct super_block *sb)
234 {
235 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
236 	struct super_block *lower_sb = sbi->lower_sb;
237 
238 	hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
239 		   sbi->local_src);
240 
241 	hmdfs_cfn_destroy(sbi);
242 	hmdfs_unregister_sysfs(sbi);
243 	hmdfs_connections_stop(sbi);
244 	hmdfs_destroy_server_writeback(sbi);
245 	hmdfs_exit_stash(sbi);
246 	atomic_dec(&lower_sb->s_active);
247 	put_cred(sbi->cred);
248 	if (sbi->system_cred)
249 		put_cred(sbi->system_cred);
250 	hmdfs_destroy_writeback(sbi);
251 	kfree(sbi->local_src);
252 	kfree(sbi->local_dst);
253 	kfree(sbi->real_dst);
254 	kfree(sbi->cache_dir);
255 	kfifo_free(&sbi->notify_fifo);
256 	sb->s_fs_info = NULL;
257 	sbi->lower_sb = NULL;
258 	hmdfs_release_sysfs(sbi);
259 	/* After all access are completed */
260 	hmdfs_free_sb_seq(sbi->seq);
261 	kfree(sbi->s_server_statis);
262 	kfree(sbi->s_client_statis);
263 	kfree(sbi);
264 }
265 
hmdfs_alloc_inode(struct super_block * sb)266 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
267 {
268 	struct hmdfs_inode_info *gi =
269 		kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
270 	if (!gi)
271 		return NULL;
272 	memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
273 	INIT_LIST_HEAD(&gi->wb_list);
274 	init_rwsem(&gi->wpage_sem);
275 	gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
276 	atomic64_set(&gi->write_counter, 0);
277 	gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
278 	spin_lock_init(&gi->fid_lock);
279 	INIT_LIST_HEAD(&gi->wr_opened_node);
280 	atomic_set(&gi->wr_opened_cnt, 0);
281 	init_waitqueue_head(&gi->fid_wq);
282 	INIT_LIST_HEAD(&gi->stash_node);
283 	spin_lock_init(&gi->stash_lock);
284 	return &gi->vfs_inode;
285 }
286 
hmdfs_remote_statfs(struct dentry * dentry,struct kstatfs * buf)287 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
288 {
289 	int error = 0;
290 	int ret = 0;
291 	char *dir_path = NULL;
292 	char *name_path = NULL;
293 	struct hmdfs_peer *con = NULL;
294 	struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
295 
296 	dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
297 	if (!dir_path) {
298 		error = -EACCES;
299 		goto rmdir_out;
300 	}
301 
302 	name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
303 	if (!name_path) {
304 		error = -EACCES;
305 		goto rmdir_out;
306 	}
307 	mutex_lock(&sbi->connections.node_lock);
308 	list_for_each_entry(con, &sbi->connections.node_list, list) {
309 		if (con->status == NODE_STAT_ONLINE &&
310 		    con->version > USERSPACE_MAX_VER) {
311 			peer_get(con);
312 			mutex_unlock(&sbi->connections.node_lock);
313 			hmdfs_debug("send MSG to remote devID %llu",
314 				    con->device_id);
315 			ret = hmdfs_send_statfs(con, name_path, buf);
316 			if (ret != 0)
317 				error = ret;
318 			peer_put(con);
319 			mutex_lock(&sbi->connections.node_lock);
320 		}
321 	}
322 	mutex_unlock(&sbi->connections.node_lock);
323 
324 rmdir_out:
325 	kfree(dir_path);
326 	kfree(name_path);
327 	return error;
328 }
329 
hmdfs_statfs(struct dentry * dentry,struct kstatfs * buf)330 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
331 {
332 	int err = 0;
333 	struct path lower_path;
334 	struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
335 	struct super_block *sb = d_inode(dentry)->i_sb;
336 	struct hmdfs_sb_info *sbi = sb->s_fs_info;
337 
338 	trace_hmdfs_statfs(dentry, info->inode_type);
339 	// merge_view & merge_view/xxx & device_view assigned src_inode info
340 	if (hmdfs_i_merge(info) ||
341 	    (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
342 		err = kern_path(sbi->local_src, 0, &lower_path);
343 		if (err)
344 			goto out;
345 		err = vfs_statfs(&lower_path, buf);
346 		path_put(&lower_path);
347 	} else if (!IS_ERR_OR_NULL(info->lower_inode)) {
348 		hmdfs_get_lower_path(dentry, &lower_path);
349 		err = vfs_statfs(&lower_path, buf);
350 		hmdfs_put_lower_path(&lower_path);
351 	} else {
352 		err = hmdfs_remote_statfs(dentry, buf);
353 	}
354 
355 	buf->f_type = HMDFS_SUPER_MAGIC;
356 out:
357 	return err;
358 }
359 
hmdfs_show_options(struct seq_file * m,struct dentry * root)360 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
361 {
362 	struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
363 
364 	if (sbi->s_case_sensitive)
365 		seq_puts(m, ",sensitive");
366 	else
367 		seq_puts(m, ",insensitive");
368 
369 	if (sbi->s_merge_switch)
370 		seq_puts(m, ",merge_enable");
371 	else
372 		seq_puts(m, ",merge_disable");
373 
374 	seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
375 	seq_printf(m, ",user_id=%u", sbi->user_id);
376 
377 	if (sbi->cache_dir)
378 		seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
379 	if (sbi->real_dst)
380 		seq_printf(m, ",real_dst=%s", sbi->real_dst);
381 
382 	seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
383 	seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
384 
385 	return 0;
386 }
387 
hmdfs_sync_fs(struct super_block * sb,int wait)388 static int hmdfs_sync_fs(struct super_block *sb, int wait)
389 {
390 	int time_left;
391 	int err = 0;
392 	struct hmdfs_peer *con = NULL;
393 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
394 	int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
395 	struct syncfs_item item, *entry = NULL, *tmp = NULL;
396 
397 	if (!wait)
398 		return 0;
399 
400 	trace_hmdfs_syncfs_enter(sbi);
401 
402 	spin_lock(&sbi->hsi.list_lock);
403 	if (!sbi->hsi.is_executing) {
404 		sbi->hsi.is_executing = true;
405 		item.need_abort = false;
406 		spin_unlock(&sbi->hsi.list_lock);
407 	} else {
408 		init_completion(&item.done);
409 		list_add_tail(&item.list, &sbi->hsi.wait_list);
410 		spin_unlock(&sbi->hsi.list_lock);
411 		wait_for_completion(&item.done);
412 	}
413 
414 	if (item.need_abort)
415 		goto out;
416 
417 	/*
418 	 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
419 	 * sure all remote syncfs calls return back or timeout by waiting,
420 	 * during the waiting period we must protect @sbi->remote_syncfs_count
421 	 * and @sbi->remote_syncfs_ret from concurrent executing.
422 	 */
423 
424 	spin_lock(&sbi->hsi.v_lock);
425 	sbi->hsi.version++;
426 	/*
427 	 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
428 	 * into spinlock protection area to avoid following scenario caused
429 	 * by out-of-order execution:
430 	 *
431 	 *            synfs                                  syncfs_cb
432 	 *  sbi->hsi.remote_ret = 0;
433 	 *  atomic_set(&sbi->hsi.wait_count, 0);
434 	 *                                               lock
435 	 *                                               version == old_version
436 	 *                                 sbi->hsi.remote_ret = resp->ret_code
437 	 *                                 atomic_dec(&sbi->hsi.wait_count);
438 	 *                                               unlock
439 	 *         lock
440 	 *         version = old_version + 1
441 	 *         unlock
442 	 *
443 	 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
444 	 * before spin lock which may compete with syncfs_cb(), making
445 	 * these two values' assignment protected by spinlock can fix this.
446 	 */
447 	sbi->hsi.remote_ret = 0;
448 	atomic_set(&sbi->hsi.wait_count, 0);
449 	spin_unlock(&sbi->hsi.v_lock);
450 
451 	mutex_lock(&sbi->connections.node_lock);
452 	list_for_each_entry(con, &sbi->connections.node_list, list) {
453 		/*
454 		 * Dirty data does not need to be synchronized to remote
455 		 * devices that go offline normally. It's okay to drop
456 		 * them.
457 		 */
458 		if (con->status != NODE_STAT_ONLINE)
459 			continue;
460 
461 		peer_get(con);
462 		mutex_unlock(&sbi->connections.node_lock);
463 
464 		/*
465 		 * There exists a gap between sync_inodes_sb() and sync_fs()
466 		 * which may race with remote writing, leading error count
467 		 * on @sb_dirty_count. The dirty data produced during the
468 		 * gap period won't be synced in next syncfs operation.
469 		 * To avoid this, we have to invoke sync_inodes_sb() again
470 		 * after getting @con->sb_dirty_count.
471 		 */
472 		con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
473 		sync_inodes_sb(sb);
474 
475 		if (!con->old_sb_dirty_count) {
476 			peer_put(con);
477 			mutex_lock(&sbi->connections.node_lock);
478 			continue;
479 		}
480 
481 		err = hmdfs_send_syncfs(con, syncfs_timeout);
482 		if (err) {
483 			hmdfs_warning("send syncfs failed with %d on node %llu",
484 				      err, con->device_id);
485 			sbi->hsi.remote_ret = err;
486 			peer_put(con);
487 			mutex_lock(&sbi->connections.node_lock);
488 			continue;
489 		}
490 
491 		atomic_inc(&sbi->hsi.wait_count);
492 
493 		peer_put(con);
494 		mutex_lock(&sbi->connections.node_lock);
495 	}
496 	mutex_unlock(&sbi->connections.node_lock);
497 
498 	/*
499 	 * Async work in background will make sure @sbi->remote_syncfs_count
500 	 * decreased to zero finally whether syncfs success or fail.
501 	 */
502 	time_left = wait_event_interruptible(
503 		sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
504 	if (time_left < 0) {
505 		hmdfs_warning("syncfs is interrupted by external signal");
506 		err = -EINTR;
507 	}
508 
509 	if (!err && sbi->hsi.remote_ret)
510 		err = sbi->hsi.remote_ret;
511 
512 	/* Abandon syncfs processes in pending_list */
513 	list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
514 		entry->need_abort = true;
515 		complete(&entry->done);
516 	}
517 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
518 
519 	/* Pick the last syncfs process in wait_list */
520 	spin_lock(&sbi->hsi.list_lock);
521 	if (list_empty(&sbi->hsi.wait_list)) {
522 		sbi->hsi.is_executing = false;
523 	} else {
524 		entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
525 					list);
526 		list_del_init(&entry->list);
527 		list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
528 		entry->need_abort = false;
529 		complete(&entry->done);
530 	}
531 	spin_unlock(&sbi->hsi.list_lock);
532 
533 out:
534 	trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
535 				get_cmd_timeout(sbi, F_SYNCFS), err);
536 
537 	/* TODO: Return synfs err back to syscall */
538 
539 	return err;
540 }
541 
542 struct super_operations hmdfs_sops = {
543 	.alloc_inode = hmdfs_alloc_inode,
544 	.destroy_inode = hmdfs_destroy_inode,
545 	.evict_inode = hmdfs_evict_inode,
546 	.put_super = hmdfs_put_super,
547 	.statfs = hmdfs_statfs,
548 	.show_options = hmdfs_show_options,
549 	.sync_fs = hmdfs_sync_fs,
550 };
551 
init_once(void * obj)552 static void init_once(void *obj)
553 {
554 	struct hmdfs_inode_info *i = obj;
555 
556 	inode_init_once(&i->vfs_inode);
557 }
558 
hmdfs_init_caches(void)559 static int __init hmdfs_init_caches(void)
560 {
561 	int err = -ENOMEM;
562 
563 	hmdfs_inode_cachep =
564 		kmem_cache_create("hmdfs_inode_cache",
565 				  sizeof(struct hmdfs_inode_info), 0,
566 				  SLAB_RECLAIM_ACCOUNT, init_once);
567 	if (unlikely(!hmdfs_inode_cachep))
568 		goto out;
569 	hmdfs_dentry_cachep =
570 		kmem_cache_create("hmdfs_dentry_cache",
571 				  sizeof(struct hmdfs_dentry_info), 0,
572 				  SLAB_RECLAIM_ACCOUNT, NULL);
573 	if (unlikely(!hmdfs_dentry_cachep))
574 		goto out_des_ino;
575 	hmdfs_dentry_merge_cachep =
576 		kmem_cache_create("hmdfs_dentry_merge_cache",
577 				  sizeof(struct hmdfs_dentry_info_merge), 0,
578 				  SLAB_RECLAIM_ACCOUNT, NULL);
579 	if (unlikely(!hmdfs_dentry_merge_cachep))
580 		goto out_des_dc;
581 	return 0;
582 
583 out_des_dc:
584 	kmem_cache_destroy(hmdfs_dentry_cachep);
585 out_des_ino:
586 	kmem_cache_destroy(hmdfs_inode_cachep);
587 out:
588 	return err;
589 }
590 
hmdfs_destroy_caches(void)591 static void hmdfs_destroy_caches(void)
592 {
593 	rcu_barrier();
594 	kmem_cache_destroy(hmdfs_inode_cachep);
595 	hmdfs_inode_cachep = NULL;
596 	kmem_cache_destroy(hmdfs_dentry_cachep);
597 	hmdfs_dentry_cachep = NULL;
598 	kmem_cache_destroy(hmdfs_dentry_merge_cachep);
599 	hmdfs_dentry_merge_cachep = NULL;
600 }
601 
path_hash(const char * path,int len,bool case_sense)602 uint64_t path_hash(const char *path, int len, bool case_sense)
603 {
604 	uint64_t res = 0;
605 	const char *kp = path;
606 	char c;
607 	/* Mocklisp hash function. */
608 	while (*kp) {
609 		c = *kp;
610 		if (!case_sense)
611 			c = tolower(c);
612 		res = (res << 5) - res + (uint64_t)(c);
613 		kp++;
614 	}
615 	return res;
616 }
617 
get_full_path(struct path * path)618 static char *get_full_path(struct path *path)
619 {
620 	char *buf, *tmp;
621 	char *ret = NULL;
622 
623 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
624 	if (!buf)
625 		goto out;
626 
627 	tmp = d_path(path, buf, PATH_MAX);
628 	if (IS_ERR(tmp))
629 		goto out;
630 
631 	ret = kstrdup(tmp, GFP_KERNEL);
632 out:
633 	kfree(buf);
634 	return ret;
635 }
636 
hmdfs_init_cmd_timeout(struct hmdfs_sb_info * sbi)637 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
638 {
639 	memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
640 
641 	set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
642 	set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
643 	set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
644 	set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
645 	set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
646 	set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
647 	set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
648 	set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
649 	set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
650 	set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
651 	set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
652 	set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
653 	set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
654 	set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
655 	set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
656 	set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
657 	set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
658 	set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
659 	set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
660 	set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
661 }
662 
init_share_table(struct hmdfs_sb_info * sbi)663 static void init_share_table(struct hmdfs_sb_info *sbi)
664 {
665 	spin_lock_init(&sbi->share_table.item_list_lock);
666 	INIT_LIST_HEAD(&sbi->share_table.item_list_head);
667 	sbi->share_table.item_cnt = 0;
668 	sbi->share_table.max_cnt = HMDFS_SHARE_ITEMS_MAX;
669 }
670 
hmdfs_init_sbi(struct hmdfs_sb_info * sbi)671 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
672 {
673 	int ret;
674 
675 	ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
676 	if (ret)
677 		goto out;
678 
679 	/*
680 	 * We have to use dynamic memory since struct server/client_statistic
681 	 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
682 	 */
683 	sbi->s_server_statis =
684 		kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
685 	sbi->s_client_statis =
686 		kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
687 	if (!sbi->s_server_statis || !sbi->s_client_statis) {
688 		ret = -ENOMEM;
689 		goto out;
690 	}
691 
692 	ret = hmdfs_alloc_sb_seq();
693 	if (ret < 0) {
694 		hmdfs_err("no sb seq available err %d", ret);
695 		goto out;
696 	}
697 	sbi->seq = ret;
698 	ret = 0;
699 
700 	spin_lock_init(&sbi->notify_fifo_lock);
701 	sbi->s_case_sensitive = false;
702 	sbi->s_features = HMDFS_FEATURE_READPAGES |
703 			  HMDFS_FEATURE_READPAGES_OPEN |
704 			  HMDFS_ATOMIC_OPEN;
705 	sbi->s_merge_switch = false;
706 	sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
707 	sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
708 	sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
709 	sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
710 	hmdfs_init_cmd_timeout(sbi);
711 	sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
712 	sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
713 	sbi->s_offline_stash = true;
714 	sbi->s_dentry_cache = true;
715 	sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
716 	/* Initialize before hmdfs_register_sysfs() */
717 	atomic_set(&sbi->connections.conn_seq, 0);
718 	mutex_init(&sbi->connections.node_lock);
719 	INIT_LIST_HEAD(&sbi->connections.node_list);
720 
721 	init_share_table(sbi);
722 	init_waitqueue_head(&sbi->async_readdir_wq);
723 	INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
724 	INIT_LIST_HEAD(&sbi->async_readdir_work_list);
725 	spin_lock_init(&sbi->async_readdir_msg_lock);
726 	spin_lock_init(&sbi->async_readdir_work_lock);
727 
728 	return 0;
729 
730 out:
731 	return ret;
732 }
733 
hmdfs_client_resp_statis(struct hmdfs_sb_info * sbi,u8 cmd,enum hmdfs_resp_type type,unsigned long start,unsigned long end)734 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
735 			      enum hmdfs_resp_type type, unsigned long start,
736 			      unsigned long end)
737 {
738 	unsigned long duration;
739 
740 	switch (type) {
741 	case HMDFS_RESP_DELAY:
742 		sbi->s_client_statis[cmd].delay_resp_cnt++;
743 		break;
744 	case HMDFS_RESP_TIMEOUT:
745 		sbi->s_client_statis[cmd].timeout_cnt++;
746 		break;
747 	case HMDFS_RESP_NORMAL:
748 		duration = end - start;
749 		sbi->s_client_statis[cmd].total += duration;
750 		sbi->s_client_statis[cmd].resp_cnt++;
751 		if (sbi->s_client_statis[cmd].max < duration)
752 			sbi->s_client_statis[cmd].max = duration;
753 		break;
754 	default:
755 		hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
756 	}
757 }
758 
hmdfs_update_dst(struct hmdfs_sb_info * sbi)759 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
760 {
761 	int err = 0;
762 	const char *path_local = UPDATE_LOCAL_DST;
763 	int len = 0;
764 
765 	sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
766 	if (!sbi->real_dst) {
767 		err = -ENOMEM;
768 		goto out_err;
769 	}
770 	kfree(sbi->local_dst);
771 
772 	len = strlen(sbi->real_dst) + strlen(path_local) + 1;
773 	if (len > PATH_MAX) {
774 		err = -EINVAL;
775 		goto out_err;
776 	}
777 	sbi->local_dst = kmalloc(len, GFP_KERNEL);
778 	if (!sbi->local_dst) {
779 		err = -ENOMEM;
780 		goto out_err;
781 	}
782 	snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
783 		 "%s%s", sbi->real_dst, path_local);
784 out_err:
785 	return err;
786 }
787 
788 /*
789  * Generate boot cookie like following format:
790  *
791  * | random |   boot time(ms) |  0x00 |
792  * |--------|-----------------|-------|
793  *     16            33          15    (bits)
794  *
795  * This will make sure boot cookie is unique in a period
796  * 2^33 / 1000 / 3600 / 24 = 99.4(days).
797  */
hmdfs_gen_boot_cookie(void)798 uint64_t hmdfs_gen_boot_cookie(void)
799 {
800 	uint64_t now;
801 	uint16_t rand;
802 
803 	now = ktime_to_ms(ktime_get());
804 	prandom_bytes(&rand, sizeof(rand));
805 
806 	now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
807 	now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
808 
809 	return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
810 }
811 
hmdfs_fill_super(struct super_block * sb,void * data,int silent)812 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
813 {
814 	struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
815 	const char *dev_name = priv->dev_name;
816 	const char *raw_data = priv->raw_data;
817 	struct hmdfs_sb_info *sbi;
818 	int err = 0;
819 	struct inode *root_inode;
820 	struct path lower_path;
821 	struct super_block *lower_sb;
822 	struct dentry *root_dentry;
823 	char ctrl_path[CTRL_PATH_MAX_LEN];
824 	uint64_t ctrl_hash;
825 
826 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
827 	if (!sbi) {
828 		err = -ENOMEM;
829 		goto out_err;
830 	}
831 	err = hmdfs_init_sbi(sbi);
832 	if (err)
833 		goto out_freesbi;
834 	sbi->sb = sb;
835 	err = hmdfs_parse_options(sbi, raw_data);
836 	if (err)
837 		goto out_freesbi;
838 
839 	sb->s_fs_info = sbi;
840 	sb->s_magic = HMDFS_SUPER_MAGIC;
841 	sb->s_xattr = hmdfs_xattr_handlers;
842 	sb->s_op = &hmdfs_sops;
843 
844 	sbi->boot_cookie = hmdfs_gen_boot_cookie();
845 
846 	err = hmdfs_init_writeback(sbi);
847 	if (err)
848 		goto out_freesbi;
849 	err = hmdfs_init_server_writeback(sbi);
850 	if (err)
851 		goto out_freesbi;
852 
853 	err = hmdfs_init_stash(sbi);
854 	if (err)
855 		goto out_freesbi;
856 
857 	// add ctrl sysfs node
858 	ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
859 	scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
860 	hmdfs_debug("hash %llu", ctrl_hash);
861 	err = hmdfs_register_sysfs(ctrl_path, sbi);
862 	if (err)
863 		goto out_freesbi;
864 
865 	err = hmdfs_update_dst(sbi);
866 	if (err)
867 		goto out_unreg_sysfs;
868 
869 	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
870 			&lower_path);
871 	if (err) {
872 		hmdfs_err("open dev failed, errno = %d", err);
873 		goto out_unreg_sysfs;
874 	}
875 
876 	lower_sb = lower_path.dentry->d_sb;
877 	atomic_inc(&lower_sb->s_active);
878 	sbi->lower_sb = lower_sb;
879 	sbi->local_src = get_full_path(&lower_path);
880 	if (!sbi->local_src) {
881 		hmdfs_err("get local_src failed!");
882 		goto out_sput;
883 	}
884 
885 	sb->s_time_gran = lower_sb->s_time_gran;
886 	sb->s_maxbytes = lower_sb->s_maxbytes;
887 	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
888 	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
889 		hmdfs_err("maximum fs stacking depth exceeded");
890 		err = -EINVAL;
891 		goto out_sput;
892 	}
893 	root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
894 	if (IS_ERR(root_inode)) {
895 		err = PTR_ERR(root_inode);
896 		goto out_sput;
897 	}
898 	hmdfs_root_inode_perm_init(root_inode);
899 	sb->s_root = root_dentry = d_make_root(root_inode);
900 	if (!root_dentry) {
901 		err = -ENOMEM;
902 		goto out_sput;
903 	}
904 
905 	err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
906 	if (err)
907 		goto out_freeroot;
908 	hmdfs_set_lower_path(root_dentry, &lower_path);
909 	d_rehash(sb->s_root);
910 	sbi->cred = get_cred(current_cred());
911 	INIT_LIST_HEAD(&sbi->client_cache);
912 	INIT_LIST_HEAD(&sbi->server_cache);
913 	INIT_LIST_HEAD(&sbi->to_delete);
914 	mutex_init(&sbi->cache_list_lock);
915 	hmdfs_cfn_load(sbi);
916 
917 	/* Initialize syncfs info */
918 	spin_lock_init(&sbi->hsi.v_lock);
919 	init_waitqueue_head(&sbi->hsi.wq);
920 	sbi->hsi.version = 0;
921 	sbi->hsi.is_executing = false;
922 	INIT_LIST_HEAD(&sbi->hsi.wait_list);
923 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
924 	spin_lock_init(&sbi->hsi.list_lock);
925 
926 	return err;
927 out_freeroot:
928 	dput(sb->s_root);
929 	sb->s_root = NULL;
930 out_sput:
931 	atomic_dec(&lower_sb->s_active);
932 	path_put(&lower_path);
933 out_unreg_sysfs:
934 	hmdfs_unregister_sysfs(sbi);
935 	hmdfs_release_sysfs(sbi);
936 out_freesbi:
937 	if (sbi) {
938 		sb->s_fs_info = NULL;
939 		hmdfs_exit_stash(sbi);
940 		hmdfs_destroy_writeback(sbi);
941 		hmdfs_destroy_server_writeback(sbi);
942 		kfifo_free(&sbi->notify_fifo);
943 		hmdfs_free_sb_seq(sbi->seq);
944 		kfree(sbi->local_src);
945 		kfree(sbi->local_dst);
946 		kfree(sbi->real_dst);
947 		kfree(sbi->cache_dir);
948 		kfree(sbi->s_server_statis);
949 		kfree(sbi->s_client_statis);
950 		kfree(sbi);
951 	}
952 out_err:
953 	return err;
954 }
955 
hmdfs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)956 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
957 				  const char *dev_name, void *raw_data)
958 {
959 	struct hmdfs_mount_priv priv = {
960 		.dev_name = dev_name,
961 		.raw_data = raw_data,
962 	};
963 	return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
964 }
965 
966 
hmdfs_cancel_async_readdir(struct hmdfs_sb_info * sbi)967 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
968 {
969 	struct sendmsg_wait_queue *msg_wq = NULL;
970 	struct hmdfs_readdir_work *rw = NULL;
971 	struct hmdfs_readdir_work *tmp = NULL;
972 	struct list_head del_work;
973 
974 	/* cancel work that are not running */
975 
976 	INIT_LIST_HEAD(&del_work);
977 	spin_lock(&sbi->async_readdir_work_lock);
978 	list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
979 		if (cancel_delayed_work(&rw->dwork))
980 			list_move(&rw->head, &del_work);
981 	}
982 	spin_unlock(&sbi->async_readdir_work_lock);
983 
984 	list_for_each_entry_safe(rw, tmp, &del_work, head) {
985 		dput(rw->dentry);
986 		peer_put(rw->con);
987 		kfree(rw);
988 	}
989 
990 	/* wake up async readdir that are waiting for remote */
991 	spin_lock(&sbi->async_readdir_msg_lock);
992 	sbi->async_readdir_prohibit = true;
993 	list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
994 		hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
995 	spin_unlock(&sbi->async_readdir_msg_lock);
996 
997 	/* wait for all async readdir to finish */
998 	if (!list_empty(&sbi->async_readdir_work_list))
999 		wait_event_interruptible_timeout(sbi->async_readdir_wq,
1000 			(list_empty(&sbi->async_readdir_work_list)), HZ);
1001 
1002 	WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1003 }
1004 
hmdfs_kill_super(struct super_block * sb)1005 static void hmdfs_kill_super(struct super_block *sb)
1006 {
1007 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1008 
1009 	/*
1010 	 * async readdir is holding ref for dentry, not for vfsmount. Thus
1011 	 * shrink_dcache_for_umount() will warn about dentry still in use
1012 	 * if async readdir is not done.
1013 	 */
1014 	if (sbi)
1015 		hmdfs_cancel_async_readdir(sbi);
1016 	kill_anon_super(sb);
1017 }
1018 
1019 static struct file_system_type hmdfs_fs_type = {
1020 	.owner = THIS_MODULE,
1021 	.name = "hmdfs",
1022 	.mount = hmdfs_mount,
1023 	.kill_sb = hmdfs_kill_super,
1024 };
1025 
hmdfs_init(void)1026 static int __init hmdfs_init(void)
1027 {
1028 	int err = 0;
1029 
1030 	err = hmdfs_init_caches();
1031 	if (err)
1032 		goto out_err;
1033 
1034 	hmdfs_node_evt_cb_init();
1035 
1036 	hmdfs_stash_add_node_evt_cb();
1037 	hmdfs_client_add_node_evt_cb();
1038 	hmdfs_server_add_node_evt_cb();
1039 
1040 	err = register_filesystem(&hmdfs_fs_type);
1041 	if (err) {
1042 		hmdfs_err("hmdfs register failed!");
1043 		goto out_err;
1044 	}
1045 
1046 	err = hmdfs_init_configfs();
1047 	if (err)
1048 		goto out_err;
1049 
1050 	err = hmdfs_sysfs_init();
1051 	if (err)
1052 		goto out_err;
1053 
1054 	hmdfs_message_verify_init();
1055 	return 0;
1056 out_err:
1057 	hmdfs_sysfs_exit();
1058 	hmdfs_exit_configfs();
1059 	unregister_filesystem(&hmdfs_fs_type);
1060 	hmdfs_destroy_caches();
1061 	hmdfs_err("hmdfs init failed!");
1062 	return err;
1063 }
1064 
hmdfs_exit(void)1065 static void __exit hmdfs_exit(void)
1066 {
1067 	hmdfs_sysfs_exit();
1068 	hmdfs_exit_configfs();
1069 	unregister_filesystem(&hmdfs_fs_type);
1070 	ida_destroy(&hmdfs_sb_seq);
1071 	hmdfs_destroy_caches();
1072 	hmdfs_info("hmdfs exited!");
1073 }
1074 
1075 module_init(hmdfs_init);
1076 module_exit(hmdfs_exit);
1077 
1078 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1079 
1080 MODULE_LICENSE("GPL v2");
1081 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1082 MODULE_DESCRIPTION("Harmony distributed file system");
1083