• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/hmdfs/main.c
4  *
5  * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6  */
7 
8 
9 #include "hmdfs.h"
10 
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21 
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 #include "hmdfs_share.h"
31 
32 #include "comm/node_cb.h"
33 #include "stash.h"
34 
35 #define CREATE_TRACE_POINTS
36 #include "hmdfs_trace.h"
37 
38 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39 
40 #define HMDFS_SB_SEQ_FROM 1
41 
42 struct hmdfs_mount_priv {
43 	const char *dev_name;
44 	const char *raw_data;
45 };
46 
47 struct syncfs_item {
48 	struct list_head list;
49 	struct completion done;
50 	bool need_abort;
51 };
52 
53 static DEFINE_IDA(hmdfs_sb_seq);
54 
hmdfs_alloc_sb_seq(void)55 static inline int hmdfs_alloc_sb_seq(void)
56 {
57 	return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58 }
59 
hmdfs_free_sb_seq(unsigned int seq)60 static inline void hmdfs_free_sb_seq(unsigned int seq)
61 {
62 	if (!seq)
63 		return;
64 	ida_simple_remove(&hmdfs_sb_seq, seq);
65 }
66 
hmdfs_xattr_local_get(struct dentry * dentry,const char * name,void * value,size_t size)67 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68 				 void *value, size_t size)
69 {
70 	struct path lower_path;
71 	ssize_t res = 0;
72 
73 	hmdfs_get_lower_path(dentry, &lower_path);
74 	res = vfs_getxattr(lower_path.dentry, name, value, size);
75 	hmdfs_put_lower_path(&lower_path);
76 	return res;
77 }
78 
hmdfs_xattr_remote_get(struct dentry * dentry,const char * name,void * value,size_t size)79 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80 				  void *value, size_t size)
81 {
82 	struct inode *inode = d_inode(dentry);
83 	struct hmdfs_inode_info *info = hmdfs_i(inode);
84 	struct hmdfs_peer *conn = info->conn;
85 	char *send_buf = NULL;
86 	ssize_t res = 0;
87 
88 	send_buf = hmdfs_get_dentry_relative_path(dentry);
89 	if (!send_buf)
90 		return -ENOMEM;
91 
92 	res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93 	kfree(send_buf);
94 	return res;
95 }
96 
hmdfs_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * value,size_t size)97 static int hmdfs_xattr_get(const struct xattr_handler *handler,
98 			   struct dentry *dentry, struct inode *inode,
99 			   const char *name, void *value, size_t size)
100 {
101 	int res = 0;
102 	struct hmdfs_inode_info *info = hmdfs_i(inode);
103 	size_t r_size = size;
104 
105 	if (!hmdfs_support_xattr(dentry))
106 		return -EOPNOTSUPP;
107 
108 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
109 		return -EOPNOTSUPP;
110 
111 	if (size > HMDFS_XATTR_SIZE_MAX)
112 		r_size = HMDFS_XATTR_SIZE_MAX;
113 
114 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
115 		res = hmdfs_xattr_local_get(dentry, name, value, r_size);
116 	else
117 		res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
118 
119 	if (res == -ERANGE && r_size != size) {
120 		hmdfs_info("no support xattr value size over than: %d",
121 			   HMDFS_XATTR_SIZE_MAX);
122 		res = -E2BIG;
123 	}
124 
125 	return res;
126 }
127 
hmdfs_xattr_local_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)128 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
129 				 const void *value, size_t size, int flags)
130 {
131 	struct path lower_path;
132 	int res = 0;
133 
134 	hmdfs_get_lower_path(dentry, &lower_path);
135 	if (value) {
136 		res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
137 	} else {
138 		WARN_ON(flags != XATTR_REPLACE);
139 		res = vfs_removexattr(lower_path.dentry, name);
140 	}
141 
142 	hmdfs_put_lower_path(&lower_path);
143 	return res;
144 }
145 
hmdfs_xattr_remote_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)146 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
147 				  const void *value, size_t size, int flags)
148 {
149 	struct inode *inode = d_inode(dentry);
150 	struct hmdfs_inode_info *info = hmdfs_i(inode);
151 	struct hmdfs_peer *conn = info->conn;
152 	char *send_buf = NULL;
153 	int res = 0;
154 
155 	send_buf = hmdfs_get_dentry_relative_path(dentry);
156 	if (!send_buf)
157 		return -ENOMEM;
158 
159 	res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
160 	kfree(send_buf);
161 	return res;
162 }
163 
hmdfs_xattr_merge_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)164 static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
165 				  const void *value, size_t size, int flags)
166 {
167 	int err = 0;
168 	struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
169 
170 	if (!lower_dentry) {
171 		err = -EOPNOTSUPP;
172 		goto out;
173 	}
174 	err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
175 out:
176 	dput(lower_dentry);
177 	return err;
178 }
179 
hmdfs_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)180 static int hmdfs_xattr_set(const struct xattr_handler *handler,
181 			   struct dentry *dentry, struct inode *inode,
182 			   const char *name, const void *value,
183 			   size_t size, int flags)
184 {
185 	struct hmdfs_inode_info *info = hmdfs_i(inode);
186 
187 	if (!hmdfs_support_xattr(dentry))
188 		return -EOPNOTSUPP;
189 
190 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
191 		return -EOPNOTSUPP;
192 
193 	if (size > HMDFS_XATTR_SIZE_MAX) {
194 		hmdfs_info("no support too long xattr value: %zu", size);
195 		return -E2BIG;
196 	}
197 
198 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
199 		return hmdfs_xattr_local_set(dentry, name, value, size, flags);
200 	else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
201 		 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
202 		return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
203 
204 	return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
205 }
206 
207 const struct xattr_handler hmdfs_xattr_handler = {
208 	.prefix = "", /* catch all */
209 	.get = hmdfs_xattr_get,
210 	.set = hmdfs_xattr_set,
211 };
212 
213 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
214 	&hmdfs_xattr_handler,
215 };
216 
217 #define HMDFS_NODE_EVT_CB_DELAY 2
218 
219 struct kmem_cache *hmdfs_inode_cachep;
220 struct kmem_cache *hmdfs_dentry_cachep;
221 
i_callback(struct rcu_head * head)222 static void i_callback(struct rcu_head *head)
223 {
224 	struct inode *inode = container_of(head, struct inode, i_rcu);
225 
226 	kmem_cache_free(hmdfs_inode_cachep,
227 			container_of(inode, struct hmdfs_inode_info,
228 				     vfs_inode));
229 }
230 
hmdfs_destroy_inode(struct inode * inode)231 static void hmdfs_destroy_inode(struct inode *inode)
232 {
233 	call_rcu(&inode->i_rcu, i_callback);
234 }
235 
hmdfs_evict_inode(struct inode * inode)236 static void hmdfs_evict_inode(struct inode *inode)
237 {
238 	struct hmdfs_inode_info *info = hmdfs_i(inode);
239 
240 	truncate_inode_pages(&inode->i_data, 0);
241 	clear_inode(inode);
242 	if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
243 	    info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
244 		return;
245 	if (info->inode_type == HMDFS_LAYER_ZERO ||
246 	    info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
247 	    info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
248 		iput(info->lower_inode);
249 		info->lower_inode = NULL;
250 	}
251 }
252 
hmdfs_put_super(struct super_block * sb)253 void hmdfs_put_super(struct super_block *sb)
254 {
255 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
256 	struct super_block *lower_sb = sbi->lower_sb;
257 
258 	hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
259 		   sbi->local_src);
260 
261 	hmdfs_cfn_destroy(sbi);
262 	hmdfs_unregister_sysfs(sbi);
263 	hmdfs_connections_stop(sbi);
264 	hmdfs_clear_share_table(sbi);
265 	hmdfs_destroy_server_writeback(sbi);
266 	hmdfs_exit_stash(sbi);
267 	atomic_dec(&lower_sb->s_active);
268 	put_cred(sbi->cred);
269 	if (sbi->system_cred)
270 		put_cred(sbi->system_cred);
271 	hmdfs_destroy_writeback(sbi);
272 	kfree(sbi->local_src);
273 	kfree(sbi->local_dst);
274 	kfree(sbi->real_dst);
275 	kfree(sbi->cache_dir);
276 	kfree(sbi->cloud_dir);
277 	kfifo_free(&sbi->notify_fifo);
278 	sb->s_fs_info = NULL;
279 	sbi->lower_sb = NULL;
280 	hmdfs_release_sysfs(sbi);
281 	/* After all access are completed */
282 	hmdfs_free_sb_seq(sbi->seq);
283 	kfree(sbi->s_server_statis);
284 	kfree(sbi->s_client_statis);
285 	kfree(sbi);
286 }
287 
hmdfs_alloc_inode(struct super_block * sb)288 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
289 {
290 	struct hmdfs_inode_info *gi =
291 		kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
292 	if (!gi)
293 		return NULL;
294 	memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
295 	INIT_LIST_HEAD(&gi->wb_list);
296 	init_rwsem(&gi->wpage_sem);
297 	gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
298 	atomic64_set(&gi->write_counter, 0);
299 	gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
300 	spin_lock_init(&gi->fid_lock);
301 	INIT_LIST_HEAD(&gi->wr_opened_node);
302 	atomic_set(&gi->wr_opened_cnt, 0);
303 	init_waitqueue_head(&gi->fid_wq);
304 	INIT_LIST_HEAD(&gi->stash_node);
305 	spin_lock_init(&gi->stash_lock);
306 	return &gi->vfs_inode;
307 }
308 
hmdfs_remote_statfs(struct dentry * dentry,struct kstatfs * buf)309 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
310 {
311 	int error = 0;
312 	int ret = 0;
313 	char *dir_path = NULL;
314 	char *name_path = NULL;
315 	struct hmdfs_peer *con = NULL;
316 	struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
317 
318 	dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
319 	if (!dir_path) {
320 		error = -EACCES;
321 		goto rmdir_out;
322 	}
323 
324 	name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
325 	if (!name_path) {
326 		error = -EACCES;
327 		goto rmdir_out;
328 	}
329 	mutex_lock(&sbi->connections.node_lock);
330 	list_for_each_entry(con, &sbi->connections.node_list, list) {
331 		if (con->status == NODE_STAT_ONLINE) {
332 			peer_get(con);
333 			mutex_unlock(&sbi->connections.node_lock);
334 			hmdfs_debug("send MSG to remote devID %llu",
335 				    con->device_id);
336 			ret = hmdfs_send_statfs(con, name_path, buf);
337 			if (ret != 0)
338 				error = ret;
339 			peer_put(con);
340 			mutex_lock(&sbi->connections.node_lock);
341 		}
342 	}
343 	mutex_unlock(&sbi->connections.node_lock);
344 
345 rmdir_out:
346 	kfree(dir_path);
347 	kfree(name_path);
348 	return error;
349 }
350 
hmdfs_statfs(struct dentry * dentry,struct kstatfs * buf)351 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
352 {
353 	int err = 0;
354 	struct path lower_path;
355 	struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
356 	struct super_block *sb = d_inode(dentry)->i_sb;
357 	struct hmdfs_sb_info *sbi = sb->s_fs_info;
358 
359 	trace_hmdfs_statfs(dentry, info->inode_type);
360 	// merge_view & merge_view/xxx & device_view assigned src_inode info
361 	if (hmdfs_i_merge(info) ||
362 	    (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
363 		err = kern_path(sbi->local_src, 0, &lower_path);
364 		if (err)
365 			goto out;
366 		err = vfs_statfs(&lower_path, buf);
367 		path_put(&lower_path);
368 	} else if (!IS_ERR_OR_NULL(info->lower_inode)) {
369 		hmdfs_get_lower_path(dentry, &lower_path);
370 		err = vfs_statfs(&lower_path, buf);
371 		hmdfs_put_lower_path(&lower_path);
372 	} else {
373 		err = hmdfs_remote_statfs(dentry, buf);
374 	}
375 
376 	buf->f_type = HMDFS_SUPER_MAGIC;
377 out:
378 	return err;
379 }
380 
hmdfs_show_options(struct seq_file * m,struct dentry * root)381 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
382 {
383 	struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
384 
385 	if (sbi->s_case_sensitive)
386 		seq_puts(m, ",sensitive");
387 	else
388 		seq_puts(m, ",insensitive");
389 
390 	if (sbi->s_merge_switch)
391 		seq_puts(m, ",merge_enable");
392 	else
393 		seq_puts(m, ",merge_disable");
394 
395 	seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
396 	seq_printf(m, ",user_id=%u", sbi->user_id);
397 
398 	if (sbi->cache_dir)
399 		seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
400 	if (sbi->real_dst)
401 		seq_printf(m, ",real_dst=%s", sbi->real_dst);
402 	if (sbi->cloud_dir)
403 		seq_printf(m, ",cloud_dir=%s", sbi->cloud_dir);
404 
405 	seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
406 	seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
407 
408 	return 0;
409 }
410 
hmdfs_sync_fs(struct super_block * sb,int wait)411 static int hmdfs_sync_fs(struct super_block *sb, int wait)
412 {
413 	int time_left;
414 	int err = 0;
415 	struct hmdfs_peer *con = NULL;
416 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
417 	int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
418 	struct syncfs_item item, *entry = NULL, *tmp = NULL;
419 
420 	if (!wait)
421 		return 0;
422 
423 	trace_hmdfs_syncfs_enter(sbi);
424 
425 	spin_lock(&sbi->hsi.list_lock);
426 	if (!sbi->hsi.is_executing) {
427 		sbi->hsi.is_executing = true;
428 		item.need_abort = false;
429 		spin_unlock(&sbi->hsi.list_lock);
430 	} else {
431 		init_completion(&item.done);
432 		list_add_tail(&item.list, &sbi->hsi.wait_list);
433 		spin_unlock(&sbi->hsi.list_lock);
434 		wait_for_completion(&item.done);
435 	}
436 
437 	if (item.need_abort)
438 		goto out;
439 
440 	/*
441 	 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
442 	 * sure all remote syncfs calls return back or timeout by waiting,
443 	 * during the waiting period we must protect @sbi->remote_syncfs_count
444 	 * and @sbi->remote_syncfs_ret from concurrent executing.
445 	 */
446 
447 	spin_lock(&sbi->hsi.v_lock);
448 	sbi->hsi.version++;
449 	/*
450 	 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
451 	 * into spinlock protection area to avoid following scenario caused
452 	 * by out-of-order execution:
453 	 *
454 	 *            synfs                                  syncfs_cb
455 	 *  sbi->hsi.remote_ret = 0;
456 	 *  atomic_set(&sbi->hsi.wait_count, 0);
457 	 *                                               lock
458 	 *                                               version == old_version
459 	 *                                 sbi->hsi.remote_ret = resp->ret_code
460 	 *                                 atomic_dec(&sbi->hsi.wait_count);
461 	 *                                               unlock
462 	 *         lock
463 	 *         version = old_version + 1
464 	 *         unlock
465 	 *
466 	 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
467 	 * before spin lock which may compete with syncfs_cb(), making
468 	 * these two values' assignment protected by spinlock can fix this.
469 	 */
470 	sbi->hsi.remote_ret = 0;
471 	atomic_set(&sbi->hsi.wait_count, 0);
472 	spin_unlock(&sbi->hsi.v_lock);
473 
474 	mutex_lock(&sbi->connections.node_lock);
475 	list_for_each_entry(con, &sbi->connections.node_list, list) {
476 		/*
477 		 * Dirty data does not need to be synchronized to remote
478 		 * devices that go offline normally. It's okay to drop
479 		 * them.
480 		 */
481 		if (con->status != NODE_STAT_ONLINE)
482 			continue;
483 
484 		peer_get(con);
485 		mutex_unlock(&sbi->connections.node_lock);
486 
487 		/*
488 		 * There exists a gap between sync_inodes_sb() and sync_fs()
489 		 * which may race with remote writing, leading error count
490 		 * on @sb_dirty_count. The dirty data produced during the
491 		 * gap period won't be synced in next syncfs operation.
492 		 * To avoid this, we have to invoke sync_inodes_sb() again
493 		 * after getting @con->sb_dirty_count.
494 		 */
495 		con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
496 		sync_inodes_sb(sb);
497 
498 		if (!con->old_sb_dirty_count) {
499 			peer_put(con);
500 			mutex_lock(&sbi->connections.node_lock);
501 			continue;
502 		}
503 
504 		err = hmdfs_send_syncfs(con, syncfs_timeout);
505 		if (err) {
506 			hmdfs_warning("send syncfs failed with %d on node %llu",
507 				      err, con->device_id);
508 			sbi->hsi.remote_ret = err;
509 			peer_put(con);
510 			mutex_lock(&sbi->connections.node_lock);
511 			continue;
512 		}
513 
514 		atomic_inc(&sbi->hsi.wait_count);
515 
516 		peer_put(con);
517 		mutex_lock(&sbi->connections.node_lock);
518 	}
519 	mutex_unlock(&sbi->connections.node_lock);
520 
521 	/*
522 	 * Async work in background will make sure @sbi->remote_syncfs_count
523 	 * decreased to zero finally whether syncfs success or fail.
524 	 */
525 	time_left = wait_event_interruptible(
526 		sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
527 	if (time_left < 0) {
528 		hmdfs_warning("syncfs is interrupted by external signal");
529 		err = -EINTR;
530 	}
531 
532 	if (!err && sbi->hsi.remote_ret)
533 		err = sbi->hsi.remote_ret;
534 
535 	/* Abandon syncfs processes in pending_list */
536 	list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
537 		entry->need_abort = true;
538 		complete(&entry->done);
539 	}
540 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
541 
542 	/* Pick the last syncfs process in wait_list */
543 	spin_lock(&sbi->hsi.list_lock);
544 	if (list_empty(&sbi->hsi.wait_list)) {
545 		sbi->hsi.is_executing = false;
546 	} else {
547 		entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
548 					list);
549 		list_del_init(&entry->list);
550 		list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
551 		entry->need_abort = false;
552 		complete(&entry->done);
553 	}
554 	spin_unlock(&sbi->hsi.list_lock);
555 
556 out:
557 	trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
558 				get_cmd_timeout(sbi, F_SYNCFS), err);
559 
560 	/* TODO: Return synfs err back to syscall */
561 
562 	return err;
563 }
564 
565 struct super_operations hmdfs_sops = {
566 	.alloc_inode = hmdfs_alloc_inode,
567 	.destroy_inode = hmdfs_destroy_inode,
568 	.evict_inode = hmdfs_evict_inode,
569 	.put_super = hmdfs_put_super,
570 	.statfs = hmdfs_statfs,
571 	.show_options = hmdfs_show_options,
572 	.sync_fs = hmdfs_sync_fs,
573 };
574 
init_once(void * obj)575 static void init_once(void *obj)
576 {
577 	struct hmdfs_inode_info *i = obj;
578 
579 	inode_init_once(&i->vfs_inode);
580 }
581 
hmdfs_init_caches(void)582 static int __init hmdfs_init_caches(void)
583 {
584 	int err = -ENOMEM;
585 
586 	hmdfs_inode_cachep =
587 		kmem_cache_create("hmdfs_inode_cache",
588 				  sizeof(struct hmdfs_inode_info), 0,
589 				  SLAB_RECLAIM_ACCOUNT, init_once);
590 	if (unlikely(!hmdfs_inode_cachep))
591 		goto out;
592 	hmdfs_dentry_cachep =
593 		kmem_cache_create("hmdfs_dentry_cache",
594 				  sizeof(struct hmdfs_dentry_info), 0,
595 				  SLAB_RECLAIM_ACCOUNT, NULL);
596 	if (unlikely(!hmdfs_dentry_cachep))
597 		goto out_des_ino;
598 	hmdfs_dentry_merge_cachep =
599 		kmem_cache_create("hmdfs_dentry_merge_cache",
600 				  sizeof(struct hmdfs_dentry_info_merge), 0,
601 				  SLAB_RECLAIM_ACCOUNT, NULL);
602 	if (unlikely(!hmdfs_dentry_merge_cachep))
603 		goto out_des_dc;
604 	return 0;
605 
606 out_des_dc:
607 	kmem_cache_destroy(hmdfs_dentry_cachep);
608 out_des_ino:
609 	kmem_cache_destroy(hmdfs_inode_cachep);
610 out:
611 	return err;
612 }
613 
hmdfs_destroy_caches(void)614 static void hmdfs_destroy_caches(void)
615 {
616 	rcu_barrier();
617 	kmem_cache_destroy(hmdfs_inode_cachep);
618 	hmdfs_inode_cachep = NULL;
619 	kmem_cache_destroy(hmdfs_dentry_cachep);
620 	hmdfs_dentry_cachep = NULL;
621 	kmem_cache_destroy(hmdfs_dentry_merge_cachep);
622 	hmdfs_dentry_merge_cachep = NULL;
623 }
624 
path_hash(const char * path,int len,bool case_sense)625 uint64_t path_hash(const char *path, int len, bool case_sense)
626 {
627 	uint64_t res = 0;
628 	const char *kp = path;
629 	char c;
630 	/* Mocklisp hash function. */
631 	while (*kp) {
632 		c = *kp;
633 		if (!case_sense)
634 			c = tolower(c);
635 		res = (res << 5) - res + (uint64_t)(c);
636 		kp++;
637 	}
638 	return res;
639 }
640 
get_full_path(struct path * path)641 static char *get_full_path(struct path *path)
642 {
643 	char *buf, *tmp;
644 	char *ret = NULL;
645 
646 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
647 	if (!buf)
648 		goto out;
649 
650 	tmp = d_path(path, buf, PATH_MAX);
651 	if (IS_ERR(tmp))
652 		goto out;
653 
654 	ret = kstrdup(tmp, GFP_KERNEL);
655 out:
656 	kfree(buf);
657 	return ret;
658 }
659 
hmdfs_init_cmd_timeout(struct hmdfs_sb_info * sbi)660 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
661 {
662 	memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
663 
664 	set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
665 	set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
666 	set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
667 	set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
668 	set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
669 	set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
670 	set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
671 	set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
672 	set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
673 	set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
674 	set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
675 	set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
676 	set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
677 	set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
678 	set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
679 	set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
680 	set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
681 	set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
682 	set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
683 	set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
684 }
685 
hmdfs_init_sbi(struct hmdfs_sb_info * sbi)686 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
687 {
688 	int ret;
689 
690 	ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
691 	if (ret)
692 		goto out;
693 
694 	/*
695 	 * We have to use dynamic memory since struct server/client_statistic
696 	 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
697 	 */
698 	sbi->s_server_statis =
699 		kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
700 	sbi->s_client_statis =
701 		kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
702 	if (!sbi->s_server_statis || !sbi->s_client_statis) {
703 		ret = -ENOMEM;
704 		goto out;
705 	}
706 
707 	ret = hmdfs_alloc_sb_seq();
708 	if (ret < 0) {
709 		hmdfs_err("no sb seq available err %d", ret);
710 		goto out;
711 	}
712 	sbi->seq = ret;
713 	ret = 0;
714 
715 	spin_lock_init(&sbi->notify_fifo_lock);
716 	mutex_init(&sbi->cmd_handler_mutex);
717 	sbi->s_case_sensitive = false;
718 	sbi->s_features = HMDFS_FEATURE_READPAGES |
719 			  HMDFS_FEATURE_READPAGES_OPEN |
720 			  HMDFS_ATOMIC_OPEN;
721 	sbi->s_merge_switch = false;
722 	sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
723 	sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
724 	sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
725 	sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
726 	hmdfs_init_cmd_timeout(sbi);
727 	sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
728 	sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
729 	sbi->s_offline_stash = true;
730 	sbi->s_dentry_cache = true;
731 	sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
732 	/* Initialize before hmdfs_register_sysfs() */
733 	atomic_set(&sbi->connections.conn_seq, 0);
734 	mutex_init(&sbi->connections.node_lock);
735 	INIT_LIST_HEAD(&sbi->connections.node_list);
736 
737 	ret = hmdfs_init_share_table(sbi);
738 	if (ret)
739 		goto out;
740 	init_waitqueue_head(&sbi->async_readdir_wq);
741 	INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
742 	INIT_LIST_HEAD(&sbi->async_readdir_work_list);
743 	spin_lock_init(&sbi->async_readdir_msg_lock);
744 	spin_lock_init(&sbi->async_readdir_work_lock);
745 
746 	return 0;
747 
748 out:
749 	return ret;
750 }
751 
hmdfs_client_resp_statis(struct hmdfs_sb_info * sbi,u8 cmd,enum hmdfs_resp_type type,unsigned long start,unsigned long end)752 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
753 			      enum hmdfs_resp_type type, unsigned long start,
754 			      unsigned long end)
755 {
756 	unsigned long duration;
757 
758 	switch (type) {
759 	case HMDFS_RESP_DELAY:
760 		sbi->s_client_statis[cmd].delay_resp_cnt++;
761 		break;
762 	case HMDFS_RESP_TIMEOUT:
763 		sbi->s_client_statis[cmd].timeout_cnt++;
764 		break;
765 	case HMDFS_RESP_NORMAL:
766 		duration = end - start;
767 		sbi->s_client_statis[cmd].total += duration;
768 		sbi->s_client_statis[cmd].resp_cnt++;
769 		if (sbi->s_client_statis[cmd].max < duration)
770 			sbi->s_client_statis[cmd].max = duration;
771 		break;
772 	default:
773 		hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
774 	}
775 }
776 
hmdfs_update_dst(struct hmdfs_sb_info * sbi)777 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
778 {
779 	int err = 0;
780 	const char *path_local = UPDATE_LOCAL_DST;
781 	int len = 0;
782 
783 	sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
784 	if (!sbi->real_dst) {
785 		err = -ENOMEM;
786 		goto out_err;
787 	}
788 	kfree(sbi->local_dst);
789 	sbi->local_dst = NULL;
790 
791 	len = strlen(sbi->real_dst) + strlen(path_local) + 1;
792 	if (len > PATH_MAX) {
793 		err = -EINVAL;
794 		goto out_err;
795 	}
796 	sbi->local_dst = kmalloc(len, GFP_KERNEL);
797 	if (!sbi->local_dst) {
798 		err = -ENOMEM;
799 		goto out_err;
800 	}
801 	snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
802 		 "%s%s", sbi->real_dst, path_local);
803 out_err:
804 	return err;
805 }
806 
807 /*
808  * Generate boot cookie like following format:
809  *
810  * | random |   boot time(ms) |  0x00 |
811  * |--------|-----------------|-------|
812  *     16            33          15    (bits)
813  *
814  * This will make sure boot cookie is unique in a period
815  * 2^33 / 1000 / 3600 / 24 = 99.4(days).
816  */
hmdfs_gen_boot_cookie(void)817 uint64_t hmdfs_gen_boot_cookie(void)
818 {
819 	uint64_t now;
820 	uint16_t rand;
821 
822 	now = ktime_to_ms(ktime_get());
823 	prandom_bytes(&rand, sizeof(rand));
824 
825 	now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
826 	now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
827 
828 	return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
829 }
830 
hmdfs_fill_super(struct super_block * sb,void * data,int silent)831 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
832 {
833 	struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
834 	const char *dev_name = priv->dev_name;
835 	const char *raw_data = priv->raw_data;
836 	struct hmdfs_sb_info *sbi;
837 	int err = 0;
838 	struct inode *root_inode;
839 	struct path lower_path;
840 	struct super_block *lower_sb;
841 	struct dentry *root_dentry;
842 	char ctrl_path[CTRL_PATH_MAX_LEN];
843 	uint64_t ctrl_hash;
844 
845 	if (!raw_data)
846 		return -EINVAL;
847 
848 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
849 	if (!sbi) {
850 		err = -ENOMEM;
851 		goto out_err;
852 	}
853 	err = hmdfs_init_sbi(sbi);
854 	if (err)
855 		goto out_freesbi;
856 	sbi->sb = sb;
857 	err = hmdfs_parse_options(sbi, raw_data);
858 	if (err)
859 		goto out_freesbi;
860 
861 	sb->s_fs_info = sbi;
862 	sb->s_magic = HMDFS_SUPER_MAGIC;
863 	sb->s_xattr = hmdfs_xattr_handlers;
864 	sb->s_op = &hmdfs_sops;
865 
866 	sbi->boot_cookie = hmdfs_gen_boot_cookie();
867 
868 	err = hmdfs_init_writeback(sbi);
869 	if (err)
870 		goto out_freesbi;
871 	err = hmdfs_init_server_writeback(sbi);
872 	if (err)
873 		goto out_freesbi;
874 
875 	err = hmdfs_init_stash(sbi);
876 	if (err)
877 		goto out_freesbi;
878 
879 	// add ctrl sysfs node
880 	ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
881 	scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
882 	hmdfs_debug("hash %llu", ctrl_hash);
883 	err = hmdfs_register_sysfs(ctrl_path, sbi);
884 	if (err)
885 		goto out_freesbi;
886 
887 	err = hmdfs_update_dst(sbi);
888 	if (err)
889 		goto out_unreg_sysfs;
890 
891 	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
892 			&lower_path);
893 	if (err) {
894 		hmdfs_err("open dev failed, errno = %d", err);
895 		goto out_unreg_sysfs;
896 	}
897 
898 	lower_sb = lower_path.dentry->d_sb;
899 	atomic_inc(&lower_sb->s_active);
900 	sbi->lower_sb = lower_sb;
901 	sbi->local_src = get_full_path(&lower_path);
902 	if (!sbi->local_src) {
903 		hmdfs_err("get local_src failed!");
904 		goto out_sput;
905 	}
906 
907 	sb->s_time_gran = lower_sb->s_time_gran;
908 	sb->s_maxbytes = lower_sb->s_maxbytes;
909 	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
910 	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
911 		hmdfs_err("maximum fs stacking depth exceeded");
912 		err = -EINVAL;
913 		goto out_sput;
914 	}
915 	root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
916 	if (IS_ERR(root_inode)) {
917 		err = PTR_ERR(root_inode);
918 		goto out_sput;
919 	}
920 	hmdfs_root_inode_perm_init(root_inode);
921 	sb->s_root = root_dentry = d_make_root(root_inode);
922 	if (!root_dentry) {
923 		err = -ENOMEM;
924 		goto out_sput;
925 	}
926 
927 	err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
928 	if (err)
929 		goto out_freeroot;
930 	hmdfs_set_lower_path(root_dentry, &lower_path);
931 	sbi->cred = get_cred(current_cred());
932 	INIT_LIST_HEAD(&sbi->client_cache);
933 	INIT_LIST_HEAD(&sbi->server_cache);
934 	INIT_LIST_HEAD(&sbi->to_delete);
935 	mutex_init(&sbi->cache_list_lock);
936 	hmdfs_cfn_load(sbi);
937 
938 	/* Initialize syncfs info */
939 	spin_lock_init(&sbi->hsi.v_lock);
940 	init_waitqueue_head(&sbi->hsi.wq);
941 	sbi->hsi.version = 0;
942 	sbi->hsi.is_executing = false;
943 	INIT_LIST_HEAD(&sbi->hsi.wait_list);
944 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
945 	spin_lock_init(&sbi->hsi.list_lock);
946 
947 	return err;
948 out_freeroot:
949 	dput(sb->s_root);
950 	sb->s_root = NULL;
951 out_sput:
952 	atomic_dec(&lower_sb->s_active);
953 	path_put(&lower_path);
954 out_unreg_sysfs:
955 	hmdfs_unregister_sysfs(sbi);
956 	hmdfs_release_sysfs(sbi);
957 out_freesbi:
958 	if (sbi) {
959 		sb->s_fs_info = NULL;
960 		hmdfs_exit_stash(sbi);
961 		hmdfs_destroy_writeback(sbi);
962 		hmdfs_destroy_server_writeback(sbi);
963 		kfifo_free(&sbi->notify_fifo);
964 		hmdfs_free_sb_seq(sbi->seq);
965 		kfree(sbi->local_src);
966 		kfree(sbi->local_dst);
967 		kfree(sbi->real_dst);
968 		kfree(sbi->cache_dir);
969 		kfree(sbi->cloud_dir);
970 		kfree(sbi->s_server_statis);
971 		kfree(sbi->s_client_statis);
972 		kfree(sbi);
973 	}
974 out_err:
975 	return err;
976 }
977 
hmdfs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)978 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
979 				  const char *dev_name, void *raw_data)
980 {
981 	struct hmdfs_mount_priv priv = {
982 		.dev_name = dev_name,
983 		.raw_data = raw_data,
984 	};
985 
986 	/* hmdfs needs a valid dev_name to get the lower_sb's metadata */
987 	if (!dev_name || !*dev_name)
988 		return ERR_PTR(-EINVAL);
989 	return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
990 }
991 
992 
hmdfs_cancel_async_readdir(struct hmdfs_sb_info * sbi)993 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
994 {
995 	struct sendmsg_wait_queue *msg_wq = NULL;
996 	struct hmdfs_readdir_work *rw = NULL;
997 	struct hmdfs_readdir_work *tmp = NULL;
998 	struct list_head del_work;
999 
1000 	/* cancel work that are not running */
1001 
1002 	INIT_LIST_HEAD(&del_work);
1003 	spin_lock(&sbi->async_readdir_work_lock);
1004 	list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
1005 		if (cancel_delayed_work(&rw->dwork))
1006 			list_move(&rw->head, &del_work);
1007 	}
1008 	spin_unlock(&sbi->async_readdir_work_lock);
1009 
1010 	list_for_each_entry_safe(rw, tmp, &del_work, head) {
1011 		dput(rw->dentry);
1012 		peer_put(rw->con);
1013 		kfree(rw);
1014 	}
1015 
1016 	/* wake up async readdir that are waiting for remote */
1017 	spin_lock(&sbi->async_readdir_msg_lock);
1018 	sbi->async_readdir_prohibit = true;
1019 	list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1020 		hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1021 	spin_unlock(&sbi->async_readdir_msg_lock);
1022 
1023 	/* wait for all async readdir to finish */
1024 	if (!list_empty(&sbi->async_readdir_work_list))
1025 		wait_event_interruptible_timeout(sbi->async_readdir_wq,
1026 			(list_empty(&sbi->async_readdir_work_list)), HZ);
1027 
1028 	WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1029 }
1030 
hmdfs_kill_super(struct super_block * sb)1031 static void hmdfs_kill_super(struct super_block *sb)
1032 {
1033 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1034 
1035 	/*
1036 	 * async readdir is holding ref for dentry, not for vfsmount. Thus
1037 	 * shrink_dcache_for_umount() will warn about dentry still in use
1038 	 * if async readdir is not done.
1039 	 */
1040 	if (sbi)
1041 		hmdfs_cancel_async_readdir(sbi);
1042 	kill_anon_super(sb);
1043 }
1044 
1045 static struct file_system_type hmdfs_fs_type = {
1046 	.owner = THIS_MODULE,
1047 	.name = "hmdfs",
1048 	.mount = hmdfs_mount,
1049 	.kill_sb = hmdfs_kill_super,
1050 };
1051 
hmdfs_init(void)1052 static int __init hmdfs_init(void)
1053 {
1054 	int err = 0;
1055 
1056 	err = hmdfs_init_caches();
1057 	if (err)
1058 		goto out_err;
1059 
1060 	hmdfs_node_evt_cb_init();
1061 
1062 	hmdfs_stash_add_node_evt_cb();
1063 	hmdfs_client_add_node_evt_cb();
1064 	hmdfs_server_add_node_evt_cb();
1065 
1066 	err = register_filesystem(&hmdfs_fs_type);
1067 	if (err) {
1068 		hmdfs_err("hmdfs register failed!");
1069 		goto out_err;
1070 	}
1071 
1072 	err = hmdfs_init_configfs();
1073 	if (err)
1074 		goto out_err;
1075 
1076 	err = hmdfs_sysfs_init();
1077 	if (err)
1078 		goto out_err;
1079 
1080 	hmdfs_message_verify_init();
1081 	return 0;
1082 out_err:
1083 	hmdfs_sysfs_exit();
1084 	hmdfs_exit_configfs();
1085 	unregister_filesystem(&hmdfs_fs_type);
1086 	hmdfs_destroy_caches();
1087 	hmdfs_err("hmdfs init failed!");
1088 	return err;
1089 }
1090 
hmdfs_exit(void)1091 static void __exit hmdfs_exit(void)
1092 {
1093 	hmdfs_sysfs_exit();
1094 	hmdfs_exit_configfs();
1095 	unregister_filesystem(&hmdfs_fs_type);
1096 	ida_destroy(&hmdfs_sb_seq);
1097 	hmdfs_destroy_caches();
1098 	hmdfs_info("hmdfs exited!");
1099 }
1100 
1101 module_init(hmdfs_init);
1102 module_exit(hmdfs_exit);
1103 
1104 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1105 
1106 MODULE_LICENSE("GPL v2");
1107 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1108 MODULE_DESCRIPTION("Harmony distributed file system");
1109