• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/hmdfs/main.c
4  *
5  * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6  */
7 
8 
9 #include "hmdfs.h"
10 
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21 
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 #include "hmdfs_share.h"
31 
32 #include "comm/node_cb.h"
33 #include "stash.h"
34 
35 #define CREATE_TRACE_POINTS
36 #include "hmdfs_trace.h"
37 
38 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39 
40 #define HMDFS_SB_SEQ_FROM 1
41 
42 struct hmdfs_mount_priv {
43 	const char *dev_name;
44 	const char *raw_data;
45 };
46 
47 struct syncfs_item {
48 	struct list_head list;
49 	struct completion done;
50 	bool need_abort;
51 };
52 
53 static DEFINE_IDA(hmdfs_sb_seq);
54 
hmdfs_alloc_sb_seq(void)55 static inline int hmdfs_alloc_sb_seq(void)
56 {
57 	return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58 }
59 
hmdfs_free_sb_seq(unsigned int seq)60 static inline void hmdfs_free_sb_seq(unsigned int seq)
61 {
62 	if (!seq)
63 		return;
64 	ida_simple_remove(&hmdfs_sb_seq, seq);
65 }
66 
hmdfs_xattr_local_get(struct dentry * dentry,const char * name,void * value,size_t size)67 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68 				 void *value, size_t size)
69 {
70 	struct path lower_path;
71 	ssize_t res = 0;
72 
73 	hmdfs_get_lower_path(dentry, &lower_path);
74 	res = vfs_getxattr(lower_path.dentry, name, value, size);
75 	hmdfs_put_lower_path(&lower_path);
76 	return res;
77 }
78 
hmdfs_xattr_remote_get(struct dentry * dentry,const char * name,void * value,size_t size)79 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80 				  void *value, size_t size)
81 {
82 	struct inode *inode = d_inode(dentry);
83 	struct hmdfs_inode_info *info = hmdfs_i(inode);
84 	struct hmdfs_peer *conn = info->conn;
85 	char *send_buf = NULL;
86 	ssize_t res = 0;
87 
88 	send_buf = hmdfs_get_dentry_relative_path(dentry);
89 	if (!send_buf)
90 		return -ENOMEM;
91 
92 	res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93 	kfree(send_buf);
94 	return res;
95 }
96 
hmdfs_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * value,size_t size)97 static int hmdfs_xattr_get(const struct xattr_handler *handler,
98 			   struct dentry *dentry, struct inode *inode,
99 			   const char *name, void *value, size_t size)
100 {
101 	int res = 0;
102 	struct hmdfs_inode_info *info = hmdfs_i(inode);
103 	size_t r_size = size;
104 
105 	if (!hmdfs_support_xattr(dentry))
106 		return -EOPNOTSUPP;
107 
108 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
109 		return -EOPNOTSUPP;
110 
111 	if (size > HMDFS_XATTR_SIZE_MAX)
112 		r_size = HMDFS_XATTR_SIZE_MAX;
113 
114 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
115 		res = hmdfs_xattr_local_get(dentry, name, value, r_size);
116 	else
117 		res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
118 
119 	if (res == -ERANGE && r_size != size) {
120 		hmdfs_info("no support xattr value size over than: %d",
121 			   HMDFS_XATTR_SIZE_MAX);
122 		res = -E2BIG;
123 	}
124 
125 	return res;
126 }
127 
hmdfs_xattr_local_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)128 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
129 				 const void *value, size_t size, int flags)
130 {
131 	struct path lower_path;
132 	int res = 0;
133 
134 	hmdfs_get_lower_path(dentry, &lower_path);
135 	if (value) {
136 		res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
137 	} else {
138 		WARN_ON(flags != XATTR_REPLACE);
139 		res = vfs_removexattr(lower_path.dentry, name);
140 	}
141 
142 	hmdfs_put_lower_path(&lower_path);
143 	return res;
144 }
145 
hmdfs_xattr_remote_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)146 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
147 				  const void *value, size_t size, int flags)
148 {
149 	struct inode *inode = d_inode(dentry);
150 	struct hmdfs_inode_info *info = hmdfs_i(inode);
151 	struct hmdfs_peer *conn = info->conn;
152 	char *send_buf = NULL;
153 	int res = 0;
154 
155 	send_buf = hmdfs_get_dentry_relative_path(dentry);
156 	if (!send_buf)
157 		return -ENOMEM;
158 
159 	res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
160 	kfree(send_buf);
161 	return res;
162 }
163 
hmdfs_xattr_merge_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)164 static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
165 				  const void *value, size_t size, int flags)
166 {
167 	int err = 0;
168 	struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
169 
170 	if (!lower_dentry) {
171 		err = -EOPNOTSUPP;
172 		goto out;
173 	}
174 	err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
175 out:
176 	dput(lower_dentry);
177 	return err;
178 }
179 
hmdfs_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)180 static int hmdfs_xattr_set(const struct xattr_handler *handler,
181 			   struct dentry *dentry, struct inode *inode,
182 			   const char *name, const void *value,
183 			   size_t size, int flags)
184 {
185 	struct hmdfs_inode_info *info = hmdfs_i(inode);
186 
187 	if (!hmdfs_support_xattr(dentry))
188 		return -EOPNOTSUPP;
189 
190 	if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
191 		return -EOPNOTSUPP;
192 
193 	if (size > HMDFS_XATTR_SIZE_MAX) {
194 		hmdfs_info("no support too long xattr value: %zu", size);
195 		return -E2BIG;
196 	}
197 
198 	if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
199 		return hmdfs_xattr_local_set(dentry, name, value, size, flags);
200 	else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE)
201 		return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
202 
203 	return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
204 }
205 
206 const struct xattr_handler hmdfs_xattr_handler = {
207 	.prefix = "", /* catch all */
208 	.get = hmdfs_xattr_get,
209 	.set = hmdfs_xattr_set,
210 };
211 
212 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
213 	&hmdfs_xattr_handler,
214 };
215 
216 #define HMDFS_NODE_EVT_CB_DELAY 2
217 
218 struct kmem_cache *hmdfs_inode_cachep;
219 struct kmem_cache *hmdfs_dentry_cachep;
220 
i_callback(struct rcu_head * head)221 static void i_callback(struct rcu_head *head)
222 {
223 	struct inode *inode = container_of(head, struct inode, i_rcu);
224 
225 	kmem_cache_free(hmdfs_inode_cachep,
226 			container_of(inode, struct hmdfs_inode_info,
227 				     vfs_inode));
228 }
229 
hmdfs_destroy_inode(struct inode * inode)230 static void hmdfs_destroy_inode(struct inode *inode)
231 {
232 	call_rcu(&inode->i_rcu, i_callback);
233 }
234 
hmdfs_evict_inode(struct inode * inode)235 static void hmdfs_evict_inode(struct inode *inode)
236 {
237 	struct hmdfs_inode_info *info = hmdfs_i(inode);
238 
239 	truncate_inode_pages(&inode->i_data, 0);
240 	clear_inode(inode);
241 	if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
242 	    info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
243 		return;
244 	if (info->inode_type == HMDFS_LAYER_ZERO ||
245 	    info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
246 	    info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
247 		iput(info->lower_inode);
248 		info->lower_inode = NULL;
249 	}
250 }
251 
hmdfs_put_super(struct super_block * sb)252 void hmdfs_put_super(struct super_block *sb)
253 {
254 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
255 	struct super_block *lower_sb = sbi->lower_sb;
256 
257 	hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
258 		   sbi->local_src);
259 
260 	hmdfs_cfn_destroy(sbi);
261 	hmdfs_unregister_sysfs(sbi);
262 	hmdfs_connections_stop(sbi);
263 	hmdfs_clear_share_table(sbi);
264 	hmdfs_destroy_server_writeback(sbi);
265 	hmdfs_exit_stash(sbi);
266 	atomic_dec(&lower_sb->s_active);
267 	put_cred(sbi->cred);
268 	if (sbi->system_cred)
269 		put_cred(sbi->system_cred);
270 	hmdfs_destroy_writeback(sbi);
271 	kfree(sbi->local_src);
272 	kfree(sbi->local_dst);
273 	kfree(sbi->real_dst);
274 	kfree(sbi->cache_dir);
275 	kfifo_free(&sbi->notify_fifo);
276 	sb->s_fs_info = NULL;
277 	sbi->lower_sb = NULL;
278 	hmdfs_release_sysfs(sbi);
279 	/* After all access are completed */
280 	hmdfs_free_sb_seq(sbi->seq);
281 	kfree(sbi->s_server_statis);
282 	kfree(sbi->s_client_statis);
283 	kfree(sbi);
284 }
285 
hmdfs_alloc_inode(struct super_block * sb)286 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
287 {
288 	struct hmdfs_inode_info *gi =
289 		kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
290 	if (!gi)
291 		return NULL;
292 	memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
293 	INIT_LIST_HEAD(&gi->wb_list);
294 	init_rwsem(&gi->wpage_sem);
295 	gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
296 	atomic64_set(&gi->write_counter, 0);
297 	gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
298 	spin_lock_init(&gi->fid_lock);
299 	INIT_LIST_HEAD(&gi->wr_opened_node);
300 	atomic_set(&gi->wr_opened_cnt, 0);
301 	init_waitqueue_head(&gi->fid_wq);
302 	INIT_LIST_HEAD(&gi->stash_node);
303 	spin_lock_init(&gi->stash_lock);
304 	return &gi->vfs_inode;
305 }
306 
hmdfs_remote_statfs(struct dentry * dentry,struct kstatfs * buf)307 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
308 {
309 	int error = 0;
310 	int ret = 0;
311 	char *dir_path = NULL;
312 	char *name_path = NULL;
313 	struct hmdfs_peer *con = NULL;
314 	struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
315 
316 	dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
317 	if (!dir_path) {
318 		error = -EACCES;
319 		goto rmdir_out;
320 	}
321 
322 	name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
323 	if (!name_path) {
324 		error = -EACCES;
325 		goto rmdir_out;
326 	}
327 	mutex_lock(&sbi->connections.node_lock);
328 	list_for_each_entry(con, &sbi->connections.node_list, list) {
329 		if (con->status == NODE_STAT_ONLINE &&
330 		    con->version > USERSPACE_MAX_VER) {
331 			peer_get(con);
332 			mutex_unlock(&sbi->connections.node_lock);
333 			hmdfs_debug("send MSG to remote devID %llu",
334 				    con->device_id);
335 			ret = hmdfs_send_statfs(con, name_path, buf);
336 			if (ret != 0)
337 				error = ret;
338 			peer_put(con);
339 			mutex_lock(&sbi->connections.node_lock);
340 		}
341 	}
342 	mutex_unlock(&sbi->connections.node_lock);
343 
344 rmdir_out:
345 	kfree(dir_path);
346 	kfree(name_path);
347 	return error;
348 }
349 
hmdfs_statfs(struct dentry * dentry,struct kstatfs * buf)350 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
351 {
352 	int err = 0;
353 	struct path lower_path;
354 	struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
355 	struct super_block *sb = d_inode(dentry)->i_sb;
356 	struct hmdfs_sb_info *sbi = sb->s_fs_info;
357 
358 	trace_hmdfs_statfs(dentry, info->inode_type);
359 	// merge_view & merge_view/xxx & device_view assigned src_inode info
360 	if (hmdfs_i_merge(info) ||
361 	    (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
362 		err = kern_path(sbi->local_src, 0, &lower_path);
363 		if (err)
364 			goto out;
365 		err = vfs_statfs(&lower_path, buf);
366 		path_put(&lower_path);
367 	} else if (!IS_ERR_OR_NULL(info->lower_inode)) {
368 		hmdfs_get_lower_path(dentry, &lower_path);
369 		err = vfs_statfs(&lower_path, buf);
370 		hmdfs_put_lower_path(&lower_path);
371 	} else {
372 		err = hmdfs_remote_statfs(dentry, buf);
373 	}
374 
375 	buf->f_type = HMDFS_SUPER_MAGIC;
376 out:
377 	return err;
378 }
379 
hmdfs_show_options(struct seq_file * m,struct dentry * root)380 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
381 {
382 	struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
383 
384 	if (sbi->s_case_sensitive)
385 		seq_puts(m, ",sensitive");
386 	else
387 		seq_puts(m, ",insensitive");
388 
389 	if (sbi->s_merge_switch)
390 		seq_puts(m, ",merge_enable");
391 	else
392 		seq_puts(m, ",merge_disable");
393 
394 	seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
395 	seq_printf(m, ",user_id=%u", sbi->user_id);
396 
397 	if (sbi->cache_dir)
398 		seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
399 	if (sbi->real_dst)
400 		seq_printf(m, ",real_dst=%s", sbi->real_dst);
401 
402 	seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
403 	seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
404 
405 	return 0;
406 }
407 
hmdfs_sync_fs(struct super_block * sb,int wait)408 static int hmdfs_sync_fs(struct super_block *sb, int wait)
409 {
410 	int time_left;
411 	int err = 0;
412 	struct hmdfs_peer *con = NULL;
413 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
414 	int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
415 	struct syncfs_item item, *entry = NULL, *tmp = NULL;
416 
417 	if (!wait)
418 		return 0;
419 
420 	trace_hmdfs_syncfs_enter(sbi);
421 
422 	spin_lock(&sbi->hsi.list_lock);
423 	if (!sbi->hsi.is_executing) {
424 		sbi->hsi.is_executing = true;
425 		item.need_abort = false;
426 		spin_unlock(&sbi->hsi.list_lock);
427 	} else {
428 		init_completion(&item.done);
429 		list_add_tail(&item.list, &sbi->hsi.wait_list);
430 		spin_unlock(&sbi->hsi.list_lock);
431 		wait_for_completion(&item.done);
432 	}
433 
434 	if (item.need_abort)
435 		goto out;
436 
437 	/*
438 	 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
439 	 * sure all remote syncfs calls return back or timeout by waiting,
440 	 * during the waiting period we must protect @sbi->remote_syncfs_count
441 	 * and @sbi->remote_syncfs_ret from concurrent executing.
442 	 */
443 
444 	spin_lock(&sbi->hsi.v_lock);
445 	sbi->hsi.version++;
446 	/*
447 	 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
448 	 * into spinlock protection area to avoid following scenario caused
449 	 * by out-of-order execution:
450 	 *
451 	 *            synfs                                  syncfs_cb
452 	 *  sbi->hsi.remote_ret = 0;
453 	 *  atomic_set(&sbi->hsi.wait_count, 0);
454 	 *                                               lock
455 	 *                                               version == old_version
456 	 *                                 sbi->hsi.remote_ret = resp->ret_code
457 	 *                                 atomic_dec(&sbi->hsi.wait_count);
458 	 *                                               unlock
459 	 *         lock
460 	 *         version = old_version + 1
461 	 *         unlock
462 	 *
463 	 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
464 	 * before spin lock which may compete with syncfs_cb(), making
465 	 * these two values' assignment protected by spinlock can fix this.
466 	 */
467 	sbi->hsi.remote_ret = 0;
468 	atomic_set(&sbi->hsi.wait_count, 0);
469 	spin_unlock(&sbi->hsi.v_lock);
470 
471 	mutex_lock(&sbi->connections.node_lock);
472 	list_for_each_entry(con, &sbi->connections.node_list, list) {
473 		/*
474 		 * Dirty data does not need to be synchronized to remote
475 		 * devices that go offline normally. It's okay to drop
476 		 * them.
477 		 */
478 		if (con->status != NODE_STAT_ONLINE)
479 			continue;
480 
481 		peer_get(con);
482 		mutex_unlock(&sbi->connections.node_lock);
483 
484 		/*
485 		 * There exists a gap between sync_inodes_sb() and sync_fs()
486 		 * which may race with remote writing, leading error count
487 		 * on @sb_dirty_count. The dirty data produced during the
488 		 * gap period won't be synced in next syncfs operation.
489 		 * To avoid this, we have to invoke sync_inodes_sb() again
490 		 * after getting @con->sb_dirty_count.
491 		 */
492 		con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
493 		sync_inodes_sb(sb);
494 
495 		if (!con->old_sb_dirty_count) {
496 			peer_put(con);
497 			mutex_lock(&sbi->connections.node_lock);
498 			continue;
499 		}
500 
501 		err = hmdfs_send_syncfs(con, syncfs_timeout);
502 		if (err) {
503 			hmdfs_warning("send syncfs failed with %d on node %llu",
504 				      err, con->device_id);
505 			sbi->hsi.remote_ret = err;
506 			peer_put(con);
507 			mutex_lock(&sbi->connections.node_lock);
508 			continue;
509 		}
510 
511 		atomic_inc(&sbi->hsi.wait_count);
512 
513 		peer_put(con);
514 		mutex_lock(&sbi->connections.node_lock);
515 	}
516 	mutex_unlock(&sbi->connections.node_lock);
517 
518 	/*
519 	 * Async work in background will make sure @sbi->remote_syncfs_count
520 	 * decreased to zero finally whether syncfs success or fail.
521 	 */
522 	time_left = wait_event_interruptible(
523 		sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
524 	if (time_left < 0) {
525 		hmdfs_warning("syncfs is interrupted by external signal");
526 		err = -EINTR;
527 	}
528 
529 	if (!err && sbi->hsi.remote_ret)
530 		err = sbi->hsi.remote_ret;
531 
532 	/* Abandon syncfs processes in pending_list */
533 	list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
534 		entry->need_abort = true;
535 		complete(&entry->done);
536 	}
537 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
538 
539 	/* Pick the last syncfs process in wait_list */
540 	spin_lock(&sbi->hsi.list_lock);
541 	if (list_empty(&sbi->hsi.wait_list)) {
542 		sbi->hsi.is_executing = false;
543 	} else {
544 		entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
545 					list);
546 		list_del_init(&entry->list);
547 		list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
548 		entry->need_abort = false;
549 		complete(&entry->done);
550 	}
551 	spin_unlock(&sbi->hsi.list_lock);
552 
553 out:
554 	trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
555 				get_cmd_timeout(sbi, F_SYNCFS), err);
556 
557 	/* TODO: Return synfs err back to syscall */
558 
559 	return err;
560 }
561 
562 struct super_operations hmdfs_sops = {
563 	.alloc_inode = hmdfs_alloc_inode,
564 	.destroy_inode = hmdfs_destroy_inode,
565 	.evict_inode = hmdfs_evict_inode,
566 	.put_super = hmdfs_put_super,
567 	.statfs = hmdfs_statfs,
568 	.show_options = hmdfs_show_options,
569 	.sync_fs = hmdfs_sync_fs,
570 };
571 
init_once(void * obj)572 static void init_once(void *obj)
573 {
574 	struct hmdfs_inode_info *i = obj;
575 
576 	inode_init_once(&i->vfs_inode);
577 }
578 
hmdfs_init_caches(void)579 static int __init hmdfs_init_caches(void)
580 {
581 	int err = -ENOMEM;
582 
583 	hmdfs_inode_cachep =
584 		kmem_cache_create("hmdfs_inode_cache",
585 				  sizeof(struct hmdfs_inode_info), 0,
586 				  SLAB_RECLAIM_ACCOUNT, init_once);
587 	if (unlikely(!hmdfs_inode_cachep))
588 		goto out;
589 	hmdfs_dentry_cachep =
590 		kmem_cache_create("hmdfs_dentry_cache",
591 				  sizeof(struct hmdfs_dentry_info), 0,
592 				  SLAB_RECLAIM_ACCOUNT, NULL);
593 	if (unlikely(!hmdfs_dentry_cachep))
594 		goto out_des_ino;
595 	hmdfs_dentry_merge_cachep =
596 		kmem_cache_create("hmdfs_dentry_merge_cache",
597 				  sizeof(struct hmdfs_dentry_info_merge), 0,
598 				  SLAB_RECLAIM_ACCOUNT, NULL);
599 	if (unlikely(!hmdfs_dentry_merge_cachep))
600 		goto out_des_dc;
601 	return 0;
602 
603 out_des_dc:
604 	kmem_cache_destroy(hmdfs_dentry_cachep);
605 out_des_ino:
606 	kmem_cache_destroy(hmdfs_inode_cachep);
607 out:
608 	return err;
609 }
610 
hmdfs_destroy_caches(void)611 static void hmdfs_destroy_caches(void)
612 {
613 	rcu_barrier();
614 	kmem_cache_destroy(hmdfs_inode_cachep);
615 	hmdfs_inode_cachep = NULL;
616 	kmem_cache_destroy(hmdfs_dentry_cachep);
617 	hmdfs_dentry_cachep = NULL;
618 	kmem_cache_destroy(hmdfs_dentry_merge_cachep);
619 	hmdfs_dentry_merge_cachep = NULL;
620 }
621 
path_hash(const char * path,int len,bool case_sense)622 uint64_t path_hash(const char *path, int len, bool case_sense)
623 {
624 	uint64_t res = 0;
625 	const char *kp = path;
626 	char c;
627 	/* Mocklisp hash function. */
628 	while (*kp) {
629 		c = *kp;
630 		if (!case_sense)
631 			c = tolower(c);
632 		res = (res << 5) - res + (uint64_t)(c);
633 		kp++;
634 	}
635 	return res;
636 }
637 
get_full_path(struct path * path)638 static char *get_full_path(struct path *path)
639 {
640 	char *buf, *tmp;
641 	char *ret = NULL;
642 
643 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
644 	if (!buf)
645 		goto out;
646 
647 	tmp = d_path(path, buf, PATH_MAX);
648 	if (IS_ERR(tmp))
649 		goto out;
650 
651 	ret = kstrdup(tmp, GFP_KERNEL);
652 out:
653 	kfree(buf);
654 	return ret;
655 }
656 
hmdfs_init_cmd_timeout(struct hmdfs_sb_info * sbi)657 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
658 {
659 	memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
660 
661 	set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
662 	set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
663 	set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
664 	set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
665 	set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
666 	set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
667 	set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
668 	set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
669 	set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
670 	set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
671 	set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
672 	set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
673 	set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
674 	set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
675 	set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
676 	set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
677 	set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
678 	set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
679 	set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
680 	set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
681 }
682 
hmdfs_init_sbi(struct hmdfs_sb_info * sbi)683 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
684 {
685 	int ret;
686 
687 	ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
688 	if (ret)
689 		goto out;
690 
691 	/*
692 	 * We have to use dynamic memory since struct server/client_statistic
693 	 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
694 	 */
695 	sbi->s_server_statis =
696 		kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
697 	sbi->s_client_statis =
698 		kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
699 	if (!sbi->s_server_statis || !sbi->s_client_statis) {
700 		ret = -ENOMEM;
701 		goto out;
702 	}
703 
704 	ret = hmdfs_alloc_sb_seq();
705 	if (ret < 0) {
706 		hmdfs_err("no sb seq available err %d", ret);
707 		goto out;
708 	}
709 	sbi->seq = ret;
710 	ret = 0;
711 
712 	spin_lock_init(&sbi->notify_fifo_lock);
713 	mutex_init(&sbi->cmd_handler_mutex);
714 	sbi->s_case_sensitive = false;
715 	sbi->s_features = HMDFS_FEATURE_READPAGES |
716 			  HMDFS_FEATURE_READPAGES_OPEN |
717 			  HMDFS_ATOMIC_OPEN;
718 	sbi->s_merge_switch = false;
719 	sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
720 	sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
721 	sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
722 	sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
723 	hmdfs_init_cmd_timeout(sbi);
724 	sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
725 	sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
726 	sbi->s_offline_stash = true;
727 	sbi->s_dentry_cache = true;
728 	sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
729 	/* Initialize before hmdfs_register_sysfs() */
730 	atomic_set(&sbi->connections.conn_seq, 0);
731 	mutex_init(&sbi->connections.node_lock);
732 	INIT_LIST_HEAD(&sbi->connections.node_list);
733 
734 	ret = hmdfs_init_share_table(sbi);
735 	if (ret)
736 		goto out;
737 	init_waitqueue_head(&sbi->async_readdir_wq);
738 	INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
739 	INIT_LIST_HEAD(&sbi->async_readdir_work_list);
740 	spin_lock_init(&sbi->async_readdir_msg_lock);
741 	spin_lock_init(&sbi->async_readdir_work_lock);
742 
743 	return 0;
744 
745 out:
746 	return ret;
747 }
748 
hmdfs_client_resp_statis(struct hmdfs_sb_info * sbi,u8 cmd,enum hmdfs_resp_type type,unsigned long start,unsigned long end)749 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
750 			      enum hmdfs_resp_type type, unsigned long start,
751 			      unsigned long end)
752 {
753 	unsigned long duration;
754 
755 	switch (type) {
756 	case HMDFS_RESP_DELAY:
757 		sbi->s_client_statis[cmd].delay_resp_cnt++;
758 		break;
759 	case HMDFS_RESP_TIMEOUT:
760 		sbi->s_client_statis[cmd].timeout_cnt++;
761 		break;
762 	case HMDFS_RESP_NORMAL:
763 		duration = end - start;
764 		sbi->s_client_statis[cmd].total += duration;
765 		sbi->s_client_statis[cmd].resp_cnt++;
766 		if (sbi->s_client_statis[cmd].max < duration)
767 			sbi->s_client_statis[cmd].max = duration;
768 		break;
769 	default:
770 		hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
771 	}
772 }
773 
hmdfs_update_dst(struct hmdfs_sb_info * sbi)774 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
775 {
776 	int err = 0;
777 	const char *path_local = UPDATE_LOCAL_DST;
778 	int len = 0;
779 
780 	sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
781 	if (!sbi->real_dst) {
782 		err = -ENOMEM;
783 		goto out_err;
784 	}
785 	kfree(sbi->local_dst);
786 	sbi->local_dst = NULL;
787 
788 	len = strlen(sbi->real_dst) + strlen(path_local) + 1;
789 	if (len > PATH_MAX) {
790 		err = -EINVAL;
791 		goto out_err;
792 	}
793 	sbi->local_dst = kmalloc(len, GFP_KERNEL);
794 	if (!sbi->local_dst) {
795 		err = -ENOMEM;
796 		goto out_err;
797 	}
798 	snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
799 		 "%s%s", sbi->real_dst, path_local);
800 out_err:
801 	return err;
802 }
803 
804 /*
805  * Generate boot cookie like following format:
806  *
807  * | random |   boot time(ms) |  0x00 |
808  * |--------|-----------------|-------|
809  *     16            33          15    (bits)
810  *
811  * This will make sure boot cookie is unique in a period
812  * 2^33 / 1000 / 3600 / 24 = 99.4(days).
813  */
hmdfs_gen_boot_cookie(void)814 uint64_t hmdfs_gen_boot_cookie(void)
815 {
816 	uint64_t now;
817 	uint16_t rand;
818 
819 	now = ktime_to_ms(ktime_get());
820 	prandom_bytes(&rand, sizeof(rand));
821 
822 	now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
823 	now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
824 
825 	return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
826 }
827 
hmdfs_fill_super(struct super_block * sb,void * data,int silent)828 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
829 {
830 	struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
831 	const char *dev_name = priv->dev_name;
832 	const char *raw_data = priv->raw_data;
833 	struct hmdfs_sb_info *sbi;
834 	int err = 0;
835 	struct inode *root_inode;
836 	struct path lower_path;
837 	struct super_block *lower_sb;
838 	struct dentry *root_dentry;
839 	char ctrl_path[CTRL_PATH_MAX_LEN];
840 	uint64_t ctrl_hash;
841 
842 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
843 	if (!sbi) {
844 		err = -ENOMEM;
845 		goto out_err;
846 	}
847 	err = hmdfs_init_sbi(sbi);
848 	if (err)
849 		goto out_freesbi;
850 	sbi->sb = sb;
851 	err = hmdfs_parse_options(sbi, raw_data);
852 	if (err)
853 		goto out_freesbi;
854 
855 	sb->s_fs_info = sbi;
856 	sb->s_magic = HMDFS_SUPER_MAGIC;
857 	sb->s_xattr = hmdfs_xattr_handlers;
858 	sb->s_op = &hmdfs_sops;
859 
860 	sbi->boot_cookie = hmdfs_gen_boot_cookie();
861 
862 	err = hmdfs_init_writeback(sbi);
863 	if (err)
864 		goto out_freesbi;
865 	err = hmdfs_init_server_writeback(sbi);
866 	if (err)
867 		goto out_freesbi;
868 
869 	err = hmdfs_init_stash(sbi);
870 	if (err)
871 		goto out_freesbi;
872 
873 	// add ctrl sysfs node
874 	ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
875 	scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
876 	hmdfs_debug("hash %llu", ctrl_hash);
877 	err = hmdfs_register_sysfs(ctrl_path, sbi);
878 	if (err)
879 		goto out_freesbi;
880 
881 	err = hmdfs_update_dst(sbi);
882 	if (err)
883 		goto out_unreg_sysfs;
884 
885 	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
886 			&lower_path);
887 	if (err) {
888 		hmdfs_err("open dev failed, errno = %d", err);
889 		goto out_unreg_sysfs;
890 	}
891 
892 	lower_sb = lower_path.dentry->d_sb;
893 	atomic_inc(&lower_sb->s_active);
894 	sbi->lower_sb = lower_sb;
895 	sbi->local_src = get_full_path(&lower_path);
896 	if (!sbi->local_src) {
897 		hmdfs_err("get local_src failed!");
898 		goto out_sput;
899 	}
900 
901 	sb->s_time_gran = lower_sb->s_time_gran;
902 	sb->s_maxbytes = lower_sb->s_maxbytes;
903 	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
904 	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
905 		hmdfs_err("maximum fs stacking depth exceeded");
906 		err = -EINVAL;
907 		goto out_sput;
908 	}
909 	root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
910 	if (IS_ERR(root_inode)) {
911 		err = PTR_ERR(root_inode);
912 		goto out_sput;
913 	}
914 	hmdfs_root_inode_perm_init(root_inode);
915 	sb->s_root = root_dentry = d_make_root(root_inode);
916 	if (!root_dentry) {
917 		err = -ENOMEM;
918 		goto out_sput;
919 	}
920 
921 	err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
922 	if (err)
923 		goto out_freeroot;
924 	hmdfs_set_lower_path(root_dentry, &lower_path);
925 	sbi->cred = get_cred(current_cred());
926 	INIT_LIST_HEAD(&sbi->client_cache);
927 	INIT_LIST_HEAD(&sbi->server_cache);
928 	INIT_LIST_HEAD(&sbi->to_delete);
929 	mutex_init(&sbi->cache_list_lock);
930 	hmdfs_cfn_load(sbi);
931 
932 	/* Initialize syncfs info */
933 	spin_lock_init(&sbi->hsi.v_lock);
934 	init_waitqueue_head(&sbi->hsi.wq);
935 	sbi->hsi.version = 0;
936 	sbi->hsi.is_executing = false;
937 	INIT_LIST_HEAD(&sbi->hsi.wait_list);
938 	INIT_LIST_HEAD(&sbi->hsi.pending_list);
939 	spin_lock_init(&sbi->hsi.list_lock);
940 
941 	return err;
942 out_freeroot:
943 	dput(sb->s_root);
944 	sb->s_root = NULL;
945 out_sput:
946 	atomic_dec(&lower_sb->s_active);
947 	path_put(&lower_path);
948 out_unreg_sysfs:
949 	hmdfs_unregister_sysfs(sbi);
950 	hmdfs_release_sysfs(sbi);
951 out_freesbi:
952 	if (sbi) {
953 		sb->s_fs_info = NULL;
954 		hmdfs_exit_stash(sbi);
955 		hmdfs_destroy_writeback(sbi);
956 		hmdfs_destroy_server_writeback(sbi);
957 		kfifo_free(&sbi->notify_fifo);
958 		hmdfs_free_sb_seq(sbi->seq);
959 		kfree(sbi->local_src);
960 		kfree(sbi->local_dst);
961 		kfree(sbi->real_dst);
962 		kfree(sbi->cache_dir);
963 		kfree(sbi->s_server_statis);
964 		kfree(sbi->s_client_statis);
965 		kfree(sbi);
966 	}
967 out_err:
968 	return err;
969 }
970 
hmdfs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)971 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
972 				  const char *dev_name, void *raw_data)
973 {
974 	struct hmdfs_mount_priv priv = {
975 		.dev_name = dev_name,
976 		.raw_data = raw_data,
977 	};
978 
979 	/* hmdfs needs a valid dev_name to get the lower_sb's metadata */
980 	if (!dev_name || !*dev_name)
981 		return ERR_PTR(-EINVAL);
982 	return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
983 }
984 
985 
hmdfs_cancel_async_readdir(struct hmdfs_sb_info * sbi)986 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
987 {
988 	struct sendmsg_wait_queue *msg_wq = NULL;
989 	struct hmdfs_readdir_work *rw = NULL;
990 	struct hmdfs_readdir_work *tmp = NULL;
991 	struct list_head del_work;
992 
993 	/* cancel work that are not running */
994 
995 	INIT_LIST_HEAD(&del_work);
996 	spin_lock(&sbi->async_readdir_work_lock);
997 	list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
998 		if (cancel_delayed_work(&rw->dwork))
999 			list_move(&rw->head, &del_work);
1000 	}
1001 	spin_unlock(&sbi->async_readdir_work_lock);
1002 
1003 	list_for_each_entry_safe(rw, tmp, &del_work, head) {
1004 		dput(rw->dentry);
1005 		peer_put(rw->con);
1006 		kfree(rw);
1007 	}
1008 
1009 	/* wake up async readdir that are waiting for remote */
1010 	spin_lock(&sbi->async_readdir_msg_lock);
1011 	sbi->async_readdir_prohibit = true;
1012 	list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1013 		hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1014 	spin_unlock(&sbi->async_readdir_msg_lock);
1015 
1016 	/* wait for all async readdir to finish */
1017 	if (!list_empty(&sbi->async_readdir_work_list))
1018 		wait_event_interruptible_timeout(sbi->async_readdir_wq,
1019 			(list_empty(&sbi->async_readdir_work_list)), HZ);
1020 
1021 	WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1022 }
1023 
hmdfs_kill_super(struct super_block * sb)1024 static void hmdfs_kill_super(struct super_block *sb)
1025 {
1026 	struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1027 
1028 	/*
1029 	 * async readdir is holding ref for dentry, not for vfsmount. Thus
1030 	 * shrink_dcache_for_umount() will warn about dentry still in use
1031 	 * if async readdir is not done.
1032 	 */
1033 	if (sbi)
1034 		hmdfs_cancel_async_readdir(sbi);
1035 	kill_anon_super(sb);
1036 }
1037 
1038 static struct file_system_type hmdfs_fs_type = {
1039 	.owner = THIS_MODULE,
1040 	.name = "hmdfs",
1041 	.mount = hmdfs_mount,
1042 	.kill_sb = hmdfs_kill_super,
1043 };
1044 
hmdfs_init(void)1045 static int __init hmdfs_init(void)
1046 {
1047 	int err = 0;
1048 
1049 	err = hmdfs_init_caches();
1050 	if (err)
1051 		goto out_err;
1052 
1053 	hmdfs_node_evt_cb_init();
1054 
1055 	hmdfs_stash_add_node_evt_cb();
1056 	hmdfs_client_add_node_evt_cb();
1057 	hmdfs_server_add_node_evt_cb();
1058 
1059 	err = register_filesystem(&hmdfs_fs_type);
1060 	if (err) {
1061 		hmdfs_err("hmdfs register failed!");
1062 		goto out_err;
1063 	}
1064 
1065 	err = hmdfs_init_configfs();
1066 	if (err)
1067 		goto out_err;
1068 
1069 	err = hmdfs_sysfs_init();
1070 	if (err)
1071 		goto out_err;
1072 
1073 	hmdfs_message_verify_init();
1074 	return 0;
1075 out_err:
1076 	hmdfs_sysfs_exit();
1077 	hmdfs_exit_configfs();
1078 	unregister_filesystem(&hmdfs_fs_type);
1079 	hmdfs_destroy_caches();
1080 	hmdfs_err("hmdfs init failed!");
1081 	return err;
1082 }
1083 
hmdfs_exit(void)1084 static void __exit hmdfs_exit(void)
1085 {
1086 	hmdfs_sysfs_exit();
1087 	hmdfs_exit_configfs();
1088 	unregister_filesystem(&hmdfs_fs_type);
1089 	ida_destroy(&hmdfs_sb_seq);
1090 	hmdfs_destroy_caches();
1091 	hmdfs_info("hmdfs exited!");
1092 }
1093 
1094 module_init(hmdfs_init);
1095 module_exit(hmdfs_exit);
1096 
1097 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1098 
1099 MODULE_LICENSE("GPL v2");
1100 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1101 MODULE_DESCRIPTION("Harmony distributed file system");
1102