1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/hmdfs/main.c
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8
9 #include "hmdfs.h"
10
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30
31 #include "comm/node_cb.h"
32 #include "stash.h"
33
34 #define CREATE_TRACE_POINTS
35 #include "hmdfs_trace.h"
36
37 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
38
39 #define HMDFS_SB_SEQ_FROM 1
40
41 struct hmdfs_mount_priv {
42 const char *dev_name;
43 const char *raw_data;
44 };
45
46 struct syncfs_item {
47 struct list_head list;
48 struct completion done;
49 bool need_abort;
50 };
51
52 static DEFINE_IDA(hmdfs_sb_seq);
53
hmdfs_alloc_sb_seq(void)54 static inline int hmdfs_alloc_sb_seq(void)
55 {
56 return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
57 }
58
hmdfs_free_sb_seq(unsigned int seq)59 static inline void hmdfs_free_sb_seq(unsigned int seq)
60 {
61 if (!seq)
62 return;
63 ida_simple_remove(&hmdfs_sb_seq, seq);
64 }
65
hmdfs_xattr_local_get(struct dentry * dentry,const char * name,void * value,size_t size)66 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
67 void *value, size_t size)
68 {
69 struct path lower_path;
70 ssize_t res = 0;
71
72 hmdfs_get_lower_path(dentry, &lower_path);
73 res = vfs_getxattr(lower_path.dentry, name, value, size);
74 hmdfs_put_lower_path(&lower_path);
75 return res;
76 }
77
hmdfs_xattr_remote_get(struct dentry * dentry,const char * name,void * value,size_t size)78 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
79 void *value, size_t size)
80 {
81 struct inode *inode = d_inode(dentry);
82 struct hmdfs_inode_info *info = hmdfs_i(inode);
83 struct hmdfs_peer *conn = info->conn;
84 char *send_buf = NULL;
85 ssize_t res = 0;
86
87 send_buf = hmdfs_get_dentry_relative_path(dentry);
88 if (!send_buf)
89 return -ENOMEM;
90
91 res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
92 kfree(send_buf);
93 return res;
94 }
95
hmdfs_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * value,size_t size)96 static int hmdfs_xattr_get(const struct xattr_handler *handler,
97 struct dentry *dentry, struct inode *inode,
98 const char *name, void *value, size_t size)
99 {
100 int res = 0;
101 struct hmdfs_inode_info *info = hmdfs_i(inode);
102 size_t r_size = size;
103
104 if (!hmdfs_support_xattr(dentry))
105 return -EOPNOTSUPP;
106
107 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
108 return -EOPNOTSUPP;
109
110 if (size > HMDFS_XATTR_SIZE_MAX)
111 r_size = HMDFS_XATTR_SIZE_MAX;
112
113 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
114 res = hmdfs_xattr_local_get(dentry, name, value, r_size);
115 else
116 res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
117
118 if (res == -ERANGE && r_size != size) {
119 hmdfs_info("no support xattr value size over than: %d",
120 HMDFS_XATTR_SIZE_MAX);
121 res = -E2BIG;
122 }
123
124 return res;
125 }
126
hmdfs_xattr_local_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)127 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
128 const void *value, size_t size, int flags)
129 {
130 struct path lower_path;
131 int res = 0;
132
133 hmdfs_get_lower_path(dentry, &lower_path);
134 if (value) {
135 res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
136 } else {
137 WARN_ON(flags != XATTR_REPLACE);
138 res = vfs_removexattr(lower_path.dentry, name);
139 }
140
141 hmdfs_put_lower_path(&lower_path);
142 return res;
143 }
144
hmdfs_xattr_remote_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)145 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
146 const void *value, size_t size, int flags)
147 {
148 struct inode *inode = d_inode(dentry);
149 struct hmdfs_inode_info *info = hmdfs_i(inode);
150 struct hmdfs_peer *conn = info->conn;
151 char *send_buf = NULL;
152 int res = 0;
153
154 send_buf = hmdfs_get_dentry_relative_path(dentry);
155 if (!send_buf)
156 return -ENOMEM;
157
158 res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
159 kfree(send_buf);
160 return res;
161 }
162
hmdfs_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)163 static int hmdfs_xattr_set(const struct xattr_handler *handler,
164 struct dentry *dentry, struct inode *inode,
165 const char *name, const void *value,
166 size_t size, int flags)
167 {
168 struct hmdfs_inode_info *info = hmdfs_i(inode);
169
170 if (!hmdfs_support_xattr(dentry))
171 return -EOPNOTSUPP;
172
173 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
174 return -EOPNOTSUPP;
175
176 if (size > HMDFS_XATTR_SIZE_MAX) {
177 hmdfs_info("no support too long xattr value: %zu", size);
178 return -E2BIG;
179 }
180
181 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
182 return hmdfs_xattr_local_set(dentry, name, value, size, flags);
183
184 return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
185 }
186
187 const struct xattr_handler hmdfs_xattr_handler = {
188 .prefix = "", /* catch all */
189 .get = hmdfs_xattr_get,
190 .set = hmdfs_xattr_set,
191 };
192
193 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
194 &hmdfs_xattr_handler,
195 };
196
197 #define HMDFS_NODE_EVT_CB_DELAY 2
198
199 struct kmem_cache *hmdfs_inode_cachep;
200 struct kmem_cache *hmdfs_dentry_cachep;
201
i_callback(struct rcu_head * head)202 static void i_callback(struct rcu_head *head)
203 {
204 struct inode *inode = container_of(head, struct inode, i_rcu);
205
206 kmem_cache_free(hmdfs_inode_cachep,
207 container_of(inode, struct hmdfs_inode_info,
208 vfs_inode));
209 }
210
hmdfs_destroy_inode(struct inode * inode)211 static void hmdfs_destroy_inode(struct inode *inode)
212 {
213 call_rcu(&inode->i_rcu, i_callback);
214 }
215
hmdfs_evict_inode(struct inode * inode)216 static void hmdfs_evict_inode(struct inode *inode)
217 {
218 struct hmdfs_inode_info *info = hmdfs_i(inode);
219
220 truncate_inode_pages(&inode->i_data, 0);
221 clear_inode(inode);
222 if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
223 info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
224 return;
225 if (info->inode_type == HMDFS_LAYER_ZERO ||
226 info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
227 info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
228 iput(info->lower_inode);
229 info->lower_inode = NULL;
230 }
231 }
232
hmdfs_put_super(struct super_block * sb)233 void hmdfs_put_super(struct super_block *sb)
234 {
235 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
236 struct super_block *lower_sb = sbi->lower_sb;
237
238 hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
239 sbi->local_src);
240
241 hmdfs_cfn_destroy(sbi);
242 hmdfs_unregister_sysfs(sbi);
243 hmdfs_connections_stop(sbi);
244 hmdfs_destroy_server_writeback(sbi);
245 hmdfs_exit_stash(sbi);
246 atomic_dec(&lower_sb->s_active);
247 put_cred(sbi->cred);
248 if (sbi->system_cred)
249 put_cred(sbi->system_cred);
250 hmdfs_destroy_writeback(sbi);
251 kfree(sbi->local_src);
252 kfree(sbi->local_dst);
253 kfree(sbi->real_dst);
254 kfree(sbi->cache_dir);
255 kfifo_free(&sbi->notify_fifo);
256 sb->s_fs_info = NULL;
257 sbi->lower_sb = NULL;
258 hmdfs_release_sysfs(sbi);
259 /* After all access are completed */
260 hmdfs_free_sb_seq(sbi->seq);
261 kfree(sbi->s_server_statis);
262 kfree(sbi->s_client_statis);
263 kfree(sbi);
264 }
265
hmdfs_alloc_inode(struct super_block * sb)266 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
267 {
268 struct hmdfs_inode_info *gi =
269 kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
270 if (!gi)
271 return NULL;
272 memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
273 INIT_LIST_HEAD(&gi->wb_list);
274 init_rwsem(&gi->wpage_sem);
275 gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
276 atomic64_set(&gi->write_counter, 0);
277 gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
278 spin_lock_init(&gi->fid_lock);
279 INIT_LIST_HEAD(&gi->wr_opened_node);
280 atomic_set(&gi->wr_opened_cnt, 0);
281 init_waitqueue_head(&gi->fid_wq);
282 INIT_LIST_HEAD(&gi->stash_node);
283 spin_lock_init(&gi->stash_lock);
284 return &gi->vfs_inode;
285 }
286
hmdfs_remote_statfs(struct dentry * dentry,struct kstatfs * buf)287 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
288 {
289 int error = 0;
290 int ret = 0;
291 char *dir_path = NULL;
292 char *name_path = NULL;
293 struct hmdfs_peer *con = NULL;
294 struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
295
296 dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
297 if (!dir_path) {
298 error = -EACCES;
299 goto rmdir_out;
300 }
301
302 name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
303 if (!name_path) {
304 error = -EACCES;
305 goto rmdir_out;
306 }
307 mutex_lock(&sbi->connections.node_lock);
308 list_for_each_entry(con, &sbi->connections.node_list, list) {
309 if (con->status == NODE_STAT_ONLINE &&
310 con->version > USERSPACE_MAX_VER) {
311 peer_get(con);
312 mutex_unlock(&sbi->connections.node_lock);
313 hmdfs_debug("send MSG to remote devID %llu",
314 con->device_id);
315 ret = hmdfs_send_statfs(con, name_path, buf);
316 if (ret != 0)
317 error = ret;
318 peer_put(con);
319 mutex_lock(&sbi->connections.node_lock);
320 }
321 }
322 mutex_unlock(&sbi->connections.node_lock);
323
324 rmdir_out:
325 kfree(dir_path);
326 kfree(name_path);
327 return error;
328 }
329
hmdfs_statfs(struct dentry * dentry,struct kstatfs * buf)330 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
331 {
332 int err = 0;
333 struct path lower_path;
334 struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
335 struct super_block *sb = d_inode(dentry)->i_sb;
336 struct hmdfs_sb_info *sbi = sb->s_fs_info;
337
338 trace_hmdfs_statfs(dentry, info->inode_type);
339 // merge_view & merge_view/xxx & device_view assigned src_inode info
340 if (hmdfs_i_merge(info) ||
341 (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
342 err = kern_path(sbi->local_src, 0, &lower_path);
343 if (err)
344 goto out;
345 err = vfs_statfs(&lower_path, buf);
346 path_put(&lower_path);
347 } else if (!IS_ERR_OR_NULL(info->lower_inode)) {
348 hmdfs_get_lower_path(dentry, &lower_path);
349 err = vfs_statfs(&lower_path, buf);
350 hmdfs_put_lower_path(&lower_path);
351 } else {
352 err = hmdfs_remote_statfs(dentry, buf);
353 }
354
355 buf->f_type = HMDFS_SUPER_MAGIC;
356 out:
357 return err;
358 }
359
hmdfs_show_options(struct seq_file * m,struct dentry * root)360 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
361 {
362 struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
363
364 if (sbi->s_case_sensitive)
365 seq_puts(m, ",sensitive");
366 else
367 seq_puts(m, ",insensitive");
368
369 if (sbi->s_merge_switch)
370 seq_puts(m, ",merge_enable");
371 else
372 seq_puts(m, ",merge_disable");
373
374 seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
375 seq_printf(m, ",user_id=%u", sbi->user_id);
376
377 if (sbi->cache_dir)
378 seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
379 if (sbi->real_dst)
380 seq_printf(m, ",real_dst=%s", sbi->real_dst);
381
382 seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
383 seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
384
385 return 0;
386 }
387
hmdfs_sync_fs(struct super_block * sb,int wait)388 static int hmdfs_sync_fs(struct super_block *sb, int wait)
389 {
390 int time_left;
391 int err = 0;
392 struct hmdfs_peer *con = NULL;
393 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
394 int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
395 struct syncfs_item item, *entry = NULL, *tmp = NULL;
396
397 if (!wait)
398 return 0;
399
400 trace_hmdfs_syncfs_enter(sbi);
401
402 spin_lock(&sbi->hsi.list_lock);
403 if (!sbi->hsi.is_executing) {
404 sbi->hsi.is_executing = true;
405 item.need_abort = false;
406 spin_unlock(&sbi->hsi.list_lock);
407 } else {
408 init_completion(&item.done);
409 list_add_tail(&item.list, &sbi->hsi.wait_list);
410 spin_unlock(&sbi->hsi.list_lock);
411 wait_for_completion(&item.done);
412 }
413
414 if (item.need_abort)
415 goto out;
416
417 /*
418 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
419 * sure all remote syncfs calls return back or timeout by waiting,
420 * during the waiting period we must protect @sbi->remote_syncfs_count
421 * and @sbi->remote_syncfs_ret from concurrent executing.
422 */
423
424 spin_lock(&sbi->hsi.v_lock);
425 sbi->hsi.version++;
426 /*
427 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
428 * into spinlock protection area to avoid following scenario caused
429 * by out-of-order execution:
430 *
431 * synfs syncfs_cb
432 * sbi->hsi.remote_ret = 0;
433 * atomic_set(&sbi->hsi.wait_count, 0);
434 * lock
435 * version == old_version
436 * sbi->hsi.remote_ret = resp->ret_code
437 * atomic_dec(&sbi->hsi.wait_count);
438 * unlock
439 * lock
440 * version = old_version + 1
441 * unlock
442 *
443 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
444 * before spin lock which may compete with syncfs_cb(), making
445 * these two values' assignment protected by spinlock can fix this.
446 */
447 sbi->hsi.remote_ret = 0;
448 atomic_set(&sbi->hsi.wait_count, 0);
449 spin_unlock(&sbi->hsi.v_lock);
450
451 mutex_lock(&sbi->connections.node_lock);
452 list_for_each_entry(con, &sbi->connections.node_list, list) {
453 /*
454 * Dirty data does not need to be synchronized to remote
455 * devices that go offline normally. It's okay to drop
456 * them.
457 */
458 if (con->status != NODE_STAT_ONLINE)
459 continue;
460
461 peer_get(con);
462 mutex_unlock(&sbi->connections.node_lock);
463
464 /*
465 * There exists a gap between sync_inodes_sb() and sync_fs()
466 * which may race with remote writing, leading error count
467 * on @sb_dirty_count. The dirty data produced during the
468 * gap period won't be synced in next syncfs operation.
469 * To avoid this, we have to invoke sync_inodes_sb() again
470 * after getting @con->sb_dirty_count.
471 */
472 con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
473 sync_inodes_sb(sb);
474
475 if (!con->old_sb_dirty_count) {
476 peer_put(con);
477 mutex_lock(&sbi->connections.node_lock);
478 continue;
479 }
480
481 err = hmdfs_send_syncfs(con, syncfs_timeout);
482 if (err) {
483 hmdfs_warning("send syncfs failed with %d on node %llu",
484 err, con->device_id);
485 sbi->hsi.remote_ret = err;
486 peer_put(con);
487 mutex_lock(&sbi->connections.node_lock);
488 continue;
489 }
490
491 atomic_inc(&sbi->hsi.wait_count);
492
493 peer_put(con);
494 mutex_lock(&sbi->connections.node_lock);
495 }
496 mutex_unlock(&sbi->connections.node_lock);
497
498 /*
499 * Async work in background will make sure @sbi->remote_syncfs_count
500 * decreased to zero finally whether syncfs success or fail.
501 */
502 time_left = wait_event_interruptible(
503 sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
504 if (time_left < 0) {
505 hmdfs_warning("syncfs is interrupted by external signal");
506 err = -EINTR;
507 }
508
509 if (!err && sbi->hsi.remote_ret)
510 err = sbi->hsi.remote_ret;
511
512 /* Abandon syncfs processes in pending_list */
513 list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
514 entry->need_abort = true;
515 complete(&entry->done);
516 }
517 INIT_LIST_HEAD(&sbi->hsi.pending_list);
518
519 /* Pick the last syncfs process in wait_list */
520 spin_lock(&sbi->hsi.list_lock);
521 if (list_empty(&sbi->hsi.wait_list)) {
522 sbi->hsi.is_executing = false;
523 } else {
524 entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
525 list);
526 list_del_init(&entry->list);
527 list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
528 entry->need_abort = false;
529 complete(&entry->done);
530 }
531 spin_unlock(&sbi->hsi.list_lock);
532
533 out:
534 trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
535 get_cmd_timeout(sbi, F_SYNCFS), err);
536
537 /* TODO: Return synfs err back to syscall */
538
539 return err;
540 }
541
542 struct super_operations hmdfs_sops = {
543 .alloc_inode = hmdfs_alloc_inode,
544 .destroy_inode = hmdfs_destroy_inode,
545 .evict_inode = hmdfs_evict_inode,
546 .put_super = hmdfs_put_super,
547 .statfs = hmdfs_statfs,
548 .show_options = hmdfs_show_options,
549 .sync_fs = hmdfs_sync_fs,
550 };
551
init_once(void * obj)552 static void init_once(void *obj)
553 {
554 struct hmdfs_inode_info *i = obj;
555
556 inode_init_once(&i->vfs_inode);
557 }
558
hmdfs_init_caches(void)559 static int __init hmdfs_init_caches(void)
560 {
561 int err = -ENOMEM;
562
563 hmdfs_inode_cachep =
564 kmem_cache_create("hmdfs_inode_cache",
565 sizeof(struct hmdfs_inode_info), 0,
566 SLAB_RECLAIM_ACCOUNT, init_once);
567 if (unlikely(!hmdfs_inode_cachep))
568 goto out;
569 hmdfs_dentry_cachep =
570 kmem_cache_create("hmdfs_dentry_cache",
571 sizeof(struct hmdfs_dentry_info), 0,
572 SLAB_RECLAIM_ACCOUNT, NULL);
573 if (unlikely(!hmdfs_dentry_cachep))
574 goto out_des_ino;
575 hmdfs_dentry_merge_cachep =
576 kmem_cache_create("hmdfs_dentry_merge_cache",
577 sizeof(struct hmdfs_dentry_info_merge), 0,
578 SLAB_RECLAIM_ACCOUNT, NULL);
579 if (unlikely(!hmdfs_dentry_merge_cachep))
580 goto out_des_dc;
581 return 0;
582
583 out_des_dc:
584 kmem_cache_destroy(hmdfs_dentry_cachep);
585 out_des_ino:
586 kmem_cache_destroy(hmdfs_inode_cachep);
587 out:
588 return err;
589 }
590
hmdfs_destroy_caches(void)591 static void hmdfs_destroy_caches(void)
592 {
593 rcu_barrier();
594 kmem_cache_destroy(hmdfs_inode_cachep);
595 hmdfs_inode_cachep = NULL;
596 kmem_cache_destroy(hmdfs_dentry_cachep);
597 hmdfs_dentry_cachep = NULL;
598 kmem_cache_destroy(hmdfs_dentry_merge_cachep);
599 hmdfs_dentry_merge_cachep = NULL;
600 }
601
path_hash(const char * path,int len,bool case_sense)602 uint64_t path_hash(const char *path, int len, bool case_sense)
603 {
604 uint64_t res = 0;
605 const char *kp = path;
606 char c;
607 /* Mocklisp hash function. */
608 while (*kp) {
609 c = *kp;
610 if (!case_sense)
611 c = tolower(c);
612 res = (res << 5) - res + (uint64_t)(c);
613 kp++;
614 }
615 return res;
616 }
617
get_full_path(struct path * path)618 static char *get_full_path(struct path *path)
619 {
620 char *buf, *tmp;
621 char *ret = NULL;
622
623 buf = kmalloc(PATH_MAX, GFP_KERNEL);
624 if (!buf)
625 goto out;
626
627 tmp = d_path(path, buf, PATH_MAX);
628 if (IS_ERR(tmp))
629 goto out;
630
631 ret = kstrdup(tmp, GFP_KERNEL);
632 out:
633 kfree(buf);
634 return ret;
635 }
636
hmdfs_init_cmd_timeout(struct hmdfs_sb_info * sbi)637 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
638 {
639 memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
640
641 set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
642 set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
643 set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
644 set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
645 set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
646 set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
647 set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
648 set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
649 set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
650 set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
651 set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
652 set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
653 set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
654 set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
655 set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
656 set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
657 set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
658 set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
659 set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
660 set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
661 }
662
init_share_table(struct hmdfs_sb_info * sbi)663 static void init_share_table(struct hmdfs_sb_info *sbi)
664 {
665 spin_lock_init(&sbi->share_table.item_list_lock);
666 INIT_LIST_HEAD(&sbi->share_table.item_list_head);
667 sbi->share_table.item_cnt = 0;
668 sbi->share_table.max_cnt = HMDFS_SHARE_ITEMS_MAX;
669 }
670
hmdfs_init_sbi(struct hmdfs_sb_info * sbi)671 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
672 {
673 int ret;
674
675 ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
676 if (ret)
677 goto out;
678
679 /*
680 * We have to use dynamic memory since struct server/client_statistic
681 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
682 */
683 sbi->s_server_statis =
684 kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
685 sbi->s_client_statis =
686 kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
687 if (!sbi->s_server_statis || !sbi->s_client_statis) {
688 ret = -ENOMEM;
689 goto out;
690 }
691
692 ret = hmdfs_alloc_sb_seq();
693 if (ret < 0) {
694 hmdfs_err("no sb seq available err %d", ret);
695 goto out;
696 }
697 sbi->seq = ret;
698 ret = 0;
699
700 spin_lock_init(&sbi->notify_fifo_lock);
701 sbi->s_case_sensitive = false;
702 sbi->s_features = HMDFS_FEATURE_READPAGES |
703 HMDFS_FEATURE_READPAGES_OPEN |
704 HMDFS_ATOMIC_OPEN;
705 sbi->s_merge_switch = false;
706 sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
707 sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
708 sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
709 sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
710 hmdfs_init_cmd_timeout(sbi);
711 sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
712 sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
713 sbi->s_offline_stash = true;
714 sbi->s_dentry_cache = true;
715 sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
716 /* Initialize before hmdfs_register_sysfs() */
717 atomic_set(&sbi->connections.conn_seq, 0);
718 mutex_init(&sbi->connections.node_lock);
719 INIT_LIST_HEAD(&sbi->connections.node_list);
720
721 init_share_table(sbi);
722 init_waitqueue_head(&sbi->async_readdir_wq);
723 INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
724 INIT_LIST_HEAD(&sbi->async_readdir_work_list);
725 spin_lock_init(&sbi->async_readdir_msg_lock);
726 spin_lock_init(&sbi->async_readdir_work_lock);
727
728 return 0;
729
730 out:
731 return ret;
732 }
733
hmdfs_client_resp_statis(struct hmdfs_sb_info * sbi,u8 cmd,enum hmdfs_resp_type type,unsigned long start,unsigned long end)734 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
735 enum hmdfs_resp_type type, unsigned long start,
736 unsigned long end)
737 {
738 unsigned long duration;
739
740 switch (type) {
741 case HMDFS_RESP_DELAY:
742 sbi->s_client_statis[cmd].delay_resp_cnt++;
743 break;
744 case HMDFS_RESP_TIMEOUT:
745 sbi->s_client_statis[cmd].timeout_cnt++;
746 break;
747 case HMDFS_RESP_NORMAL:
748 duration = end - start;
749 sbi->s_client_statis[cmd].total += duration;
750 sbi->s_client_statis[cmd].resp_cnt++;
751 if (sbi->s_client_statis[cmd].max < duration)
752 sbi->s_client_statis[cmd].max = duration;
753 break;
754 default:
755 hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
756 }
757 }
758
hmdfs_update_dst(struct hmdfs_sb_info * sbi)759 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
760 {
761 int err = 0;
762 const char *path_local = UPDATE_LOCAL_DST;
763 int len = 0;
764
765 sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
766 if (!sbi->real_dst) {
767 err = -ENOMEM;
768 goto out_err;
769 }
770 kfree(sbi->local_dst);
771
772 len = strlen(sbi->real_dst) + strlen(path_local) + 1;
773 if (len > PATH_MAX) {
774 err = -EINVAL;
775 goto out_err;
776 }
777 sbi->local_dst = kmalloc(len, GFP_KERNEL);
778 if (!sbi->local_dst) {
779 err = -ENOMEM;
780 goto out_err;
781 }
782 snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
783 "%s%s", sbi->real_dst, path_local);
784 out_err:
785 return err;
786 }
787
788 /*
789 * Generate boot cookie like following format:
790 *
791 * | random | boot time(ms) | 0x00 |
792 * |--------|-----------------|-------|
793 * 16 33 15 (bits)
794 *
795 * This will make sure boot cookie is unique in a period
796 * 2^33 / 1000 / 3600 / 24 = 99.4(days).
797 */
hmdfs_gen_boot_cookie(void)798 uint64_t hmdfs_gen_boot_cookie(void)
799 {
800 uint64_t now;
801 uint16_t rand;
802
803 now = ktime_to_ms(ktime_get());
804 prandom_bytes(&rand, sizeof(rand));
805
806 now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
807 now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
808
809 return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
810 }
811
hmdfs_fill_super(struct super_block * sb,void * data,int silent)812 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
813 {
814 struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
815 const char *dev_name = priv->dev_name;
816 const char *raw_data = priv->raw_data;
817 struct hmdfs_sb_info *sbi;
818 int err = 0;
819 struct inode *root_inode;
820 struct path lower_path;
821 struct super_block *lower_sb;
822 struct dentry *root_dentry;
823 char ctrl_path[CTRL_PATH_MAX_LEN];
824 uint64_t ctrl_hash;
825
826 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
827 if (!sbi) {
828 err = -ENOMEM;
829 goto out_err;
830 }
831 err = hmdfs_init_sbi(sbi);
832 if (err)
833 goto out_freesbi;
834 sbi->sb = sb;
835 err = hmdfs_parse_options(sbi, raw_data);
836 if (err)
837 goto out_freesbi;
838
839 sb->s_fs_info = sbi;
840 sb->s_magic = HMDFS_SUPER_MAGIC;
841 sb->s_xattr = hmdfs_xattr_handlers;
842 sb->s_op = &hmdfs_sops;
843
844 sbi->boot_cookie = hmdfs_gen_boot_cookie();
845
846 err = hmdfs_init_writeback(sbi);
847 if (err)
848 goto out_freesbi;
849 err = hmdfs_init_server_writeback(sbi);
850 if (err)
851 goto out_freesbi;
852
853 err = hmdfs_init_stash(sbi);
854 if (err)
855 goto out_freesbi;
856
857 // add ctrl sysfs node
858 ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
859 scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
860 hmdfs_debug("hash %llu", ctrl_hash);
861 err = hmdfs_register_sysfs(ctrl_path, sbi);
862 if (err)
863 goto out_freesbi;
864
865 err = hmdfs_update_dst(sbi);
866 if (err)
867 goto out_unreg_sysfs;
868
869 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
870 &lower_path);
871 if (err) {
872 hmdfs_err("open dev failed, errno = %d", err);
873 goto out_unreg_sysfs;
874 }
875
876 lower_sb = lower_path.dentry->d_sb;
877 atomic_inc(&lower_sb->s_active);
878 sbi->lower_sb = lower_sb;
879 sbi->local_src = get_full_path(&lower_path);
880 if (!sbi->local_src) {
881 hmdfs_err("get local_src failed!");
882 goto out_sput;
883 }
884
885 sb->s_time_gran = lower_sb->s_time_gran;
886 sb->s_maxbytes = lower_sb->s_maxbytes;
887 sb->s_stack_depth = lower_sb->s_stack_depth + 1;
888 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
889 hmdfs_err("maximum fs stacking depth exceeded");
890 err = -EINVAL;
891 goto out_sput;
892 }
893 root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
894 if (IS_ERR(root_inode)) {
895 err = PTR_ERR(root_inode);
896 goto out_sput;
897 }
898 hmdfs_root_inode_perm_init(root_inode);
899 sb->s_root = root_dentry = d_make_root(root_inode);
900 if (!root_dentry) {
901 err = -ENOMEM;
902 goto out_sput;
903 }
904
905 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
906 if (err)
907 goto out_freeroot;
908 hmdfs_set_lower_path(root_dentry, &lower_path);
909 d_rehash(sb->s_root);
910 sbi->cred = get_cred(current_cred());
911 INIT_LIST_HEAD(&sbi->client_cache);
912 INIT_LIST_HEAD(&sbi->server_cache);
913 INIT_LIST_HEAD(&sbi->to_delete);
914 mutex_init(&sbi->cache_list_lock);
915 hmdfs_cfn_load(sbi);
916
917 /* Initialize syncfs info */
918 spin_lock_init(&sbi->hsi.v_lock);
919 init_waitqueue_head(&sbi->hsi.wq);
920 sbi->hsi.version = 0;
921 sbi->hsi.is_executing = false;
922 INIT_LIST_HEAD(&sbi->hsi.wait_list);
923 INIT_LIST_HEAD(&sbi->hsi.pending_list);
924 spin_lock_init(&sbi->hsi.list_lock);
925
926 return err;
927 out_freeroot:
928 dput(sb->s_root);
929 sb->s_root = NULL;
930 out_sput:
931 atomic_dec(&lower_sb->s_active);
932 path_put(&lower_path);
933 out_unreg_sysfs:
934 hmdfs_unregister_sysfs(sbi);
935 hmdfs_release_sysfs(sbi);
936 out_freesbi:
937 if (sbi) {
938 sb->s_fs_info = NULL;
939 hmdfs_exit_stash(sbi);
940 hmdfs_destroy_writeback(sbi);
941 hmdfs_destroy_server_writeback(sbi);
942 kfifo_free(&sbi->notify_fifo);
943 hmdfs_free_sb_seq(sbi->seq);
944 kfree(sbi->local_src);
945 kfree(sbi->local_dst);
946 kfree(sbi->real_dst);
947 kfree(sbi->cache_dir);
948 kfree(sbi->s_server_statis);
949 kfree(sbi->s_client_statis);
950 kfree(sbi);
951 }
952 out_err:
953 return err;
954 }
955
hmdfs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)956 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
957 const char *dev_name, void *raw_data)
958 {
959 struct hmdfs_mount_priv priv = {
960 .dev_name = dev_name,
961 .raw_data = raw_data,
962 };
963 return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
964 }
965
966
hmdfs_cancel_async_readdir(struct hmdfs_sb_info * sbi)967 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
968 {
969 struct sendmsg_wait_queue *msg_wq = NULL;
970 struct hmdfs_readdir_work *rw = NULL;
971 struct hmdfs_readdir_work *tmp = NULL;
972 struct list_head del_work;
973
974 /* cancel work that are not running */
975
976 INIT_LIST_HEAD(&del_work);
977 spin_lock(&sbi->async_readdir_work_lock);
978 list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
979 if (cancel_delayed_work(&rw->dwork))
980 list_move(&rw->head, &del_work);
981 }
982 spin_unlock(&sbi->async_readdir_work_lock);
983
984 list_for_each_entry_safe(rw, tmp, &del_work, head) {
985 dput(rw->dentry);
986 peer_put(rw->con);
987 kfree(rw);
988 }
989
990 /* wake up async readdir that are waiting for remote */
991 spin_lock(&sbi->async_readdir_msg_lock);
992 sbi->async_readdir_prohibit = true;
993 list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
994 hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
995 spin_unlock(&sbi->async_readdir_msg_lock);
996
997 /* wait for all async readdir to finish */
998 if (!list_empty(&sbi->async_readdir_work_list))
999 wait_event_interruptible_timeout(sbi->async_readdir_wq,
1000 (list_empty(&sbi->async_readdir_work_list)), HZ);
1001
1002 WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1003 }
1004
hmdfs_kill_super(struct super_block * sb)1005 static void hmdfs_kill_super(struct super_block *sb)
1006 {
1007 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1008
1009 /*
1010 * async readdir is holding ref for dentry, not for vfsmount. Thus
1011 * shrink_dcache_for_umount() will warn about dentry still in use
1012 * if async readdir is not done.
1013 */
1014 if (sbi)
1015 hmdfs_cancel_async_readdir(sbi);
1016 kill_anon_super(sb);
1017 }
1018
1019 static struct file_system_type hmdfs_fs_type = {
1020 .owner = THIS_MODULE,
1021 .name = "hmdfs",
1022 .mount = hmdfs_mount,
1023 .kill_sb = hmdfs_kill_super,
1024 };
1025
hmdfs_init(void)1026 static int __init hmdfs_init(void)
1027 {
1028 int err = 0;
1029
1030 err = hmdfs_init_caches();
1031 if (err)
1032 goto out_err;
1033
1034 hmdfs_node_evt_cb_init();
1035
1036 hmdfs_stash_add_node_evt_cb();
1037 hmdfs_client_add_node_evt_cb();
1038 hmdfs_server_add_node_evt_cb();
1039
1040 err = register_filesystem(&hmdfs_fs_type);
1041 if (err) {
1042 hmdfs_err("hmdfs register failed!");
1043 goto out_err;
1044 }
1045
1046 err = hmdfs_init_configfs();
1047 if (err)
1048 goto out_err;
1049
1050 err = hmdfs_sysfs_init();
1051 if (err)
1052 goto out_err;
1053
1054 hmdfs_message_verify_init();
1055 return 0;
1056 out_err:
1057 hmdfs_sysfs_exit();
1058 hmdfs_exit_configfs();
1059 unregister_filesystem(&hmdfs_fs_type);
1060 hmdfs_destroy_caches();
1061 hmdfs_err("hmdfs init failed!");
1062 return err;
1063 }
1064
hmdfs_exit(void)1065 static void __exit hmdfs_exit(void)
1066 {
1067 hmdfs_sysfs_exit();
1068 hmdfs_exit_configfs();
1069 unregister_filesystem(&hmdfs_fs_type);
1070 ida_destroy(&hmdfs_sb_seq);
1071 hmdfs_destroy_caches();
1072 hmdfs_info("hmdfs exited!");
1073 }
1074
1075 module_init(hmdfs_init);
1076 module_exit(hmdfs_exit);
1077
1078 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1079
1080 MODULE_LICENSE("GPL v2");
1081 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1082 MODULE_DESCRIPTION("Harmony distributed file system");
1083