1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/hmdfs/main.c
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8
9 #include "hmdfs.h"
10
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 #include "hmdfs_share.h"
31
32 #include "comm/node_cb.h"
33 #include "stash.h"
34
35 #define CREATE_TRACE_POINTS
36 #include "hmdfs_trace.h"
37
38 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39
40 #define HMDFS_SB_SEQ_FROM 1
41
42 struct hmdfs_mount_priv {
43 const char *dev_name;
44 const char *raw_data;
45 };
46
47 struct syncfs_item {
48 struct list_head list;
49 struct completion done;
50 bool need_abort;
51 };
52
53 static DEFINE_IDA(hmdfs_sb_seq);
54
hmdfs_alloc_sb_seq(void)55 static inline int hmdfs_alloc_sb_seq(void)
56 {
57 return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58 }
59
hmdfs_free_sb_seq(unsigned int seq)60 static inline void hmdfs_free_sb_seq(unsigned int seq)
61 {
62 if (!seq)
63 return;
64 ida_simple_remove(&hmdfs_sb_seq, seq);
65 }
66
hmdfs_xattr_local_get(struct dentry * dentry,const char * name,void * value,size_t size)67 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68 void *value, size_t size)
69 {
70 struct path lower_path;
71 ssize_t res = 0;
72
73 hmdfs_get_lower_path(dentry, &lower_path);
74 res = vfs_getxattr(lower_path.dentry, name, value, size);
75 hmdfs_put_lower_path(&lower_path);
76 return res;
77 }
78
hmdfs_xattr_remote_get(struct dentry * dentry,const char * name,void * value,size_t size)79 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80 void *value, size_t size)
81 {
82 struct inode *inode = d_inode(dentry);
83 struct hmdfs_inode_info *info = hmdfs_i(inode);
84 struct hmdfs_peer *conn = info->conn;
85 char *send_buf = NULL;
86 ssize_t res = 0;
87
88 send_buf = hmdfs_get_dentry_relative_path(dentry);
89 if (!send_buf)
90 return -ENOMEM;
91
92 res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93 kfree(send_buf);
94 return res;
95 }
96
hmdfs_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * value,size_t size)97 static int hmdfs_xattr_get(const struct xattr_handler *handler,
98 struct dentry *dentry, struct inode *inode,
99 const char *name, void *value, size_t size)
100 {
101 int res = 0;
102 struct hmdfs_inode_info *info = hmdfs_i(inode);
103 size_t r_size = size;
104
105 if (!hmdfs_support_xattr(dentry))
106 return -EOPNOTSUPP;
107
108 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
109 return -EOPNOTSUPP;
110
111 if (size > HMDFS_XATTR_SIZE_MAX)
112 r_size = HMDFS_XATTR_SIZE_MAX;
113
114 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
115 res = hmdfs_xattr_local_get(dentry, name, value, r_size);
116 else
117 res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
118
119 if (res == -ERANGE && r_size != size) {
120 hmdfs_info("no support xattr value size over than: %d",
121 HMDFS_XATTR_SIZE_MAX);
122 res = -E2BIG;
123 }
124
125 return res;
126 }
127
hmdfs_xattr_local_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)128 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
129 const void *value, size_t size, int flags)
130 {
131 struct path lower_path;
132 int res = 0;
133
134 hmdfs_get_lower_path(dentry, &lower_path);
135 if (value) {
136 res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
137 } else {
138 WARN_ON(flags != XATTR_REPLACE);
139 res = vfs_removexattr(lower_path.dentry, name);
140 }
141
142 hmdfs_put_lower_path(&lower_path);
143 return res;
144 }
145
hmdfs_xattr_remote_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)146 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
147 const void *value, size_t size, int flags)
148 {
149 struct inode *inode = d_inode(dentry);
150 struct hmdfs_inode_info *info = hmdfs_i(inode);
151 struct hmdfs_peer *conn = info->conn;
152 char *send_buf = NULL;
153 int res = 0;
154
155 send_buf = hmdfs_get_dentry_relative_path(dentry);
156 if (!send_buf)
157 return -ENOMEM;
158
159 res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
160 kfree(send_buf);
161 return res;
162 }
163
hmdfs_xattr_merge_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)164 static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
165 const void *value, size_t size, int flags)
166 {
167 int err = 0;
168 struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
169
170 if (!lower_dentry) {
171 err = -EOPNOTSUPP;
172 goto out;
173 }
174 err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
175 out:
176 dput(lower_dentry);
177 return err;
178 }
179
hmdfs_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)180 static int hmdfs_xattr_set(const struct xattr_handler *handler,
181 struct dentry *dentry, struct inode *inode,
182 const char *name, const void *value,
183 size_t size, int flags)
184 {
185 struct hmdfs_inode_info *info = hmdfs_i(inode);
186
187 if (!hmdfs_support_xattr(dentry))
188 return -EOPNOTSUPP;
189
190 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
191 return -EOPNOTSUPP;
192
193 if (size > HMDFS_XATTR_SIZE_MAX) {
194 hmdfs_info("no support too long xattr value: %zu", size);
195 return -E2BIG;
196 }
197
198 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
199 return hmdfs_xattr_local_set(dentry, name, value, size, flags);
200 else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE ||
201 info->inode_type == HMDFS_LAYER_OTHER_MERGE_CLOUD)
202 return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
203
204 return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
205 }
206
207 const struct xattr_handler hmdfs_xattr_handler = {
208 .prefix = "", /* catch all */
209 .get = hmdfs_xattr_get,
210 .set = hmdfs_xattr_set,
211 };
212
213 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
214 &hmdfs_xattr_handler,
215 };
216
217 #define HMDFS_NODE_EVT_CB_DELAY 2
218
219 struct kmem_cache *hmdfs_inode_cachep;
220 struct kmem_cache *hmdfs_dentry_cachep;
221
i_callback(struct rcu_head * head)222 static void i_callback(struct rcu_head *head)
223 {
224 struct inode *inode = container_of(head, struct inode, i_rcu);
225
226 kmem_cache_free(hmdfs_inode_cachep,
227 container_of(inode, struct hmdfs_inode_info,
228 vfs_inode));
229 }
230
hmdfs_destroy_inode(struct inode * inode)231 static void hmdfs_destroy_inode(struct inode *inode)
232 {
233 call_rcu(&inode->i_rcu, i_callback);
234 }
235
hmdfs_evict_inode(struct inode * inode)236 static void hmdfs_evict_inode(struct inode *inode)
237 {
238 struct hmdfs_inode_info *info = hmdfs_i(inode);
239
240 truncate_inode_pages(&inode->i_data, 0);
241 clear_inode(inode);
242 if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
243 info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
244 return;
245 if (info->inode_type == HMDFS_LAYER_ZERO ||
246 info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
247 info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
248 iput(info->lower_inode);
249 info->lower_inode = NULL;
250 }
251 }
252
hmdfs_put_super(struct super_block * sb)253 void hmdfs_put_super(struct super_block *sb)
254 {
255 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
256 struct super_block *lower_sb = sbi->lower_sb;
257
258 hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
259 sbi->local_src);
260
261 hmdfs_cfn_destroy(sbi);
262 hmdfs_unregister_sysfs(sbi);
263 hmdfs_connections_stop(sbi);
264 hmdfs_clear_share_table(sbi);
265 hmdfs_destroy_server_writeback(sbi);
266 hmdfs_exit_stash(sbi);
267 atomic_dec(&lower_sb->s_active);
268 put_cred(sbi->cred);
269 if (sbi->system_cred)
270 put_cred(sbi->system_cred);
271 hmdfs_destroy_writeback(sbi);
272 kfree(sbi->local_src);
273 kfree(sbi->local_dst);
274 kfree(sbi->real_dst);
275 kfree(sbi->cache_dir);
276 kfree(sbi->cloud_dir);
277 kfifo_free(&sbi->notify_fifo);
278 sb->s_fs_info = NULL;
279 sbi->lower_sb = NULL;
280 hmdfs_release_sysfs(sbi);
281 /* After all access are completed */
282 hmdfs_free_sb_seq(sbi->seq);
283 kfree(sbi->s_server_statis);
284 kfree(sbi->s_client_statis);
285 kfree(sbi);
286 }
287
hmdfs_alloc_inode(struct super_block * sb)288 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
289 {
290 struct hmdfs_inode_info *gi =
291 kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
292 if (!gi)
293 return NULL;
294 memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
295 INIT_LIST_HEAD(&gi->wb_list);
296 init_rwsem(&gi->wpage_sem);
297 gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
298 atomic64_set(&gi->write_counter, 0);
299 gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
300 spin_lock_init(&gi->fid_lock);
301 INIT_LIST_HEAD(&gi->wr_opened_node);
302 atomic_set(&gi->wr_opened_cnt, 0);
303 init_waitqueue_head(&gi->fid_wq);
304 INIT_LIST_HEAD(&gi->stash_node);
305 spin_lock_init(&gi->stash_lock);
306 return &gi->vfs_inode;
307 }
308
hmdfs_remote_statfs(struct dentry * dentry,struct kstatfs * buf)309 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
310 {
311 int error = 0;
312 int ret = 0;
313 char *dir_path = NULL;
314 char *name_path = NULL;
315 struct hmdfs_peer *con = NULL;
316 struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
317
318 dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
319 if (!dir_path) {
320 error = -EACCES;
321 goto rmdir_out;
322 }
323
324 name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
325 if (!name_path) {
326 error = -EACCES;
327 goto rmdir_out;
328 }
329 mutex_lock(&sbi->connections.node_lock);
330 list_for_each_entry(con, &sbi->connections.node_list, list) {
331 if (con->status == NODE_STAT_ONLINE) {
332 peer_get(con);
333 mutex_unlock(&sbi->connections.node_lock);
334 hmdfs_debug("send MSG to remote devID %llu",
335 con->device_id);
336 ret = hmdfs_send_statfs(con, name_path, buf);
337 if (ret != 0)
338 error = ret;
339 peer_put(con);
340 mutex_lock(&sbi->connections.node_lock);
341 }
342 }
343 mutex_unlock(&sbi->connections.node_lock);
344
345 rmdir_out:
346 kfree(dir_path);
347 kfree(name_path);
348 return error;
349 }
350
hmdfs_statfs(struct dentry * dentry,struct kstatfs * buf)351 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
352 {
353 int err = 0;
354 struct path lower_path;
355 struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
356 struct super_block *sb = d_inode(dentry)->i_sb;
357 struct hmdfs_sb_info *sbi = sb->s_fs_info;
358
359 trace_hmdfs_statfs(dentry, info->inode_type);
360 // merge_view & merge_view/xxx & device_view assigned src_inode info
361 if (hmdfs_i_merge(info) ||
362 (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
363 err = kern_path(sbi->local_src, 0, &lower_path);
364 if (err)
365 goto out;
366 err = vfs_statfs(&lower_path, buf);
367 path_put(&lower_path);
368 } else if (!IS_ERR_OR_NULL(info->lower_inode)) {
369 hmdfs_get_lower_path(dentry, &lower_path);
370 err = vfs_statfs(&lower_path, buf);
371 hmdfs_put_lower_path(&lower_path);
372 } else {
373 err = hmdfs_remote_statfs(dentry, buf);
374 }
375
376 buf->f_type = HMDFS_SUPER_MAGIC;
377 out:
378 return err;
379 }
380
hmdfs_show_options(struct seq_file * m,struct dentry * root)381 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
382 {
383 struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
384
385 if (sbi->s_case_sensitive)
386 seq_puts(m, ",sensitive");
387 else
388 seq_puts(m, ",insensitive");
389
390 if (sbi->s_merge_switch)
391 seq_puts(m, ",merge_enable");
392 else
393 seq_puts(m, ",merge_disable");
394
395 seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
396 seq_printf(m, ",user_id=%u", sbi->user_id);
397
398 if (sbi->cache_dir)
399 seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
400 if (sbi->real_dst)
401 seq_printf(m, ",real_dst=%s", sbi->real_dst);
402 if (sbi->cloud_dir)
403 seq_printf(m, ",cloud_dir=%s", sbi->cloud_dir);
404
405 seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
406 seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
407
408 return 0;
409 }
410
hmdfs_sync_fs(struct super_block * sb,int wait)411 static int hmdfs_sync_fs(struct super_block *sb, int wait)
412 {
413 int time_left;
414 int err = 0;
415 struct hmdfs_peer *con = NULL;
416 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
417 int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
418 struct syncfs_item item, *entry = NULL, *tmp = NULL;
419
420 if (!wait)
421 return 0;
422
423 trace_hmdfs_syncfs_enter(sbi);
424
425 spin_lock(&sbi->hsi.list_lock);
426 if (!sbi->hsi.is_executing) {
427 sbi->hsi.is_executing = true;
428 item.need_abort = false;
429 spin_unlock(&sbi->hsi.list_lock);
430 } else {
431 init_completion(&item.done);
432 list_add_tail(&item.list, &sbi->hsi.wait_list);
433 spin_unlock(&sbi->hsi.list_lock);
434 wait_for_completion(&item.done);
435 }
436
437 if (item.need_abort)
438 goto out;
439
440 /*
441 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
442 * sure all remote syncfs calls return back or timeout by waiting,
443 * during the waiting period we must protect @sbi->remote_syncfs_count
444 * and @sbi->remote_syncfs_ret from concurrent executing.
445 */
446
447 spin_lock(&sbi->hsi.v_lock);
448 sbi->hsi.version++;
449 /*
450 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
451 * into spinlock protection area to avoid following scenario caused
452 * by out-of-order execution:
453 *
454 * synfs syncfs_cb
455 * sbi->hsi.remote_ret = 0;
456 * atomic_set(&sbi->hsi.wait_count, 0);
457 * lock
458 * version == old_version
459 * sbi->hsi.remote_ret = resp->ret_code
460 * atomic_dec(&sbi->hsi.wait_count);
461 * unlock
462 * lock
463 * version = old_version + 1
464 * unlock
465 *
466 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
467 * before spin lock which may compete with syncfs_cb(), making
468 * these two values' assignment protected by spinlock can fix this.
469 */
470 sbi->hsi.remote_ret = 0;
471 atomic_set(&sbi->hsi.wait_count, 0);
472 spin_unlock(&sbi->hsi.v_lock);
473
474 mutex_lock(&sbi->connections.node_lock);
475 list_for_each_entry(con, &sbi->connections.node_list, list) {
476 /*
477 * Dirty data does not need to be synchronized to remote
478 * devices that go offline normally. It's okay to drop
479 * them.
480 */
481 if (con->status != NODE_STAT_ONLINE)
482 continue;
483
484 peer_get(con);
485 mutex_unlock(&sbi->connections.node_lock);
486
487 /*
488 * There exists a gap between sync_inodes_sb() and sync_fs()
489 * which may race with remote writing, leading error count
490 * on @sb_dirty_count. The dirty data produced during the
491 * gap period won't be synced in next syncfs operation.
492 * To avoid this, we have to invoke sync_inodes_sb() again
493 * after getting @con->sb_dirty_count.
494 */
495 con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
496 sync_inodes_sb(sb);
497
498 if (!con->old_sb_dirty_count) {
499 peer_put(con);
500 mutex_lock(&sbi->connections.node_lock);
501 continue;
502 }
503
504 err = hmdfs_send_syncfs(con, syncfs_timeout);
505 if (err) {
506 hmdfs_warning("send syncfs failed with %d on node %llu",
507 err, con->device_id);
508 sbi->hsi.remote_ret = err;
509 peer_put(con);
510 mutex_lock(&sbi->connections.node_lock);
511 continue;
512 }
513
514 atomic_inc(&sbi->hsi.wait_count);
515
516 peer_put(con);
517 mutex_lock(&sbi->connections.node_lock);
518 }
519 mutex_unlock(&sbi->connections.node_lock);
520
521 /*
522 * Async work in background will make sure @sbi->remote_syncfs_count
523 * decreased to zero finally whether syncfs success or fail.
524 */
525 time_left = wait_event_interruptible(
526 sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
527 if (time_left < 0) {
528 hmdfs_warning("syncfs is interrupted by external signal");
529 err = -EINTR;
530 }
531
532 if (!err && sbi->hsi.remote_ret)
533 err = sbi->hsi.remote_ret;
534
535 /* Abandon syncfs processes in pending_list */
536 list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
537 entry->need_abort = true;
538 complete(&entry->done);
539 }
540 INIT_LIST_HEAD(&sbi->hsi.pending_list);
541
542 /* Pick the last syncfs process in wait_list */
543 spin_lock(&sbi->hsi.list_lock);
544 if (list_empty(&sbi->hsi.wait_list)) {
545 sbi->hsi.is_executing = false;
546 } else {
547 entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
548 list);
549 list_del_init(&entry->list);
550 list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
551 entry->need_abort = false;
552 complete(&entry->done);
553 }
554 spin_unlock(&sbi->hsi.list_lock);
555
556 out:
557 trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
558 get_cmd_timeout(sbi, F_SYNCFS), err);
559
560 /* TODO: Return synfs err back to syscall */
561
562 return err;
563 }
564
565 struct super_operations hmdfs_sops = {
566 .alloc_inode = hmdfs_alloc_inode,
567 .destroy_inode = hmdfs_destroy_inode,
568 .evict_inode = hmdfs_evict_inode,
569 .put_super = hmdfs_put_super,
570 .statfs = hmdfs_statfs,
571 .show_options = hmdfs_show_options,
572 .sync_fs = hmdfs_sync_fs,
573 };
574
init_once(void * obj)575 static void init_once(void *obj)
576 {
577 struct hmdfs_inode_info *i = obj;
578
579 inode_init_once(&i->vfs_inode);
580 }
581
hmdfs_init_caches(void)582 static int __init hmdfs_init_caches(void)
583 {
584 int err = -ENOMEM;
585
586 hmdfs_inode_cachep =
587 kmem_cache_create("hmdfs_inode_cache",
588 sizeof(struct hmdfs_inode_info), 0,
589 SLAB_RECLAIM_ACCOUNT, init_once);
590 if (unlikely(!hmdfs_inode_cachep))
591 goto out;
592 hmdfs_dentry_cachep =
593 kmem_cache_create("hmdfs_dentry_cache",
594 sizeof(struct hmdfs_dentry_info), 0,
595 SLAB_RECLAIM_ACCOUNT, NULL);
596 if (unlikely(!hmdfs_dentry_cachep))
597 goto out_des_ino;
598 hmdfs_dentry_merge_cachep =
599 kmem_cache_create("hmdfs_dentry_merge_cache",
600 sizeof(struct hmdfs_dentry_info_merge), 0,
601 SLAB_RECLAIM_ACCOUNT, NULL);
602 if (unlikely(!hmdfs_dentry_merge_cachep))
603 goto out_des_dc;
604 return 0;
605
606 out_des_dc:
607 kmem_cache_destroy(hmdfs_dentry_cachep);
608 out_des_ino:
609 kmem_cache_destroy(hmdfs_inode_cachep);
610 out:
611 return err;
612 }
613
hmdfs_destroy_caches(void)614 static void hmdfs_destroy_caches(void)
615 {
616 rcu_barrier();
617 kmem_cache_destroy(hmdfs_inode_cachep);
618 hmdfs_inode_cachep = NULL;
619 kmem_cache_destroy(hmdfs_dentry_cachep);
620 hmdfs_dentry_cachep = NULL;
621 kmem_cache_destroy(hmdfs_dentry_merge_cachep);
622 hmdfs_dentry_merge_cachep = NULL;
623 }
624
path_hash(const char * path,int len,bool case_sense)625 uint64_t path_hash(const char *path, int len, bool case_sense)
626 {
627 uint64_t res = 0;
628 const char *kp = path;
629 char c;
630 /* Mocklisp hash function. */
631 while (*kp) {
632 c = *kp;
633 if (!case_sense)
634 c = tolower(c);
635 res = (res << 5) - res + (uint64_t)(c);
636 kp++;
637 }
638 return res;
639 }
640
get_full_path(struct path * path)641 static char *get_full_path(struct path *path)
642 {
643 char *buf, *tmp;
644 char *ret = NULL;
645
646 buf = kmalloc(PATH_MAX, GFP_KERNEL);
647 if (!buf)
648 goto out;
649
650 tmp = d_path(path, buf, PATH_MAX);
651 if (IS_ERR(tmp))
652 goto out;
653
654 ret = kstrdup(tmp, GFP_KERNEL);
655 out:
656 kfree(buf);
657 return ret;
658 }
659
hmdfs_init_cmd_timeout(struct hmdfs_sb_info * sbi)660 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
661 {
662 memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
663
664 set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
665 set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
666 set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
667 set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
668 set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
669 set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
670 set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
671 set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
672 set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
673 set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
674 set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
675 set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
676 set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
677 set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
678 set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
679 set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
680 set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
681 set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
682 set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
683 set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
684 }
685
hmdfs_init_sbi(struct hmdfs_sb_info * sbi)686 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
687 {
688 int ret;
689
690 ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
691 if (ret)
692 goto out;
693
694 /*
695 * We have to use dynamic memory since struct server/client_statistic
696 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
697 */
698 sbi->s_server_statis =
699 kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
700 sbi->s_client_statis =
701 kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
702 if (!sbi->s_server_statis || !sbi->s_client_statis) {
703 ret = -ENOMEM;
704 goto out;
705 }
706
707 ret = hmdfs_alloc_sb_seq();
708 if (ret < 0) {
709 hmdfs_err("no sb seq available err %d", ret);
710 goto out;
711 }
712 sbi->seq = ret;
713 ret = 0;
714
715 spin_lock_init(&sbi->notify_fifo_lock);
716 mutex_init(&sbi->cmd_handler_mutex);
717 sbi->s_case_sensitive = false;
718 sbi->s_features = HMDFS_FEATURE_READPAGES |
719 HMDFS_FEATURE_READPAGES_OPEN |
720 HMDFS_ATOMIC_OPEN;
721 sbi->s_merge_switch = false;
722 sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
723 sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
724 sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
725 sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
726 hmdfs_init_cmd_timeout(sbi);
727 sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
728 sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
729 sbi->s_offline_stash = true;
730 sbi->s_dentry_cache = true;
731 sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
732 /* Initialize before hmdfs_register_sysfs() */
733 atomic_set(&sbi->connections.conn_seq, 0);
734 mutex_init(&sbi->connections.node_lock);
735 INIT_LIST_HEAD(&sbi->connections.node_list);
736
737 ret = hmdfs_init_share_table(sbi);
738 if (ret)
739 goto out;
740 init_waitqueue_head(&sbi->async_readdir_wq);
741 INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
742 INIT_LIST_HEAD(&sbi->async_readdir_work_list);
743 spin_lock_init(&sbi->async_readdir_msg_lock);
744 spin_lock_init(&sbi->async_readdir_work_lock);
745
746 return 0;
747
748 out:
749 return ret;
750 }
751
hmdfs_client_resp_statis(struct hmdfs_sb_info * sbi,u8 cmd,enum hmdfs_resp_type type,unsigned long start,unsigned long end)752 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
753 enum hmdfs_resp_type type, unsigned long start,
754 unsigned long end)
755 {
756 unsigned long duration;
757
758 switch (type) {
759 case HMDFS_RESP_DELAY:
760 sbi->s_client_statis[cmd].delay_resp_cnt++;
761 break;
762 case HMDFS_RESP_TIMEOUT:
763 sbi->s_client_statis[cmd].timeout_cnt++;
764 break;
765 case HMDFS_RESP_NORMAL:
766 duration = end - start;
767 sbi->s_client_statis[cmd].total += duration;
768 sbi->s_client_statis[cmd].resp_cnt++;
769 if (sbi->s_client_statis[cmd].max < duration)
770 sbi->s_client_statis[cmd].max = duration;
771 break;
772 default:
773 hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
774 }
775 }
776
hmdfs_update_dst(struct hmdfs_sb_info * sbi)777 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
778 {
779 int err = 0;
780 const char *path_local = UPDATE_LOCAL_DST;
781 int len = 0;
782
783 sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
784 if (!sbi->real_dst) {
785 err = -ENOMEM;
786 goto out_err;
787 }
788 kfree(sbi->local_dst);
789 sbi->local_dst = NULL;
790
791 len = strlen(sbi->real_dst) + strlen(path_local) + 1;
792 if (len > PATH_MAX) {
793 err = -EINVAL;
794 goto out_err;
795 }
796 sbi->local_dst = kmalloc(len, GFP_KERNEL);
797 if (!sbi->local_dst) {
798 err = -ENOMEM;
799 goto out_err;
800 }
801 snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
802 "%s%s", sbi->real_dst, path_local);
803 out_err:
804 return err;
805 }
806
807 /*
808 * Generate boot cookie like following format:
809 *
810 * | random | boot time(ms) | 0x00 |
811 * |--------|-----------------|-------|
812 * 16 33 15 (bits)
813 *
814 * This will make sure boot cookie is unique in a period
815 * 2^33 / 1000 / 3600 / 24 = 99.4(days).
816 */
hmdfs_gen_boot_cookie(void)817 uint64_t hmdfs_gen_boot_cookie(void)
818 {
819 uint64_t now;
820 uint16_t rand;
821
822 now = ktime_to_ms(ktime_get());
823 prandom_bytes(&rand, sizeof(rand));
824
825 now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
826 now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
827
828 return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
829 }
830
hmdfs_fill_super(struct super_block * sb,void * data,int silent)831 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
832 {
833 struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
834 const char *dev_name = priv->dev_name;
835 const char *raw_data = priv->raw_data;
836 struct hmdfs_sb_info *sbi;
837 int err = 0;
838 struct inode *root_inode;
839 struct path lower_path;
840 struct super_block *lower_sb;
841 struct dentry *root_dentry;
842 char ctrl_path[CTRL_PATH_MAX_LEN];
843 uint64_t ctrl_hash;
844
845 if (!raw_data)
846 return -EINVAL;
847
848 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
849 if (!sbi) {
850 err = -ENOMEM;
851 goto out_err;
852 }
853 err = hmdfs_init_sbi(sbi);
854 if (err)
855 goto out_freesbi;
856 sbi->sb = sb;
857 err = hmdfs_parse_options(sbi, raw_data);
858 if (err)
859 goto out_freesbi;
860
861 sb->s_fs_info = sbi;
862 sb->s_magic = HMDFS_SUPER_MAGIC;
863 sb->s_xattr = hmdfs_xattr_handlers;
864 sb->s_op = &hmdfs_sops;
865
866 sbi->boot_cookie = hmdfs_gen_boot_cookie();
867
868 err = hmdfs_init_writeback(sbi);
869 if (err)
870 goto out_freesbi;
871 err = hmdfs_init_server_writeback(sbi);
872 if (err)
873 goto out_freesbi;
874
875 err = hmdfs_init_stash(sbi);
876 if (err)
877 goto out_freesbi;
878
879 // add ctrl sysfs node
880 ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
881 scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
882 hmdfs_debug("hash %llu", ctrl_hash);
883 err = hmdfs_register_sysfs(ctrl_path, sbi);
884 if (err)
885 goto out_freesbi;
886
887 err = hmdfs_update_dst(sbi);
888 if (err)
889 goto out_unreg_sysfs;
890
891 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
892 &lower_path);
893 if (err) {
894 hmdfs_err("open dev failed, errno = %d", err);
895 goto out_unreg_sysfs;
896 }
897
898 lower_sb = lower_path.dentry->d_sb;
899 atomic_inc(&lower_sb->s_active);
900 sbi->lower_sb = lower_sb;
901 sbi->local_src = get_full_path(&lower_path);
902 if (!sbi->local_src) {
903 hmdfs_err("get local_src failed!");
904 goto out_sput;
905 }
906
907 sb->s_time_gran = lower_sb->s_time_gran;
908 sb->s_maxbytes = lower_sb->s_maxbytes;
909 sb->s_stack_depth = lower_sb->s_stack_depth + 1;
910 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
911 hmdfs_err("maximum fs stacking depth exceeded");
912 err = -EINVAL;
913 goto out_sput;
914 }
915 root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
916 if (IS_ERR(root_inode)) {
917 err = PTR_ERR(root_inode);
918 goto out_sput;
919 }
920 hmdfs_root_inode_perm_init(root_inode);
921 sb->s_root = root_dentry = d_make_root(root_inode);
922 if (!root_dentry) {
923 err = -ENOMEM;
924 goto out_sput;
925 }
926
927 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
928 if (err)
929 goto out_freeroot;
930 hmdfs_set_lower_path(root_dentry, &lower_path);
931 sbi->cred = get_cred(current_cred());
932 INIT_LIST_HEAD(&sbi->client_cache);
933 INIT_LIST_HEAD(&sbi->server_cache);
934 INIT_LIST_HEAD(&sbi->to_delete);
935 mutex_init(&sbi->cache_list_lock);
936 hmdfs_cfn_load(sbi);
937
938 /* Initialize syncfs info */
939 spin_lock_init(&sbi->hsi.v_lock);
940 init_waitqueue_head(&sbi->hsi.wq);
941 sbi->hsi.version = 0;
942 sbi->hsi.is_executing = false;
943 INIT_LIST_HEAD(&sbi->hsi.wait_list);
944 INIT_LIST_HEAD(&sbi->hsi.pending_list);
945 spin_lock_init(&sbi->hsi.list_lock);
946
947 return err;
948 out_freeroot:
949 dput(sb->s_root);
950 sb->s_root = NULL;
951 out_sput:
952 atomic_dec(&lower_sb->s_active);
953 path_put(&lower_path);
954 out_unreg_sysfs:
955 hmdfs_unregister_sysfs(sbi);
956 hmdfs_release_sysfs(sbi);
957 out_freesbi:
958 if (sbi) {
959 sb->s_fs_info = NULL;
960 hmdfs_exit_stash(sbi);
961 hmdfs_destroy_writeback(sbi);
962 hmdfs_destroy_server_writeback(sbi);
963 kfifo_free(&sbi->notify_fifo);
964 hmdfs_free_sb_seq(sbi->seq);
965 kfree(sbi->local_src);
966 kfree(sbi->local_dst);
967 kfree(sbi->real_dst);
968 kfree(sbi->cache_dir);
969 kfree(sbi->cloud_dir);
970 kfree(sbi->s_server_statis);
971 kfree(sbi->s_client_statis);
972 kfree(sbi);
973 }
974 out_err:
975 return err;
976 }
977
hmdfs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)978 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
979 const char *dev_name, void *raw_data)
980 {
981 struct hmdfs_mount_priv priv = {
982 .dev_name = dev_name,
983 .raw_data = raw_data,
984 };
985
986 /* hmdfs needs a valid dev_name to get the lower_sb's metadata */
987 if (!dev_name || !*dev_name)
988 return ERR_PTR(-EINVAL);
989 return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
990 }
991
992
hmdfs_cancel_async_readdir(struct hmdfs_sb_info * sbi)993 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
994 {
995 struct sendmsg_wait_queue *msg_wq = NULL;
996 struct hmdfs_readdir_work *rw = NULL;
997 struct hmdfs_readdir_work *tmp = NULL;
998 struct list_head del_work;
999
1000 /* cancel work that are not running */
1001
1002 INIT_LIST_HEAD(&del_work);
1003 spin_lock(&sbi->async_readdir_work_lock);
1004 list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
1005 if (cancel_delayed_work(&rw->dwork))
1006 list_move(&rw->head, &del_work);
1007 }
1008 spin_unlock(&sbi->async_readdir_work_lock);
1009
1010 list_for_each_entry_safe(rw, tmp, &del_work, head) {
1011 dput(rw->dentry);
1012 peer_put(rw->con);
1013 kfree(rw);
1014 }
1015
1016 /* wake up async readdir that are waiting for remote */
1017 spin_lock(&sbi->async_readdir_msg_lock);
1018 sbi->async_readdir_prohibit = true;
1019 list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1020 hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1021 spin_unlock(&sbi->async_readdir_msg_lock);
1022
1023 /* wait for all async readdir to finish */
1024 if (!list_empty(&sbi->async_readdir_work_list))
1025 wait_event_interruptible_timeout(sbi->async_readdir_wq,
1026 (list_empty(&sbi->async_readdir_work_list)), HZ);
1027
1028 WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1029 }
1030
hmdfs_kill_super(struct super_block * sb)1031 static void hmdfs_kill_super(struct super_block *sb)
1032 {
1033 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1034
1035 /*
1036 * async readdir is holding ref for dentry, not for vfsmount. Thus
1037 * shrink_dcache_for_umount() will warn about dentry still in use
1038 * if async readdir is not done.
1039 */
1040 if (sbi)
1041 hmdfs_cancel_async_readdir(sbi);
1042 kill_anon_super(sb);
1043 }
1044
1045 static struct file_system_type hmdfs_fs_type = {
1046 .owner = THIS_MODULE,
1047 .name = "hmdfs",
1048 .mount = hmdfs_mount,
1049 .kill_sb = hmdfs_kill_super,
1050 };
1051
hmdfs_init(void)1052 static int __init hmdfs_init(void)
1053 {
1054 int err = 0;
1055
1056 err = hmdfs_init_caches();
1057 if (err)
1058 goto out_err;
1059
1060 hmdfs_node_evt_cb_init();
1061
1062 hmdfs_stash_add_node_evt_cb();
1063 hmdfs_client_add_node_evt_cb();
1064 hmdfs_server_add_node_evt_cb();
1065
1066 err = register_filesystem(&hmdfs_fs_type);
1067 if (err) {
1068 hmdfs_err("hmdfs register failed!");
1069 goto out_err;
1070 }
1071
1072 err = hmdfs_init_configfs();
1073 if (err)
1074 goto out_err;
1075
1076 err = hmdfs_sysfs_init();
1077 if (err)
1078 goto out_err;
1079
1080 hmdfs_message_verify_init();
1081 return 0;
1082 out_err:
1083 hmdfs_sysfs_exit();
1084 hmdfs_exit_configfs();
1085 unregister_filesystem(&hmdfs_fs_type);
1086 hmdfs_destroy_caches();
1087 hmdfs_err("hmdfs init failed!");
1088 return err;
1089 }
1090
hmdfs_exit(void)1091 static void __exit hmdfs_exit(void)
1092 {
1093 hmdfs_sysfs_exit();
1094 hmdfs_exit_configfs();
1095 unregister_filesystem(&hmdfs_fs_type);
1096 ida_destroy(&hmdfs_sb_seq);
1097 hmdfs_destroy_caches();
1098 hmdfs_info("hmdfs exited!");
1099 }
1100
1101 module_init(hmdfs_init);
1102 module_exit(hmdfs_exit);
1103
1104 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1105
1106 MODULE_LICENSE("GPL v2");
1107 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1108 MODULE_DESCRIPTION("Harmony distributed file system");
1109