1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/hmdfs/main.c
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8
9 #include "hmdfs.h"
10
11 #include <linux/ctype.h>
12 #include <linux/module.h>
13 #include <linux/statfs.h>
14 #include <linux/xattr.h>
15 #include <linux/idr.h>
16 #if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
17 #include <linux/prandom.h>
18 #else
19 #include <linux/random.h>
20 #endif
21
22 #include "authority/authentication.h"
23 #include "hmdfs_server.h"
24 #include "comm/device_node.h"
25 #include "comm/message_verify.h"
26 #include "comm/protocol.h"
27 #include "comm/socket_adapter.h"
28 #include "hmdfs_merge_view.h"
29 #include "server_writeback.h"
30 #include "hmdfs_share.h"
31
32 #include "comm/node_cb.h"
33 #include "stash.h"
34
35 #define CREATE_TRACE_POINTS
36 #include "hmdfs_trace.h"
37
38 #define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
39
40 #define HMDFS_SB_SEQ_FROM 1
41
42 struct hmdfs_mount_priv {
43 const char *dev_name;
44 const char *raw_data;
45 };
46
47 struct syncfs_item {
48 struct list_head list;
49 struct completion done;
50 bool need_abort;
51 };
52
53 static DEFINE_IDA(hmdfs_sb_seq);
54
hmdfs_alloc_sb_seq(void)55 static inline int hmdfs_alloc_sb_seq(void)
56 {
57 return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
58 }
59
hmdfs_free_sb_seq(unsigned int seq)60 static inline void hmdfs_free_sb_seq(unsigned int seq)
61 {
62 if (!seq)
63 return;
64 ida_simple_remove(&hmdfs_sb_seq, seq);
65 }
66
hmdfs_xattr_local_get(struct dentry * dentry,const char * name,void * value,size_t size)67 static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
68 void *value, size_t size)
69 {
70 struct path lower_path;
71 ssize_t res = 0;
72
73 hmdfs_get_lower_path(dentry, &lower_path);
74 res = vfs_getxattr(lower_path.dentry, name, value, size);
75 hmdfs_put_lower_path(&lower_path);
76 return res;
77 }
78
hmdfs_xattr_remote_get(struct dentry * dentry,const char * name,void * value,size_t size)79 static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
80 void *value, size_t size)
81 {
82 struct inode *inode = d_inode(dentry);
83 struct hmdfs_inode_info *info = hmdfs_i(inode);
84 struct hmdfs_peer *conn = info->conn;
85 char *send_buf = NULL;
86 ssize_t res = 0;
87
88 send_buf = hmdfs_get_dentry_relative_path(dentry);
89 if (!send_buf)
90 return -ENOMEM;
91
92 res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
93 kfree(send_buf);
94 return res;
95 }
96
hmdfs_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * value,size_t size)97 static int hmdfs_xattr_get(const struct xattr_handler *handler,
98 struct dentry *dentry, struct inode *inode,
99 const char *name, void *value, size_t size)
100 {
101 int res = 0;
102 struct hmdfs_inode_info *info = hmdfs_i(inode);
103 size_t r_size = size;
104
105 if (!hmdfs_support_xattr(dentry))
106 return -EOPNOTSUPP;
107
108 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
109 return -EOPNOTSUPP;
110
111 if (size > HMDFS_XATTR_SIZE_MAX)
112 r_size = HMDFS_XATTR_SIZE_MAX;
113
114 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
115 res = hmdfs_xattr_local_get(dentry, name, value, r_size);
116 else
117 res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
118
119 if (res == -ERANGE && r_size != size) {
120 hmdfs_info("no support xattr value size over than: %d",
121 HMDFS_XATTR_SIZE_MAX);
122 res = -E2BIG;
123 }
124
125 return res;
126 }
127
hmdfs_xattr_local_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)128 static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
129 const void *value, size_t size, int flags)
130 {
131 struct path lower_path;
132 int res = 0;
133
134 hmdfs_get_lower_path(dentry, &lower_path);
135 if (value) {
136 res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
137 } else {
138 WARN_ON(flags != XATTR_REPLACE);
139 res = vfs_removexattr(lower_path.dentry, name);
140 }
141
142 hmdfs_put_lower_path(&lower_path);
143 return res;
144 }
145
hmdfs_xattr_remote_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)146 static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
147 const void *value, size_t size, int flags)
148 {
149 struct inode *inode = d_inode(dentry);
150 struct hmdfs_inode_info *info = hmdfs_i(inode);
151 struct hmdfs_peer *conn = info->conn;
152 char *send_buf = NULL;
153 int res = 0;
154
155 send_buf = hmdfs_get_dentry_relative_path(dentry);
156 if (!send_buf)
157 return -ENOMEM;
158
159 res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
160 kfree(send_buf);
161 return res;
162 }
163
hmdfs_xattr_merge_set(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)164 static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name,
165 const void *value, size_t size, int flags)
166 {
167 int err = 0;
168 struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL);
169
170 if (!lower_dentry) {
171 err = -EOPNOTSUPP;
172 goto out;
173 }
174 err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags);
175 out:
176 dput(lower_dentry);
177 return err;
178 }
179
hmdfs_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)180 static int hmdfs_xattr_set(const struct xattr_handler *handler,
181 struct dentry *dentry, struct inode *inode,
182 const char *name, const void *value,
183 size_t size, int flags)
184 {
185 struct hmdfs_inode_info *info = hmdfs_i(inode);
186
187 if (!hmdfs_support_xattr(dentry))
188 return -EOPNOTSUPP;
189
190 if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
191 return -EOPNOTSUPP;
192
193 if (size > HMDFS_XATTR_SIZE_MAX) {
194 hmdfs_info("no support too long xattr value: %zu", size);
195 return -E2BIG;
196 }
197
198 if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
199 return hmdfs_xattr_local_set(dentry, name, value, size, flags);
200 else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE)
201 return hmdfs_xattr_merge_set(dentry, name, value, size, flags);
202
203 return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
204 }
205
206 const struct xattr_handler hmdfs_xattr_handler = {
207 .prefix = "", /* catch all */
208 .get = hmdfs_xattr_get,
209 .set = hmdfs_xattr_set,
210 };
211
212 static const struct xattr_handler *hmdfs_xattr_handlers[] = {
213 &hmdfs_xattr_handler,
214 };
215
216 #define HMDFS_NODE_EVT_CB_DELAY 2
217
218 struct kmem_cache *hmdfs_inode_cachep;
219 struct kmem_cache *hmdfs_dentry_cachep;
220
i_callback(struct rcu_head * head)221 static void i_callback(struct rcu_head *head)
222 {
223 struct inode *inode = container_of(head, struct inode, i_rcu);
224
225 kmem_cache_free(hmdfs_inode_cachep,
226 container_of(inode, struct hmdfs_inode_info,
227 vfs_inode));
228 }
229
hmdfs_destroy_inode(struct inode * inode)230 static void hmdfs_destroy_inode(struct inode *inode)
231 {
232 call_rcu(&inode->i_rcu, i_callback);
233 }
234
hmdfs_evict_inode(struct inode * inode)235 static void hmdfs_evict_inode(struct inode *inode)
236 {
237 struct hmdfs_inode_info *info = hmdfs_i(inode);
238
239 truncate_inode_pages(&inode->i_data, 0);
240 clear_inode(inode);
241 if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
242 info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
243 return;
244 if (info->inode_type == HMDFS_LAYER_ZERO ||
245 info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
246 info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
247 iput(info->lower_inode);
248 info->lower_inode = NULL;
249 }
250 }
251
hmdfs_put_super(struct super_block * sb)252 void hmdfs_put_super(struct super_block *sb)
253 {
254 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
255 struct super_block *lower_sb = sbi->lower_sb;
256
257 hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
258 sbi->local_src);
259
260 hmdfs_cfn_destroy(sbi);
261 hmdfs_unregister_sysfs(sbi);
262 hmdfs_connections_stop(sbi);
263 hmdfs_clear_share_table(sbi);
264 hmdfs_destroy_server_writeback(sbi);
265 hmdfs_exit_stash(sbi);
266 atomic_dec(&lower_sb->s_active);
267 put_cred(sbi->cred);
268 if (sbi->system_cred)
269 put_cred(sbi->system_cred);
270 hmdfs_destroy_writeback(sbi);
271 kfree(sbi->local_src);
272 kfree(sbi->local_dst);
273 kfree(sbi->real_dst);
274 kfree(sbi->cache_dir);
275 kfifo_free(&sbi->notify_fifo);
276 sb->s_fs_info = NULL;
277 sbi->lower_sb = NULL;
278 hmdfs_release_sysfs(sbi);
279 /* After all access are completed */
280 hmdfs_free_sb_seq(sbi->seq);
281 kfree(sbi->s_server_statis);
282 kfree(sbi->s_client_statis);
283 kfree(sbi);
284 }
285
hmdfs_alloc_inode(struct super_block * sb)286 static struct inode *hmdfs_alloc_inode(struct super_block *sb)
287 {
288 struct hmdfs_inode_info *gi =
289 kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
290 if (!gi)
291 return NULL;
292 memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
293 INIT_LIST_HEAD(&gi->wb_list);
294 init_rwsem(&gi->wpage_sem);
295 gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
296 atomic64_set(&gi->write_counter, 0);
297 gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
298 spin_lock_init(&gi->fid_lock);
299 INIT_LIST_HEAD(&gi->wr_opened_node);
300 atomic_set(&gi->wr_opened_cnt, 0);
301 init_waitqueue_head(&gi->fid_wq);
302 INIT_LIST_HEAD(&gi->stash_node);
303 spin_lock_init(&gi->stash_lock);
304 return &gi->vfs_inode;
305 }
306
hmdfs_remote_statfs(struct dentry * dentry,struct kstatfs * buf)307 static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
308 {
309 int error = 0;
310 int ret = 0;
311 char *dir_path = NULL;
312 char *name_path = NULL;
313 struct hmdfs_peer *con = NULL;
314 struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
315
316 dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
317 if (!dir_path) {
318 error = -EACCES;
319 goto rmdir_out;
320 }
321
322 name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
323 if (!name_path) {
324 error = -EACCES;
325 goto rmdir_out;
326 }
327 mutex_lock(&sbi->connections.node_lock);
328 list_for_each_entry(con, &sbi->connections.node_list, list) {
329 if (con->status == NODE_STAT_ONLINE &&
330 con->version > USERSPACE_MAX_VER) {
331 peer_get(con);
332 mutex_unlock(&sbi->connections.node_lock);
333 hmdfs_debug("send MSG to remote devID %llu",
334 con->device_id);
335 ret = hmdfs_send_statfs(con, name_path, buf);
336 if (ret != 0)
337 error = ret;
338 peer_put(con);
339 mutex_lock(&sbi->connections.node_lock);
340 }
341 }
342 mutex_unlock(&sbi->connections.node_lock);
343
344 rmdir_out:
345 kfree(dir_path);
346 kfree(name_path);
347 return error;
348 }
349
hmdfs_statfs(struct dentry * dentry,struct kstatfs * buf)350 static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
351 {
352 int err = 0;
353 struct path lower_path;
354 struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
355 struct super_block *sb = d_inode(dentry)->i_sb;
356 struct hmdfs_sb_info *sbi = sb->s_fs_info;
357
358 trace_hmdfs_statfs(dentry, info->inode_type);
359 // merge_view & merge_view/xxx & device_view assigned src_inode info
360 if (hmdfs_i_merge(info) ||
361 (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
362 err = kern_path(sbi->local_src, 0, &lower_path);
363 if (err)
364 goto out;
365 err = vfs_statfs(&lower_path, buf);
366 path_put(&lower_path);
367 } else if (!IS_ERR_OR_NULL(info->lower_inode)) {
368 hmdfs_get_lower_path(dentry, &lower_path);
369 err = vfs_statfs(&lower_path, buf);
370 hmdfs_put_lower_path(&lower_path);
371 } else {
372 err = hmdfs_remote_statfs(dentry, buf);
373 }
374
375 buf->f_type = HMDFS_SUPER_MAGIC;
376 out:
377 return err;
378 }
379
hmdfs_show_options(struct seq_file * m,struct dentry * root)380 static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
381 {
382 struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
383
384 if (sbi->s_case_sensitive)
385 seq_puts(m, ",sensitive");
386 else
387 seq_puts(m, ",insensitive");
388
389 if (sbi->s_merge_switch)
390 seq_puts(m, ",merge_enable");
391 else
392 seq_puts(m, ",merge_disable");
393
394 seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
395 seq_printf(m, ",user_id=%u", sbi->user_id);
396
397 if (sbi->cache_dir)
398 seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
399 if (sbi->real_dst)
400 seq_printf(m, ",real_dst=%s", sbi->real_dst);
401
402 seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
403 seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
404
405 return 0;
406 }
407
hmdfs_sync_fs(struct super_block * sb,int wait)408 static int hmdfs_sync_fs(struct super_block *sb, int wait)
409 {
410 int time_left;
411 int err = 0;
412 struct hmdfs_peer *con = NULL;
413 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
414 int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
415 struct syncfs_item item, *entry = NULL, *tmp = NULL;
416
417 if (!wait)
418 return 0;
419
420 trace_hmdfs_syncfs_enter(sbi);
421
422 spin_lock(&sbi->hsi.list_lock);
423 if (!sbi->hsi.is_executing) {
424 sbi->hsi.is_executing = true;
425 item.need_abort = false;
426 spin_unlock(&sbi->hsi.list_lock);
427 } else {
428 init_completion(&item.done);
429 list_add_tail(&item.list, &sbi->hsi.wait_list);
430 spin_unlock(&sbi->hsi.list_lock);
431 wait_for_completion(&item.done);
432 }
433
434 if (item.need_abort)
435 goto out;
436
437 /*
438 * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
439 * sure all remote syncfs calls return back or timeout by waiting,
440 * during the waiting period we must protect @sbi->remote_syncfs_count
441 * and @sbi->remote_syncfs_ret from concurrent executing.
442 */
443
444 spin_lock(&sbi->hsi.v_lock);
445 sbi->hsi.version++;
446 /*
447 * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
448 * into spinlock protection area to avoid following scenario caused
449 * by out-of-order execution:
450 *
451 * synfs syncfs_cb
452 * sbi->hsi.remote_ret = 0;
453 * atomic_set(&sbi->hsi.wait_count, 0);
454 * lock
455 * version == old_version
456 * sbi->hsi.remote_ret = resp->ret_code
457 * atomic_dec(&sbi->hsi.wait_count);
458 * unlock
459 * lock
460 * version = old_version + 1
461 * unlock
462 *
463 * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
464 * before spin lock which may compete with syncfs_cb(), making
465 * these two values' assignment protected by spinlock can fix this.
466 */
467 sbi->hsi.remote_ret = 0;
468 atomic_set(&sbi->hsi.wait_count, 0);
469 spin_unlock(&sbi->hsi.v_lock);
470
471 mutex_lock(&sbi->connections.node_lock);
472 list_for_each_entry(con, &sbi->connections.node_list, list) {
473 /*
474 * Dirty data does not need to be synchronized to remote
475 * devices that go offline normally. It's okay to drop
476 * them.
477 */
478 if (con->status != NODE_STAT_ONLINE)
479 continue;
480
481 peer_get(con);
482 mutex_unlock(&sbi->connections.node_lock);
483
484 /*
485 * There exists a gap between sync_inodes_sb() and sync_fs()
486 * which may race with remote writing, leading error count
487 * on @sb_dirty_count. The dirty data produced during the
488 * gap period won't be synced in next syncfs operation.
489 * To avoid this, we have to invoke sync_inodes_sb() again
490 * after getting @con->sb_dirty_count.
491 */
492 con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
493 sync_inodes_sb(sb);
494
495 if (!con->old_sb_dirty_count) {
496 peer_put(con);
497 mutex_lock(&sbi->connections.node_lock);
498 continue;
499 }
500
501 err = hmdfs_send_syncfs(con, syncfs_timeout);
502 if (err) {
503 hmdfs_warning("send syncfs failed with %d on node %llu",
504 err, con->device_id);
505 sbi->hsi.remote_ret = err;
506 peer_put(con);
507 mutex_lock(&sbi->connections.node_lock);
508 continue;
509 }
510
511 atomic_inc(&sbi->hsi.wait_count);
512
513 peer_put(con);
514 mutex_lock(&sbi->connections.node_lock);
515 }
516 mutex_unlock(&sbi->connections.node_lock);
517
518 /*
519 * Async work in background will make sure @sbi->remote_syncfs_count
520 * decreased to zero finally whether syncfs success or fail.
521 */
522 time_left = wait_event_interruptible(
523 sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
524 if (time_left < 0) {
525 hmdfs_warning("syncfs is interrupted by external signal");
526 err = -EINTR;
527 }
528
529 if (!err && sbi->hsi.remote_ret)
530 err = sbi->hsi.remote_ret;
531
532 /* Abandon syncfs processes in pending_list */
533 list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
534 entry->need_abort = true;
535 complete(&entry->done);
536 }
537 INIT_LIST_HEAD(&sbi->hsi.pending_list);
538
539 /* Pick the last syncfs process in wait_list */
540 spin_lock(&sbi->hsi.list_lock);
541 if (list_empty(&sbi->hsi.wait_list)) {
542 sbi->hsi.is_executing = false;
543 } else {
544 entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
545 list);
546 list_del_init(&entry->list);
547 list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
548 entry->need_abort = false;
549 complete(&entry->done);
550 }
551 spin_unlock(&sbi->hsi.list_lock);
552
553 out:
554 trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
555 get_cmd_timeout(sbi, F_SYNCFS), err);
556
557 /* TODO: Return synfs err back to syscall */
558
559 return err;
560 }
561
562 struct super_operations hmdfs_sops = {
563 .alloc_inode = hmdfs_alloc_inode,
564 .destroy_inode = hmdfs_destroy_inode,
565 .evict_inode = hmdfs_evict_inode,
566 .put_super = hmdfs_put_super,
567 .statfs = hmdfs_statfs,
568 .show_options = hmdfs_show_options,
569 .sync_fs = hmdfs_sync_fs,
570 };
571
init_once(void * obj)572 static void init_once(void *obj)
573 {
574 struct hmdfs_inode_info *i = obj;
575
576 inode_init_once(&i->vfs_inode);
577 }
578
hmdfs_init_caches(void)579 static int __init hmdfs_init_caches(void)
580 {
581 int err = -ENOMEM;
582
583 hmdfs_inode_cachep =
584 kmem_cache_create("hmdfs_inode_cache",
585 sizeof(struct hmdfs_inode_info), 0,
586 SLAB_RECLAIM_ACCOUNT, init_once);
587 if (unlikely(!hmdfs_inode_cachep))
588 goto out;
589 hmdfs_dentry_cachep =
590 kmem_cache_create("hmdfs_dentry_cache",
591 sizeof(struct hmdfs_dentry_info), 0,
592 SLAB_RECLAIM_ACCOUNT, NULL);
593 if (unlikely(!hmdfs_dentry_cachep))
594 goto out_des_ino;
595 hmdfs_dentry_merge_cachep =
596 kmem_cache_create("hmdfs_dentry_merge_cache",
597 sizeof(struct hmdfs_dentry_info_merge), 0,
598 SLAB_RECLAIM_ACCOUNT, NULL);
599 if (unlikely(!hmdfs_dentry_merge_cachep))
600 goto out_des_dc;
601 return 0;
602
603 out_des_dc:
604 kmem_cache_destroy(hmdfs_dentry_cachep);
605 out_des_ino:
606 kmem_cache_destroy(hmdfs_inode_cachep);
607 out:
608 return err;
609 }
610
hmdfs_destroy_caches(void)611 static void hmdfs_destroy_caches(void)
612 {
613 rcu_barrier();
614 kmem_cache_destroy(hmdfs_inode_cachep);
615 hmdfs_inode_cachep = NULL;
616 kmem_cache_destroy(hmdfs_dentry_cachep);
617 hmdfs_dentry_cachep = NULL;
618 kmem_cache_destroy(hmdfs_dentry_merge_cachep);
619 hmdfs_dentry_merge_cachep = NULL;
620 }
621
path_hash(const char * path,int len,bool case_sense)622 uint64_t path_hash(const char *path, int len, bool case_sense)
623 {
624 uint64_t res = 0;
625 const char *kp = path;
626 char c;
627 /* Mocklisp hash function. */
628 while (*kp) {
629 c = *kp;
630 if (!case_sense)
631 c = tolower(c);
632 res = (res << 5) - res + (uint64_t)(c);
633 kp++;
634 }
635 return res;
636 }
637
get_full_path(struct path * path)638 static char *get_full_path(struct path *path)
639 {
640 char *buf, *tmp;
641 char *ret = NULL;
642
643 buf = kmalloc(PATH_MAX, GFP_KERNEL);
644 if (!buf)
645 goto out;
646
647 tmp = d_path(path, buf, PATH_MAX);
648 if (IS_ERR(tmp))
649 goto out;
650
651 ret = kstrdup(tmp, GFP_KERNEL);
652 out:
653 kfree(buf);
654 return ret;
655 }
656
hmdfs_init_cmd_timeout(struct hmdfs_sb_info * sbi)657 static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
658 {
659 memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
660
661 set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
662 set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
663 set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
664 set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
665 set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
666 set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
667 set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
668 set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
669 set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
670 set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
671 set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
672 set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
673 set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
674 set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
675 set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
676 set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
677 set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
678 set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
679 set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
680 set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
681 }
682
hmdfs_init_sbi(struct hmdfs_sb_info * sbi)683 static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
684 {
685 int ret;
686
687 ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
688 if (ret)
689 goto out;
690
691 /*
692 * We have to use dynamic memory since struct server/client_statistic
693 * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
694 */
695 sbi->s_server_statis =
696 kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
697 sbi->s_client_statis =
698 kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
699 if (!sbi->s_server_statis || !sbi->s_client_statis) {
700 ret = -ENOMEM;
701 goto out;
702 }
703
704 ret = hmdfs_alloc_sb_seq();
705 if (ret < 0) {
706 hmdfs_err("no sb seq available err %d", ret);
707 goto out;
708 }
709 sbi->seq = ret;
710 ret = 0;
711
712 spin_lock_init(&sbi->notify_fifo_lock);
713 mutex_init(&sbi->cmd_handler_mutex);
714 sbi->s_case_sensitive = false;
715 sbi->s_features = HMDFS_FEATURE_READPAGES |
716 HMDFS_FEATURE_READPAGES_OPEN |
717 HMDFS_ATOMIC_OPEN;
718 sbi->s_merge_switch = false;
719 sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
720 sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
721 sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
722 sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
723 hmdfs_init_cmd_timeout(sbi);
724 sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
725 sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
726 sbi->s_offline_stash = true;
727 sbi->s_dentry_cache = true;
728 sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
729 /* Initialize before hmdfs_register_sysfs() */
730 atomic_set(&sbi->connections.conn_seq, 0);
731 mutex_init(&sbi->connections.node_lock);
732 INIT_LIST_HEAD(&sbi->connections.node_list);
733
734 ret = hmdfs_init_share_table(sbi);
735 if (ret)
736 goto out;
737 init_waitqueue_head(&sbi->async_readdir_wq);
738 INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
739 INIT_LIST_HEAD(&sbi->async_readdir_work_list);
740 spin_lock_init(&sbi->async_readdir_msg_lock);
741 spin_lock_init(&sbi->async_readdir_work_lock);
742
743 return 0;
744
745 out:
746 return ret;
747 }
748
hmdfs_client_resp_statis(struct hmdfs_sb_info * sbi,u8 cmd,enum hmdfs_resp_type type,unsigned long start,unsigned long end)749 void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
750 enum hmdfs_resp_type type, unsigned long start,
751 unsigned long end)
752 {
753 unsigned long duration;
754
755 switch (type) {
756 case HMDFS_RESP_DELAY:
757 sbi->s_client_statis[cmd].delay_resp_cnt++;
758 break;
759 case HMDFS_RESP_TIMEOUT:
760 sbi->s_client_statis[cmd].timeout_cnt++;
761 break;
762 case HMDFS_RESP_NORMAL:
763 duration = end - start;
764 sbi->s_client_statis[cmd].total += duration;
765 sbi->s_client_statis[cmd].resp_cnt++;
766 if (sbi->s_client_statis[cmd].max < duration)
767 sbi->s_client_statis[cmd].max = duration;
768 break;
769 default:
770 hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
771 }
772 }
773
hmdfs_update_dst(struct hmdfs_sb_info * sbi)774 static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
775 {
776 int err = 0;
777 const char *path_local = UPDATE_LOCAL_DST;
778 int len = 0;
779
780 sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
781 if (!sbi->real_dst) {
782 err = -ENOMEM;
783 goto out_err;
784 }
785 kfree(sbi->local_dst);
786 sbi->local_dst = NULL;
787
788 len = strlen(sbi->real_dst) + strlen(path_local) + 1;
789 if (len > PATH_MAX) {
790 err = -EINVAL;
791 goto out_err;
792 }
793 sbi->local_dst = kmalloc(len, GFP_KERNEL);
794 if (!sbi->local_dst) {
795 err = -ENOMEM;
796 goto out_err;
797 }
798 snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
799 "%s%s", sbi->real_dst, path_local);
800 out_err:
801 return err;
802 }
803
804 /*
805 * Generate boot cookie like following format:
806 *
807 * | random | boot time(ms) | 0x00 |
808 * |--------|-----------------|-------|
809 * 16 33 15 (bits)
810 *
811 * This will make sure boot cookie is unique in a period
812 * 2^33 / 1000 / 3600 / 24 = 99.4(days).
813 */
hmdfs_gen_boot_cookie(void)814 uint64_t hmdfs_gen_boot_cookie(void)
815 {
816 uint64_t now;
817 uint16_t rand;
818
819 now = ktime_to_ms(ktime_get());
820 prandom_bytes(&rand, sizeof(rand));
821
822 now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
823 now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
824
825 return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
826 }
827
hmdfs_fill_super(struct super_block * sb,void * data,int silent)828 static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
829 {
830 struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
831 const char *dev_name = priv->dev_name;
832 const char *raw_data = priv->raw_data;
833 struct hmdfs_sb_info *sbi;
834 int err = 0;
835 struct inode *root_inode;
836 struct path lower_path;
837 struct super_block *lower_sb;
838 struct dentry *root_dentry;
839 char ctrl_path[CTRL_PATH_MAX_LEN];
840 uint64_t ctrl_hash;
841
842 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
843 if (!sbi) {
844 err = -ENOMEM;
845 goto out_err;
846 }
847 err = hmdfs_init_sbi(sbi);
848 if (err)
849 goto out_freesbi;
850 sbi->sb = sb;
851 err = hmdfs_parse_options(sbi, raw_data);
852 if (err)
853 goto out_freesbi;
854
855 sb->s_fs_info = sbi;
856 sb->s_magic = HMDFS_SUPER_MAGIC;
857 sb->s_xattr = hmdfs_xattr_handlers;
858 sb->s_op = &hmdfs_sops;
859
860 sbi->boot_cookie = hmdfs_gen_boot_cookie();
861
862 err = hmdfs_init_writeback(sbi);
863 if (err)
864 goto out_freesbi;
865 err = hmdfs_init_server_writeback(sbi);
866 if (err)
867 goto out_freesbi;
868
869 err = hmdfs_init_stash(sbi);
870 if (err)
871 goto out_freesbi;
872
873 // add ctrl sysfs node
874 ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
875 scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
876 hmdfs_debug("hash %llu", ctrl_hash);
877 err = hmdfs_register_sysfs(ctrl_path, sbi);
878 if (err)
879 goto out_freesbi;
880
881 err = hmdfs_update_dst(sbi);
882 if (err)
883 goto out_unreg_sysfs;
884
885 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
886 &lower_path);
887 if (err) {
888 hmdfs_err("open dev failed, errno = %d", err);
889 goto out_unreg_sysfs;
890 }
891
892 lower_sb = lower_path.dentry->d_sb;
893 atomic_inc(&lower_sb->s_active);
894 sbi->lower_sb = lower_sb;
895 sbi->local_src = get_full_path(&lower_path);
896 if (!sbi->local_src) {
897 hmdfs_err("get local_src failed!");
898 goto out_sput;
899 }
900
901 sb->s_time_gran = lower_sb->s_time_gran;
902 sb->s_maxbytes = lower_sb->s_maxbytes;
903 sb->s_stack_depth = lower_sb->s_stack_depth + 1;
904 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
905 hmdfs_err("maximum fs stacking depth exceeded");
906 err = -EINVAL;
907 goto out_sput;
908 }
909 root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
910 if (IS_ERR(root_inode)) {
911 err = PTR_ERR(root_inode);
912 goto out_sput;
913 }
914 hmdfs_root_inode_perm_init(root_inode);
915 sb->s_root = root_dentry = d_make_root(root_inode);
916 if (!root_dentry) {
917 err = -ENOMEM;
918 goto out_sput;
919 }
920
921 err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
922 if (err)
923 goto out_freeroot;
924 hmdfs_set_lower_path(root_dentry, &lower_path);
925 sbi->cred = get_cred(current_cred());
926 INIT_LIST_HEAD(&sbi->client_cache);
927 INIT_LIST_HEAD(&sbi->server_cache);
928 INIT_LIST_HEAD(&sbi->to_delete);
929 mutex_init(&sbi->cache_list_lock);
930 hmdfs_cfn_load(sbi);
931
932 /* Initialize syncfs info */
933 spin_lock_init(&sbi->hsi.v_lock);
934 init_waitqueue_head(&sbi->hsi.wq);
935 sbi->hsi.version = 0;
936 sbi->hsi.is_executing = false;
937 INIT_LIST_HEAD(&sbi->hsi.wait_list);
938 INIT_LIST_HEAD(&sbi->hsi.pending_list);
939 spin_lock_init(&sbi->hsi.list_lock);
940
941 return err;
942 out_freeroot:
943 dput(sb->s_root);
944 sb->s_root = NULL;
945 out_sput:
946 atomic_dec(&lower_sb->s_active);
947 path_put(&lower_path);
948 out_unreg_sysfs:
949 hmdfs_unregister_sysfs(sbi);
950 hmdfs_release_sysfs(sbi);
951 out_freesbi:
952 if (sbi) {
953 sb->s_fs_info = NULL;
954 hmdfs_exit_stash(sbi);
955 hmdfs_destroy_writeback(sbi);
956 hmdfs_destroy_server_writeback(sbi);
957 kfifo_free(&sbi->notify_fifo);
958 hmdfs_free_sb_seq(sbi->seq);
959 kfree(sbi->local_src);
960 kfree(sbi->local_dst);
961 kfree(sbi->real_dst);
962 kfree(sbi->cache_dir);
963 kfree(sbi->s_server_statis);
964 kfree(sbi->s_client_statis);
965 kfree(sbi);
966 }
967 out_err:
968 return err;
969 }
970
hmdfs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)971 static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
972 const char *dev_name, void *raw_data)
973 {
974 struct hmdfs_mount_priv priv = {
975 .dev_name = dev_name,
976 .raw_data = raw_data,
977 };
978
979 /* hmdfs needs a valid dev_name to get the lower_sb's metadata */
980 if (!dev_name || !*dev_name)
981 return ERR_PTR(-EINVAL);
982 return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
983 }
984
985
hmdfs_cancel_async_readdir(struct hmdfs_sb_info * sbi)986 static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
987 {
988 struct sendmsg_wait_queue *msg_wq = NULL;
989 struct hmdfs_readdir_work *rw = NULL;
990 struct hmdfs_readdir_work *tmp = NULL;
991 struct list_head del_work;
992
993 /* cancel work that are not running */
994
995 INIT_LIST_HEAD(&del_work);
996 spin_lock(&sbi->async_readdir_work_lock);
997 list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
998 if (cancel_delayed_work(&rw->dwork))
999 list_move(&rw->head, &del_work);
1000 }
1001 spin_unlock(&sbi->async_readdir_work_lock);
1002
1003 list_for_each_entry_safe(rw, tmp, &del_work, head) {
1004 dput(rw->dentry);
1005 peer_put(rw->con);
1006 kfree(rw);
1007 }
1008
1009 /* wake up async readdir that are waiting for remote */
1010 spin_lock(&sbi->async_readdir_msg_lock);
1011 sbi->async_readdir_prohibit = true;
1012 list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
1013 hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
1014 spin_unlock(&sbi->async_readdir_msg_lock);
1015
1016 /* wait for all async readdir to finish */
1017 if (!list_empty(&sbi->async_readdir_work_list))
1018 wait_event_interruptible_timeout(sbi->async_readdir_wq,
1019 (list_empty(&sbi->async_readdir_work_list)), HZ);
1020
1021 WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
1022 }
1023
hmdfs_kill_super(struct super_block * sb)1024 static void hmdfs_kill_super(struct super_block *sb)
1025 {
1026 struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
1027
1028 /*
1029 * async readdir is holding ref for dentry, not for vfsmount. Thus
1030 * shrink_dcache_for_umount() will warn about dentry still in use
1031 * if async readdir is not done.
1032 */
1033 if (sbi)
1034 hmdfs_cancel_async_readdir(sbi);
1035 kill_anon_super(sb);
1036 }
1037
1038 static struct file_system_type hmdfs_fs_type = {
1039 .owner = THIS_MODULE,
1040 .name = "hmdfs",
1041 .mount = hmdfs_mount,
1042 .kill_sb = hmdfs_kill_super,
1043 };
1044
hmdfs_init(void)1045 static int __init hmdfs_init(void)
1046 {
1047 int err = 0;
1048
1049 err = hmdfs_init_caches();
1050 if (err)
1051 goto out_err;
1052
1053 hmdfs_node_evt_cb_init();
1054
1055 hmdfs_stash_add_node_evt_cb();
1056 hmdfs_client_add_node_evt_cb();
1057 hmdfs_server_add_node_evt_cb();
1058
1059 err = register_filesystem(&hmdfs_fs_type);
1060 if (err) {
1061 hmdfs_err("hmdfs register failed!");
1062 goto out_err;
1063 }
1064
1065 err = hmdfs_init_configfs();
1066 if (err)
1067 goto out_err;
1068
1069 err = hmdfs_sysfs_init();
1070 if (err)
1071 goto out_err;
1072
1073 hmdfs_message_verify_init();
1074 return 0;
1075 out_err:
1076 hmdfs_sysfs_exit();
1077 hmdfs_exit_configfs();
1078 unregister_filesystem(&hmdfs_fs_type);
1079 hmdfs_destroy_caches();
1080 hmdfs_err("hmdfs init failed!");
1081 return err;
1082 }
1083
hmdfs_exit(void)1084 static void __exit hmdfs_exit(void)
1085 {
1086 hmdfs_sysfs_exit();
1087 hmdfs_exit_configfs();
1088 unregister_filesystem(&hmdfs_fs_type);
1089 ida_destroy(&hmdfs_sb_seq);
1090 hmdfs_destroy_caches();
1091 hmdfs_info("hmdfs exited!");
1092 }
1093
1094 module_init(hmdfs_init);
1095 module_exit(hmdfs_exit);
1096
1097 EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
1098
1099 MODULE_LICENSE("GPL v2");
1100 MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
1101 MODULE_DESCRIPTION("Harmony distributed file system");
1102