1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * fs/hmdfs/inode.h
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8 #ifndef INODE_H
9 #define INODE_H
10
11 #include "hmdfs.h"
12
13 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
14 #include <linux/iversion.h>
15 #endif
16
17 enum {
18 HMDFS_REMOTE_INODE_NONE = 0,
19 HMDFS_REMOTE_INODE_STASHING,
20 HMDFS_REMOTE_INODE_RESTORING,
21 };
22
23 /*****************************************************************************
24 * fid
25 *****************************************************************************/
26
27 /* Bits for fid_flags */
28 enum {
29 HMDFS_FID_NEED_OPEN = 0,
30 HMDFS_FID_OPENING,
31 };
32
33 struct hmdfs_fid {
34 __u64 ver;
35 __u32 id;
36 };
37
38 /*
39 * Cache file is stored in file like following format:
40 * ________________________________________________________________
41 * |meta file info| remote file(s) path | file content |
42 * | head | path | data |
43 * ↑ ↑
44 * path_offs data_offs
45 */
46 struct hmdfs_cache_info {
47 /* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
48 __u32 path_offs;
49 __u32 path_len;
50 __u32 path_cnt;
51 char *path_buf;
52 /* Stricky remote file(hardlink)s' path, split by '\0' */
53 char *path;
54 /* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
55 __u32 data_offs;
56 /* # of pages need to be written to remote file during offline */
57 atomic64_t to_write_pgs;
58 /* # of pages written to remote file during offline */
59 atomic64_t written_pgs;
60 /* Stash file handler */
61 struct file *cache_file;
62 };
63
64 /*****************************************************************************
65 * inode info and it's inline helpers
66 *****************************************************************************/
67
68 struct hmdfs_inode_info {
69 struct inode *lower_inode; // for local/merge inode
70 struct hmdfs_peer *conn; // for remote inode
71 struct kref ref;
72 spinlock_t fid_lock;
73 struct hmdfs_fid fid;
74 unsigned long fid_flags;
75 wait_queue_head_t fid_wq;
76 __u8 inode_type; // deprecated: use ino system instead
77
78 /* writeback list */
79 struct list_head wb_list;
80
81 #ifdef CONFIG_HMDFS_FS_PERMISSION
82 __u16 perm;
83 #endif
84 /*
85 * lookup remote file will generate a local inode, this store the
86 * combination of remote inode number and generation in such situation.
87 * the uniqueness of local inode can be determined.
88 */
89 __u64 remote_ino;
90 /*
91 * if this value is not ULLONG_MAX, it means that remote getattr syscall
92 * should return this value as inode size.
93 */
94 __u64 getattr_isize;
95 /*
96 * this value stores remote ctime, explicitly when remote file is opened
97 */
98 struct hmdfs_time_t remote_ctime;
99 /*
100 * this value stores the last time, aligned to dcache_precision, that
101 * remote file was modified. It should be noted that this value won't
102 * be effective if writecace_expire is set.
103 */
104 struct hmdfs_time_t stable_ctime;
105 /*
106 * If this value is set nonzero, pagecache should be truncated if the
107 * time that the file is opened is beyond the value. Furthermore,
108 * the functionality of stable_ctime won't be effective.
109 */
110 unsigned long writecache_expire;
111 /*
112 * This value record how many times the file has been written while file
113 * is opened. 'writecache_expire' will set in close if this value is
114 * nonzero.
115 */
116 atomic64_t write_counter;
117 /*
118 * will be linked to hmdfs_peer::wr_opened_inode_list
119 * if the remote inode is writable-opened. And using
120 * wr_opened_cnt to track possibly multiple writeable-open.
121 */
122 struct list_head wr_opened_node;
123 atomic_t wr_opened_cnt;
124 spinlock_t stash_lock;
125 unsigned int stash_status;
126 struct hmdfs_cache_info *cache;
127 /* link to hmdfs_peer::stashed_inode_list when stashing completes */
128 struct list_head stash_node;
129 /*
130 * The flush/fsync thread will hold the write lock while threads
131 * calling writepage will hold the read lock. We use rwlock to
132 * eliminate the cases that flush/fsync operations are done with
133 * re-dirtied pages remain dirty.
134 *
135 * Here is the explanation in detail:
136 *
137 * During `writepage()`, the state of a re-dirtied page will switch
138 * to the following states in sequence:
139 * s1: page dirty + tree dirty
140 * s2: page dirty + tree dirty <tag_pages_for_writeback>
141 * s3: page clean + tree dirty <clear_page_dirty_for_io>
142 * s4: page clean + tree clean + write back <set_page_writeback>
143 * s5: page dirty + tree dirty + write back <redirty_page_for_writepage>
144 * s6: page dirty + tree dirty <end_page_writeback>
145 *
146 * A page upon s4 will thus be ignored by the concurrent
147 * `do_writepages()` contained by `close()`, `fsync()`, making it's
148 * state inconsistent.
149 *
150 * To avoid such situation, we use per-file rwsems to prevent
151 * concurrent in-flight `writepage` during `close()` or `fsync()`.
152 *
153 * Minimal overhead is brought in since rsems allow concurrent
154 * `writepage` while `close()` or `fsync()` is natural to wait for
155 * in-flight `writepage()`s to complete.
156 *
157 * NOTE that in the worst case, a process may wait for wsem for TIMEOUT
158 * even if a signal is pending. But we've to wait there to iterate all
159 * pages and make sure that no dirty page should remain.
160 */
161 struct rw_semaphore wpage_sem;
162
163 // The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM.
164 struct inode vfs_inode;
165 };
166
167 struct hmdfs_readdir_work {
168 struct list_head head;
169 struct dentry *dentry;
170 struct hmdfs_peer *con;
171 struct delayed_work dwork;
172 };
173
hmdfs_i(struct inode * inode)174 static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode)
175 {
176 return container_of(inode, struct hmdfs_inode_info, vfs_inode);
177 }
178
hmdfs_inode_is_stashing(const struct hmdfs_inode_info * info)179 static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info)
180 {
181 const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb);
182
183 /* Refer to comments in hmdfs_stash_remote_inode() */
184 return (hmdfs_is_stash_enabled(sbi) &&
185 smp_load_acquire(&info->stash_status)); // protect
186 }
187
hmdfs_remote_fetch_fid(struct hmdfs_inode_info * info,struct hmdfs_fid * fid)188 static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info,
189 struct hmdfs_fid *fid)
190 {
191 spin_lock(&info->fid_lock);
192 *fid = info->fid;
193 spin_unlock(&info->fid_lock);
194 }
195
196 /*****************************************************************************
197 * ino allocator
198 *****************************************************************************/
199
200 enum HMDFS_ROOT {
201 HMDFS_ROOT_ANCESTOR = 1, // /
202 HMDFS_ROOT_DEV, // /device_view
203 HMDFS_ROOT_DEV_LOCAL, // /device_view/local
204 HMDFS_ROOT_DEV_REMOTE, // /device_view/remote
205 HMDFS_ROOT_MERGE, // /merge_view
206
207 HMDFS_ROOT_INVALID,
208 };
209
210 // delete layer, directory layer, not overlay layer
211 enum HMDFS_LAYER_TYPE {
212 HMDFS_LAYER_ZERO = 0, // /
213 HMDFS_LAYER_FIRST_DEVICE, // /device_view
214 HMDFS_LAYER_SECOND_LOCAL, // /device_view/local
215 HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote
216 HMDFS_LAYER_OTHER_LOCAL, // /device_view/local/xx
217 HMDFS_LAYER_OTHER_REMOTE, // /device_view/remote/xx
218
219 HMDFS_LAYER_FIRST_MERGE, // /merge_view
220 HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx
221 HMDFS_LAYER_INVALID,
222 };
223
224 struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
225 struct inode *lo_i,
226 struct hmdfs_peer *peer);
227 struct inode *hmdfs_iget5_locked_merge(struct super_block *sb,
228 struct dentry *fst_lo_d);
229
230 struct inode *hmdfs_iget5_locked_local(struct super_block *sb,
231 struct inode *lo_i);
232 struct hmdfs_peer;
233 struct inode *hmdfs_iget5_locked_remote(struct super_block *sb,
234 struct hmdfs_peer *peer,
235 uint64_t remote_ino);
236
237 #endif // INODE_H
238