1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * fs/hmdfs/inode.h
4 *
5 * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6 */
7
8 #ifndef INODE_H
9 #define INODE_H
10
11 #include "hmdfs.h"
12
13 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
14 #include <linux/iversion.h>
15 #endif
16
17 enum {
18 HMDFS_REMOTE_INODE_NONE = 0,
19 HMDFS_REMOTE_INODE_STASHING,
20 HMDFS_REMOTE_INODE_RESTORING,
21 };
22
23 /*****************************************************************************
24 * fid
25 *****************************************************************************/
26
27 /* Bits for fid_flags */
28 enum {
29 HMDFS_FID_NEED_OPEN = 0,
30 HMDFS_FID_OPENING,
31 };
32
33 struct hmdfs_fid {
34 __u64 ver;
35 __u32 id;
36 };
37
38 /*
39 * Cache file is stored in file like following format:
40 * ________________________________________________________________
41 * |meta file info| remote file(s) path | file content |
42 * | head | path | data |
43 * ↑ ↑
44 * path_offs data_offs
45 */
46 struct hmdfs_cache_info {
47 /* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
48 __u32 path_offs;
49 __u32 path_len;
50 __u32 path_cnt;
51 char *path_buf;
52 /* Stricky remote file(hardlink)s' path, split by '\0' */
53 char *path;
54 /* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
55 __u32 data_offs;
56 /* # of pages need to be written to remote file during offline */
57 atomic64_t to_write_pgs;
58 /* # of pages written to remote file during offline */
59 atomic64_t written_pgs;
60 /* Stash file handler */
61 struct file *cache_file;
62 };
63
64 /*****************************************************************************
65 * inode info and it's inline helpers
66 *****************************************************************************/
67
68 struct hmdfs_inode_info {
69 struct inode *lower_inode; // for local/merge inode
70 struct hmdfs_peer *conn; // for remote inode
71 struct kref ref;
72 spinlock_t fid_lock;
73 struct hmdfs_fid fid;
74 unsigned long fid_flags;
75 wait_queue_head_t fid_wq;
76 __u8 inode_type; // deprecated: use ino system instead
77 atomic_t write_opened;
78
79 /* writeback list */
80 struct list_head wb_list;
81
82 #ifdef CONFIG_HMDFS_FS_PERMISSION
83 __u16 perm;
84 #endif
85 /*
86 * lookup remote file will generate a local inode, this store the
87 * combination of remote inode number and generation in such situation.
88 * the uniqueness of local inode can be determined.
89 */
90 __u64 remote_ino;
91 #define CLOUD_RECORD_ID_LEN 33
92 __u8 cloud_record_id[CLOUD_RECORD_ID_LEN];
93 #define CLOUD_DENTRY_RESERVED_LENGTH 3
94 __u8 reserved[CLOUD_DENTRY_RESERVED_LENGTH];
95 /*
96 * if this value is not ULLONG_MAX, it means that remote getattr syscall
97 * should return this value as inode size.
98 */
99 __u64 getattr_isize;
100 /*
101 * this value stores remote ctime, explicitly when remote file is opened
102 */
103 struct hmdfs_time_t remote_ctime;
104 /*
105 * this value stores the last time, aligned to dcache_precision, that
106 * remote file was modified. It should be noted that this value won't
107 * be effective if writecace_expire is set.
108 */
109 struct hmdfs_time_t stable_ctime;
110 /*
111 * If this value is set nonzero, pagecache should be truncated if the
112 * time that the file is opened is beyond the value. Furthermore,
113 * the functionality of stable_ctime won't be effective.
114 */
115 unsigned long writecache_expire;
116 /*
117 * This value record how many times the file has been written while file
118 * is opened. 'writecache_expire' will set in close if this value is
119 * nonzero.
120 */
121 atomic64_t write_counter;
122 /*
123 * will be linked to hmdfs_peer::wr_opened_inode_list
124 * if the remote inode is writable-opened. And using
125 * wr_opened_cnt to track possibly multiple writeable-open.
126 */
127 struct list_head wr_opened_node;
128 atomic_t wr_opened_cnt;
129 spinlock_t stash_lock;
130 unsigned int stash_status;
131 struct hmdfs_cache_info *cache;
132 /* link to hmdfs_peer::stashed_inode_list when stashing completes */
133 struct list_head stash_node;
134 /*
135 * The flush/fsync thread will hold the write lock while threads
136 * calling writepage will hold the read lock. We use rwlock to
137 * eliminate the cases that flush/fsync operations are done with
138 * re-dirtied pages remain dirty.
139 *
140 * Here is the explanation in detail:
141 *
142 * During `writepage()`, the state of a re-dirtied page will switch
143 * to the following states in sequence:
144 * s1: page dirty + tree dirty
145 * s2: page dirty + tree dirty <tag_pages_for_writeback>
146 * s3: page clean + tree dirty <clear_page_dirty_for_io>
147 * s4: page clean + tree clean + write back <set_page_writeback>
148 * s5: page dirty + tree dirty + write back <redirty_page_for_writepage>
149 * s6: page dirty + tree dirty <end_page_writeback>
150 *
151 * A page upon s4 will thus be ignored by the concurrent
152 * `do_writepages()` contained by `close()`, `fsync()`, making it's
153 * state inconsistent.
154 *
155 * To avoid such situation, we use per-file rwsems to prevent
156 * concurrent in-flight `writepage` during `close()` or `fsync()`.
157 *
158 * Minimal overhead is brought in since rsems allow concurrent
159 * `writepage` while `close()` or `fsync()` is natural to wait for
160 * in-flight `writepage()`s to complete.
161 *
162 * NOTE that in the worst case, a process may wait for wsem for TIMEOUT
163 * even if a signal is pending. But we've to wait there to iterate all
164 * pages and make sure that no dirty page should remain.
165 */
166 struct rw_semaphore wpage_sem;
167
168 // The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM.
169 struct inode vfs_inode;
170 };
171
172 struct hmdfs_readdir_work {
173 struct list_head head;
174 struct dentry *dentry;
175 struct hmdfs_peer *con;
176 struct delayed_work dwork;
177 };
178
hmdfs_i(struct inode * inode)179 static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode)
180 {
181 return container_of(inode, struct hmdfs_inode_info, vfs_inode);
182 }
183
hmdfs_inode_is_stashing(const struct hmdfs_inode_info * info)184 static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info)
185 {
186 const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb);
187
188 /* Refer to comments in hmdfs_stash_remote_inode() */
189 return (hmdfs_is_stash_enabled(sbi) &&
190 smp_load_acquire(&info->stash_status)); // protect
191 }
192
hmdfs_remote_fetch_fid(struct hmdfs_inode_info * info,struct hmdfs_fid * fid)193 static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info,
194 struct hmdfs_fid *fid)
195 {
196 spin_lock(&info->fid_lock);
197 *fid = info->fid;
198 spin_unlock(&info->fid_lock);
199 }
200
201 /*****************************************************************************
202 * ino allocator
203 *****************************************************************************/
204
205 enum HMDFS_ROOT {
206 HMDFS_ROOT_ANCESTOR = 1, // /
207 HMDFS_ROOT_DEV, // /device_view
208 HMDFS_ROOT_DEV_LOCAL, // /device_view/local
209 HMDFS_ROOT_DEV_REMOTE, // /device_view/remote
210 HMDFS_ROOT_DEV_CLOUD, // /device_view/cloud
211 HMDFS_ROOT_MERGE, // /merge_view
212 HMDFS_ROOT_MERGE_CLOUD, // /cloud_merge_view
213
214 HMDFS_ROOT_INVALID,
215 };
216
217 // delete layer, directory layer, not overlay layer
218 enum HMDFS_LAYER_TYPE {
219 HMDFS_LAYER_ZERO = 0, // /
220 HMDFS_LAYER_FIRST_DEVICE, // /device_view
221 HMDFS_LAYER_SECOND_LOCAL, // /device_view/local
222 HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote
223 HMDFS_LAYER_SECOND_CLOUD, // /device_view/cloud
224 HMDFS_LAYER_OTHER_LOCAL, // /device_view/local/xx
225 HMDFS_LAYER_OTHER_REMOTE, // /device_view/remote/xx
226 HMDFS_LAYER_OTHER_CLOUD, // /device_view/cloud/xx
227
228 HMDFS_LAYER_FIRST_MERGE, // /merge_view
229 HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx
230 HMDFS_LAYER_FIRST_MERGE_CLOUD, // /cloud_merge_view
231 HMDFS_LAYER_OTHER_MERGE_CLOUD, // /coud_merge_view/xxx
232 HMDFS_LAYER_INVALID,
233 };
234
235 struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
236 struct inode *lo_i,
237 struct hmdfs_peer *peer);
238 struct inode *hmdfs_iget5_locked_merge(struct super_block *sb,
239 struct dentry *fst_lo_d);
240 struct inode *hmdfs_iget5_locked_cloud_merge(struct super_block *sb,
241 struct dentry *fst_lo_d);
242
243 struct inode *hmdfs_iget5_locked_local(struct super_block *sb,
244 struct inode *lo_i);
245 struct hmdfs_peer;
246 struct inode *hmdfs_iget5_locked_remote(struct super_block *sb,
247 struct hmdfs_peer *peer,
248 uint64_t remote_ino);
249
250 struct hmdfs_lookup_cloud_ret {
251 uint64_t i_size;
252 uint64_t i_mtime;
253 uint8_t record_id[CLOUD_RECORD_ID_LEN];
254 uint8_t reserved[CLOUD_DENTRY_RESERVED_LENGTH];
255 uint16_t i_mode;
256 };
257
258 struct inode *hmdfs_iget5_locked_cloud(struct super_block *sb,
259 struct hmdfs_peer *peer,
260 struct hmdfs_lookup_cloud_ret *res);
261
262 uint32_t make_ino_raw_cloud(uint8_t *cloud_id);
263 #endif // INODE_H
264