• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * fs/hmdfs/inode.h
4  *
5  * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
6  */
7 
8 #ifndef INODE_H
9 #define INODE_H
10 
11 #include "hmdfs.h"
12 
13 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
14 #include <linux/iversion.h>
15 #endif
16 
17 enum {
18 	HMDFS_REMOTE_INODE_NONE = 0,
19 	HMDFS_REMOTE_INODE_STASHING,
20 	HMDFS_REMOTE_INODE_RESTORING,
21 };
22 
23 /*****************************************************************************
24  * fid
25  *****************************************************************************/
26 
27 /* Bits for fid_flags */
28 enum {
29 	HMDFS_FID_NEED_OPEN = 0,
30 	HMDFS_FID_OPENING,
31 };
32 
33 struct hmdfs_fid {
34 	__u64 ver;
35 	__u32 id;
36 };
37 
38 /*
39  * Cache file is stored in file like following format:
40  *  ________________________________________________________________
41  * |meta file info| remote file(s) path |        file content       |
42  * |     head     |        path         |            data           |
43  *                ↑                     ↑
44  *             path_offs             data_offs
45  */
46 struct hmdfs_cache_info {
47 	/* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
48 	__u32 path_offs;
49 	__u32 path_len;
50 	__u32 path_cnt;
51 	char *path_buf;
52 	/* Stricky remote file(hardlink)s' path, split by '\0' */
53 	char *path;
54 	/* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
55 	__u32 data_offs;
56 	/* # of pages need to be written to remote file during offline */
57 	atomic64_t to_write_pgs;
58 	/* # of pages written to remote file during offline */
59 	atomic64_t written_pgs;
60 	/* Stash file handler */
61 	struct file *cache_file;
62 };
63 
64 /*****************************************************************************
65  * inode info and it's inline helpers
66  *****************************************************************************/
67 
68 struct hmdfs_inode_info {
69 	struct inode *lower_inode; // for local/merge inode
70 	struct hmdfs_peer *conn;   // for remote inode
71 	struct kref ref;
72 	spinlock_t fid_lock;
73 	struct hmdfs_fid fid;
74 	unsigned long fid_flags;
75 	wait_queue_head_t fid_wq;
76 	__u8 inode_type; // deprecated: use ino system instead
77 	atomic_t write_opened;
78 
79 	/* writeback list */
80 	struct list_head wb_list;
81 
82 #ifdef CONFIG_HMDFS_FS_PERMISSION
83 	__u16 perm;
84 #endif
85 	/*
86 	 * lookup remote file will generate a local inode, this store the
87 	 * combination of remote inode number and generation in such situation.
88 	 * the uniqueness of local inode can be determined.
89 	 */
90 	__u64 remote_ino;
91 #define CLOUD_RECORD_ID_LEN            33
92 	__u8 cloud_record_id[CLOUD_RECORD_ID_LEN];
93 #define CLOUD_DENTRY_RESERVED_LENGTH   3
94 	__u8 reserved[CLOUD_DENTRY_RESERVED_LENGTH];
95 	/*
96 	 * if this value is not ULLONG_MAX, it means that remote getattr syscall
97 	 * should return this value as inode size.
98 	 */
99 	__u64 getattr_isize;
100 	/*
101 	 * this value stores remote ctime, explicitly when remote file is opened
102 	 */
103 	struct hmdfs_time_t remote_ctime;
104 	/*
105 	 * this value stores the last time, aligned to dcache_precision, that
106 	 * remote file was modified. It should be noted that this value won't
107 	 * be effective if writecace_expire is set.
108 	 */
109 	struct hmdfs_time_t stable_ctime;
110 	/*
111 	 * If this value is set nonzero, pagecache should be truncated if the
112 	 * time that the file is opened is beyond the value. Furthermore,
113 	 * the functionality of stable_ctime won't be effective.
114 	 */
115 	unsigned long writecache_expire;
116 	/*
117 	 * This value record how many times the file has been written while file
118 	 * is opened. 'writecache_expire' will set in close if this value is
119 	 * nonzero.
120 	 */
121 	atomic64_t write_counter;
122 	/*
123 	 * will be linked to hmdfs_peer::wr_opened_inode_list
124 	 * if the remote inode is writable-opened. And using
125 	 * wr_opened_cnt to track possibly multiple writeable-open.
126 	 */
127 	struct list_head wr_opened_node;
128 	atomic_t wr_opened_cnt;
129 	spinlock_t stash_lock;
130 	unsigned int stash_status;
131 	struct hmdfs_cache_info *cache;
132 	/* link to hmdfs_peer::stashed_inode_list when stashing completes */
133 	struct list_head stash_node;
134 	/*
135 	 * The flush/fsync thread will hold the write lock while threads
136 	 * calling writepage will hold the read lock. We use rwlock to
137 	 * eliminate the cases that flush/fsync operations are done with
138 	 * re-dirtied pages remain dirty.
139 	 *
140 	 * Here is the explanation in detail:
141 	 *
142 	 * During `writepage()`, the state of a re-dirtied page will switch
143 	 * to the following states in sequence:
144 	 * s1: page dirty + tree dirty
145 	 * s2: page dirty + tree dirty <tag_pages_for_writeback>
146 	 * s3: page clean + tree dirty <clear_page_dirty_for_io>
147 	 * s4: page clean + tree clean + write back <set_page_writeback>
148 	 * s5: page dirty + tree dirty + write back <redirty_page_for_writepage>
149 	 * s6: page dirty + tree dirty <end_page_writeback>
150 	 *
151 	 * A page upon s4 will thus be ignored by the concurrent
152 	 * `do_writepages()` contained by `close()`, `fsync()`, making it's
153 	 * state inconsistent.
154 	 *
155 	 * To avoid such situation, we use per-file rwsems to prevent
156 	 * concurrent in-flight `writepage` during `close()` or `fsync()`.
157 	 *
158 	 * Minimal overhead is brought in since rsems allow concurrent
159 	 * `writepage` while `close()` or `fsync()` is natural to wait for
160 	 * in-flight `writepage()`s to complete.
161 	 *
162 	 * NOTE that in the worst case, a process may wait for wsem for TIMEOUT
163 	 * even if a signal is pending. But we've to wait there to iterate all
164 	 * pages and make sure that no dirty page should remain.
165 	 */
166 	struct rw_semaphore wpage_sem;
167 
168 	// The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM.
169 	struct inode vfs_inode;
170 };
171 
172 struct hmdfs_readdir_work {
173 	struct list_head head;
174 	struct dentry *dentry;
175 	struct hmdfs_peer *con;
176 	struct delayed_work dwork;
177 };
178 
hmdfs_i(struct inode * inode)179 static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode)
180 {
181 	return container_of(inode, struct hmdfs_inode_info, vfs_inode);
182 }
183 
hmdfs_inode_is_stashing(const struct hmdfs_inode_info * info)184 static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info)
185 {
186 	const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb);
187 
188 	/* Refer to comments in hmdfs_stash_remote_inode() */
189 	return (hmdfs_is_stash_enabled(sbi) &&
190 		smp_load_acquire(&info->stash_status)); // protect
191 }
192 
hmdfs_remote_fetch_fid(struct hmdfs_inode_info * info,struct hmdfs_fid * fid)193 static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info,
194 					  struct hmdfs_fid *fid)
195 {
196 	spin_lock(&info->fid_lock);
197 	*fid = info->fid;
198 	spin_unlock(&info->fid_lock);
199 }
200 
201 /*****************************************************************************
202  * ino allocator
203  *****************************************************************************/
204 
205 enum HMDFS_ROOT {
206 	HMDFS_ROOT_ANCESTOR = 1, // /
207 	HMDFS_ROOT_DEV,		 // /device_view
208 	HMDFS_ROOT_DEV_LOCAL,	 // /device_view/local
209 	HMDFS_ROOT_DEV_REMOTE,	 // /device_view/remote
210 	HMDFS_ROOT_DEV_CLOUD,	 // /device_view/cloud
211 	HMDFS_ROOT_MERGE,	 // /merge_view
212 	HMDFS_ROOT_MERGE_CLOUD,	 // /cloud_merge_view
213 
214 	HMDFS_ROOT_INVALID,
215 };
216 
217 // delete layer, directory layer, not overlay layer
218 enum HMDFS_LAYER_TYPE {
219 	HMDFS_LAYER_ZERO = 0,	   // /
220 	HMDFS_LAYER_FIRST_DEVICE,  // /device_view
221 	HMDFS_LAYER_SECOND_LOCAL,  // /device_view/local
222 	HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote
223 	HMDFS_LAYER_SECOND_CLOUD,  // /device_view/cloud
224 	HMDFS_LAYER_OTHER_LOCAL,   // /device_view/local/xx
225 	HMDFS_LAYER_OTHER_REMOTE,  // /device_view/remote/xx
226 	HMDFS_LAYER_OTHER_CLOUD,   // /device_view/cloud/xx
227 
228 	HMDFS_LAYER_FIRST_MERGE, // /merge_view
229 	HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx
230 	HMDFS_LAYER_FIRST_MERGE_CLOUD, // /cloud_merge_view
231 	HMDFS_LAYER_OTHER_MERGE_CLOUD, // /coud_merge_view/xxx
232 	HMDFS_LAYER_INVALID,
233 };
234 
235 struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
236 				     struct inode *lo_i,
237 				     struct hmdfs_peer *peer);
238 struct inode *hmdfs_iget5_locked_merge(struct super_block *sb,
239 				       struct dentry *fst_lo_d);
240 struct inode *hmdfs_iget5_locked_cloud_merge(struct super_block *sb,
241 					     struct dentry *fst_lo_d);
242 
243 struct inode *hmdfs_iget5_locked_local(struct super_block *sb,
244 				       struct inode *lo_i);
245 struct hmdfs_peer;
246 struct inode *hmdfs_iget5_locked_remote(struct super_block *sb,
247 					struct hmdfs_peer *peer,
248 					uint64_t remote_ino);
249 
250 struct hmdfs_lookup_cloud_ret {
251 	uint64_t i_size;
252 	uint64_t i_mtime;
253 	uint8_t record_id[CLOUD_RECORD_ID_LEN];
254 	uint8_t reserved[CLOUD_DENTRY_RESERVED_LENGTH];
255 	uint16_t i_mode;
256 };
257 
258 struct inode *hmdfs_iget5_locked_cloud(struct super_block *sb,
259 				       struct hmdfs_peer *peer,
260 				       struct hmdfs_lookup_cloud_ret *res);
261 
262 uint32_t make_ino_raw_cloud(uint8_t *cloud_id);
263 #endif // INODE_H
264