• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /* SPDX-License-Identifier: GPL-2.0 */
2  /*
3   * Copyright 2019 Google LLC
4   */
5  #ifndef _INCFS_DATA_MGMT_H
6  #define _INCFS_DATA_MGMT_H
7  
8  #include <linux/cred.h>
9  #include <linux/fs.h>
10  #include <linux/types.h>
11  #include <linux/mutex.h>
12  #include <linux/spinlock.h>
13  #include <linux/rcupdate.h>
14  #include <linux/completion.h>
15  #include <linux/wait.h>
16  #include <linux/zstd.h>
17  #include <crypto/hash.h>
18  #include <linux/rwsem.h>
19  
20  #include <uapi/linux/incrementalfs.h>
21  
22  #include "internal.h"
23  #include "pseudo_files.h"
24  
25  #define SEGMENTS_PER_FILE 3
26  
27  enum LOG_RECORD_TYPE {
28  	FULL,
29  	SAME_FILE,
30  	SAME_FILE_CLOSE_BLOCK,
31  	SAME_FILE_CLOSE_BLOCK_SHORT,
32  	SAME_FILE_NEXT_BLOCK,
33  	SAME_FILE_NEXT_BLOCK_SHORT,
34  };
35  
36  struct full_record {
37  	enum LOG_RECORD_TYPE type : 3; /* FULL */
38  	u32 block_index : 29;
39  	incfs_uuid_t file_id;
40  	u64 absolute_ts_us;
41  	uid_t uid;
42  } __packed; /* 32 bytes */
43  
44  struct same_file {
45  	enum LOG_RECORD_TYPE type : 3; /* SAME_FILE */
46  	u32 block_index : 29;
47  	uid_t uid;
48  	u16 relative_ts_us; /* max 2^16 us ~= 64 ms */
49  } __packed; /* 10 bytes */
50  
51  struct same_file_close_block {
52  	enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK */
53  	u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */
54  	s16 block_index_delta;
55  } __packed; /* 4 bytes */
56  
57  struct same_file_close_block_short {
58  	enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK_SHORT */
59  	u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */
60  	s8 block_index_delta;
61  } __packed; /* 2 bytes */
62  
63  struct same_file_next_block {
64  	enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK */
65  	u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */
66  } __packed; /* 2 bytes */
67  
68  struct same_file_next_block_short {
69  	enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK_SHORT */
70  	u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */
71  } __packed; /* 1 byte */
72  
73  union log_record {
74  	struct full_record full_record;
75  	struct same_file same_file;
76  	struct same_file_close_block same_file_close_block;
77  	struct same_file_close_block_short same_file_close_block_short;
78  	struct same_file_next_block same_file_next_block;
79  	struct same_file_next_block_short same_file_next_block_short;
80  };
81  
82  struct read_log_state {
83  	/* Log buffer generation id, incremented on configuration changes */
84  	u32 generation_id;
85  
86  	/* Offset in rl_ring_buf to write into. */
87  	u32 next_offset;
88  
89  	/* Current number of writer passes over rl_ring_buf */
90  	u32 current_pass_no;
91  
92  	/* Current full_record to diff against */
93  	struct full_record base_record;
94  
95  	/* Current record number counting from configuration change */
96  	u64 current_record_no;
97  };
98  
99  /* A ring buffer to save records about data blocks which were recently read. */
100  struct read_log {
101  	void *rl_ring_buf;
102  
103  	int rl_size;
104  
105  	struct read_log_state rl_head;
106  
107  	struct read_log_state rl_tail;
108  
109  	/* A lock to protect the above fields */
110  	spinlock_t rl_lock;
111  
112  	/* A queue of waiters who want to be notified about reads */
113  	wait_queue_head_t ml_notif_wq;
114  
115  	/* A work item to wake up those waiters without slowing down readers */
116  	struct delayed_work ml_wakeup_work;
117  };
118  
119  struct mount_options {
120  	unsigned int read_timeout_ms;
121  	unsigned int readahead_pages;
122  	unsigned int read_log_pages;
123  	unsigned int read_log_wakeup_count;
124  	bool report_uid;
125  	char *sysfs_name;
126  };
127  
128  struct mount_info {
129  	struct super_block *mi_sb;
130  
131  	struct path mi_backing_dir_path;
132  
133  	struct dentry *mi_index_dir;
134  	/* For stacking mounts, if true, this indicates if the index dir needs
135  	 * to be freed for this SB otherwise it was created by lower level SB */
136  	bool mi_index_free;
137  
138  	struct dentry *mi_incomplete_dir;
139  	/* For stacking mounts, if true, this indicates if the incomplete dir
140  	 * needs to be freed for this SB. Similar to mi_index_free */
141  	bool mi_incomplete_free;
142  
143  	const struct cred *mi_owner;
144  
145  	struct mount_options mi_options;
146  
147  	/* This mutex is to be taken before create, rename, delete */
148  	struct mutex mi_dir_struct_mutex;
149  
150  	/*
151  	 * A queue of waiters who want to be notified about new pending reads.
152  	 */
153  	wait_queue_head_t mi_pending_reads_notif_wq;
154  
155  	/*
156  	 * Protects - RCU safe:
157  	 *  - reads_list_head
158  	 *  - mi_pending_reads_count
159  	 *  - mi_last_pending_read_number
160  	 *  - data_file_segment.reads_list_head
161  	 */
162  	spinlock_t pending_read_lock;
163  
164  	/* List of active pending_read objects */
165  	struct list_head mi_reads_list_head;
166  
167  	/* Total number of items in reads_list_head */
168  	int mi_pending_reads_count;
169  
170  	/*
171  	 * Last serial number that was assigned to a pending read.
172  	 * 0 means no pending reads have been seen yet.
173  	 */
174  	int mi_last_pending_read_number;
175  
176  	/* Temporary buffer for read logger. */
177  	struct read_log mi_log;
178  
179  	/* SELinux needs special xattrs on our pseudo files */
180  	struct mem_range pseudo_file_xattr[PSEUDO_FILE_COUNT];
181  
182  	/* A queue of waiters who want to be notified about blocks_written */
183  	wait_queue_head_t mi_blocks_written_notif_wq;
184  
185  	/* Number of blocks written since mount */
186  	atomic_t mi_blocks_written;
187  
188  	/* Per UID read timeouts */
189  	spinlock_t mi_per_uid_read_timeouts_lock;
190  	struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts;
191  	int mi_per_uid_read_timeouts_size;
192  
193  	/* zstd workspace */
194  	struct mutex mi_zstd_workspace_mutex;
195  	void *mi_zstd_workspace;
196  	ZSTD_DStream *mi_zstd_stream;
197  	struct delayed_work mi_zstd_cleanup_work;
198  
199  	/* sysfs node */
200  	struct incfs_sysfs_node *mi_sysfs_node;
201  
202  	/* Last error information */
203  	struct mutex	mi_le_mutex;
204  	incfs_uuid_t	mi_le_file_id;
205  	u64		mi_le_time_us;
206  	u32		mi_le_page;
207  	u32		mi_le_errno;
208  	uid_t		mi_le_uid;
209  
210  	/* Number of reads timed out */
211  	u32 mi_reads_failed_timed_out;
212  
213  	/* Number of reads failed because hash verification failed */
214  	u32 mi_reads_failed_hash_verification;
215  
216  	/* Number of reads failed for another reason */
217  	u32 mi_reads_failed_other;
218  
219  	/* Number of reads delayed because page had to be fetched */
220  	u32 mi_reads_delayed_pending;
221  
222  	/* Total time waiting for pages to be fetched */
223  	u64 mi_reads_delayed_pending_us;
224  
225  	/*
226  	 * Number of reads delayed because of per-uid min_time_us or
227  	 * min_pending_time_us settings
228  	 */
229  	u32 mi_reads_delayed_min;
230  
231  	/* Total time waiting because of per-uid min_time_us or
232  	 * min_pending_time_us settings.
233  	 *
234  	 * Note that if a read is initially delayed because we have to wait for
235  	 * the page, then further delayed because of min_pending_time_us
236  	 * setting, this counter gets incremented by only the further delay
237  	 * time.
238  	 */
239  	u64 mi_reads_delayed_min_us;
240  };
241  
242  struct data_file_block {
243  	loff_t db_backing_file_data_offset;
244  
245  	size_t db_stored_size;
246  
247  	enum incfs_compression_alg db_comp_alg;
248  };
249  
250  struct pending_read {
251  	incfs_uuid_t file_id;
252  
253  	s64 timestamp_us;
254  
255  	atomic_t done;
256  
257  	int block_index;
258  
259  	int serial_number;
260  
261  	uid_t uid;
262  
263  	struct list_head mi_reads_list;
264  
265  	struct list_head segment_reads_list;
266  
267  	struct rcu_head rcu;
268  };
269  
270  struct data_file_segment {
271  	wait_queue_head_t new_data_arrival_wq;
272  
273  	/* Protects reads and writes from the blockmap */
274  	struct rw_semaphore rwsem;
275  
276  	/* List of active pending_read objects belonging to this segment */
277  	/* Protected by mount_info.pending_reads_mutex */
278  	struct list_head reads_list_head;
279  };
280  
281  /*
282   * Extra info associated with a file. Just a few bytes set by a user.
283   */
284  struct file_attr {
285  	loff_t fa_value_offset;
286  
287  	size_t fa_value_size;
288  
289  	u32 fa_crc;
290  };
291  
292  
293  struct data_file {
294  	struct backing_file_context *df_backing_file_context;
295  
296  	struct mount_info *df_mount_info;
297  
298  	incfs_uuid_t df_id;
299  
300  	/*
301  	 * Array of segments used to reduce lock contention for the file.
302  	 * Segment is chosen for a block depends on the block's index.
303  	 */
304  	struct data_file_segment df_segments[SEGMENTS_PER_FILE];
305  
306  	/* Base offset of the first metadata record. */
307  	loff_t df_metadata_off;
308  
309  	/* Base offset of the block map. */
310  	loff_t df_blockmap_off;
311  
312  	/* File size in bytes */
313  	loff_t df_size;
314  
315  	/* File header flags */
316  	u32 df_header_flags;
317  
318  	/* File size in DATA_FILE_BLOCK_SIZE blocks */
319  	int df_data_block_count;
320  
321  	/* Total number of blocks, data + hash */
322  	int df_total_block_count;
323  
324  	/* For mapped files, the offset into the actual file */
325  	loff_t df_mapped_offset;
326  
327  	/* Number of data blocks written to file */
328  	atomic_t df_data_blocks_written;
329  
330  	/* Number of data blocks in the status block */
331  	u32 df_initial_data_blocks_written;
332  
333  	/* Number of hash blocks written to file */
334  	atomic_t df_hash_blocks_written;
335  
336  	/* Number of hash blocks in the status block */
337  	u32 df_initial_hash_blocks_written;
338  
339  	/* Offset to status metadata header */
340  	loff_t df_status_offset;
341  
342  	/*
343  	 * Mutex acquired while enabling verity. Note that df_hash_tree is set
344  	 * by enable verity.
345  	 *
346  	 * The backing file mutex bc_mutex  may be taken while this mutex is
347  	 * held.
348  	 */
349  	struct mutex df_enable_verity;
350  
351  	/*
352  	 * Set either at construction time or during enabling verity. In the
353  	 * latter case, set via smp_store_release, so use smp_load_acquire to
354  	 * read it.
355  	 */
356  	struct mtree *df_hash_tree;
357  
358  	/* Guaranteed set if df_hash_tree is set. */
359  	struct incfs_df_signature *df_signature;
360  
361  	/*
362  	 * The verity file digest, set when verity is enabled and the file has
363  	 * been opened
364  	 */
365  	struct mem_range df_verity_file_digest;
366  
367  	struct incfs_df_verity_signature *df_verity_signature;
368  };
369  
370  struct dir_file {
371  	struct mount_info *mount_info;
372  
373  	struct file *backing_dir;
374  };
375  
376  struct inode_info {
377  	struct mount_info *n_mount_info; /* A mount, this file belongs to */
378  
379  	struct inode *n_backing_inode;
380  
381  	struct data_file *n_file;
382  
383  	struct inode n_vfs_inode;
384  };
385  
386  struct dentry_info {
387  	struct path backing_path;
388  };
389  
390  enum FILL_PERMISSION {
391  	CANT_FILL = 0,
392  	CAN_FILL = 1,
393  };
394  
395  struct incfs_file_data {
396  	/* Does this file handle have INCFS_IOC_FILL_BLOCKS permission */
397  	enum FILL_PERMISSION fd_fill_permission;
398  
399  	/* If INCFS_IOC_GET_FILLED_BLOCKS has been called, where are we */
400  	int fd_get_block_pos;
401  
402  	/* And how many filled blocks are there up to that point */
403  	int fd_filled_data_blocks;
404  	int fd_filled_hash_blocks;
405  };
406  
407  struct mount_info *incfs_alloc_mount_info(struct super_block *sb,
408  					  struct mount_options *options,
409  					  struct path *backing_dir_path);
410  
411  int incfs_realloc_mount_info(struct mount_info *mi,
412  			     struct mount_options *options);
413  
414  void incfs_free_mount_info(struct mount_info *mi);
415  
416  char *file_id_to_str(incfs_uuid_t id);
417  struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name);
418  struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf);
419  void incfs_free_data_file(struct data_file *df);
420  
421  struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf);
422  void incfs_free_dir_file(struct dir_file *dir);
423  
424  struct incfs_read_data_file_timeouts {
425  	u32 min_time_us;
426  	u32 min_pending_time_us;
427  	u32 max_pending_time_us;
428  };
429  
430  ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f,
431  			int index, struct mem_range tmp,
432  			struct incfs_read_data_file_timeouts *timeouts,
433  			unsigned int *delayed_min_us);
434  
435  ssize_t incfs_read_merkle_tree_blocks(struct mem_range dst,
436  				      struct data_file *df, size_t offset);
437  
438  int incfs_get_filled_blocks(struct data_file *df,
439  			    struct incfs_file_data *fd,
440  			    struct incfs_get_filled_blocks_args *arg);
441  
442  int incfs_read_file_signature(struct data_file *df, struct mem_range dst);
443  
444  int incfs_process_new_data_block(struct data_file *df,
445  				 struct incfs_fill_block *block, u8 *data,
446  				 bool *complete);
447  
448  int incfs_process_new_hash_block(struct data_file *df,
449  				 struct incfs_fill_block *block, u8 *data);
450  
451  bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number);
452  
453  /*
454   * Collects pending reads and saves them into the array (reads/reads_size).
455   * Only reads with serial_number > sn_lowerbound are reported.
456   * Returns how many reads were saved into the array.
457   */
458  int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound,
459  				struct incfs_pending_read_info *reads,
460  				struct incfs_pending_read_info2 *reads2,
461  				int reads_size, int *new_max_sn);
462  
463  int incfs_collect_logged_reads(struct mount_info *mi,
464  			       struct read_log_state *start_state,
465  			       struct incfs_pending_read_info *reads,
466  			       struct incfs_pending_read_info2 *reads2,
467  			       int reads_size);
468  struct read_log_state incfs_get_log_state(struct mount_info *mi);
469  int incfs_get_uncollected_logs_count(struct mount_info *mi,
470  				     const struct read_log_state *state);
471  
get_incfs_node(struct inode * inode)472  static inline struct inode_info *get_incfs_node(struct inode *inode)
473  {
474  	if (!inode)
475  		return NULL;
476  
477  	if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) {
478  		/* This inode doesn't belong to us. */
479  		pr_warn_once("incfs: %s on an alien inode.", __func__);
480  		return NULL;
481  	}
482  
483  	return container_of(inode, struct inode_info, n_vfs_inode);
484  }
485  
get_incfs_data_file(struct file * f)486  static inline struct data_file *get_incfs_data_file(struct file *f)
487  {
488  	struct inode_info *node = NULL;
489  
490  	if (!f)
491  		return NULL;
492  
493  	if (!S_ISREG(f->f_inode->i_mode))
494  		return NULL;
495  
496  	node = get_incfs_node(f->f_inode);
497  	if (!node)
498  		return NULL;
499  
500  	return node->n_file;
501  }
502  
get_incfs_dir_file(struct file * f)503  static inline struct dir_file *get_incfs_dir_file(struct file *f)
504  {
505  	if (!f)
506  		return NULL;
507  
508  	if (!S_ISDIR(f->f_inode->i_mode))
509  		return NULL;
510  
511  	return (struct dir_file *)f->private_data;
512  }
513  
514  /*
515   * Make sure that inode_info.n_file is initialized and inode can be used
516   * for reading and writing data from/to the backing file.
517   */
518  int make_inode_ready_for_data_ops(struct mount_info *mi,
519  				struct inode *inode,
520  				struct file *backing_file);
521  
get_incfs_dentry(const struct dentry * d)522  static inline struct dentry_info *get_incfs_dentry(const struct dentry *d)
523  {
524  	if (!d)
525  		return NULL;
526  
527  	return (struct dentry_info *)d->d_fsdata;
528  }
529  
get_incfs_backing_path(const struct dentry * d,struct path * path)530  static inline void get_incfs_backing_path(const struct dentry *d,
531  					  struct path *path)
532  {
533  	struct dentry_info *di = get_incfs_dentry(d);
534  
535  	if (!di) {
536  		*path = (struct path) {};
537  		return;
538  	}
539  
540  	*path = di->backing_path;
541  	path_get(path);
542  }
543  
get_blocks_count_for_size(u64 size)544  static inline int get_blocks_count_for_size(u64 size)
545  {
546  	if (size == 0)
547  		return 0;
548  	return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE;
549  }
550  
551  #endif /* _INCFS_DATA_MGMT_H */
552