1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright 2019 Google LLC 4 */ 5 #ifndef _INCFS_DATA_MGMT_H 6 #define _INCFS_DATA_MGMT_H 7 8 #include <linux/cred.h> 9 #include <linux/fs.h> 10 #include <linux/types.h> 11 #include <linux/mutex.h> 12 #include <linux/spinlock.h> 13 #include <linux/rcupdate.h> 14 #include <linux/completion.h> 15 #include <linux/wait.h> 16 #include <linux/zstd.h> 17 #include <crypto/hash.h> 18 #include <linux/rwsem.h> 19 20 #include <uapi/linux/incrementalfs.h> 21 22 #include "internal.h" 23 #include "pseudo_files.h" 24 25 #define SEGMENTS_PER_FILE 3 26 27 enum LOG_RECORD_TYPE { 28 FULL, 29 SAME_FILE, 30 SAME_FILE_CLOSE_BLOCK, 31 SAME_FILE_CLOSE_BLOCK_SHORT, 32 SAME_FILE_NEXT_BLOCK, 33 SAME_FILE_NEXT_BLOCK_SHORT, 34 }; 35 36 struct full_record { 37 enum LOG_RECORD_TYPE type : 3; /* FULL */ 38 u32 block_index : 29; 39 incfs_uuid_t file_id; 40 u64 absolute_ts_us; 41 uid_t uid; 42 } __packed; /* 32 bytes */ 43 44 struct same_file { 45 enum LOG_RECORD_TYPE type : 3; /* SAME_FILE */ 46 u32 block_index : 29; 47 uid_t uid; 48 u16 relative_ts_us; /* max 2^16 us ~= 64 ms */ 49 } __packed; /* 10 bytes */ 50 51 struct same_file_close_block { 52 enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK */ 53 u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */ 54 s16 block_index_delta; 55 } __packed; /* 4 bytes */ 56 57 struct same_file_close_block_short { 58 enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK_SHORT */ 59 u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */ 60 s8 block_index_delta; 61 } __packed; /* 2 bytes */ 62 63 struct same_file_next_block { 64 enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK */ 65 u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */ 66 } __packed; /* 2 bytes */ 67 68 struct same_file_next_block_short { 69 enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK_SHORT */ 70 u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */ 71 } __packed; /* 1 byte */ 72 73 union log_record { 74 struct full_record full_record; 75 struct same_file same_file; 76 struct same_file_close_block same_file_close_block; 77 struct same_file_close_block_short same_file_close_block_short; 78 struct same_file_next_block same_file_next_block; 79 struct same_file_next_block_short same_file_next_block_short; 80 }; 81 82 struct read_log_state { 83 /* Log buffer generation id, incremented on configuration changes */ 84 u32 generation_id; 85 86 /* Offset in rl_ring_buf to write into. */ 87 u32 next_offset; 88 89 /* Current number of writer passes over rl_ring_buf */ 90 u32 current_pass_no; 91 92 /* Current full_record to diff against */ 93 struct full_record base_record; 94 95 /* Current record number counting from configuration change */ 96 u64 current_record_no; 97 }; 98 99 /* A ring buffer to save records about data blocks which were recently read. */ 100 struct read_log { 101 void *rl_ring_buf; 102 103 int rl_size; 104 105 struct read_log_state rl_head; 106 107 struct read_log_state rl_tail; 108 109 /* A lock to protect the above fields */ 110 spinlock_t rl_lock; 111 112 /* A queue of waiters who want to be notified about reads */ 113 wait_queue_head_t ml_notif_wq; 114 115 /* A work item to wake up those waiters without slowing down readers */ 116 struct delayed_work ml_wakeup_work; 117 }; 118 119 struct mount_options { 120 unsigned int read_timeout_ms; 121 unsigned int readahead_pages; 122 unsigned int read_log_pages; 123 unsigned int read_log_wakeup_count; 124 bool report_uid; 125 char *sysfs_name; 126 }; 127 128 struct mount_info { 129 struct super_block *mi_sb; 130 131 struct path mi_backing_dir_path; 132 133 struct dentry *mi_index_dir; 134 /* For stacking mounts, if true, this indicates if the index dir needs 135 * to be freed for this SB otherwise it was created by lower level SB */ 136 bool mi_index_free; 137 138 struct dentry *mi_incomplete_dir; 139 /* For stacking mounts, if true, this indicates if the incomplete dir 140 * needs to be freed for this SB. Similar to mi_index_free */ 141 bool mi_incomplete_free; 142 143 const struct cred *mi_owner; 144 145 struct mount_options mi_options; 146 147 /* This mutex is to be taken before create, rename, delete */ 148 struct mutex mi_dir_struct_mutex; 149 150 /* 151 * A queue of waiters who want to be notified about new pending reads. 152 */ 153 wait_queue_head_t mi_pending_reads_notif_wq; 154 155 /* 156 * Protects - RCU safe: 157 * - reads_list_head 158 * - mi_pending_reads_count 159 * - mi_last_pending_read_number 160 * - data_file_segment.reads_list_head 161 */ 162 spinlock_t pending_read_lock; 163 164 /* List of active pending_read objects */ 165 struct list_head mi_reads_list_head; 166 167 /* Total number of items in reads_list_head */ 168 int mi_pending_reads_count; 169 170 /* 171 * Last serial number that was assigned to a pending read. 172 * 0 means no pending reads have been seen yet. 173 */ 174 int mi_last_pending_read_number; 175 176 /* Temporary buffer for read logger. */ 177 struct read_log mi_log; 178 179 /* SELinux needs special xattrs on our pseudo files */ 180 struct mem_range pseudo_file_xattr[PSEUDO_FILE_COUNT]; 181 182 /* A queue of waiters who want to be notified about blocks_written */ 183 wait_queue_head_t mi_blocks_written_notif_wq; 184 185 /* Number of blocks written since mount */ 186 atomic_t mi_blocks_written; 187 188 /* Per UID read timeouts */ 189 spinlock_t mi_per_uid_read_timeouts_lock; 190 struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; 191 int mi_per_uid_read_timeouts_size; 192 193 /* zstd workspace */ 194 struct mutex mi_zstd_workspace_mutex; 195 void *mi_zstd_workspace; 196 ZSTD_DStream *mi_zstd_stream; 197 struct delayed_work mi_zstd_cleanup_work; 198 199 /* sysfs node */ 200 struct incfs_sysfs_node *mi_sysfs_node; 201 202 /* Last error information */ 203 struct mutex mi_le_mutex; 204 incfs_uuid_t mi_le_file_id; 205 u64 mi_le_time_us; 206 u32 mi_le_page; 207 u32 mi_le_errno; 208 uid_t mi_le_uid; 209 210 /* Number of reads timed out */ 211 u32 mi_reads_failed_timed_out; 212 213 /* Number of reads failed because hash verification failed */ 214 u32 mi_reads_failed_hash_verification; 215 216 /* Number of reads failed for another reason */ 217 u32 mi_reads_failed_other; 218 219 /* Number of reads delayed because page had to be fetched */ 220 u32 mi_reads_delayed_pending; 221 222 /* Total time waiting for pages to be fetched */ 223 u64 mi_reads_delayed_pending_us; 224 225 /* 226 * Number of reads delayed because of per-uid min_time_us or 227 * min_pending_time_us settings 228 */ 229 u32 mi_reads_delayed_min; 230 231 /* Total time waiting because of per-uid min_time_us or 232 * min_pending_time_us settings. 233 * 234 * Note that if a read is initially delayed because we have to wait for 235 * the page, then further delayed because of min_pending_time_us 236 * setting, this counter gets incremented by only the further delay 237 * time. 238 */ 239 u64 mi_reads_delayed_min_us; 240 }; 241 242 struct data_file_block { 243 loff_t db_backing_file_data_offset; 244 245 size_t db_stored_size; 246 247 enum incfs_compression_alg db_comp_alg; 248 }; 249 250 struct pending_read { 251 incfs_uuid_t file_id; 252 253 s64 timestamp_us; 254 255 atomic_t done; 256 257 int block_index; 258 259 int serial_number; 260 261 uid_t uid; 262 263 struct list_head mi_reads_list; 264 265 struct list_head segment_reads_list; 266 267 struct rcu_head rcu; 268 }; 269 270 struct data_file_segment { 271 wait_queue_head_t new_data_arrival_wq; 272 273 /* Protects reads and writes from the blockmap */ 274 struct rw_semaphore rwsem; 275 276 /* List of active pending_read objects belonging to this segment */ 277 /* Protected by mount_info.pending_reads_mutex */ 278 struct list_head reads_list_head; 279 }; 280 281 /* 282 * Extra info associated with a file. Just a few bytes set by a user. 283 */ 284 struct file_attr { 285 loff_t fa_value_offset; 286 287 size_t fa_value_size; 288 289 u32 fa_crc; 290 }; 291 292 293 struct data_file { 294 struct backing_file_context *df_backing_file_context; 295 296 struct mount_info *df_mount_info; 297 298 incfs_uuid_t df_id; 299 300 /* 301 * Array of segments used to reduce lock contention for the file. 302 * Segment is chosen for a block depends on the block's index. 303 */ 304 struct data_file_segment df_segments[SEGMENTS_PER_FILE]; 305 306 /* Base offset of the first metadata record. */ 307 loff_t df_metadata_off; 308 309 /* Base offset of the block map. */ 310 loff_t df_blockmap_off; 311 312 /* File size in bytes */ 313 loff_t df_size; 314 315 /* File header flags */ 316 u32 df_header_flags; 317 318 /* File size in DATA_FILE_BLOCK_SIZE blocks */ 319 int df_data_block_count; 320 321 /* Total number of blocks, data + hash */ 322 int df_total_block_count; 323 324 /* For mapped files, the offset into the actual file */ 325 loff_t df_mapped_offset; 326 327 /* Number of data blocks written to file */ 328 atomic_t df_data_blocks_written; 329 330 /* Number of data blocks in the status block */ 331 u32 df_initial_data_blocks_written; 332 333 /* Number of hash blocks written to file */ 334 atomic_t df_hash_blocks_written; 335 336 /* Number of hash blocks in the status block */ 337 u32 df_initial_hash_blocks_written; 338 339 /* Offset to status metadata header */ 340 loff_t df_status_offset; 341 342 /* 343 * Mutex acquired while enabling verity. Note that df_hash_tree is set 344 * by enable verity. 345 * 346 * The backing file mutex bc_mutex may be taken while this mutex is 347 * held. 348 */ 349 struct mutex df_enable_verity; 350 351 /* 352 * Set either at construction time or during enabling verity. In the 353 * latter case, set via smp_store_release, so use smp_load_acquire to 354 * read it. 355 */ 356 struct mtree *df_hash_tree; 357 358 /* Guaranteed set if df_hash_tree is set. */ 359 struct incfs_df_signature *df_signature; 360 361 /* 362 * The verity file digest, set when verity is enabled and the file has 363 * been opened 364 */ 365 struct mem_range df_verity_file_digest; 366 367 struct incfs_df_verity_signature *df_verity_signature; 368 }; 369 370 struct dir_file { 371 struct mount_info *mount_info; 372 373 struct file *backing_dir; 374 }; 375 376 struct inode_info { 377 struct mount_info *n_mount_info; /* A mount, this file belongs to */ 378 379 struct inode *n_backing_inode; 380 381 struct data_file *n_file; 382 383 struct inode n_vfs_inode; 384 }; 385 386 struct dentry_info { 387 struct path backing_path; 388 }; 389 390 enum FILL_PERMISSION { 391 CANT_FILL = 0, 392 CAN_FILL = 1, 393 }; 394 395 struct incfs_file_data { 396 /* Does this file handle have INCFS_IOC_FILL_BLOCKS permission */ 397 enum FILL_PERMISSION fd_fill_permission; 398 399 /* If INCFS_IOC_GET_FILLED_BLOCKS has been called, where are we */ 400 int fd_get_block_pos; 401 402 /* And how many filled blocks are there up to that point */ 403 int fd_filled_data_blocks; 404 int fd_filled_hash_blocks; 405 }; 406 407 struct mount_info *incfs_alloc_mount_info(struct super_block *sb, 408 struct mount_options *options, 409 struct path *backing_dir_path); 410 411 int incfs_realloc_mount_info(struct mount_info *mi, 412 struct mount_options *options); 413 414 void incfs_free_mount_info(struct mount_info *mi); 415 416 char *file_id_to_str(incfs_uuid_t id); 417 struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name); 418 struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); 419 void incfs_free_data_file(struct data_file *df); 420 421 struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); 422 void incfs_free_dir_file(struct dir_file *dir); 423 424 struct incfs_read_data_file_timeouts { 425 u32 min_time_us; 426 u32 min_pending_time_us; 427 u32 max_pending_time_us; 428 }; 429 430 ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, 431 int index, struct mem_range tmp, 432 struct incfs_read_data_file_timeouts *timeouts, 433 unsigned int *delayed_min_us); 434 435 ssize_t incfs_read_merkle_tree_blocks(struct mem_range dst, 436 struct data_file *df, size_t offset); 437 438 int incfs_get_filled_blocks(struct data_file *df, 439 struct incfs_file_data *fd, 440 struct incfs_get_filled_blocks_args *arg); 441 442 int incfs_read_file_signature(struct data_file *df, struct mem_range dst); 443 444 int incfs_process_new_data_block(struct data_file *df, 445 struct incfs_fill_block *block, u8 *data, 446 bool *complete); 447 448 int incfs_process_new_hash_block(struct data_file *df, 449 struct incfs_fill_block *block, u8 *data); 450 451 bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); 452 453 /* 454 * Collects pending reads and saves them into the array (reads/reads_size). 455 * Only reads with serial_number > sn_lowerbound are reported. 456 * Returns how many reads were saved into the array. 457 */ 458 int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, 459 struct incfs_pending_read_info *reads, 460 struct incfs_pending_read_info2 *reads2, 461 int reads_size, int *new_max_sn); 462 463 int incfs_collect_logged_reads(struct mount_info *mi, 464 struct read_log_state *start_state, 465 struct incfs_pending_read_info *reads, 466 struct incfs_pending_read_info2 *reads2, 467 int reads_size); 468 struct read_log_state incfs_get_log_state(struct mount_info *mi); 469 int incfs_get_uncollected_logs_count(struct mount_info *mi, 470 const struct read_log_state *state); 471 get_incfs_node(struct inode * inode)472 static inline struct inode_info *get_incfs_node(struct inode *inode) 473 { 474 if (!inode) 475 return NULL; 476 477 if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) { 478 /* This inode doesn't belong to us. */ 479 pr_warn_once("incfs: %s on an alien inode.", __func__); 480 return NULL; 481 } 482 483 return container_of(inode, struct inode_info, n_vfs_inode); 484 } 485 get_incfs_data_file(struct file * f)486 static inline struct data_file *get_incfs_data_file(struct file *f) 487 { 488 struct inode_info *node = NULL; 489 490 if (!f) 491 return NULL; 492 493 if (!S_ISREG(f->f_inode->i_mode)) 494 return NULL; 495 496 node = get_incfs_node(f->f_inode); 497 if (!node) 498 return NULL; 499 500 return node->n_file; 501 } 502 get_incfs_dir_file(struct file * f)503 static inline struct dir_file *get_incfs_dir_file(struct file *f) 504 { 505 if (!f) 506 return NULL; 507 508 if (!S_ISDIR(f->f_inode->i_mode)) 509 return NULL; 510 511 return (struct dir_file *)f->private_data; 512 } 513 514 /* 515 * Make sure that inode_info.n_file is initialized and inode can be used 516 * for reading and writing data from/to the backing file. 517 */ 518 int make_inode_ready_for_data_ops(struct mount_info *mi, 519 struct inode *inode, 520 struct file *backing_file); 521 get_incfs_dentry(const struct dentry * d)522 static inline struct dentry_info *get_incfs_dentry(const struct dentry *d) 523 { 524 if (!d) 525 return NULL; 526 527 return (struct dentry_info *)d->d_fsdata; 528 } 529 get_incfs_backing_path(const struct dentry * d,struct path * path)530 static inline void get_incfs_backing_path(const struct dentry *d, 531 struct path *path) 532 { 533 struct dentry_info *di = get_incfs_dentry(d); 534 535 if (!di) { 536 *path = (struct path) {}; 537 return; 538 } 539 540 *path = di->backing_path; 541 path_get(path); 542 } 543 get_blocks_count_for_size(u64 size)544 static inline int get_blocks_count_for_size(u64 size) 545 { 546 if (size == 0) 547 return 0; 548 return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; 549 } 550 551 #endif /* _INCFS_DATA_MGMT_H */ 552