1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 mod file;
18 mod mount;
19
20 use anyhow::{anyhow, bail, Result};
21 use fuse::filesystem::{
22 Context, DirEntry, DirectoryIterator, Entry, FileSystem, FsOptions, GetxattrReply,
23 SetattrValid, ZeroCopyReader, ZeroCopyWriter,
24 };
25 use fuse::sys::OpenOptions as FuseOpenOptions;
26 use log::{debug, error, warn};
27 use std::collections::{btree_map, BTreeMap};
28 use std::convert::{TryFrom, TryInto};
29 use std::ffi::{CStr, CString, OsStr};
30 use std::io;
31 use std::mem::{zeroed, MaybeUninit};
32 use std::option::Option;
33 use std::os::unix::ffi::OsStrExt;
34 use std::path::{Component, Path, PathBuf};
35 use std::sync::atomic::{AtomicU64, Ordering};
36 use std::sync::{Arc, RwLock};
37 use std::time::Duration;
38
39 use crate::common::{divide_roundup, ChunkedSizeIter, CHUNK_SIZE};
40 use crate::file::{
41 validate_basename, Attr, InMemoryDir, RandomWrite, ReadByChunk, RemoteDirEditor,
42 RemoteFileEditor, RemoteFileReader,
43 };
44 use crate::fsstat::RemoteFsStatsReader;
45 use crate::fsverity::VerifiedFileEditor;
46
47 pub use self::file::LazyVerifiedReadonlyFile;
48 pub use self::mount::mount_and_enter_message_loop;
49 use self::mount::MAX_WRITE_BYTES;
50
51 pub type Inode = u64;
52 type Handle = u64;
53
54 /// Maximum time for a file's metadata to be cached by the kernel. Since any file and directory
55 /// changes (if not read-only) has to go through AuthFS to be trusted, the timeout can be maximum.
56 const DEFAULT_METADATA_TIMEOUT: Duration = Duration::MAX;
57
58 const ROOT_INODE: Inode = 1;
59
60 /// `AuthFsEntry` defines the filesystem entry type supported by AuthFS.
61 pub enum AuthFsEntry {
62 /// A read-only directory (writable during initialization). Root directory is an example.
63 ReadonlyDirectory { dir: InMemoryDir },
64 /// A file type that is verified against fs-verity signature (thus read-only). The file is
65 /// served from a remote server.
66 VerifiedReadonly { reader: LazyVerifiedReadonlyFile },
67 /// A file type that is a read-only passthrough from a file on a remote server.
68 UnverifiedReadonly { reader: RemoteFileReader, file_size: u64 },
69 /// A file type that is initially empty, and the content is stored on a remote server. File
70 /// integrity is guaranteed with private Merkle tree.
71 VerifiedNew { editor: VerifiedFileEditor<RemoteFileEditor>, attr: Attr },
72 /// A directory type that is initially empty. One can create new file (`VerifiedNew`) and new
73 /// directory (`VerifiedNewDirectory` itself) with integrity guaranteed within the VM.
74 VerifiedNewDirectory { dir: RemoteDirEditor, attr: Attr },
75 }
76
77 impl AuthFsEntry {
expect_empty_deletable_directory(&self) -> io::Result<()>78 fn expect_empty_deletable_directory(&self) -> io::Result<()> {
79 match self {
80 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
81 if dir.number_of_entries() == 0 {
82 Ok(())
83 } else {
84 Err(io::Error::from_raw_os_error(libc::ENOTEMPTY))
85 }
86 }
87 AuthFsEntry::ReadonlyDirectory { .. } => {
88 Err(io::Error::from_raw_os_error(libc::EACCES))
89 }
90 _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
91 }
92 }
93 }
94
95 struct InodeState {
96 /// Actual inode entry.
97 entry: AuthFsEntry,
98
99 /// Number of `Handle`s (i.e. file descriptors) that are currently referring to the this inode.
100 ///
101 /// Technically, this does not matter to readonly entries, since they live forever. The
102 /// reference count is only needed for manageing lifetime of writable entries like `VerifiedNew`
103 /// and `VerifiedNewDirectory`. That is, when an entry is deleted, the actual entry needs to
104 /// stay alive until the reference count reaches zero.
105 ///
106 /// Note: This is not to be confused with hardlinks, which AuthFS doesn't currently implement.
107 handle_ref_count: AtomicU64,
108
109 /// Whether the inode is already unlinked, i.e. should be removed, once `handle_ref_count` is
110 /// down to zero.
111 unlinked: bool,
112 }
113
114 impl InodeState {
new(entry: AuthFsEntry) -> Self115 fn new(entry: AuthFsEntry) -> Self {
116 InodeState { entry, handle_ref_count: AtomicU64::new(0), unlinked: false }
117 }
118
new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self119 fn new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self {
120 InodeState { entry, handle_ref_count: AtomicU64::new(handle_ref_count), unlinked: false }
121 }
122 }
123
124 /// Data type that a directory implementation should be able to present its entry to `AuthFs`.
125 #[derive(Clone)]
126 pub struct AuthFsDirEntry {
127 pub inode: Inode,
128 pub name: CString,
129 pub is_dir: bool,
130 }
131
132 /// A snapshot of a directory entries for supporting `readdir` operation.
133 ///
134 /// The `readdir` implementation is required by FUSE to not return any entries that have been
135 /// returned previously (while it's fine to not return new entries). Snapshot is the easiest way to
136 /// be compliant. See `fuse::filesystem::readdir` for more details.
137 ///
138 /// A `DirEntriesSnapshot` is created on `opendir`, and is associated with the returned
139 /// `Handle`/FD. The snapshot is deleted when the handle is released in `releasedir`.
140 type DirEntriesSnapshot = Vec<AuthFsDirEntry>;
141
142 /// An iterator for reading from `DirEntriesSnapshot`.
143 pub struct DirEntriesSnapshotIterator {
144 /// A reference to the `DirEntriesSnapshot` in `AuthFs`.
145 snapshot: Arc<DirEntriesSnapshot>,
146
147 /// A value determined by `Self` to identify the last entry. 0 is a reserved value by FUSE to
148 /// mean reading from the beginning.
149 prev_offset: usize,
150 }
151
152 impl<'a> DirectoryIterator for DirEntriesSnapshotIterator {
next(&mut self) -> Option<DirEntry>153 fn next(&mut self) -> Option<DirEntry> {
154 // This iterator should not be the only reference to the snapshot. The snapshot should
155 // still be hold in `dir_handle_table`, i.e. when the FD is not yet closed.
156 //
157 // This code is unreachable when `readdir` is called with a closed FD. Only when the FD is
158 // not yet closed, `DirEntriesSnapshotIterator` can be created (but still short-lived
159 // during `readdir`).
160 debug_assert!(Arc::strong_count(&self.snapshot) >= 2);
161
162 // Since 0 is reserved, let's use 1-based index for the offset. This allows us to
163 // resume from the previous read in the snapshot easily.
164 let current_offset = if self.prev_offset == 0 {
165 1 // first element in the vector
166 } else {
167 self.prev_offset + 1 // next element in the vector
168 };
169 if current_offset > self.snapshot.len() {
170 None
171 } else {
172 let AuthFsDirEntry { inode, name, is_dir } = &self.snapshot[current_offset - 1];
173 let entry = DirEntry {
174 offset: current_offset as u64,
175 ino: *inode,
176 name,
177 type_: if *is_dir { libc::DT_DIR.into() } else { libc::DT_REG.into() },
178 };
179 self.prev_offset = current_offset;
180 Some(entry)
181 }
182 }
183 }
184
185 type DirHandleTable = BTreeMap<Handle, Arc<DirEntriesSnapshot>>;
186
187 // AuthFS needs to be `Sync` to be used with the `fuse` crate.
188 pub struct AuthFs {
189 /// Table for `Inode` to `InodeState` lookup.
190 inode_table: RwLock<BTreeMap<Inode, InodeState>>,
191
192 /// The next available inode number.
193 next_inode: AtomicU64,
194
195 /// Table for `Handle` to `Arc<DirEntriesSnapshot>` lookup. On `opendir`, a new directory handle
196 /// is created and the snapshot of the current directory is created. This is not super
197 /// efficient, but is the simplest way to be compliant to the FUSE contract (see
198 /// `fuse::filesystem::readdir`).
199 ///
200 /// Currently, no code locks `dir_handle_table` and `inode_table` at the same time to avoid
201 /// deadlock.
202 dir_handle_table: RwLock<DirHandleTable>,
203
204 /// The next available handle number.
205 next_handle: AtomicU64,
206
207 /// A reader to access the remote filesystem stats, which is supposed to be of "the" output
208 /// directory. We assume all output are stored in the same partition.
209 remote_fs_stats_reader: RemoteFsStatsReader,
210 }
211
212 // Implementation for preparing an `AuthFs` instance, before starting to serve.
213 // TODO(victorhsieh): Consider implement a builder to separate the mutable initialization from the
214 // immutable / interiorly mutable serving phase.
215 impl AuthFs {
new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs216 pub fn new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs {
217 let mut inode_table = BTreeMap::new();
218 inode_table.insert(
219 ROOT_INODE,
220 InodeState::new(AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() }),
221 );
222
223 AuthFs {
224 inode_table: RwLock::new(inode_table),
225 next_inode: AtomicU64::new(ROOT_INODE + 1),
226 dir_handle_table: RwLock::new(BTreeMap::new()),
227 next_handle: AtomicU64::new(1),
228 remote_fs_stats_reader,
229 }
230 }
231
232 /// Add an `AuthFsEntry` as `basename` to the filesystem root.
add_entry_at_root_dir( &mut self, basename: PathBuf, entry: AuthFsEntry, ) -> Result<Inode>233 pub fn add_entry_at_root_dir(
234 &mut self,
235 basename: PathBuf,
236 entry: AuthFsEntry,
237 ) -> Result<Inode> {
238 validate_basename(&basename)?;
239 self.add_entry_at_ro_dir_by_path(ROOT_INODE, &basename, entry)
240 }
241
242 /// Add an `AuthFsEntry` by path from the `ReadonlyDirectory` represented by `dir_inode`. The
243 /// path must be a related path. If some ancestor directories do not exist, they will be
244 /// created (also as `ReadonlyDirectory`) automatically.
add_entry_at_ro_dir_by_path( &mut self, dir_inode: Inode, path: &Path, entry: AuthFsEntry, ) -> Result<Inode>245 pub fn add_entry_at_ro_dir_by_path(
246 &mut self,
247 dir_inode: Inode,
248 path: &Path,
249 entry: AuthFsEntry,
250 ) -> Result<Inode> {
251 // 1. Make sure the parent directories all exist. Derive the entry's parent inode.
252 let parent_path =
253 path.parent().ok_or_else(|| anyhow!("No parent directory: {:?}", path))?;
254 let parent_inode =
255 parent_path.components().try_fold(dir_inode, |current_dir_inode, path_component| {
256 match path_component {
257 Component::RootDir => bail!("Absolute path is not supported"),
258 Component::Normal(name) => {
259 let inode_table = self.inode_table.get_mut().unwrap();
260 // Locate the internal directory structure.
261 let current_dir_entry = &mut inode_table
262 .get_mut(¤t_dir_inode)
263 .ok_or_else(|| {
264 anyhow!("Unknown directory inode {}", current_dir_inode)
265 })?
266 .entry;
267 let dir = match current_dir_entry {
268 AuthFsEntry::ReadonlyDirectory { dir } => dir,
269 _ => unreachable!("Not a ReadonlyDirectory"),
270 };
271 // Return directory inode. Create first if not exists.
272 if let Some(existing_inode) = dir.lookup_inode(name.as_ref()) {
273 Ok(existing_inode)
274 } else {
275 let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
276 let new_dir_entry =
277 AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() };
278
279 // Actually update the tables.
280 dir.add_dir(name.as_ref(), new_inode)?;
281 if inode_table
282 .insert(new_inode, InodeState::new(new_dir_entry))
283 .is_some()
284 {
285 bail!("Unexpected to find a duplicated inode");
286 }
287 Ok(new_inode)
288 }
289 }
290 _ => Err(anyhow!("Path is not canonical: {:?}", path)),
291 }
292 })?;
293
294 // 2. Insert the entry to the parent directory, as well as the inode table.
295 let inode_table = self.inode_table.get_mut().unwrap();
296 let inode_state = inode_table.get_mut(&parent_inode).expect("previously returned inode");
297 match &mut inode_state.entry {
298 AuthFsEntry::ReadonlyDirectory { dir } => {
299 let basename =
300 path.file_name().ok_or_else(|| anyhow!("Bad file name: {:?}", path))?;
301 let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
302
303 // Actually update the tables.
304 dir.add_file(basename.as_ref(), new_inode)?;
305 if inode_table.insert(new_inode, InodeState::new(entry)).is_some() {
306 bail!("Unexpected to find a duplicated inode");
307 }
308 Ok(new_inode)
309 }
310 _ => unreachable!("Not a ReadonlyDirectory"),
311 }
312 }
313 }
314
315 // Implementation for serving requests.
316 impl AuthFs {
317 /// Handles the file associated with `inode` if found. This function returns whatever
318 /// `handle_fn` returns.
handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R> where F: FnOnce(&AuthFsEntry) -> io::Result<R>,319 fn handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R>
320 where
321 F: FnOnce(&AuthFsEntry) -> io::Result<R>,
322 {
323 let inode_table = self.inode_table.read().unwrap();
324 handle_inode_locked(&inode_table, inode, |inode_state| handle_fn(&inode_state.entry))
325 }
326
327 /// Adds a new entry `name` created by `create_fn` at `parent_inode`, with an initial ref count
328 /// of one.
329 ///
330 /// The operation involves two updates: adding the name with a new allocated inode to the
331 /// parent directory, and insert the new inode and the actual `AuthFsEntry` to the global inode
332 /// table.
333 ///
334 /// `create_fn` receives the parent directory, through which it can create the new entry at and
335 /// register the new inode to. Its returned entry is then added to the inode table.
create_new_entry_with_ref_count<F>( &self, parent_inode: Inode, name: &CStr, create_fn: F, ) -> io::Result<Inode> where F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,336 fn create_new_entry_with_ref_count<F>(
337 &self,
338 parent_inode: Inode,
339 name: &CStr,
340 create_fn: F,
341 ) -> io::Result<Inode>
342 where
343 F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,
344 {
345 let mut inode_table = self.inode_table.write().unwrap();
346 let (new_inode, new_file_entry) = handle_inode_mut_locked(
347 &mut inode_table,
348 &parent_inode,
349 |InodeState { entry, .. }| {
350 let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
351 let basename: &Path = cstr_to_path(name);
352 let new_file_entry = create_fn(entry, basename, new_inode)?;
353 Ok((new_inode, new_file_entry))
354 },
355 )?;
356
357 if let btree_map::Entry::Vacant(entry) = inode_table.entry(new_inode) {
358 entry.insert(InodeState::new_with_ref_count(new_file_entry, 1));
359 Ok(new_inode)
360 } else {
361 unreachable!("Unexpected duplication of inode {}", new_inode);
362 }
363 }
364
open_dir_store_snapshot( &self, dir_entries: Vec<AuthFsDirEntry>, ) -> io::Result<(Option<Handle>, FuseOpenOptions)>365 fn open_dir_store_snapshot(
366 &self,
367 dir_entries: Vec<AuthFsDirEntry>,
368 ) -> io::Result<(Option<Handle>, FuseOpenOptions)> {
369 let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
370 let mut dir_handle_table = self.dir_handle_table.write().unwrap();
371 if let btree_map::Entry::Vacant(value) = dir_handle_table.entry(handle) {
372 value.insert(Arc::new(dir_entries));
373 Ok((Some(handle), FuseOpenOptions::empty()))
374 } else {
375 unreachable!("Unexpected to see new handle {} to existing in the table", handle);
376 }
377 }
378 }
379
check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()>380 fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
381 if (flags & libc::O_ACCMODE as u32) == mode as u32 {
382 Ok(())
383 } else {
384 Err(io::Error::from_raw_os_error(libc::EACCES))
385 }
386 }
387
388 cfg_if::cfg_if! {
389 if #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] {
390 fn blk_size() -> libc::c_int { CHUNK_SIZE as libc::c_int }
391 } else {
392 fn blk_size() -> libc::c_long { CHUNK_SIZE as libc::c_long }
393 }
394 }
395
396 #[allow(clippy::enum_variant_names)]
397 enum AccessMode {
398 ReadOnly,
399 Variable(u32),
400 }
401
create_stat( ino: libc::ino_t, file_size: u64, access_mode: AccessMode, ) -> io::Result<libc::stat64>402 fn create_stat(
403 ino: libc::ino_t,
404 file_size: u64,
405 access_mode: AccessMode,
406 ) -> io::Result<libc::stat64> {
407 // SAFETY: stat64 is a plan C struct without pointer.
408 let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
409
410 st.st_ino = ino;
411 st.st_mode = match access_mode {
412 AccessMode::ReadOnly => {
413 // Until needed, let's just grant the owner access.
414 libc::S_IFREG | libc::S_IRUSR
415 }
416 AccessMode::Variable(mode) => libc::S_IFREG | mode,
417 };
418 st.st_nlink = 1;
419 st.st_uid = 0;
420 st.st_gid = 0;
421 st.st_size = libc::off64_t::try_from(file_size)
422 .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
423 st.st_blksize = blk_size();
424 // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
425 st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
426 .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
427 Ok(st)
428 }
429
create_dir_stat( ino: libc::ino_t, file_number: u16, access_mode: AccessMode, ) -> io::Result<libc::stat64>430 fn create_dir_stat(
431 ino: libc::ino_t,
432 file_number: u16,
433 access_mode: AccessMode,
434 ) -> io::Result<libc::stat64> {
435 // SAFETY: stat64 is a plan C struct without pointer.
436 let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
437
438 st.st_ino = ino;
439 st.st_mode = match access_mode {
440 AccessMode::ReadOnly => {
441 // Until needed, let's just grant the owner access and search to group and others.
442 libc::S_IFDIR | libc::S_IXUSR | libc::S_IRUSR | libc::S_IXGRP | libc::S_IXOTH
443 }
444 AccessMode::Variable(mode) => libc::S_IFDIR | mode,
445 };
446
447 // 2 extra for . and ..
448 st.st_nlink = file_number
449 .checked_add(2)
450 .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?
451 .into();
452
453 st.st_uid = 0;
454 st.st_gid = 0;
455 Ok(st)
456 }
457
offset_to_chunk_index(offset: u64) -> u64458 fn offset_to_chunk_index(offset: u64) -> u64 {
459 offset / CHUNK_SIZE
460 }
461
read_chunks<W: io::Write, T: ReadByChunk>( mut w: W, file: &T, file_size: u64, offset: u64, size: u32, ) -> io::Result<usize>462 fn read_chunks<W: io::Write, T: ReadByChunk>(
463 mut w: W,
464 file: &T,
465 file_size: u64,
466 offset: u64,
467 size: u32,
468 ) -> io::Result<usize> {
469 let remaining = file_size.saturating_sub(offset);
470 let size_to_read = std::cmp::min(size as usize, remaining as usize);
471 let total = ChunkedSizeIter::new(size_to_read, offset, CHUNK_SIZE as usize).try_fold(
472 0,
473 |total, (current_offset, planned_data_size)| {
474 // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
475 // instead of accepting a buffer, the writer could expose the final destination buffer
476 // for the reader to write to. It might not be generally applicable though, e.g. with
477 // virtio transport, the buffer may not be continuous.
478 let mut buf = [0u8; CHUNK_SIZE as usize];
479 let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
480 if read_size < planned_data_size {
481 return Err(io::Error::from_raw_os_error(libc::ENODATA));
482 }
483
484 let begin = (current_offset % CHUNK_SIZE) as usize;
485 let end = begin + planned_data_size;
486 let s = w.write(&buf[begin..end])?;
487 if s != planned_data_size {
488 return Err(io::Error::from_raw_os_error(libc::EIO));
489 }
490 Ok(total + s)
491 },
492 )?;
493
494 Ok(total)
495 }
496
497 impl FileSystem for AuthFs {
498 type Inode = Inode;
499 type Handle = Handle;
500 type DirIter = DirEntriesSnapshotIterator;
501
max_buffer_size(&self) -> u32502 fn max_buffer_size(&self) -> u32 {
503 MAX_WRITE_BYTES
504 }
505
init(&self, _capable: FsOptions) -> io::Result<FsOptions>506 fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
507 // Enable writeback cache for better performance especially since our bandwidth to the
508 // backend service is limited.
509 Ok(FsOptions::WRITEBACK_CACHE)
510 }
511
lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry>512 fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
513 let inode_table = self.inode_table.read().unwrap();
514
515 // Look up the entry's inode number in parent directory.
516 let inode =
517 handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
518 AuthFsEntry::ReadonlyDirectory { dir } => {
519 let path = cstr_to_path(name);
520 dir.lookup_inode(path).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
521 }
522 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
523 let path = cstr_to_path(name);
524 dir.find_inode(path)
525 }
526 _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
527 })?;
528
529 // Create the entry's stat if found.
530 let st = handle_inode_locked(
531 &inode_table,
532 &inode,
533 |InodeState { entry, handle_ref_count, .. }| {
534 let st = match entry {
535 AuthFsEntry::ReadonlyDirectory { dir } => {
536 create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
537 }
538 AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
539 create_stat(inode, *file_size, AccessMode::ReadOnly)
540 }
541 AuthFsEntry::VerifiedReadonly { reader } => {
542 create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
543 }
544 AuthFsEntry::VerifiedNew { editor, attr, .. } => {
545 create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
546 }
547 AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
548 inode,
549 dir.number_of_entries(),
550 AccessMode::Variable(attr.mode()),
551 ),
552 }?;
553 if handle_ref_count.fetch_add(1, Ordering::Relaxed) == u64::MAX {
554 panic!("Handle reference count overflow");
555 }
556 Ok(st)
557 },
558 )?;
559
560 Ok(Entry {
561 inode,
562 generation: 0,
563 attr: st,
564 entry_timeout: DEFAULT_METADATA_TIMEOUT,
565 attr_timeout: DEFAULT_METADATA_TIMEOUT,
566 })
567 }
568
forget(&self, _ctx: Context, inode: Self::Inode, count: u64)569 fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) {
570 let mut inode_table = self.inode_table.write().unwrap();
571 let delete_now = handle_inode_mut_locked(
572 &mut inode_table,
573 &inode,
574 |InodeState { handle_ref_count, unlinked, .. }| {
575 let current = handle_ref_count.get_mut();
576 if count > *current {
577 error!(
578 "Trying to decrease refcount of inode {} by {} (> current {})",
579 inode, count, *current
580 );
581 panic!(); // log to logcat with error!
582 }
583 *current -= count;
584 Ok(*unlinked && *current == 0)
585 },
586 );
587
588 match delete_now {
589 Ok(true) => {
590 let _ = inode_table.remove(&inode).expect("Removed an existing entry");
591 }
592 Ok(false) => { /* Let the inode stay */ }
593 Err(e) => {
594 warn!(
595 "Unexpected failure when tries to forget an inode {} by refcount {}: {:?}",
596 inode, count, e
597 );
598 }
599 }
600 }
601
getattr( &self, _ctx: Context, inode: Inode, _handle: Option<Handle>, ) -> io::Result<(libc::stat64, Duration)>602 fn getattr(
603 &self,
604 _ctx: Context,
605 inode: Inode,
606 _handle: Option<Handle>,
607 ) -> io::Result<(libc::stat64, Duration)> {
608 self.handle_inode(&inode, |config| {
609 Ok((
610 match config {
611 AuthFsEntry::ReadonlyDirectory { dir } => {
612 create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
613 }
614 AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
615 create_stat(inode, *file_size, AccessMode::ReadOnly)
616 }
617 AuthFsEntry::VerifiedReadonly { reader } => {
618 create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
619 }
620 AuthFsEntry::VerifiedNew { editor, attr, .. } => {
621 create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
622 }
623 AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
624 inode,
625 dir.number_of_entries(),
626 AccessMode::Variable(attr.mode()),
627 ),
628 }?,
629 DEFAULT_METADATA_TIMEOUT,
630 ))
631 })
632 }
633
open( &self, _ctx: Context, inode: Self::Inode, flags: u32, ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)>634 fn open(
635 &self,
636 _ctx: Context,
637 inode: Self::Inode,
638 flags: u32,
639 ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
640 // Since file handle is not really used in later operations (which use Inode directly),
641 // return None as the handle.
642 self.handle_inode(&inode, |config| {
643 match config {
644 AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
645 check_access_mode(flags, libc::O_RDONLY)?;
646 }
647 AuthFsEntry::VerifiedNew { .. } => {
648 // TODO(victorhsieh): Imeplement ACL check using the attr and ctx. Always allow
649 // for now.
650 }
651 AuthFsEntry::ReadonlyDirectory { .. }
652 | AuthFsEntry::VerifiedNewDirectory { .. } => {
653 // TODO(victorhsieh): implement when needed.
654 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
655 }
656 }
657 // Always cache the file content. There is currently no need to support direct I/O or
658 // avoid the cache buffer. Memory mapping is only possible with cache enabled.
659 Ok((None, FuseOpenOptions::KEEP_CACHE))
660 })
661 }
662
create( &self, _ctx: Context, parent: Self::Inode, name: &CStr, mode: u32, _flags: u32, umask: u32, ) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)>663 fn create(
664 &self,
665 _ctx: Context,
666 parent: Self::Inode,
667 name: &CStr,
668 mode: u32,
669 _flags: u32,
670 umask: u32,
671 ) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)> {
672 let new_inode = self.create_new_entry_with_ref_count(
673 parent,
674 name,
675 |parent_entry, basename, new_inode| match parent_entry {
676 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
677 if dir.has_entry(basename) {
678 return Err(io::Error::from_raw_os_error(libc::EEXIST));
679 }
680 let mode = mode & !umask;
681 let (new_file, new_attr) = dir.create_file(basename, new_inode, mode)?;
682 Ok(AuthFsEntry::VerifiedNew { editor: new_file, attr: new_attr })
683 }
684 _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
685 },
686 )?;
687
688 Ok((
689 Entry {
690 inode: new_inode,
691 generation: 0,
692 attr: create_stat(new_inode, /* file_size */ 0, AccessMode::Variable(mode))?,
693 entry_timeout: DEFAULT_METADATA_TIMEOUT,
694 attr_timeout: DEFAULT_METADATA_TIMEOUT,
695 },
696 // See also `open`.
697 /* handle */ None,
698 FuseOpenOptions::KEEP_CACHE,
699 ))
700 }
701
read<W: io::Write + ZeroCopyWriter>( &self, _ctx: Context, inode: Inode, _handle: Handle, w: W, size: u32, offset: u64, _lock_owner: Option<u64>, _flags: u32, ) -> io::Result<usize>702 fn read<W: io::Write + ZeroCopyWriter>(
703 &self,
704 _ctx: Context,
705 inode: Inode,
706 _handle: Handle,
707 w: W,
708 size: u32,
709 offset: u64,
710 _lock_owner: Option<u64>,
711 _flags: u32,
712 ) -> io::Result<usize> {
713 self.handle_inode(&inode, |config| {
714 match config {
715 AuthFsEntry::VerifiedReadonly { reader } => {
716 read_chunks(w, reader, reader.file_size()?, offset, size)
717 }
718 AuthFsEntry::UnverifiedReadonly { reader, file_size } => {
719 read_chunks(w, reader, *file_size, offset, size)
720 }
721 AuthFsEntry::VerifiedNew { editor, .. } => {
722 // Note that with FsOptions::WRITEBACK_CACHE, it's possible for the kernel to
723 // request a read even if the file is open with O_WRONLY.
724 read_chunks(w, editor, editor.size(), offset, size)
725 }
726 AuthFsEntry::ReadonlyDirectory { .. }
727 | AuthFsEntry::VerifiedNewDirectory { .. } => {
728 Err(io::Error::from_raw_os_error(libc::EISDIR))
729 }
730 }
731 })
732 }
733
write<R: io::Read + ZeroCopyReader>( &self, _ctx: Context, inode: Self::Inode, _handle: Self::Handle, mut r: R, size: u32, offset: u64, _lock_owner: Option<u64>, _delayed_write: bool, _flags: u32, ) -> io::Result<usize>734 fn write<R: io::Read + ZeroCopyReader>(
735 &self,
736 _ctx: Context,
737 inode: Self::Inode,
738 _handle: Self::Handle,
739 mut r: R,
740 size: u32,
741 offset: u64,
742 _lock_owner: Option<u64>,
743 _delayed_write: bool,
744 _flags: u32,
745 ) -> io::Result<usize> {
746 self.handle_inode(&inode, |config| match config {
747 AuthFsEntry::VerifiedNew { editor, .. } => {
748 let mut buf = vec![0; size as usize];
749 r.read_exact(&mut buf)?;
750 editor.write_at(&buf, offset)
751 }
752 AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
753 Err(io::Error::from_raw_os_error(libc::EPERM))
754 }
755 AuthFsEntry::ReadonlyDirectory { .. } | AuthFsEntry::VerifiedNewDirectory { .. } => {
756 Err(io::Error::from_raw_os_error(libc::EISDIR))
757 }
758 })
759 }
760
setattr( &self, _ctx: Context, inode: Inode, in_attr: libc::stat64, _handle: Option<Handle>, valid: SetattrValid, ) -> io::Result<(libc::stat64, Duration)>761 fn setattr(
762 &self,
763 _ctx: Context,
764 inode: Inode,
765 in_attr: libc::stat64,
766 _handle: Option<Handle>,
767 valid: SetattrValid,
768 ) -> io::Result<(libc::stat64, Duration)> {
769 let mut inode_table = self.inode_table.write().unwrap();
770 handle_inode_mut_locked(&mut inode_table, &inode, |InodeState { entry, .. }| match entry {
771 AuthFsEntry::VerifiedNew { editor, attr } => {
772 check_unsupported_setattr_request(valid)?;
773
774 // Initialize the default stat.
775 let mut new_attr =
776 create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))?;
777 // `valid` indicates what fields in `attr` are valid. Update to return correctly.
778 if valid.contains(SetattrValid::SIZE) {
779 // st_size is i64, but the cast should be safe since kernel should not give a
780 // negative size.
781 debug_assert!(in_attr.st_size >= 0);
782 new_attr.st_size = in_attr.st_size;
783 editor.resize(in_attr.st_size as u64)?;
784 }
785 if valid.contains(SetattrValid::MODE) {
786 attr.set_mode(in_attr.st_mode)?;
787 new_attr.st_mode = in_attr.st_mode;
788 }
789 Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
790 }
791 AuthFsEntry::VerifiedNewDirectory { dir, attr } => {
792 check_unsupported_setattr_request(valid)?;
793 if valid.contains(SetattrValid::SIZE) {
794 return Err(io::Error::from_raw_os_error(libc::EISDIR));
795 }
796
797 // Initialize the default stat.
798 let mut new_attr = create_dir_stat(
799 inode,
800 dir.number_of_entries(),
801 AccessMode::Variable(attr.mode()),
802 )?;
803 if valid.contains(SetattrValid::MODE) {
804 attr.set_mode(in_attr.st_mode)?;
805 new_attr.st_mode = in_attr.st_mode;
806 }
807 Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
808 }
809 _ => Err(io::Error::from_raw_os_error(libc::EPERM)),
810 })
811 }
812
getxattr( &self, _ctx: Context, inode: Self::Inode, name: &CStr, size: u32, ) -> io::Result<GetxattrReply>813 fn getxattr(
814 &self,
815 _ctx: Context,
816 inode: Self::Inode,
817 name: &CStr,
818 size: u32,
819 ) -> io::Result<GetxattrReply> {
820 self.handle_inode(&inode, |config| {
821 match config {
822 AuthFsEntry::VerifiedNew { editor, .. } => {
823 // FUSE ioctl is limited, thus we can't implement fs-verity ioctls without a kernel
824 // change (see b/196635431). Until it's possible, use xattr to expose what we need
825 // as an authfs specific API.
826 if name != CStr::from_bytes_with_nul(b"authfs.fsverity.digest\0").unwrap() {
827 return Err(io::Error::from_raw_os_error(libc::ENODATA));
828 }
829
830 if size == 0 {
831 // Per protocol, when size is 0, return the value size.
832 Ok(GetxattrReply::Count(editor.get_fsverity_digest_size() as u32))
833 } else {
834 let digest = editor.calculate_fsverity_digest()?;
835 if digest.len() > size as usize {
836 Err(io::Error::from_raw_os_error(libc::ERANGE))
837 } else {
838 Ok(GetxattrReply::Value(digest.to_vec()))
839 }
840 }
841 }
842 _ => Err(io::Error::from_raw_os_error(libc::ENODATA)),
843 }
844 })
845 }
846
mkdir( &self, _ctx: Context, parent: Self::Inode, name: &CStr, mode: u32, umask: u32, ) -> io::Result<Entry>847 fn mkdir(
848 &self,
849 _ctx: Context,
850 parent: Self::Inode,
851 name: &CStr,
852 mode: u32,
853 umask: u32,
854 ) -> io::Result<Entry> {
855 let new_inode = self.create_new_entry_with_ref_count(
856 parent,
857 name,
858 |parent_entry, basename, new_inode| match parent_entry {
859 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
860 if dir.has_entry(basename) {
861 return Err(io::Error::from_raw_os_error(libc::EEXIST));
862 }
863 let mode = mode & !umask;
864 let (new_dir, new_attr) = dir.mkdir(basename, new_inode, mode)?;
865 Ok(AuthFsEntry::VerifiedNewDirectory { dir: new_dir, attr: new_attr })
866 }
867 AuthFsEntry::ReadonlyDirectory { .. } => {
868 Err(io::Error::from_raw_os_error(libc::EACCES))
869 }
870 _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
871 },
872 )?;
873
874 Ok(Entry {
875 inode: new_inode,
876 generation: 0,
877 attr: create_dir_stat(new_inode, /* file_number */ 0, AccessMode::Variable(mode))?,
878 entry_timeout: DEFAULT_METADATA_TIMEOUT,
879 attr_timeout: DEFAULT_METADATA_TIMEOUT,
880 })
881 }
882
unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()>883 fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
884 let mut inode_table = self.inode_table.write().unwrap();
885 handle_inode_mut_locked(
886 &mut inode_table,
887 &parent,
888 |InodeState { entry, unlinked, .. }| match entry {
889 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
890 let basename: &Path = cstr_to_path(name);
891 // Delete the file from in both the local and remote directories.
892 let _inode = dir.delete_file(basename)?;
893 *unlinked = true;
894 Ok(())
895 }
896 AuthFsEntry::ReadonlyDirectory { .. } => {
897 Err(io::Error::from_raw_os_error(libc::EACCES))
898 }
899 AuthFsEntry::VerifiedNew { .. } => {
900 // Deleting a entry in filesystem root is not currently supported.
901 Err(io::Error::from_raw_os_error(libc::ENOSYS))
902 }
903 AuthFsEntry::UnverifiedReadonly { .. } | AuthFsEntry::VerifiedReadonly { .. } => {
904 Err(io::Error::from_raw_os_error(libc::ENOTDIR))
905 }
906 },
907 )
908 }
909
rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()>910 fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
911 let mut inode_table = self.inode_table.write().unwrap();
912
913 // Check before actual removal, with readonly borrow.
914 handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
915 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
916 let basename: &Path = cstr_to_path(name);
917 let existing_inode = dir.find_inode(basename)?;
918 handle_inode_locked(&inode_table, &existing_inode, |inode_state| {
919 inode_state.entry.expect_empty_deletable_directory()
920 })
921 }
922 AuthFsEntry::ReadonlyDirectory { .. } => {
923 Err(io::Error::from_raw_os_error(libc::EACCES))
924 }
925 _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
926 })?;
927
928 // Look up again, this time with mutable borrow. This needs to be done separately because
929 // the previous lookup needs to borrow multiple entry references in the table.
930 handle_inode_mut_locked(
931 &mut inode_table,
932 &parent,
933 |InodeState { entry, unlinked, .. }| match entry {
934 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
935 let basename: &Path = cstr_to_path(name);
936 let _inode = dir.force_delete_directory(basename)?;
937 *unlinked = true;
938 Ok(())
939 }
940 _ => unreachable!("Mismatched entry type that is just checked"),
941 },
942 )
943 }
944
opendir( &self, _ctx: Context, inode: Self::Inode, _flags: u32, ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)>945 fn opendir(
946 &self,
947 _ctx: Context,
948 inode: Self::Inode,
949 _flags: u32,
950 ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
951 let entries = self.handle_inode(&inode, |config| match config {
952 AuthFsEntry::VerifiedNewDirectory { dir, .. } => dir.retrieve_entries(),
953 AuthFsEntry::ReadonlyDirectory { dir } => dir.retrieve_entries(),
954 _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
955 })?;
956 self.open_dir_store_snapshot(entries)
957 }
958
readdir( &self, _ctx: Context, _inode: Self::Inode, handle: Self::Handle, _size: u32, offset: u64, ) -> io::Result<Self::DirIter>959 fn readdir(
960 &self,
961 _ctx: Context,
962 _inode: Self::Inode,
963 handle: Self::Handle,
964 _size: u32,
965 offset: u64,
966 ) -> io::Result<Self::DirIter> {
967 let dir_handle_table = self.dir_handle_table.read().unwrap();
968 if let Some(entry) = dir_handle_table.get(&handle) {
969 Ok(DirEntriesSnapshotIterator {
970 snapshot: entry.clone(),
971 prev_offset: offset.try_into().unwrap(),
972 })
973 } else {
974 Err(io::Error::from_raw_os_error(libc::EBADF))
975 }
976 }
977
releasedir( &self, _ctx: Context, inode: Self::Inode, _flags: u32, handle: Self::Handle, ) -> io::Result<()>978 fn releasedir(
979 &self,
980 _ctx: Context,
981 inode: Self::Inode,
982 _flags: u32,
983 handle: Self::Handle,
984 ) -> io::Result<()> {
985 let mut dir_handle_table = self.dir_handle_table.write().unwrap();
986 if dir_handle_table.remove(&handle).is_none() {
987 unreachable!("Unknown directory handle {}, inode {}", handle, inode);
988 }
989 Ok(())
990 }
991
statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64>992 fn statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64> {
993 let remote_stat = self.remote_fs_stats_reader.statfs()?;
994
995 // Safe because we are zero-initializing a struct with only POD fields. Not all fields
996 // matter to FUSE. See also:
997 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/fuse/inode.c?h=v5.15#n460
998 let mut st: libc::statvfs64 = unsafe { zeroed() };
999
1000 // Use the remote stat as a template, since it'd matter the most to consider the writable
1001 // files/directories that are written to the remote.
1002 st.f_bsize = remote_stat.block_size;
1003 st.f_frsize = remote_stat.fragment_size;
1004 st.f_blocks = remote_stat.block_numbers;
1005 st.f_bavail = remote_stat.block_available;
1006 st.f_favail = remote_stat.inodes_available;
1007 st.f_namemax = remote_stat.max_filename;
1008 // Assuming we are not privileged to use all free spaces on the remote server, set the free
1009 // blocks/fragment to the same available amount.
1010 st.f_bfree = st.f_bavail;
1011 st.f_ffree = st.f_favail;
1012 // Number of inodes on the filesystem
1013 st.f_files = self.inode_table.read().unwrap().len() as u64;
1014
1015 Ok(st)
1016 }
1017 }
1018
handle_inode_locked<F, R>( inode_table: &BTreeMap<Inode, InodeState>, inode: &Inode, handle_fn: F, ) -> io::Result<R> where F: FnOnce(&InodeState) -> io::Result<R>,1019 fn handle_inode_locked<F, R>(
1020 inode_table: &BTreeMap<Inode, InodeState>,
1021 inode: &Inode,
1022 handle_fn: F,
1023 ) -> io::Result<R>
1024 where
1025 F: FnOnce(&InodeState) -> io::Result<R>,
1026 {
1027 if let Some(inode_state) = inode_table.get(inode) {
1028 handle_fn(inode_state)
1029 } else {
1030 Err(io::Error::from_raw_os_error(libc::ENOENT))
1031 }
1032 }
1033
handle_inode_mut_locked<F, R>( inode_table: &mut BTreeMap<Inode, InodeState>, inode: &Inode, handle_fn: F, ) -> io::Result<R> where F: FnOnce(&mut InodeState) -> io::Result<R>,1034 fn handle_inode_mut_locked<F, R>(
1035 inode_table: &mut BTreeMap<Inode, InodeState>,
1036 inode: &Inode,
1037 handle_fn: F,
1038 ) -> io::Result<R>
1039 where
1040 F: FnOnce(&mut InodeState) -> io::Result<R>,
1041 {
1042 if let Some(inode_state) = inode_table.get_mut(inode) {
1043 handle_fn(inode_state)
1044 } else {
1045 Err(io::Error::from_raw_os_error(libc::ENOENT))
1046 }
1047 }
1048
check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()>1049 fn check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()> {
1050 if valid.contains(SetattrValid::UID) {
1051 warn!("Changing st_uid is not currently supported");
1052 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
1053 }
1054 if valid.contains(SetattrValid::GID) {
1055 warn!("Changing st_gid is not currently supported");
1056 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
1057 }
1058 if valid.intersects(
1059 SetattrValid::CTIME
1060 | SetattrValid::ATIME
1061 | SetattrValid::ATIME_NOW
1062 | SetattrValid::MTIME
1063 | SetattrValid::MTIME_NOW,
1064 ) {
1065 debug!("Ignoring ctime/atime/mtime change as authfs does not maintain timestamp currently");
1066 }
1067 Ok(())
1068 }
1069
cstr_to_path(cstr: &CStr) -> &Path1070 fn cstr_to_path(cstr: &CStr) -> &Path {
1071 OsStr::from_bytes(cstr.to_bytes()).as_ref()
1072 }
1073