1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::borrow::Cow;
6 use std::cmp;
7 use std::collections::btree_map;
8 use std::collections::BTreeMap;
9 use std::ffi::CStr;
10 use std::ffi::CString;
11 use std::fs::File;
12 use std::io;
13 use std::mem;
14 use std::mem::size_of;
15 use std::mem::MaybeUninit;
16 use std::os::raw::c_int;
17 use std::os::raw::c_long;
18 use std::ptr::addr_of;
19 use std::ptr::addr_of_mut;
20 use std::str::FromStr;
21 use std::sync::atomic::AtomicBool;
22 use std::sync::atomic::AtomicU64;
23 use std::sync::atomic::Ordering;
24 use std::sync::Arc;
25 use std::time::Duration;
26
27 use base::error;
28 use base::ioctl_ior_nr;
29 use base::ioctl_iow_nr;
30 use base::ioctl_iowr_nr;
31 use base::ioctl_with_mut_ptr;
32 use base::ioctl_with_ptr;
33 use base::syscall;
34 use base::AsRawDescriptor;
35 use base::FileFlags;
36 use base::FromRawDescriptor;
37 use base::RawDescriptor;
38 use data_model::zerocopy_from_reader;
39 use data_model::DataInit;
40 use fuse::filesystem::Context;
41 use fuse::filesystem::DirectoryIterator;
42 use fuse::filesystem::Entry;
43 use fuse::filesystem::FileSystem;
44 use fuse::filesystem::FsOptions;
45 use fuse::filesystem::GetxattrReply;
46 use fuse::filesystem::IoctlFlags;
47 use fuse::filesystem::IoctlReply;
48 use fuse::filesystem::ListxattrReply;
49 use fuse::filesystem::OpenOptions;
50 use fuse::filesystem::RemoveMappingOne;
51 use fuse::filesystem::SetattrValid;
52 use fuse::filesystem::ZeroCopyReader;
53 use fuse::filesystem::ZeroCopyWriter;
54 use fuse::filesystem::ROOT_ID;
55 use fuse::sys::WRITE_KILL_PRIV;
56 use fuse::Mapper;
57 #[cfg(feature = "arc_quota")]
58 use protobuf::Message;
59 use serde::Deserialize;
60 use serde::Serialize;
61 use sync::Mutex;
62 #[cfg(feature = "arc_quota")]
63 use system_api::client::OrgChromiumArcQuota;
64 #[cfg(feature = "arc_quota")]
65 use system_api::UserDataAuth::SetMediaRWDataFileProjectIdReply;
66 #[cfg(feature = "arc_quota")]
67 use system_api::UserDataAuth::SetMediaRWDataFileProjectIdRequest;
68 #[cfg(feature = "arc_quota")]
69 use system_api::UserDataAuth::SetMediaRWDataFileProjectInheritanceFlagReply;
70 #[cfg(feature = "arc_quota")]
71 use system_api::UserDataAuth::SetMediaRWDataFileProjectInheritanceFlagRequest;
72 use zerocopy::AsBytes;
73 use zerocopy::FromBytes;
74
75 use crate::virtio::fs::caps::Capability;
76 use crate::virtio::fs::caps::Caps;
77 use crate::virtio::fs::caps::Set as CapSet;
78 use crate::virtio::fs::caps::Value as CapValue;
79 use crate::virtio::fs::multikey::MultikeyBTreeMap;
80 use crate::virtio::fs::read_dir::ReadDir;
81
82 const EMPTY_CSTR: &[u8] = b"\0";
83 const ROOT_CSTR: &[u8] = b"/\0";
84 const PROC_CSTR: &[u8] = b"/proc\0";
85
86 const USER_VIRTIOFS_XATTR: &[u8] = b"user.virtiofs.";
87 const SECURITY_XATTR: &[u8] = b"security.";
88 const SELINUX_XATTR: &[u8] = b"security.selinux";
89
90 const FSCRYPT_KEY_DESCRIPTOR_SIZE: usize = 8;
91 const FSCRYPT_KEY_IDENTIFIER_SIZE: usize = 16;
92
93 #[cfg(feature = "arc_quota")]
94 const FS_PROJINHERIT_FL: c_int = 0x20000000;
95
96 // 25 seconds is the default timeout for dbus-send.
97 #[cfg(feature = "arc_quota")]
98 const DEFAULT_DBUS_TIMEOUT: Duration = Duration::from_secs(25);
99
100 #[repr(C)]
101 #[derive(Clone, Copy, AsBytes, FromBytes)]
102 struct fscrypt_policy_v1 {
103 _version: u8,
104 _contents_encryption_mode: u8,
105 _filenames_encryption_mode: u8,
106 _flags: u8,
107 _master_key_descriptor: [u8; FSCRYPT_KEY_DESCRIPTOR_SIZE],
108 }
109
110 #[repr(C)]
111 #[derive(Clone, Copy, AsBytes, FromBytes)]
112 struct fscrypt_policy_v2 {
113 _version: u8,
114 _contents_encryption_mode: u8,
115 _filenames_encryption_mode: u8,
116 _flags: u8,
117 __reserved: [u8; 4],
118 master_key_identifier: [u8; FSCRYPT_KEY_IDENTIFIER_SIZE],
119 }
120
121 #[repr(C)]
122 #[derive(Copy, Clone, FromBytes)]
123 union fscrypt_policy {
124 _version: u8,
125 _v1: fscrypt_policy_v1,
126 _v2: fscrypt_policy_v2,
127 }
128
129 #[repr(C)]
130 #[derive(Copy, Clone, FromBytes)]
131 struct fscrypt_get_policy_ex_arg {
132 policy_size: u64, /* input/output */
133 policy: fscrypt_policy, /* output */
134 }
135
136 unsafe impl DataInit for fscrypt_get_policy_ex_arg {}
137
138 ioctl_iowr_nr!(FS_IOC_GET_ENCRYPTION_POLICY_EX, 'f' as u32, 22, [u8; 9]);
139
140 #[repr(C)]
141 #[derive(Clone, Copy, AsBytes, FromBytes)]
142 struct fsxattr {
143 fsx_xflags: u32, /* xflags field value (get/set) */
144 fsx_extsize: u32, /* extsize field value (get/set)*/
145 fsx_nextents: u32, /* nextents field value (get) */
146 fsx_projid: u32, /* project identifier (get/set) */
147 fsx_cowextsize: u32, /* CoW extsize field value (get/set)*/
148 fsx_pad: [u8; 8],
149 }
150
151 ioctl_ior_nr!(FS_IOC_FSGETXATTR, 'X' as u32, 31, fsxattr);
152 ioctl_iow_nr!(FS_IOC_FSSETXATTR, 'X' as u32, 32, fsxattr);
153
154 ioctl_ior_nr!(FS_IOC_GETFLAGS, 'f' as u32, 1, c_long);
155 ioctl_iow_nr!(FS_IOC_SETFLAGS, 'f' as u32, 2, c_long);
156
157 ioctl_ior_nr!(FS_IOC32_GETFLAGS, 'f' as u32, 1, u32);
158 ioctl_iow_nr!(FS_IOC32_SETFLAGS, 'f' as u32, 2, u32);
159
160 ioctl_ior_nr!(FS_IOC64_GETFLAGS, 'f' as u32, 1, u64);
161 ioctl_iow_nr!(FS_IOC64_SETFLAGS, 'f' as u32, 2, u64);
162
163 #[repr(C)]
164 #[derive(Clone, Copy, AsBytes, FromBytes)]
165 struct fsverity_enable_arg {
166 _version: u32,
167 _hash_algorithm: u32,
168 _block_size: u32,
169 salt_size: u32,
170 salt_ptr: u64,
171 sig_size: u32,
172 __reserved1: u32,
173 sig_ptr: u64,
174 __reserved2: [u64; 11],
175 }
176
177 #[repr(C)]
178 #[derive(Clone, Copy, AsBytes, FromBytes)]
179 struct fsverity_digest {
180 _digest_algorithm: u16,
181 digest_size: u16,
182 // __u8 digest[];
183 }
184
185 ioctl_iow_nr!(FS_IOC_ENABLE_VERITY, 'f' as u32, 133, fsverity_enable_arg);
186 ioctl_iowr_nr!(FS_IOC_MEASURE_VERITY, 'f' as u32, 134, fsverity_digest);
187
188 pub type Inode = u64;
189 type Handle = u64;
190
191 #[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
192 struct InodeAltKey {
193 ino: libc::ino64_t,
194 dev: libc::dev_t,
195 }
196
197 #[derive(PartialEq, Eq, Debug)]
198 enum FileType {
199 Regular,
200 Directory,
201 Other,
202 }
203
204 impl From<libc::mode_t> for FileType {
from(mode: libc::mode_t) -> Self205 fn from(mode: libc::mode_t) -> Self {
206 match mode & libc::S_IFMT {
207 libc::S_IFREG => FileType::Regular,
208 libc::S_IFDIR => FileType::Directory,
209 _ => FileType::Other,
210 }
211 }
212 }
213
214 #[derive(Debug)]
215 struct InodeData {
216 inode: Inode,
217 // (File, open_flags)
218 file: Mutex<(File, libc::c_int)>,
219 refcount: AtomicU64,
220 filetype: FileType,
221 }
222
223 impl AsRawDescriptor for InodeData {
as_raw_descriptor(&self) -> RawDescriptor224 fn as_raw_descriptor(&self) -> RawDescriptor {
225 self.file.lock().0.as_raw_descriptor()
226 }
227 }
228
229 #[derive(Debug)]
230 struct HandleData {
231 inode: Inode,
232 file: Mutex<File>,
233 }
234
235 impl AsRawDescriptor for HandleData {
as_raw_descriptor(&self) -> RawDescriptor236 fn as_raw_descriptor(&self) -> RawDescriptor {
237 self.file.lock().as_raw_descriptor()
238 }
239 }
240
241 macro_rules! scoped_cred {
242 ($name:ident, $ty:ty, $syscall_nr:expr) => {
243 #[derive(Debug)]
244 struct $name {
245 old: $ty,
246 }
247
248 impl $name {
249 // Changes the effective uid/gid of the current thread to `val`. Changes the thread's
250 // credentials back to `old` when the returned struct is dropped.
251 fn new(val: $ty, old: $ty) -> io::Result<Option<$name>> {
252 if val == old {
253 // Nothing to do since we already have the correct value.
254 return Ok(None);
255 }
256
257 // We want credential changes to be per-thread because otherwise
258 // we might interfere with operations being carried out on other
259 // threads with different uids/gids. However, posix requires that
260 // all threads in a process share the same credentials. To do this
261 // libc uses signals to ensure that when one thread changes its
262 // credentials the other threads do the same thing.
263 //
264 // So instead we invoke the syscall directly in order to get around
265 // this limitation. Another option is to use the setfsuid and
266 // setfsgid systems calls. However since those calls have no way to
267 // return an error, it's preferable to do this instead.
268
269 // This call is safe because it doesn't modify any memory and we
270 // check the return value.
271 let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) };
272 if res == 0 {
273 Ok(Some($name { old }))
274 } else {
275 Err(io::Error::last_os_error())
276 }
277 }
278 }
279
280 impl Drop for $name {
281 fn drop(&mut self) {
282 let res = unsafe { libc::syscall($syscall_nr, -1, self.old, -1) };
283 if res < 0 {
284 error!(
285 "failed to change credentials back to {}: {}",
286 self.old,
287 io::Error::last_os_error(),
288 );
289 }
290 }
291 }
292 };
293 }
294 #[cfg(not(target_arch = "arm"))]
295 scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid);
296 #[cfg(target_arch = "arm")]
297 scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid32);
298
299 #[cfg(not(target_arch = "arm"))]
300 scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid);
301 #[cfg(target_arch = "arm")]
302 scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid32);
303
304 #[cfg(not(target_arch = "arm"))]
305 const SYS_GETEUID: libc::c_long = libc::SYS_geteuid;
306 #[cfg(target_arch = "arm")]
307 const SYS_GETEUID: libc::c_long = libc::SYS_geteuid32;
308
309 #[cfg(not(target_arch = "arm"))]
310 const SYS_GETEGID: libc::c_long = libc::SYS_getegid;
311 #[cfg(target_arch = "arm")]
312 const SYS_GETEGID: libc::c_long = libc::SYS_getegid32;
313
314 thread_local! {
315 // Both these calls are safe because they take no parameters, and only return an integer value.
316 // The kernel also guarantees that they can never fail.
317 static THREAD_EUID: libc::uid_t = unsafe { libc::syscall(SYS_GETEUID) as libc::uid_t };
318 static THREAD_EGID: libc::gid_t = unsafe { libc::syscall(SYS_GETEGID) as libc::gid_t };
319 }
320
set_creds( uid: libc::uid_t, gid: libc::gid_t, ) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)>321 fn set_creds(
322 uid: libc::uid_t,
323 gid: libc::gid_t,
324 ) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)> {
325 let olduid = THREAD_EUID.with(|uid| *uid);
326 let oldgid = THREAD_EGID.with(|gid| *gid);
327
328 // We have to change the gid before we change the uid because if we change the uid first then we
329 // lose the capability to change the gid. However changing back can happen in any order.
330 ScopedGid::new(gid, oldgid).and_then(|gid| Ok((ScopedUid::new(uid, olduid)?, gid)))
331 }
332
333 struct ScopedUmask {
334 old: libc::mode_t,
335 mask: libc::mode_t,
336 }
337
338 impl ScopedUmask {
new(mask: libc::mode_t) -> ScopedUmask339 fn new(mask: libc::mode_t) -> ScopedUmask {
340 ScopedUmask {
341 // Safe because this doesn't modify any memory and always succeeds.
342 old: unsafe { libc::umask(mask) },
343 mask,
344 }
345 }
346 }
347
348 impl Drop for ScopedUmask {
drop(&mut self)349 fn drop(&mut self) {
350 // Safe because this doesn't modify any memory and always succeeds.
351 let previous = unsafe { libc::umask(self.old) };
352 debug_assert_eq!(
353 previous, self.mask,
354 "umask changed while holding ScopedUmask"
355 );
356 }
357 }
358
359 struct ScopedFsetid(Caps);
360 impl Drop for ScopedFsetid {
drop(&mut self)361 fn drop(&mut self) {
362 if let Err(e) = raise_cap_fsetid(&mut self.0) {
363 error!(
364 "Failed to restore CAP_FSETID: {}. Some operations may be broken.",
365 e
366 )
367 }
368 }
369 }
370
raise_cap_fsetid(c: &mut Caps) -> io::Result<()>371 fn raise_cap_fsetid(c: &mut Caps) -> io::Result<()> {
372 c.update(&[Capability::Fsetid], CapSet::Effective, CapValue::Set)?;
373 c.apply()
374 }
375
376 // Drops CAP_FSETID from the effective set for the current thread and returns an RAII guard that
377 // adds the capability back when it is dropped.
drop_cap_fsetid() -> io::Result<ScopedFsetid>378 fn drop_cap_fsetid() -> io::Result<ScopedFsetid> {
379 let mut caps = Caps::for_current_thread()?;
380 caps.update(&[Capability::Fsetid], CapSet::Effective, CapValue::Clear)?;
381 caps.apply()?;
382 Ok(ScopedFsetid(caps))
383 }
384
ebadf() -> io::Error385 fn ebadf() -> io::Error {
386 io::Error::from_raw_os_error(libc::EBADF)
387 }
388
stat<F: AsRawDescriptor + ?Sized>(f: &F) -> io::Result<libc::stat64>389 fn stat<F: AsRawDescriptor + ?Sized>(f: &F) -> io::Result<libc::stat64> {
390 let mut st = MaybeUninit::<libc::stat64>::zeroed();
391
392 // Safe because this is a constant value and a valid C string.
393 let pathname = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
394
395 // Safe because the kernel will only write data in `st` and we check the return
396 // value.
397 syscall!(unsafe {
398 libc::fstatat64(
399 f.as_raw_descriptor(),
400 pathname.as_ptr(),
401 st.as_mut_ptr(),
402 libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
403 )
404 })?;
405
406 // Safe because the kernel guarantees that the struct is now fully initialized.
407 Ok(unsafe { st.assume_init() })
408 }
409
statat<D: AsRawDescriptor>(dir: &D, name: &CStr) -> io::Result<libc::stat64>410 fn statat<D: AsRawDescriptor>(dir: &D, name: &CStr) -> io::Result<libc::stat64> {
411 let mut st = MaybeUninit::<libc::stat64>::zeroed();
412
413 // Safe because the kernel will only write data in `st` and we check the return
414 // value.
415 syscall!(unsafe {
416 libc::fstatat64(
417 dir.as_raw_descriptor(),
418 name.as_ptr(),
419 st.as_mut_ptr(),
420 libc::AT_SYMLINK_NOFOLLOW,
421 )
422 })?;
423
424 // Safe because the kernel guarantees that the struct is now fully initialized.
425 Ok(unsafe { st.assume_init() })
426 }
427
428 /// The caching policy that the file system should report to the FUSE client. By default the FUSE
429 /// protocol uses close-to-open consistency. This means that any cached contents of the file are
430 /// invalidated the next time that file is opened.
431 #[derive(Debug, Clone, Default, Eq, PartialEq, Serialize, Deserialize)]
432 pub enum CachePolicy {
433 /// The client should never cache file data and all I/O should be directly forwarded to the
434 /// server. This policy must be selected when file contents may change without the knowledge of
435 /// the FUSE client (i.e., the file system does not have exclusive access to the directory).
436 Never,
437
438 /// The client is free to choose when and how to cache file data. This is the default policy and
439 /// uses close-to-open consistency as described in the enum documentation.
440 #[default]
441 Auto,
442
443 /// The client should always cache file data. This means that the FUSE client will not
444 /// invalidate any cached data that was returned by the file system the last time the file was
445 /// opened. This policy should only be selected when the file system has exclusive access to the
446 /// directory.
447 Always,
448 }
449
450 impl FromStr for CachePolicy {
451 type Err = &'static str;
452
from_str(s: &str) -> Result<Self, Self::Err>453 fn from_str(s: &str) -> Result<Self, Self::Err> {
454 match s {
455 "never" | "Never" | "NEVER" => Ok(CachePolicy::Never),
456 "auto" | "Auto" | "AUTO" => Ok(CachePolicy::Auto),
457 "always" | "Always" | "ALWAYS" => Ok(CachePolicy::Always),
458 _ => Err("invalid cache policy"),
459 }
460 }
461 }
462
463 /// Options that configure the behavior of the file system.
464 #[derive(Debug, Clone, Serialize, Deserialize)]
465 pub struct Config {
466 /// How long the FUSE client should consider directory entries to be valid. If the contents of a
467 /// directory can only be modified by the FUSE client (i.e., the file system has exclusive
468 /// access), then this should be a large value.
469 ///
470 /// The default value for this option is 5 seconds.
471 pub entry_timeout: Duration,
472
473 /// How long the FUSE client should consider file and directory attributes to be valid. If the
474 /// attributes of a file or directory can only be modified by the FUSE client (i.e., the file
475 /// system has exclusive access), then this should be set to a large value.
476 ///
477 /// The default value for this option is 5 seconds.
478 pub attr_timeout: Duration,
479
480 /// The caching policy the file system should use. See the documentation of `CachePolicy` for
481 /// more details.
482 pub cache_policy: CachePolicy,
483
484 /// Whether the file system should enabled writeback caching. This can improve performance as it
485 /// allows the FUSE client to cache and coalesce multiple writes before sending them to the file
486 /// system. However, enabling this option can increase the risk of data corruption if the file
487 /// contents can change without the knowledge of the FUSE client (i.e., the server does **NOT**
488 /// have exclusive access). Additionally, the file system should have read access to all files
489 /// in the directory it is serving as the FUSE client may send read requests even for files
490 /// opened with `O_WRONLY`.
491 ///
492 /// Therefore callers should only enable this option when they can guarantee that: 1) the file
493 /// system has exclusive access to the directory and 2) the file system has read permissions for
494 /// all files in that directory.
495 ///
496 /// The default value for this option is `false`.
497 pub writeback: bool,
498
499 /// Controls whether security.* xattrs (except for security.selinux) are re-written. When this
500 /// is set to true, the server will add a "user.virtiofs" prefix to xattrs in the security
501 /// namespace. Setting these xattrs requires CAP_SYS_ADMIN in the namespace where the file
502 /// system was mounted and since the server usually runs in an unprivileged user namespace, it's
503 /// unlikely to have that capability.
504 ///
505 /// The default value for this option is `false`.
506 pub rewrite_security_xattrs: bool,
507
508 /// Use case-insensitive lookups for directory entries (ASCII only).
509 ///
510 /// The default value for this option is `false`.
511 pub ascii_casefold: bool,
512
513 // UIDs which are privileged to perform quota-related operations. We cannot perform a CAP_FOWNER
514 // check so we consult this list when the VM tries to set the project quota and the process uid
515 // doesn't match the owner uid. In that case, all uids in this list are treated as if they have
516 // CAP_FOWNER.
517 #[cfg(feature = "arc_quota")]
518 pub privileged_quota_uids: Vec<libc::uid_t>,
519
520 /// Use DAX for shared files.
521 ///
522 /// Enabling DAX can improve performance for frequently accessed files by mapping regions of the
523 /// file directly into the VM's memory region, allowing direct access with the cost of slightly
524 /// increased latency the first time the file is accessed. Additionally, since the mapping is
525 /// shared directly from the host kernel's file cache, enabling DAX can improve performance even
526 /// when the cache policy is `Never`.
527 ///
528 /// The default value for this option is `false`.
529 pub use_dax: bool,
530
531 /// Enable support for POSIX acls.
532 ///
533 /// Enable POSIX acl support for the shared directory. This requires that the underlying file
534 /// system also supports POSIX acls.
535 ///
536 /// The default value for this option is `true`.
537 pub posix_acl: bool,
538 }
539
540 impl Default for Config {
default() -> Self541 fn default() -> Self {
542 Config {
543 entry_timeout: Duration::from_secs(5),
544 attr_timeout: Duration::from_secs(5),
545 cache_policy: Default::default(),
546 writeback: false,
547 rewrite_security_xattrs: false,
548 ascii_casefold: false,
549 #[cfg(feature = "arc_quota")]
550 privileged_quota_uids: Default::default(),
551 use_dax: false,
552 posix_acl: true,
553 }
554 }
555 }
556
557 impl FromStr for Config {
558 type Err = &'static str;
559
from_str(params: &str) -> Result<Self, Self::Err>560 fn from_str(params: &str) -> Result<Self, Self::Err> {
561 let mut cfg = Self::default();
562 if params.is_empty() {
563 return Ok(cfg);
564 }
565 for opt in params.split(':') {
566 let mut o = opt.splitn(2, '=');
567 let kind = o.next().ok_or("`cfg` options mut not be empty")?;
568 let value = o
569 .next()
570 .ok_or("`cfg` options must be of the form `kind=value`")?;
571 match kind {
572 #[cfg(feature = "arc_quota")]
573 "privileged_quota_uids" => {
574 cfg.privileged_quota_uids =
575 value.split(' ').map(|s| s.parse().unwrap()).collect();
576 }
577 "timeout" => {
578 let seconds = value.parse().map_err(|_| "`timeout` must be an integer")?;
579
580 let dur = Duration::from_secs(seconds);
581 cfg.entry_timeout = dur;
582 cfg.attr_timeout = dur;
583 }
584 "cache" => {
585 let policy = value
586 .parse()
587 .map_err(|_| "`cache` must be one of `never`, `always`, or `auto`")?;
588 cfg.cache_policy = policy;
589 }
590 "writeback" => {
591 let writeback = value.parse().map_err(|_| "`writeback` must be a boolean")?;
592 cfg.writeback = writeback;
593 }
594 "rewrite-security-xattrs" => {
595 let rewrite_security_xattrs = value
596 .parse()
597 .map_err(|_| "`rewrite-security-xattrs` must be a boolean")?;
598 cfg.rewrite_security_xattrs = rewrite_security_xattrs;
599 }
600 "ascii_casefold" => {
601 let ascii_casefold = value
602 .parse()
603 .map_err(|_| "`ascii_casefold` must be a boolean")?;
604 cfg.ascii_casefold = ascii_casefold;
605 }
606 "dax" => {
607 let use_dax = value.parse().map_err(|_| "`dax` must be a boolean")?;
608 cfg.use_dax = use_dax;
609 }
610 "posix_acl" => {
611 let posix_acl = value.parse().map_err(|_| "`posix_acl` must be a boolean")?;
612 cfg.posix_acl = posix_acl;
613 }
614 _ => return Err("unrecognized option for virtio-fs config"),
615 }
616 }
617 Ok(cfg)
618 }
619 }
620
621 /// A file system that simply "passes through" all requests it receives to the underlying file
622 /// system. To keep the implementation simple it servers the contents of its root directory. Users
623 /// that wish to serve only a specific directory should set up the environment so that that
624 /// directory ends up as the root of the file system process. One way to accomplish this is via a
625 /// combination of mount namespaces and the pivot_root system call.
626 pub struct PassthroughFs {
627 // Mutex that must be acquired before executing a process-wide operation such as fchdir.
628 process_lock: Mutex<()>,
629 // virtio-fs tag that the guest uses when mounting. This is only used for debugging
630 // when tracing is enabled.
631 #[cfg_attr(not(feature = "trace_marker"), allow(dead_code))]
632 tag: String,
633
634 // File descriptors for various points in the file system tree.
635 inodes: Mutex<MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>>,
636 next_inode: AtomicU64,
637
638 // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be
639 // used for reading and writing data.
640 handles: Mutex<BTreeMap<Handle, Arc<HandleData>>>,
641 next_handle: AtomicU64,
642
643 // File descriptor pointing to the `/proc` directory. This is used to convert an fd from
644 // `inodes` into one that can go into `handles`. This is accomplished by reading the
645 // `self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant
646 // to be serving doesn't have access to `/proc`.
647 proc: File,
648
649 // Whether writeback caching is enabled for this directory. This will only be true when
650 // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`.
651 writeback: AtomicBool,
652
653 // Whether zero message opens are supported by the kernel driver.
654 zero_message_open: AtomicBool,
655
656 // Whether zero message opendir is supported by the kernel driver.
657 zero_message_opendir: AtomicBool,
658
659 // Used to communicate with other processes using D-Bus.
660 #[cfg(feature = "arc_quota")]
661 dbus_connection: Option<Mutex<dbus::blocking::Connection>>,
662 #[cfg(feature = "arc_quota")]
663 dbus_fd: Option<std::os::unix::io::RawFd>,
664
665 cfg: Config,
666 }
667
668 impl std::fmt::Debug for PassthroughFs {
fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result669 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
670 f.debug_struct("PassthroughFs")
671 .field("tag", &self.tag)
672 .field("next_inode", &self.next_inode)
673 .field("next_handle", &self.next_handle)
674 .field("proc", &self.proc)
675 .field("writeback", &self.writeback)
676 .field("zero_message_open", &self.zero_message_open)
677 .field("zero_message_opendir", &self.zero_message_opendir)
678 .field("cfg", &self.cfg)
679 .finish()
680 }
681 }
682
683 impl PassthroughFs {
new(tag: &str, cfg: Config) -> io::Result<PassthroughFs>684 pub fn new(tag: &str, cfg: Config) -> io::Result<PassthroughFs> {
685 // Safe because this is a constant value and a valid C string.
686 let proc_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_CSTR) };
687
688 // Safe because this doesn't modify any memory and we check the return value.
689 let raw_descriptor = syscall!(unsafe {
690 libc::openat64(
691 libc::AT_FDCWD,
692 proc_cstr.as_ptr(),
693 libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
694 )
695 })?;
696
697 // Privileged UIDs can use D-Bus to perform some operations.
698 #[cfg(feature = "arc_quota")]
699 let (dbus_connection, dbus_fd) = if cfg.privileged_quota_uids.is_empty() {
700 (None, None)
701 } else {
702 let mut channel = dbus::channel::Channel::get_private(dbus::channel::BusType::System)
703 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
704 channel.set_watch_enabled(true);
705 let dbus_fd = channel.watch().fd;
706 channel.set_watch_enabled(false);
707 (
708 Some(Mutex::new(dbus::blocking::Connection::from(channel))),
709 Some(dbus_fd),
710 )
711 };
712
713 // Safe because we just opened this descriptor.
714 let proc = unsafe { File::from_raw_descriptor(raw_descriptor) };
715
716 let passthroughfs = PassthroughFs {
717 process_lock: Mutex::new(()),
718 tag: tag.to_string(),
719 inodes: Mutex::new(MultikeyBTreeMap::new()),
720 next_inode: AtomicU64::new(ROOT_ID + 1),
721
722 handles: Mutex::new(BTreeMap::new()),
723 next_handle: AtomicU64::new(1),
724
725 proc,
726
727 writeback: AtomicBool::new(false),
728 zero_message_open: AtomicBool::new(false),
729 zero_message_opendir: AtomicBool::new(false),
730
731 #[cfg(feature = "arc_quota")]
732 dbus_connection,
733 #[cfg(feature = "arc_quota")]
734 dbus_fd,
735
736 cfg,
737 };
738
739 cros_tracing::trace_simple_print!("New PassthroughFS initialized: {:?}", passthroughfs);
740 Ok(passthroughfs)
741 }
742
cfg(&self) -> &Config743 pub fn cfg(&self) -> &Config {
744 &self.cfg
745 }
746
keep_rds(&self) -> Vec<RawDescriptor>747 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
748 #[cfg_attr(not(feature = "arc_quota"), allow(unused_mut))]
749 let mut keep_rds = vec![self.proc.as_raw_descriptor()];
750 #[cfg(feature = "arc_quota")]
751 if let Some(fd) = self.dbus_fd {
752 keep_rds.push(fd);
753 }
754 keep_rds
755 }
756
rewrite_xattr_name<'xattr>(&self, name: &'xattr CStr) -> Cow<'xattr, CStr>757 fn rewrite_xattr_name<'xattr>(&self, name: &'xattr CStr) -> Cow<'xattr, CStr> {
758 if !self.cfg.rewrite_security_xattrs {
759 return Cow::Borrowed(name);
760 }
761
762 // Does not include nul-terminator.
763 let buf = name.to_bytes();
764 if !buf.starts_with(SECURITY_XATTR) || buf == SELINUX_XATTR {
765 return Cow::Borrowed(name);
766 }
767
768 let mut newname = USER_VIRTIOFS_XATTR.to_vec();
769 newname.extend_from_slice(buf);
770
771 // The unwrap is safe here because the prefix doesn't contain any interior nul-bytes and the
772 // to_bytes() call above will not return a byte slice with any interior nul-bytes either.
773 Cow::Owned(CString::new(newname).expect("Failed to re-write xattr name"))
774 }
775
find_inode(&self, inode: Inode) -> io::Result<Arc<InodeData>>776 fn find_inode(&self, inode: Inode) -> io::Result<Arc<InodeData>> {
777 self.inodes
778 .lock()
779 .get(&inode)
780 .map(Arc::clone)
781 .ok_or_else(ebadf)
782 }
783
find_handle(&self, handle: Handle, inode: Inode) -> io::Result<Arc<HandleData>>784 fn find_handle(&self, handle: Handle, inode: Inode) -> io::Result<Arc<HandleData>> {
785 self.handles
786 .lock()
787 .get(&handle)
788 .filter(|hd| hd.inode == inode)
789 .map(Arc::clone)
790 .ok_or_else(ebadf)
791 }
792
open_fd(&self, fd: RawDescriptor, flags: i32) -> io::Result<File>793 fn open_fd(&self, fd: RawDescriptor, flags: i32) -> io::Result<File> {
794 let pathname = CString::new(format!("self/fd/{}", fd))
795 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
796
797 // Safe because this doesn't modify any memory and we check the return value. We don't
798 // really check `flags` because if the kernel can't handle poorly specified flags then we
799 // have much bigger problems. Also, clear the `O_NOFOLLOW` flag if it is set since we need
800 // to follow the `/proc/self/fd` symlink to get the file.
801 let raw_descriptor = syscall!(unsafe {
802 libc::openat64(
803 self.proc.as_raw_descriptor(),
804 pathname.as_ptr(),
805 (flags | libc::O_CLOEXEC) & !(libc::O_NOFOLLOW | libc::O_DIRECT),
806 )
807 })?;
808
809 // Safe because we just opened this descriptor.
810 Ok(unsafe { File::from_raw_descriptor(raw_descriptor) })
811 }
812
open_inode(&self, inode: &InodeData, mut flags: i32) -> io::Result<File>813 fn open_inode(&self, inode: &InodeData, mut flags: i32) -> io::Result<File> {
814 // When writeback caching is enabled, the kernel may send read requests even if the
815 // userspace program opened the file write-only. So we need to ensure that we have opened
816 // the file for reading as well as writing.
817 let writeback = self.writeback.load(Ordering::Relaxed);
818 if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
819 flags &= !libc::O_ACCMODE;
820 flags |= libc::O_RDWR;
821 }
822
823 // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`.
824 // However, this breaks atomicity as the file may have changed on disk, invalidating the
825 // cached copy of the data in the kernel and the offset that the kernel thinks is the end of
826 // the file. Just allow this for now as it is the user's responsibility to enable writeback
827 // caching only for directories that are not shared. It also means that we need to clear the
828 // `O_APPEND` flag.
829 if writeback && flags & libc::O_APPEND != 0 {
830 flags &= !libc::O_APPEND;
831 }
832
833 self.open_fd(inode.as_raw_descriptor(), flags)
834 }
835
836 // Increases the inode refcount and returns the inode.
increase_inode_refcount(&self, inode_data: &InodeData) -> Inode837 fn increase_inode_refcount(&self, inode_data: &InodeData) -> Inode {
838 // Matches with the release store in `forget`.
839 inode_data.refcount.fetch_add(1, Ordering::Acquire);
840 inode_data.inode
841 }
842
843 // Creates a new entry for `f` or increases the refcount of the existing entry for `f`.
844 // The inodes mutex lock must not be already taken by the same thread otherwise this
845 // will deadlock.
add_entry(&self, f: File, st: libc::stat64, open_flags: libc::c_int) -> Entry846 fn add_entry(&self, f: File, st: libc::stat64, open_flags: libc::c_int) -> Entry {
847 let mut inodes = self.inodes.lock();
848
849 let altkey = InodeAltKey {
850 ino: st.st_ino,
851 dev: st.st_dev,
852 };
853
854 let inode = if let Some(data) = inodes.get_alt(&altkey) {
855 self.increase_inode_refcount(data)
856 } else {
857 let inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
858 inodes.insert(
859 inode,
860 altkey,
861 Arc::new(InodeData {
862 inode,
863 file: Mutex::new((f, open_flags)),
864 refcount: AtomicU64::new(1),
865 filetype: st.st_mode.into(),
866 }),
867 );
868
869 inode
870 };
871
872 Entry {
873 inode,
874 generation: 0,
875 attr: st,
876 attr_timeout: self.cfg.attr_timeout,
877 entry_timeout: self.cfg.entry_timeout,
878 }
879 }
880
881 // Performs an ascii case insensitive lookup.
ascii_casefold_lookup(&self, parent: &InodeData, name: &[u8]) -> io::Result<Entry>882 fn ascii_casefold_lookup(&self, parent: &InodeData, name: &[u8]) -> io::Result<Entry> {
883 let mut buf = [0u8; 1024];
884 let mut offset = 0;
885 loop {
886 let mut read_dir = ReadDir::new(parent, offset, &mut buf[..])?;
887 if read_dir.remaining() == 0 {
888 break;
889 }
890
891 while let Some(entry) = read_dir.next() {
892 offset = entry.offset as libc::off64_t;
893 if name.eq_ignore_ascii_case(entry.name.to_bytes()) {
894 return self.do_lookup(parent, entry.name);
895 }
896 }
897 }
898 Err(io::Error::from_raw_os_error(libc::ENOENT))
899 }
900
do_lookup(&self, parent: &InodeData, name: &CStr) -> io::Result<Entry>901 fn do_lookup(&self, parent: &InodeData, name: &CStr) -> io::Result<Entry> {
902 let st = statat(parent, name)?;
903
904 let altkey = InodeAltKey {
905 ino: st.st_ino,
906 dev: st.st_dev,
907 };
908
909 // Check if we already have an entry before opening a new file.
910 if let Some(data) = self.inodes.lock().get_alt(&altkey) {
911 // Return the same inode with the reference counter increased.
912 return Ok(Entry {
913 inode: self.increase_inode_refcount(data),
914 generation: 0,
915 attr: st,
916 attr_timeout: self.cfg.attr_timeout,
917 entry_timeout: self.cfg.entry_timeout,
918 });
919 }
920
921 // Open a regular file with O_RDONLY to store in `InodeData` so explicit open requests can
922 // be skipped later if the ZERO_MESSAGE_{OPEN,OPENDIR} features are enabled.
923 // If the crosvm process doesn't have a read permission, fall back to O_PATH below.
924 let mut flags = libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
925 match FileType::from(st.st_mode) {
926 FileType::Regular => {}
927 FileType::Directory => flags |= libc::O_DIRECTORY,
928 FileType::Other => flags |= libc::O_PATH,
929 };
930
931 // Safe because this doesn't modify any memory and we check the return value.
932 let fd = match unsafe {
933 syscall!(libc::openat64(
934 parent.as_raw_descriptor(),
935 name.as_ptr(),
936 flags
937 ))
938 } {
939 Ok(fd) => fd,
940 Err(e) if e.errno() == libc::EACCES => {
941 // If O_RDONLY is unavailable, fall back to O_PATH to get an FD to store in
942 // `InodeData`.
943 // Note that some operations which should be allowed without read permissions
944 // require syscalls that don't support O_PATH fds. For those syscalls, we will
945 // need to fall back to their path-based equivalents with /self/fd/${FD}.
946 // e.g. `fgetxattr()` for an O_PATH FD fails while `getxaattr()` for /self/fd/${FD}
947 // works.
948 flags |= libc::O_PATH;
949 // Safe because this doesn't modify any memory and we check the return value.
950 unsafe {
951 syscall!(libc::openat64(
952 parent.as_raw_descriptor(),
953 name.as_ptr(),
954 flags
955 ))
956 }?
957 }
958 Err(e) => {
959 return Err(e.into());
960 }
961 };
962
963 // Safe because we own the fd.
964 let f = unsafe { File::from_raw_descriptor(fd) };
965 // We made sure the lock acquired for `self.inodes` is released automatically when
966 // the if block above is exited, so a call to `self.add_entry()` should not cause a deadlock
967 // here. This would not be the case if this were executed in an else block instead.
968 Ok(self.add_entry(f, st, flags))
969 }
970
do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option<Handle>, OpenOptions)>971 fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option<Handle>, OpenOptions)> {
972 let inode_data = self.find_inode(inode)?;
973
974 let file = Mutex::new(self.open_inode(&inode_data, flags as i32)?);
975
976 let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
977 let data = HandleData { inode, file };
978
979 self.handles.lock().insert(handle, Arc::new(data));
980
981 let mut opts = OpenOptions::empty();
982 match self.cfg.cache_policy {
983 // We only set the direct I/O option on files.
984 CachePolicy::Never => opts.set(
985 OpenOptions::DIRECT_IO,
986 flags & (libc::O_DIRECTORY as u32) == 0,
987 ),
988 CachePolicy::Always => {
989 opts |= if flags & (libc::O_DIRECTORY as u32) == 0 {
990 OpenOptions::KEEP_CACHE
991 } else {
992 OpenOptions::CACHE_DIR
993 }
994 }
995 _ => {}
996 };
997
998 Ok((Some(handle), opts))
999 }
1000
do_release(&self, inode: Inode, handle: Handle) -> io::Result<()>1001 fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
1002 let mut handles = self.handles.lock();
1003
1004 if let btree_map::Entry::Occupied(e) = handles.entry(handle) {
1005 if e.get().inode == inode {
1006 // We don't need to close the file here because that will happen automatically when
1007 // the last `Arc` is dropped.
1008 e.remove();
1009 return Ok(());
1010 }
1011 }
1012
1013 Err(ebadf())
1014 }
1015
do_getattr(&self, inode: &InodeData) -> io::Result<(libc::stat64, Duration)>1016 fn do_getattr(&self, inode: &InodeData) -> io::Result<(libc::stat64, Duration)> {
1017 let st = stat(inode)?;
1018
1019 Ok((st, self.cfg.attr_timeout))
1020 }
1021
do_unlink(&self, parent: &InodeData, name: &CStr, flags: libc::c_int) -> io::Result<()>1022 fn do_unlink(&self, parent: &InodeData, name: &CStr, flags: libc::c_int) -> io::Result<()> {
1023 // Safe because this doesn't modify any memory and we check the return value.
1024 syscall!(unsafe { libc::unlinkat(parent.as_raw_descriptor(), name.as_ptr(), flags) })?;
1025 Ok(())
1026 }
1027
do_fsync<F: AsRawDescriptor>(&self, file: &F, datasync: bool) -> io::Result<()>1028 fn do_fsync<F: AsRawDescriptor>(&self, file: &F, datasync: bool) -> io::Result<()> {
1029 // Safe because this doesn't modify any memory and we check the return value.
1030 syscall!(unsafe {
1031 if datasync {
1032 libc::fdatasync(file.as_raw_descriptor())
1033 } else {
1034 libc::fsync(file.as_raw_descriptor())
1035 }
1036 })?;
1037
1038 Ok(())
1039 }
1040
1041 // Changes the CWD to `self.proc`, runs `f`, and then changes the CWD back to the root
1042 // directory. This effectively emulates an *at syscall starting at /proc, which is useful when
1043 // there is no *at syscall available. Panics if any of the fchdir calls fail or if there is no
1044 // root inode.
1045 //
1046 // NOTE: this method acquires an `self`-wide lock. If any locks are acquired in `f`, care must
1047 // be taken to avoid the risk of deadlocks.
with_proc_chdir<F, T>(&self, f: F) -> T where F: FnOnce() -> T,1048 fn with_proc_chdir<F, T>(&self, f: F) -> T
1049 where
1050 F: FnOnce() -> T,
1051 {
1052 let root = self.find_inode(ROOT_ID).expect("failed to find root inode");
1053
1054 // Acquire a lock for `fchdir`.
1055 let _proc_lock = self.process_lock.lock();
1056 // Safe because this doesn't modify any memory and we check the return value. Since the
1057 // fchdir should never fail we just use debug_asserts.
1058 let proc_cwd = unsafe { libc::fchdir(self.proc.as_raw_descriptor()) };
1059 debug_assert_eq!(
1060 proc_cwd,
1061 0,
1062 "failed to fchdir to /proc: {}",
1063 io::Error::last_os_error()
1064 );
1065
1066 let res = f();
1067
1068 // Safe because this doesn't modify any memory and we check the return value. Since the
1069 // fchdir should never fail we just use debug_asserts.
1070 let root_cwd = unsafe { libc::fchdir(root.as_raw_descriptor()) };
1071 debug_assert_eq!(
1072 root_cwd,
1073 0,
1074 "failed to fchdir back to root directory: {}",
1075 io::Error::last_os_error()
1076 );
1077
1078 res
1079 }
1080
do_getxattr(&self, inode: &InodeData, name: &CStr, value: &mut [u8]) -> io::Result<usize>1081 fn do_getxattr(&self, inode: &InodeData, name: &CStr, value: &mut [u8]) -> io::Result<usize> {
1082 let file = inode.file.lock();
1083 let o_path_file = (file.1 & libc::O_PATH) != 0;
1084 let res = if o_path_file {
1085 // For FDs opened with `O_PATH`, we cannot call `fgetxattr` normally. Instead we
1086 // emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
1087 // and then setting the CWD back to the root directory.
1088 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
1089 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1090
1091 // Safe because this will only modify `value` and we check the return value.
1092 self.with_proc_chdir(|| unsafe {
1093 libc::getxattr(
1094 path.as_ptr(),
1095 name.as_ptr(),
1096 value.as_mut_ptr() as *mut libc::c_void,
1097 value.len() as libc::size_t,
1098 )
1099 })
1100 } else {
1101 // For regular files and directories, we can just use fgetxattr. Safe because this will
1102 // only write to `value` and we check the return value.
1103 unsafe {
1104 libc::fgetxattr(
1105 file.0.as_raw_descriptor(),
1106 name.as_ptr(),
1107 value.as_mut_ptr() as *mut libc::c_void,
1108 value.len() as libc::size_t,
1109 )
1110 }
1111 };
1112
1113 if res < 0 {
1114 Err(io::Error::last_os_error())
1115 } else {
1116 Ok(res as usize)
1117 }
1118 }
1119
get_encryption_policy_ex<R: io::Read>( &self, inode: Inode, handle: Handle, mut r: R, ) -> io::Result<IoctlReply>1120 fn get_encryption_policy_ex<R: io::Read>(
1121 &self,
1122 inode: Inode,
1123 handle: Handle,
1124 mut r: R,
1125 ) -> io::Result<IoctlReply> {
1126 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1127 self.find_inode(inode)?
1128 } else {
1129 self.find_handle(handle, inode)?
1130 };
1131
1132 // Safe because this only has integer fields.
1133 let mut arg = unsafe { MaybeUninit::<fscrypt_get_policy_ex_arg>::zeroed().assume_init() };
1134 r.read_exact(arg.policy_size.as_bytes_mut())?;
1135
1136 let policy_size = cmp::min(arg.policy_size, size_of::<fscrypt_policy>() as u64);
1137 arg.policy_size = policy_size;
1138
1139 // Safe because the kernel will only write to `arg` and we check the return value.
1140 let res =
1141 unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GET_ENCRYPTION_POLICY_EX(), &mut arg) };
1142 if res < 0 {
1143 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1144 } else {
1145 let len = size_of::<u64>() + arg.policy_size as usize;
1146 Ok(IoctlReply::Done(Ok(arg.as_slice()[..len].to_vec())))
1147 }
1148 }
1149
get_fsxattr(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply>1150 fn get_fsxattr(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply> {
1151 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1152 self.find_inode(inode)?
1153 } else {
1154 self.find_handle(handle, inode)?
1155 };
1156
1157 let mut buf = MaybeUninit::<fsxattr>::zeroed();
1158
1159 // Safe because the kernel will only write to `buf` and we check the return value.
1160 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_FSGETXATTR(), buf.as_mut_ptr()) };
1161 if res < 0 {
1162 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1163 } else {
1164 // Safe because the kernel guarantees that the policy is now initialized.
1165 let xattr = unsafe { buf.assume_init() };
1166 Ok(IoctlReply::Done(Ok(xattr.as_bytes().to_vec())))
1167 }
1168 }
1169
set_fsxattr<R: io::Read>( &self, #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context, inode: Inode, handle: Handle, r: R, ) -> io::Result<IoctlReply>1170 fn set_fsxattr<R: io::Read>(
1171 &self,
1172 #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context,
1173 inode: Inode,
1174 handle: Handle,
1175 r: R,
1176 ) -> io::Result<IoctlReply> {
1177 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1178 self.find_inode(inode)?
1179 } else {
1180 self.find_handle(handle, inode)?
1181 };
1182
1183 let in_attr: fsxattr = zerocopy_from_reader(r)?;
1184
1185 #[cfg(feature = "arc_quota")]
1186 let st = stat(&*data)?;
1187
1188 // Changing quota project ID requires CAP_FOWNER or being file owner.
1189 // Here we use privileged_quota_uids because we cannot perform a CAP_FOWNER check.
1190 #[cfg(feature = "arc_quota")]
1191 if ctx.uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx.uid) {
1192 // Get the current fsxattr.
1193 let mut buf = MaybeUninit::<fsxattr>::zeroed();
1194 // Safe because the kernel will only write to `buf` and we check the return value.
1195 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_FSGETXATTR(), buf.as_mut_ptr()) };
1196 if res < 0 {
1197 return Ok(IoctlReply::Done(Err(io::Error::last_os_error())));
1198 }
1199 // Safe because the kernel guarantees that the policy is now initialized.
1200 let current_attr = unsafe { buf.assume_init() };
1201
1202 // Project ID cannot be changed inside a user namespace.
1203 // Use UserDataAuth to avoid this restriction.
1204 if current_attr.fsx_projid != in_attr.fsx_projid {
1205 let connection = self.dbus_connection.as_ref().unwrap().lock();
1206 let proxy = connection.with_proxy(
1207 "org.chromium.UserDataAuth",
1208 "/org/chromium/UserDataAuth",
1209 DEFAULT_DBUS_TIMEOUT,
1210 );
1211 let mut proto: SetMediaRWDataFileProjectIdRequest = Message::new();
1212 proto.project_id = in_attr.fsx_projid;
1213 // Safe because data is a valid file descriptor.
1214 let fd = unsafe { dbus::arg::OwnedFd::new(base::clone_descriptor(&*data)?) };
1215 match proxy.set_media_rwdata_file_project_id(fd, proto.write_to_bytes().unwrap()) {
1216 Ok(r) => {
1217 let r = SetMediaRWDataFileProjectIdReply::parse_from_bytes(&r)
1218 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1219 if !r.success {
1220 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1221 r.error,
1222 ))));
1223 }
1224 }
1225 Err(e) => {
1226 return Err(io::Error::new(io::ErrorKind::Other, e));
1227 }
1228 };
1229 }
1230 }
1231
1232 // Safe because this doesn't modify any memory and we check the return value.
1233 let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_FSSETXATTR(), &in_attr) };
1234 if res < 0 {
1235 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1236 } else {
1237 Ok(IoctlReply::Done(Ok(Vec::new())))
1238 }
1239 }
1240
get_flags(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply>1241 fn get_flags(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply> {
1242 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1243 self.find_inode(inode)?
1244 } else {
1245 self.find_handle(handle, inode)?
1246 };
1247
1248 // The ioctl encoding is a long but the parameter is actually an int.
1249 let mut flags: c_int = 0;
1250
1251 // Safe because the kernel will only write to `flags` and we check the return value.
1252 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GETFLAGS(), &mut flags) };
1253 if res < 0 {
1254 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1255 } else {
1256 Ok(IoctlReply::Done(Ok(flags.to_ne_bytes().to_vec())))
1257 }
1258 }
1259
set_flags<R: io::Read>( &self, #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context, inode: Inode, handle: Handle, r: R, ) -> io::Result<IoctlReply>1260 fn set_flags<R: io::Read>(
1261 &self,
1262 #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context,
1263 inode: Inode,
1264 handle: Handle,
1265 r: R,
1266 ) -> io::Result<IoctlReply> {
1267 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1268 self.find_inode(inode)?
1269 } else {
1270 self.find_handle(handle, inode)?
1271 };
1272
1273 // The ioctl encoding is a long but the parameter is actually an int.
1274 let in_flags: c_int = zerocopy_from_reader(r)?;
1275
1276 #[cfg(feature = "arc_quota")]
1277 let st = stat(&*data)?;
1278
1279 // Only privleged uid can perform FS_IOC_SETFLAGS through cryptohome.
1280 #[cfg(feature = "arc_quota")]
1281 if ctx.uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx.uid) {
1282 // Get the current flag.
1283 let mut buf = MaybeUninit::<c_int>::zeroed();
1284 // Safe because the kernel will only write to `buf` and we check the return value.
1285 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GETFLAGS(), buf.as_mut_ptr()) };
1286 if res < 0 {
1287 return Ok(IoctlReply::Done(Err(io::Error::last_os_error())));
1288 }
1289 // Safe because the kernel guarantees that the policy is now initialized.
1290 let current_flags = unsafe { buf.assume_init() };
1291
1292 // Project inheritance flag cannot be changed inside a user namespace.
1293 // Use UserDataAuth to avoid this restriction.
1294 if (in_flags & FS_PROJINHERIT_FL) != (current_flags & FS_PROJINHERIT_FL) {
1295 let connection = self.dbus_connection.as_ref().unwrap().lock();
1296 let proxy = connection.with_proxy(
1297 "org.chromium.UserDataAuth",
1298 "/org/chromium/UserDataAuth",
1299 DEFAULT_DBUS_TIMEOUT,
1300 );
1301 let mut proto: SetMediaRWDataFileProjectInheritanceFlagRequest = Message::new();
1302 // If the input flags contain FS_PROJINHERIT_FL, then it is a set. Otherwise it is a
1303 // reset.
1304 proto.enable = (in_flags & FS_PROJINHERIT_FL) == FS_PROJINHERIT_FL;
1305 // Safe because data is a valid file descriptor.
1306 let fd = unsafe { dbus::arg::OwnedFd::new(base::clone_descriptor(&*data)?) };
1307 match proxy.set_media_rwdata_file_project_inheritance_flag(
1308 fd,
1309 proto.write_to_bytes().unwrap(),
1310 ) {
1311 Ok(r) => {
1312 let r = SetMediaRWDataFileProjectInheritanceFlagReply::parse_from_bytes(&r)
1313 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1314 if !r.success {
1315 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1316 r.error,
1317 ))));
1318 }
1319 }
1320 Err(e) => {
1321 return Err(io::Error::new(io::ErrorKind::Other, e));
1322 }
1323 };
1324 }
1325 }
1326
1327 // Safe because this doesn't modify any memory and we check the return value.
1328 let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_SETFLAGS(), &in_flags) };
1329 if res < 0 {
1330 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1331 } else {
1332 Ok(IoctlReply::Done(Ok(Vec::new())))
1333 }
1334 }
1335
enable_verity<R: io::Read>( &self, inode: Inode, handle: Handle, mut r: R, ) -> io::Result<IoctlReply>1336 fn enable_verity<R: io::Read>(
1337 &self,
1338 inode: Inode,
1339 handle: Handle,
1340 mut r: R,
1341 ) -> io::Result<IoctlReply> {
1342 let inode_data = self.find_inode(inode)?;
1343
1344 // These match the return codes from `fsverity_ioctl_enable` in the kernel.
1345 match inode_data.filetype {
1346 FileType::Regular => {}
1347 FileType::Directory => return Err(io::Error::from_raw_os_error(libc::EISDIR)),
1348 FileType::Other => return Err(io::Error::from_raw_os_error(libc::EINVAL)),
1349 }
1350
1351 {
1352 // We cannot enable verity while holding a writable fd so get a new one, if necessary.
1353 let mut file = inode_data.file.lock();
1354 let mut flags = file.1;
1355 match flags & libc::O_ACCMODE {
1356 libc::O_WRONLY | libc::O_RDWR => {
1357 flags &= !libc::O_ACCMODE;
1358 flags |= libc::O_RDONLY;
1359
1360 // We need to get a read-only handle for this file.
1361 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDONLY)?;
1362 *file = (newfile, flags);
1363 }
1364 libc::O_RDONLY => {}
1365 _ => panic!("Unexpected flags: {:#x}", flags),
1366 }
1367 }
1368
1369 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1370 inode_data
1371 } else {
1372 let data = self.find_handle(handle, inode)?;
1373
1374 {
1375 // We can't enable verity while holding a writable fd. We don't know whether the file
1376 // was opened for writing so check it here. We don't expect this to be a frequent
1377 // operation so the extra latency should be fine.
1378 let mut file = data.file.lock();
1379 let flags = FileFlags::from_file(&*file).map_err(io::Error::from)?;
1380 match flags {
1381 FileFlags::ReadWrite | FileFlags::Write => {
1382 // We need to get a read-only handle for this file.
1383 *file = self.open_fd(file.as_raw_descriptor(), libc::O_RDONLY)?;
1384 }
1385 FileFlags::Read => {}
1386 }
1387 }
1388
1389 data
1390 };
1391
1392 let mut arg: fsverity_enable_arg = zerocopy_from_reader(&mut r)?;
1393
1394 let mut salt;
1395 if arg.salt_size > 0 {
1396 if arg.salt_size > self.max_buffer_size() {
1397 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1398 libc::ENOMEM,
1399 ))));
1400 }
1401 salt = vec![0; arg.salt_size as usize];
1402 r.read_exact(&mut salt)?;
1403 arg.salt_ptr = salt.as_ptr() as usize as u64;
1404 } else {
1405 arg.salt_ptr = 0;
1406 }
1407
1408 let mut sig;
1409 if arg.sig_size > 0 {
1410 if arg.sig_size > self.max_buffer_size() {
1411 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1412 libc::ENOMEM,
1413 ))));
1414 }
1415 sig = vec![0; arg.sig_size as usize];
1416 r.read_exact(&mut sig)?;
1417 arg.sig_ptr = sig.as_ptr() as usize as u64;
1418 } else {
1419 arg.sig_ptr = 0;
1420 }
1421
1422 // Safe because this doesn't modify any memory and we check the return value.
1423 let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_ENABLE_VERITY(), &arg) };
1424 if res < 0 {
1425 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1426 } else {
1427 Ok(IoctlReply::Done(Ok(Vec::new())))
1428 }
1429 }
1430
measure_verity<R: io::Read>( &self, inode: Inode, handle: Handle, r: R, out_size: u32, ) -> io::Result<IoctlReply>1431 fn measure_verity<R: io::Read>(
1432 &self,
1433 inode: Inode,
1434 handle: Handle,
1435 r: R,
1436 out_size: u32,
1437 ) -> io::Result<IoctlReply> {
1438 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1439 self.find_inode(inode)?
1440 } else {
1441 self.find_handle(handle, inode)?
1442 };
1443
1444 let digest: fsverity_digest = zerocopy_from_reader(r)?;
1445
1446 // Taken from fs/verity/fsverity_private.h.
1447 const FS_VERITY_MAX_DIGEST_SIZE: u16 = 64;
1448
1449 // This digest size is what the fsverity command line utility uses.
1450 const DIGEST_SIZE: u16 = FS_VERITY_MAX_DIGEST_SIZE * 2 + 1;
1451 const BUFLEN: usize = size_of::<fsverity_digest>() + DIGEST_SIZE as usize;
1452 const ROUNDED_LEN: usize =
1453 (BUFLEN + size_of::<fsverity_digest>() - 1) / size_of::<fsverity_digest>();
1454
1455 // Make sure we get a properly aligned allocation.
1456 let mut buf = [MaybeUninit::<fsverity_digest>::uninit(); ROUNDED_LEN];
1457
1458 // Safe because we are only writing data and not reading uninitialized memory.
1459 unsafe {
1460 // TODO: Replace with `MaybeUninit::slice_as_mut_ptr` once it is stabilized.
1461 addr_of_mut!((*(buf.as_mut_ptr() as *mut fsverity_digest)).digest_size)
1462 .write(DIGEST_SIZE)
1463 };
1464
1465 // Safe because this will only modify `buf` and we check the return value.
1466 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_MEASURE_VERITY(), buf.as_mut_ptr()) };
1467 if res < 0 {
1468 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1469 } else {
1470 // Safe because this value was initialized by us already and then overwritten by the
1471 // kernel.
1472 // TODO: Replace with `MaybeUninit::slice_as_ptr` once it is stabilized.
1473 let digest_size =
1474 unsafe { addr_of!((*(buf.as_ptr() as *const fsverity_digest)).digest_size).read() };
1475 let outlen = size_of::<fsverity_digest>() as u32 + u32::from(digest_size);
1476
1477 // The kernel guarantees this but it doesn't hurt to be paranoid.
1478 debug_assert!(outlen <= (ROUNDED_LEN * size_of::<fsverity_digest>()) as u32);
1479 if digest.digest_size < digest_size || out_size < outlen {
1480 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1481 libc::EOVERFLOW,
1482 ))));
1483 }
1484
1485 // Safe because any bit pattern is valid for `MaybeUninit<u8>` and `fsverity_digest`
1486 // doesn't contain any references.
1487 let buf: [MaybeUninit<u8>; ROUNDED_LEN * size_of::<fsverity_digest>()] =
1488 unsafe { mem::transmute(buf) };
1489
1490 // Casting to `*const [u8]` is safe because the kernel guarantees that the first
1491 // `outlen` bytes of `buf` are initialized and `MaybeUninit<u8>` is guaranteed to have
1492 // the same layout as `u8`.
1493 // TODO: Replace with `MaybeUninit::slice_assume_init_ref` once it is stabilized.
1494 let buf =
1495 unsafe { &*(&buf[..outlen as usize] as *const [MaybeUninit<u8>] as *const [u8]) };
1496 Ok(IoctlReply::Done(Ok(buf.to_vec())))
1497 }
1498 }
1499 }
1500
forget_one( inodes: &mut MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>, inode: Inode, count: u64, )1501 fn forget_one(
1502 inodes: &mut MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>,
1503 inode: Inode,
1504 count: u64,
1505 ) {
1506 if let Some(data) = inodes.get(&inode) {
1507 // Acquiring the write lock on the inode map prevents new lookups from incrementing the
1508 // refcount but there is the possibility that a previous lookup already acquired a
1509 // reference to the inode data and is in the process of updating the refcount so we need
1510 // to loop here until we can decrement successfully.
1511 loop {
1512 let refcount = data.refcount.load(Ordering::Relaxed);
1513
1514 // Saturating sub because it doesn't make sense for a refcount to go below zero and
1515 // we don't want misbehaving clients to cause integer overflow.
1516 let new_count = refcount.saturating_sub(count);
1517
1518 // Synchronizes with the acquire load in `do_lookup`.
1519 if data
1520 .refcount
1521 .compare_exchange_weak(refcount, new_count, Ordering::Release, Ordering::Relaxed)
1522 .is_ok()
1523 {
1524 if new_count == 0 {
1525 // We just removed the last refcount for this inode. There's no need for an
1526 // acquire fence here because we hold a write lock on the inode map and any
1527 // thread that is waiting to do a forget on the same inode will have to wait
1528 // until we release the lock. So there's is no other release store for us to
1529 // synchronize with before deleting the entry.
1530 inodes.remove(&inode);
1531 }
1532 break;
1533 }
1534 }
1535 }
1536 }
1537
1538 // Strips any `user.virtiofs.` prefix from `buf`. If buf contains one or more nul-bytes, each
1539 // nul-byte-separated slice is treated as a C string and the prefix is stripped from each one.
strip_xattr_prefix(buf: &mut Vec<u8>)1540 fn strip_xattr_prefix(buf: &mut Vec<u8>) {
1541 fn next_cstr(b: &[u8], start: usize) -> Option<&[u8]> {
1542 if start >= b.len() {
1543 return None;
1544 }
1545
1546 let end = b[start..]
1547 .iter()
1548 .position(|&c| c == b'\0')
1549 .map(|p| start + p + 1)
1550 .unwrap_or(b.len());
1551
1552 Some(&b[start..end])
1553 }
1554
1555 let mut pos = 0;
1556 while let Some(name) = next_cstr(buf, pos) {
1557 if !name.starts_with(USER_VIRTIOFS_XATTR) {
1558 pos += name.len();
1559 continue;
1560 }
1561
1562 let newlen = name.len() - USER_VIRTIOFS_XATTR.len();
1563 buf.drain(pos..pos + USER_VIRTIOFS_XATTR.len());
1564 pos += newlen;
1565 }
1566 }
1567
1568 impl FileSystem for PassthroughFs {
1569 type Inode = Inode;
1570 type Handle = Handle;
1571 type DirIter = ReadDir<Box<[u8]>>;
1572
init(&self, capable: FsOptions) -> io::Result<FsOptions>1573 fn init(&self, capable: FsOptions) -> io::Result<FsOptions> {
1574 // Safe because this is a constant value and a valid C string.
1575 let root = unsafe { CStr::from_bytes_with_nul_unchecked(ROOT_CSTR) };
1576
1577 let flags = libc::O_DIRECTORY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
1578 // Safe because this doesn't modify any memory and we check the return value.
1579 let raw_descriptor = unsafe { libc::openat64(libc::AT_FDCWD, root.as_ptr(), flags) };
1580 if raw_descriptor < 0 {
1581 return Err(io::Error::last_os_error());
1582 }
1583
1584 // Safe because we just opened this descriptor above.
1585 let f = unsafe { File::from_raw_descriptor(raw_descriptor) };
1586
1587 let st = stat(&f)?;
1588
1589 // Safe because this doesn't modify any memory and there is no need to check the return
1590 // value because this system call always succeeds. We need to clear the umask here because
1591 // we want the client to be able to set all the bits in the mode.
1592 unsafe { libc::umask(0o000) };
1593
1594 let mut inodes = self.inodes.lock();
1595
1596 // Not sure why the root inode gets a refcount of 2 but that's what libfuse does.
1597 inodes.insert(
1598 ROOT_ID,
1599 InodeAltKey {
1600 ino: st.st_ino,
1601 dev: st.st_dev,
1602 },
1603 Arc::new(InodeData {
1604 inode: ROOT_ID,
1605 file: Mutex::new((f, flags)),
1606 refcount: AtomicU64::new(2),
1607 filetype: st.st_mode.into(),
1608 }),
1609 );
1610
1611 let mut opts = FsOptions::DO_READDIRPLUS
1612 | FsOptions::READDIRPLUS_AUTO
1613 | FsOptions::EXPORT_SUPPORT
1614 | FsOptions::DONT_MASK;
1615 if self.cfg.posix_acl {
1616 opts |= FsOptions::POSIX_ACL;
1617 }
1618 if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) {
1619 opts |= FsOptions::WRITEBACK_CACHE;
1620 self.writeback.store(true, Ordering::Relaxed);
1621 }
1622 if self.cfg.cache_policy == CachePolicy::Always {
1623 if capable.contains(FsOptions::ZERO_MESSAGE_OPEN) {
1624 opts |= FsOptions::ZERO_MESSAGE_OPEN;
1625 self.zero_message_open.store(true, Ordering::Relaxed);
1626 }
1627 if capable.contains(FsOptions::ZERO_MESSAGE_OPENDIR) {
1628 opts |= FsOptions::ZERO_MESSAGE_OPENDIR;
1629 self.zero_message_opendir.store(true, Ordering::Relaxed);
1630 }
1631 }
1632 Ok(opts)
1633 }
1634
destroy(&self)1635 fn destroy(&self) {
1636 cros_tracing::trace_simple_print!("{:?}: destroy", self);
1637 self.handles.lock().clear();
1638 self.inodes.lock().clear();
1639 }
1640
statfs(&self, _ctx: Context, inode: Inode) -> io::Result<libc::statvfs64>1641 fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result<libc::statvfs64> {
1642 cros_tracing::trace_simple_print!("{}: statfs: inode={inode}", self.tag);
1643 let data = self.find_inode(inode)?;
1644
1645 let mut out = MaybeUninit::<libc::statvfs64>::zeroed();
1646
1647 // Safe because this will only modify `out` and we check the return value.
1648 syscall!(unsafe { libc::fstatvfs64(data.as_raw_descriptor(), out.as_mut_ptr()) })?;
1649
1650 // Safe because the kernel guarantees that `out` has been initialized.
1651 Ok(unsafe { out.assume_init() })
1652 }
1653
lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry>1654 fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
1655 cros_tracing::trace_simple_print!(
1656 "{}: lookup: inode={}, name={:?}",
1657 self.tag,
1658 parent,
1659 name
1660 );
1661 let data = self.find_inode(parent)?;
1662 self.do_lookup(&data, name).or_else(|e| {
1663 if self.cfg.ascii_casefold {
1664 self.ascii_casefold_lookup(&data, name.to_bytes())
1665 } else {
1666 Err(e)
1667 }
1668 })
1669 }
1670
forget(&self, _ctx: Context, inode: Inode, count: u64)1671 fn forget(&self, _ctx: Context, inode: Inode, count: u64) {
1672 cros_tracing::trace_simple_print!("{}: forget: inode={inode}, count={count}", self.tag);
1673 let mut inodes = self.inodes.lock();
1674
1675 forget_one(&mut inodes, inode, count)
1676 }
1677
batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>)1678 fn batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>) {
1679 let mut inodes = self.inodes.lock();
1680
1681 for (inode, count) in requests {
1682 forget_one(&mut inodes, inode, count)
1683 }
1684 }
1685
opendir( &self, _ctx: Context, inode: Inode, flags: u32, ) -> io::Result<(Option<Handle>, OpenOptions)>1686 fn opendir(
1687 &self,
1688 _ctx: Context,
1689 inode: Inode,
1690 flags: u32,
1691 ) -> io::Result<(Option<Handle>, OpenOptions)> {
1692 cros_tracing::trace_simple_print!("{}: opendir: inode={inode}, flags={flags}", self.tag);
1693 if self.zero_message_opendir.load(Ordering::Relaxed) {
1694 Err(io::Error::from_raw_os_error(libc::ENOSYS))
1695 } else {
1696 self.do_open(inode, flags | (libc::O_DIRECTORY as u32))
1697 }
1698 }
1699
releasedir( &self, _ctx: Context, inode: Inode, _flags: u32, handle: Handle, ) -> io::Result<()>1700 fn releasedir(
1701 &self,
1702 _ctx: Context,
1703 inode: Inode,
1704 _flags: u32,
1705 handle: Handle,
1706 ) -> io::Result<()> {
1707 cros_tracing::trace_simple_print!(
1708 "{}: releasedir: inode={inode}, handle={handle}",
1709 self.tag
1710 );
1711 if self.zero_message_opendir.load(Ordering::Relaxed) {
1712 Ok(())
1713 } else {
1714 self.do_release(inode, handle)
1715 }
1716 }
1717
mkdir( &self, ctx: Context, parent: Inode, name: &CStr, mode: u32, umask: u32, ) -> io::Result<Entry>1718 fn mkdir(
1719 &self,
1720 ctx: Context,
1721 parent: Inode,
1722 name: &CStr,
1723 mode: u32,
1724 umask: u32,
1725 ) -> io::Result<Entry> {
1726 cros_tracing::trace_simple_print!(
1727 "{}: mkdir: inode={parent}, name={:?}, mode={mode}, umask={umask}",
1728 self.tag,
1729 name
1730 );
1731 let data = self.find_inode(parent)?;
1732
1733 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
1734 {
1735 let _scoped_umask = ScopedUmask::new(umask);
1736
1737 // Safe because this doesn't modify any memory and we check the return value.
1738 syscall!(unsafe { libc::mkdirat(data.as_raw_descriptor(), name.as_ptr(), mode) })?;
1739 }
1740
1741 self.do_lookup(&data, name)
1742 }
1743
rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()>1744 fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
1745 cros_tracing::trace_simple_print!("{}: rmdir: inode={parent}, name={:?}", self.tag, name);
1746 let data = self.find_inode(parent)?;
1747 self.do_unlink(&data, name, libc::AT_REMOVEDIR)
1748 }
1749
readdir( &self, _ctx: Context, inode: Inode, handle: Handle, size: u32, offset: u64, ) -> io::Result<Self::DirIter>1750 fn readdir(
1751 &self,
1752 _ctx: Context,
1753 inode: Inode,
1754 handle: Handle,
1755 size: u32,
1756 offset: u64,
1757 ) -> io::Result<Self::DirIter> {
1758 cros_tracing::trace_simple_print!(
1759 "{}: readdir: inode={inode}, handle={handle}, size={size}, offset={offset}",
1760 self.tag
1761 );
1762 let buf = vec![0; size as usize].into_boxed_slice();
1763
1764 if self.zero_message_opendir.load(Ordering::Relaxed) {
1765 let data = self.find_inode(inode)?;
1766 ReadDir::new(&*data, offset as libc::off64_t, buf)
1767 } else {
1768 let data = self.find_handle(handle, inode)?;
1769
1770 let dir = data.file.lock();
1771
1772 ReadDir::new(&*dir, offset as libc::off64_t, buf)
1773 }
1774 }
1775
open( &self, _ctx: Context, inode: Inode, flags: u32, ) -> io::Result<(Option<Handle>, OpenOptions)>1776 fn open(
1777 &self,
1778 _ctx: Context,
1779 inode: Inode,
1780 flags: u32,
1781 ) -> io::Result<(Option<Handle>, OpenOptions)> {
1782 if self.zero_message_open.load(Ordering::Relaxed) {
1783 cros_tracing::trace_simple_print!(
1784 "{}: open (zero-message): inode={inode}, flags={flags}",
1785 self.tag
1786 );
1787 Err(io::Error::from_raw_os_error(libc::ENOSYS))
1788 } else {
1789 cros_tracing::trace_simple_print!("{}: open: inode={inode}, flags={flags}", self.tag);
1790 self.do_open(inode, flags)
1791 }
1792 }
1793
release( &self, _ctx: Context, inode: Inode, _flags: u32, handle: Handle, _flush: bool, _flock_release: bool, _lock_owner: Option<u64>, ) -> io::Result<()>1794 fn release(
1795 &self,
1796 _ctx: Context,
1797 inode: Inode,
1798 _flags: u32,
1799 handle: Handle,
1800 _flush: bool,
1801 _flock_release: bool,
1802 _lock_owner: Option<u64>,
1803 ) -> io::Result<()> {
1804 if self.zero_message_open.load(Ordering::Relaxed) {
1805 cros_tracing::trace_simple_print!(
1806 "{}: release (zero-message): inode={inode}, handle={handle}",
1807 self.tag
1808 );
1809 Ok(())
1810 } else {
1811 cros_tracing::trace_simple_print!(
1812 "{}: release: inode={inode}, handle={handle}",
1813 self.tag
1814 );
1815 self.do_release(inode, handle)
1816 }
1817 }
1818
chromeos_tmpfile( &self, ctx: Context, parent: Self::Inode, mode: u32, umask: u32, ) -> io::Result<Entry>1819 fn chromeos_tmpfile(
1820 &self,
1821 ctx: Context,
1822 parent: Self::Inode,
1823 mode: u32,
1824 umask: u32,
1825 ) -> io::Result<Entry> {
1826 cros_tracing::trace_simple_print!(
1827 "{}: chromeos_tempfile: inode={parent}, mode={mode}, umask={umask}",
1828 self.tag
1829 );
1830 let data = self.find_inode(parent)?;
1831
1832 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
1833
1834 let tmpflags = libc::O_RDWR | libc::O_TMPFILE | libc::O_CLOEXEC | libc::O_NOFOLLOW;
1835
1836 // Safe because this is a valid c string.
1837 let current_dir = unsafe { CStr::from_bytes_with_nul_unchecked(b".\0") };
1838
1839 let fd = {
1840 let _scoped_umask = ScopedUmask::new(umask);
1841
1842 // Safe because this doesn't modify any memory and we check the return value.
1843 syscall!(unsafe {
1844 libc::openat64(
1845 data.as_raw_descriptor(),
1846 current_dir.as_ptr(),
1847 tmpflags,
1848 mode,
1849 )
1850 })?
1851 };
1852
1853 // Safe because we just opened this fd.
1854 let tmpfile = unsafe { File::from_raw_descriptor(fd) };
1855
1856 let st = stat(&tmpfile)?;
1857 Ok(self.add_entry(tmpfile, st, tmpflags))
1858 }
1859
create( &self, ctx: Context, parent: Inode, name: &CStr, mode: u32, flags: u32, umask: u32, ) -> io::Result<(Entry, Option<Handle>, OpenOptions)>1860 fn create(
1861 &self,
1862 ctx: Context,
1863 parent: Inode,
1864 name: &CStr,
1865 mode: u32,
1866 flags: u32,
1867 umask: u32,
1868 ) -> io::Result<(Entry, Option<Handle>, OpenOptions)> {
1869 cros_tracing::trace_simple_print!(
1870 "{}: create: inode={parent}, name={:?}, mode={mode}, flags={flags}, umask={umask}",
1871 self.tag,
1872 name
1873 );
1874 let data = self.find_inode(parent)?;
1875
1876 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
1877
1878 let create_flags =
1879 (flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW) & !libc::O_DIRECT;
1880
1881 let fd = {
1882 let _scoped_umask = ScopedUmask::new(umask);
1883
1884 // Safe because this doesn't modify any memory and we check the return value. We don't
1885 // really check `flags` because if the kernel can't handle poorly specified flags then
1886 // we have much bigger problems.
1887 syscall!(unsafe {
1888 libc::openat64(data.as_raw_descriptor(), name.as_ptr(), create_flags, mode)
1889 })?
1890 };
1891
1892 // Safe because we just opened this fd.
1893 let file = unsafe { File::from_raw_descriptor(fd) };
1894
1895 let st = stat(&file)?;
1896 let entry = self.add_entry(file, st, create_flags);
1897
1898 let (handle, opts) = if self.zero_message_open.load(Ordering::Relaxed) {
1899 (None, OpenOptions::KEEP_CACHE)
1900 } else {
1901 self.do_open(
1902 entry.inode,
1903 flags & !((libc::O_CREAT | libc::O_EXCL | libc::O_NOCTTY) as u32),
1904 )
1905 .map_err(|e| {
1906 // Don't leak the entry.
1907 self.forget(ctx, entry.inode, 1);
1908 e
1909 })?
1910 };
1911
1912 Ok((entry, handle, opts))
1913 }
1914
unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()>1915 fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
1916 cros_tracing::trace_simple_print!("{}: unlink: inode={parent}, name={:?}", self.tag, name);
1917 let data = self.find_inode(parent)?;
1918 self.do_unlink(&data, name, 0)
1919 }
1920
read<W: io::Write + ZeroCopyWriter>( &self, _ctx: Context, inode: Inode, handle: Handle, mut w: W, size: u32, offset: u64, _lock_owner: Option<u64>, _flags: u32, ) -> io::Result<usize>1921 fn read<W: io::Write + ZeroCopyWriter>(
1922 &self,
1923 _ctx: Context,
1924 inode: Inode,
1925 handle: Handle,
1926 mut w: W,
1927 size: u32,
1928 offset: u64,
1929 _lock_owner: Option<u64>,
1930 _flags: u32,
1931 ) -> io::Result<usize> {
1932 if self.zero_message_open.load(Ordering::Relaxed) {
1933 cros_tracing::trace_simple_print!("{}: read (zero-message): inode={inode}, handle={handle}, size={size}, offset={offset}", self.tag);
1934 let data = self.find_inode(inode)?;
1935
1936 let mut file = data.file.lock();
1937 let mut flags = file.1;
1938 match flags & libc::O_ACCMODE {
1939 libc::O_WRONLY => {
1940 flags &= !libc::O_WRONLY;
1941 flags |= libc::O_RDWR;
1942
1943 // We need to get a readable handle for this file.
1944 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
1945 *file = (newfile, flags);
1946 }
1947 libc::O_RDONLY | libc::O_RDWR => {}
1948 _ => panic!("Unexpected flags: {:#x}", flags),
1949 }
1950
1951 w.write_from(&mut file.0, size as usize, offset)
1952 } else {
1953 cros_tracing::trace_simple_print!(
1954 "{}: read: inode={inode}, handle={handle}, size={size}, offset={offset}",
1955 self.tag
1956 );
1957 let data = self.find_handle(handle, inode)?;
1958
1959 let mut f = data.file.lock();
1960 w.write_from(&mut f, size as usize, offset)
1961 }
1962 }
1963
write<R: io::Read + ZeroCopyReader>( &self, _ctx: Context, inode: Inode, handle: Handle, mut r: R, size: u32, offset: u64, _lock_owner: Option<u64>, _delayed_write: bool, flags: u32, ) -> io::Result<usize>1964 fn write<R: io::Read + ZeroCopyReader>(
1965 &self,
1966 _ctx: Context,
1967 inode: Inode,
1968 handle: Handle,
1969 mut r: R,
1970 size: u32,
1971 offset: u64,
1972 _lock_owner: Option<u64>,
1973 _delayed_write: bool,
1974 flags: u32,
1975 ) -> io::Result<usize> {
1976 // When the WRITE_KILL_PRIV flag is set, drop CAP_FSETID so that the kernel will
1977 // automatically clear the setuid and setgid bits for us.
1978 let _fsetid = if flags & WRITE_KILL_PRIV != 0 {
1979 Some(drop_cap_fsetid()?)
1980 } else {
1981 None
1982 };
1983
1984 if self.zero_message_open.load(Ordering::Relaxed) {
1985 cros_tracing::trace_simple_print!(
1986 "{}: write (zero-message): inode={inode}, handle={handle}, size={size}, offset={offset}",
1987 self.tag
1988 );
1989
1990 let data = self.find_inode(inode)?;
1991
1992 let mut file = data.file.lock();
1993 let mut flags = file.1;
1994 match flags & libc::O_ACCMODE {
1995 libc::O_RDONLY => {
1996 flags &= !libc::O_RDONLY;
1997 flags |= libc::O_RDWR;
1998
1999 // We need to get a writable handle for this file.
2000 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
2001 *file = (newfile, flags);
2002 }
2003 libc::O_WRONLY | libc::O_RDWR => {}
2004 _ => panic!("Unexpected flags: {:#x}", flags),
2005 }
2006
2007 r.read_to(&mut file.0, size as usize, offset)
2008 } else {
2009 cros_tracing::trace_simple_print!(
2010 "{}: write: inode={inode}, handle={handle}, size={size}, offset={offset}",
2011 self.tag
2012 );
2013
2014 let data = self.find_handle(handle, inode)?;
2015
2016 let mut f = data.file.lock();
2017 r.read_to(&mut f, size as usize, offset)
2018 }
2019 }
2020
getattr( &self, _ctx: Context, inode: Inode, _handle: Option<Handle>, ) -> io::Result<(libc::stat64, Duration)>2021 fn getattr(
2022 &self,
2023 _ctx: Context,
2024 inode: Inode,
2025 _handle: Option<Handle>,
2026 ) -> io::Result<(libc::stat64, Duration)> {
2027 cros_tracing::trace_simple_print!("{}: getattr: inode={inode}", self.tag);
2028
2029 let data = self.find_inode(inode)?;
2030 self.do_getattr(&data)
2031 }
2032
setattr( &self, _ctx: Context, inode: Inode, attr: libc::stat64, handle: Option<Handle>, valid: SetattrValid, ) -> io::Result<(libc::stat64, Duration)>2033 fn setattr(
2034 &self,
2035 _ctx: Context,
2036 inode: Inode,
2037 attr: libc::stat64,
2038 handle: Option<Handle>,
2039 valid: SetattrValid,
2040 ) -> io::Result<(libc::stat64, Duration)> {
2041 cros_tracing::trace_simple_print!(
2042 "{}: setattr: inode={inode}, handle={:?}",
2043 self.tag,
2044 handle
2045 );
2046 let inode_data = self.find_inode(inode)?;
2047
2048 enum Data {
2049 Handle(Arc<HandleData>, RawDescriptor),
2050 ProcPath(CString),
2051 }
2052
2053 // If we have a handle then use it otherwise get a new fd from the inode.
2054 let data = if let Some(handle) = handle.filter(|&h| h != 0) {
2055 let hd = self.find_handle(handle, inode)?;
2056
2057 let fd = hd.file.lock().as_raw_descriptor();
2058 Data::Handle(hd, fd)
2059 } else {
2060 let pathname = CString::new(format!("self/fd/{}", inode_data.as_raw_descriptor()))
2061 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2062 Data::ProcPath(pathname)
2063 };
2064
2065 if valid.contains(SetattrValid::MODE) {
2066 // Safe because this doesn't modify any memory and we check the return value.
2067 syscall!(unsafe {
2068 match data {
2069 Data::Handle(_, fd) => libc::fchmod(fd, attr.st_mode),
2070 Data::ProcPath(ref p) => {
2071 libc::fchmodat(self.proc.as_raw_descriptor(), p.as_ptr(), attr.st_mode, 0)
2072 }
2073 }
2074 })?;
2075 }
2076
2077 if valid.intersects(SetattrValid::UID | SetattrValid::GID) {
2078 let uid = if valid.contains(SetattrValid::UID) {
2079 attr.st_uid
2080 } else {
2081 // Cannot use -1 here because these are unsigned values.
2082 ::std::u32::MAX
2083 };
2084 let gid = if valid.contains(SetattrValid::GID) {
2085 attr.st_gid
2086 } else {
2087 // Cannot use -1 here because these are unsigned values.
2088 ::std::u32::MAX
2089 };
2090
2091 // Safe because this is a constant value and a valid C string.
2092 let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
2093
2094 // Safe because this doesn't modify any memory and we check the return value.
2095 syscall!(unsafe {
2096 libc::fchownat(
2097 inode_data.as_raw_descriptor(),
2098 empty.as_ptr(),
2099 uid,
2100 gid,
2101 libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
2102 )
2103 })?;
2104 }
2105
2106 if valid.contains(SetattrValid::SIZE) {
2107 // Safe because this doesn't modify any memory and we check the return value.
2108 syscall!(match data {
2109 Data::Handle(_, fd) => unsafe { libc::ftruncate64(fd, attr.st_size) },
2110 _ => {
2111 // There is no `ftruncateat` so we need to get a new fd and truncate it.
2112 let f = self.open_inode(&inode_data, libc::O_NONBLOCK | libc::O_RDWR)?;
2113 unsafe { libc::ftruncate64(f.as_raw_descriptor(), attr.st_size) }
2114 }
2115 })?;
2116 }
2117
2118 if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) {
2119 let mut tvs = [
2120 libc::timespec {
2121 tv_sec: 0,
2122 tv_nsec: libc::UTIME_OMIT,
2123 },
2124 libc::timespec {
2125 tv_sec: 0,
2126 tv_nsec: libc::UTIME_OMIT,
2127 },
2128 ];
2129
2130 if valid.contains(SetattrValid::ATIME_NOW) {
2131 tvs[0].tv_nsec = libc::UTIME_NOW;
2132 } else if valid.contains(SetattrValid::ATIME) {
2133 tvs[0].tv_sec = attr.st_atime;
2134 tvs[0].tv_nsec = attr.st_atime_nsec;
2135 }
2136
2137 if valid.contains(SetattrValid::MTIME_NOW) {
2138 tvs[1].tv_nsec = libc::UTIME_NOW;
2139 } else if valid.contains(SetattrValid::MTIME) {
2140 tvs[1].tv_sec = attr.st_mtime;
2141 tvs[1].tv_nsec = attr.st_mtime_nsec;
2142 }
2143
2144 // Safe because this doesn't modify any memory and we check the return value.
2145 syscall!(unsafe {
2146 match data {
2147 Data::Handle(_, fd) => libc::futimens(fd, tvs.as_ptr()),
2148 Data::ProcPath(ref p) => {
2149 libc::utimensat(self.proc.as_raw_descriptor(), p.as_ptr(), tvs.as_ptr(), 0)
2150 }
2151 }
2152 })?;
2153 }
2154
2155 self.do_getattr(&inode_data)
2156 }
2157
rename( &self, _ctx: Context, olddir: Inode, oldname: &CStr, newdir: Inode, newname: &CStr, flags: u32, ) -> io::Result<()>2158 fn rename(
2159 &self,
2160 _ctx: Context,
2161 olddir: Inode,
2162 oldname: &CStr,
2163 newdir: Inode,
2164 newname: &CStr,
2165 flags: u32,
2166 ) -> io::Result<()> {
2167 cros_tracing::trace_simple_print!(
2168 "{}: rename: olddir={olddir}, oldname={:?}, newdir={newdir}, newname={:?}, flags={flags}",
2169 self.tag,
2170 oldname,
2171 newname
2172 );
2173
2174 let old_inode = self.find_inode(olddir)?;
2175 let new_inode = self.find_inode(newdir)?;
2176
2177 // Safe because this doesn't modify any memory and we check the return value.
2178 // TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands
2179 // and we have glibc 2.28.
2180 syscall!(unsafe {
2181 libc::syscall(
2182 libc::SYS_renameat2,
2183 old_inode.as_raw_descriptor(),
2184 oldname.as_ptr(),
2185 new_inode.as_raw_descriptor(),
2186 newname.as_ptr(),
2187 flags,
2188 )
2189 })?;
2190 Ok(())
2191 }
2192
mknod( &self, ctx: Context, parent: Inode, name: &CStr, mode: u32, rdev: u32, umask: u32, ) -> io::Result<Entry>2193 fn mknod(
2194 &self,
2195 ctx: Context,
2196 parent: Inode,
2197 name: &CStr,
2198 mode: u32,
2199 rdev: u32,
2200 umask: u32,
2201 ) -> io::Result<Entry> {
2202 cros_tracing::trace_simple_print!(
2203 "{}: mknod: inode={parent}, name={:?}, mode={mode}, rdev={rdev}, umask={umask}",
2204 self.tag,
2205 name
2206 );
2207 let data = self.find_inode(parent)?;
2208
2209 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
2210
2211 {
2212 let _scoped_umask = ScopedUmask::new(umask);
2213
2214 // Safe because this doesn't modify any memory and we check the return value.
2215 syscall!(unsafe {
2216 libc::mknodat(
2217 data.as_raw_descriptor(),
2218 name.as_ptr(),
2219 mode as libc::mode_t,
2220 rdev as libc::dev_t,
2221 )
2222 })?;
2223 }
2224
2225 self.do_lookup(&data, name)
2226 }
2227
link( &self, _ctx: Context, inode: Inode, newparent: Inode, newname: &CStr, ) -> io::Result<Entry>2228 fn link(
2229 &self,
2230 _ctx: Context,
2231 inode: Inode,
2232 newparent: Inode,
2233 newname: &CStr,
2234 ) -> io::Result<Entry> {
2235 cros_tracing::trace_simple_print!(
2236 "{}: link: inode={inode}, newparent={newparent}, newmname={:?}",
2237 self.tag,
2238 newname
2239 );
2240 let data = self.find_inode(inode)?;
2241 let new_inode = self.find_inode(newparent)?;
2242
2243 let path = CString::new(format!("self/fd/{}", data.as_raw_descriptor()))
2244 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2245
2246 // Safe because this doesn't modify any memory and we check the return value.
2247 syscall!(unsafe {
2248 libc::linkat(
2249 self.proc.as_raw_descriptor(),
2250 path.as_ptr(),
2251 new_inode.as_raw_descriptor(),
2252 newname.as_ptr(),
2253 libc::AT_SYMLINK_FOLLOW,
2254 )
2255 })?;
2256
2257 self.do_lookup(&new_inode, newname)
2258 }
2259
symlink( &self, ctx: Context, linkname: &CStr, parent: Inode, name: &CStr, ) -> io::Result<Entry>2260 fn symlink(
2261 &self,
2262 ctx: Context,
2263 linkname: &CStr,
2264 parent: Inode,
2265 name: &CStr,
2266 ) -> io::Result<Entry> {
2267 cros_tracing::trace_simple_print!(
2268 "{}: symlink: inode={parent}, linkname={:?}, name={:?}",
2269 self.tag,
2270 linkname,
2271 name
2272 );
2273 let data = self.find_inode(parent)?;
2274
2275 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
2276
2277 // Safe because this doesn't modify any memory and we check the return value.
2278 syscall!(unsafe {
2279 libc::symlinkat(linkname.as_ptr(), data.as_raw_descriptor(), name.as_ptr())
2280 })?;
2281
2282 self.do_lookup(&data, name)
2283 }
2284
readlink(&self, _ctx: Context, inode: Inode) -> io::Result<Vec<u8>>2285 fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result<Vec<u8>> {
2286 cros_tracing::trace_simple_print!("{}: readlink: inode={inode}", self.tag);
2287 let data = self.find_inode(inode)?;
2288
2289 let mut buf = vec![0; libc::PATH_MAX as usize];
2290
2291 // Safe because this is a constant value and a valid C string.
2292 let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
2293
2294 // Safe because this will only modify the contents of `buf` and we check the return value.
2295 let res = syscall!(unsafe {
2296 libc::readlinkat(
2297 data.as_raw_descriptor(),
2298 empty.as_ptr(),
2299 buf.as_mut_ptr() as *mut libc::c_char,
2300 buf.len(),
2301 )
2302 })?;
2303
2304 buf.resize(res as usize, 0);
2305 Ok(buf)
2306 }
2307
flush( &self, _ctx: Context, inode: Inode, handle: Handle, _lock_owner: u64, ) -> io::Result<()>2308 fn flush(
2309 &self,
2310 _ctx: Context,
2311 inode: Inode,
2312 handle: Handle,
2313 _lock_owner: u64,
2314 ) -> io::Result<()> {
2315 cros_tracing::trace_simple_print!("{}: flush: inode={inode}, handle={handle}", self.tag);
2316 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
2317 self.find_inode(inode)?
2318 } else {
2319 self.find_handle(handle, inode)?
2320 };
2321
2322 // Since this method is called whenever an fd is closed in the client, we can emulate that
2323 // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe
2324 // because this doesn't modify any memory and we check the return values.
2325 unsafe {
2326 let newfd = syscall!(libc::fcntl(
2327 data.as_raw_descriptor(),
2328 libc::F_DUPFD_CLOEXEC,
2329 0
2330 ))?;
2331
2332 syscall!(libc::close(newfd))?;
2333 }
2334 Ok(())
2335 }
2336
fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()>2337 fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> {
2338 if self.zero_message_open.load(Ordering::Relaxed) {
2339 cros_tracing::trace_simple_print!(
2340 "{}: fsync (zero-message): inode={inode}, datasync={datasync}, handle={handle}",
2341 self.tag
2342 );
2343 let data = self.find_inode(inode)?;
2344 self.do_fsync(&*data, datasync)
2345 } else {
2346 cros_tracing::trace_simple_print!(
2347 "{}: fsync: inode={inode}, datasync={datasync}, handle={handle}",
2348 self.tag
2349 );
2350 let data = self.find_handle(handle, inode)?;
2351
2352 let file = data.file.lock();
2353 self.do_fsync(&*file, datasync)
2354 }
2355 }
2356
fsyncdir( &self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle, ) -> io::Result<()>2357 fn fsyncdir(
2358 &self,
2359 _ctx: Context,
2360 inode: Inode,
2361 datasync: bool,
2362 handle: Handle,
2363 ) -> io::Result<()> {
2364 if self.zero_message_opendir.load(Ordering::Relaxed) {
2365 cros_tracing::trace_simple_print!(
2366 "{}: fsyncdir (zero-message): inode={inode}, datasync={datasync}, handle={handle}",
2367 self.tag
2368 );
2369 let data = self.find_inode(inode)?;
2370 self.do_fsync(&*data, datasync)
2371 } else {
2372 cros_tracing::trace_simple_print!(
2373 "{}: fsyncdir: inode={inode}, datasync={datasync}, handle={handle}",
2374 self.tag
2375 );
2376 let data = self.find_handle(handle, inode)?;
2377
2378 let file = data.file.lock();
2379 self.do_fsync(&*file, datasync)
2380 }
2381 }
2382
access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()>2383 fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> {
2384 cros_tracing::trace_simple_print!("{}: access: inode={inode}, mask={mask}", self.tag);
2385 let data = self.find_inode(inode)?;
2386
2387 let st = stat(&*data)?;
2388 let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK);
2389
2390 if mode == libc::F_OK {
2391 // The file exists since we were able to call `stat(2)` on it.
2392 return Ok(());
2393 }
2394
2395 if (mode & libc::R_OK) != 0 {
2396 if ctx.uid != 0
2397 && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0)
2398 && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0)
2399 && st.st_mode & 0o004 == 0
2400 {
2401 return Err(io::Error::from_raw_os_error(libc::EACCES));
2402 }
2403 }
2404
2405 if (mode & libc::W_OK) != 0 {
2406 if ctx.uid != 0
2407 && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0)
2408 && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0)
2409 && st.st_mode & 0o002 == 0
2410 {
2411 return Err(io::Error::from_raw_os_error(libc::EACCES));
2412 }
2413 }
2414
2415 // root can only execute something if it is executable by one of the owner, the group, or
2416 // everyone.
2417 if (mode & libc::X_OK) != 0 {
2418 if (ctx.uid != 0 || st.st_mode & 0o111 == 0)
2419 && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0)
2420 && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0)
2421 && st.st_mode & 0o001 == 0
2422 {
2423 return Err(io::Error::from_raw_os_error(libc::EACCES));
2424 }
2425 }
2426
2427 Ok(())
2428 }
2429
setxattr( &self, _ctx: Context, inode: Inode, name: &CStr, value: &[u8], flags: u32, ) -> io::Result<()>2430 fn setxattr(
2431 &self,
2432 _ctx: Context,
2433 inode: Inode,
2434 name: &CStr,
2435 value: &[u8],
2436 flags: u32,
2437 ) -> io::Result<()> {
2438 cros_tracing::trace_simple_print!(
2439 "{}: setxattr: inode={inode}, name={:?}, flags={flags}",
2440 self.tag,
2441 name
2442 );
2443 // We can't allow the VM to set this xattr because an unprivileged process may use it to set
2444 // a privileged xattr.
2445 if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
2446 return Err(io::Error::from_raw_os_error(libc::EPERM));
2447 }
2448
2449 let data = self.find_inode(inode)?;
2450 let name = self.rewrite_xattr_name(name);
2451 let file = data.file.lock();
2452 let o_path_file = (file.1 & libc::O_PATH) != 0;
2453 if o_path_file {
2454 // For FDs opened with `O_PATH`, we cannot call `fsetxattr` normally. Instead we emulate
2455 // an _at syscall by changing the CWD to /proc, running the path based syscall, and then
2456 // setting the CWD back to the root directory.
2457 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
2458 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2459
2460 // Safe because this doesn't modify any memory and we check the return value.
2461 syscall!(self.with_proc_chdir(|| {
2462 unsafe {
2463 libc::setxattr(
2464 path.as_ptr(),
2465 name.as_ptr(),
2466 value.as_ptr() as *const libc::c_void,
2467 value.len() as libc::size_t,
2468 flags as c_int,
2469 )
2470 }
2471 }))?;
2472 } else {
2473 // For regular files and directories, we can just use fsetxattr. Safe because this
2474 // doesn't modify any memory and we check the return value.
2475 syscall!(unsafe {
2476 libc::fsetxattr(
2477 file.0.as_raw_descriptor(),
2478 name.as_ptr(),
2479 value.as_ptr() as *const libc::c_void,
2480 value.len() as libc::size_t,
2481 flags as c_int,
2482 )
2483 })?;
2484 }
2485
2486 Ok(())
2487 }
2488
getxattr( &self, _ctx: Context, inode: Inode, name: &CStr, size: u32, ) -> io::Result<GetxattrReply>2489 fn getxattr(
2490 &self,
2491 _ctx: Context,
2492 inode: Inode,
2493 name: &CStr,
2494 size: u32,
2495 ) -> io::Result<GetxattrReply> {
2496 cros_tracing::trace_simple_print!(
2497 "{}: getxattr: inode={inode}, name={:?}, size={size}",
2498 self.tag,
2499 name
2500 );
2501 // We don't allow the VM to set this xattr so we also pretend there is no value associated
2502 // with it.
2503 if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
2504 return Err(io::Error::from_raw_os_error(libc::ENODATA));
2505 }
2506
2507 let data = self.find_inode(inode)?;
2508 let name = self.rewrite_xattr_name(name);
2509 let mut buf = vec![0u8; size as usize];
2510
2511 // Safe because this will only modify the contents of `buf`.
2512 let res = self.do_getxattr(&data, &name, &mut buf[..])?;
2513 if size == 0 {
2514 Ok(GetxattrReply::Count(res as u32))
2515 } else {
2516 buf.truncate(res as usize);
2517 Ok(GetxattrReply::Value(buf))
2518 }
2519 }
2520
listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result<ListxattrReply>2521 fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result<ListxattrReply> {
2522 cros_tracing::trace_simple_print!("{}: listxattr: inode={inode}, size={size}", self.tag);
2523 let data = self.find_inode(inode)?;
2524
2525 let mut buf = vec![0u8; size as usize];
2526
2527 let file = data.file.lock();
2528 let o_path_file = (file.1 & libc::O_PATH) != 0;
2529 let res = if o_path_file {
2530 // For FDs opened with `O_PATH`, we cannot call `flistxattr` normally. Instead we
2531 // emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
2532 // and then setting the CWD back to the root directory.
2533 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
2534 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2535
2536 // Safe because this will only modify `buf` and we check the return value.
2537 syscall!(self.with_proc_chdir(|| unsafe {
2538 libc::listxattr(
2539 path.as_ptr(),
2540 buf.as_mut_ptr() as *mut libc::c_char,
2541 buf.len() as libc::size_t,
2542 )
2543 }))?
2544 } else {
2545 // For regular files and directories, we can just flistxattr. Safe because this will only
2546 // write to `buf` and we check the return value.
2547 syscall!(unsafe {
2548 libc::flistxattr(
2549 file.0.as_raw_descriptor(),
2550 buf.as_mut_ptr() as *mut libc::c_char,
2551 buf.len() as libc::size_t,
2552 )
2553 })?
2554 };
2555
2556 if size == 0 {
2557 Ok(ListxattrReply::Count(res as u32))
2558 } else {
2559 buf.truncate(res as usize);
2560
2561 if self.cfg.rewrite_security_xattrs {
2562 strip_xattr_prefix(&mut buf);
2563 }
2564 Ok(ListxattrReply::Names(buf))
2565 }
2566 }
2567
removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()>2568 fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> {
2569 cros_tracing::trace_simple_print!(
2570 "{}: removexattr: inode={inode}, name={:?}",
2571 self.tag,
2572 name
2573 );
2574 // We don't allow the VM to set this xattr so we also pretend there is no value associated
2575 // with it.
2576 if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
2577 return Err(io::Error::from_raw_os_error(libc::ENODATA));
2578 }
2579
2580 let data = self.find_inode(inode)?;
2581 let name = self.rewrite_xattr_name(name);
2582
2583 let file = data.file.lock();
2584 let o_path_file = (file.1 & libc::O_PATH) != 0;
2585 if o_path_file {
2586 // For files opened with `O_PATH`, we cannot call `fremovexattr` normally. Instead we
2587 // emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
2588 // and then setting the CWD back to the root directory.
2589 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
2590 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2591
2592 // Safe because this doesn't modify any memory and we check the return value.
2593 syscall!(
2594 self.with_proc_chdir(|| unsafe { libc::removexattr(path.as_ptr(), name.as_ptr()) })
2595 )?;
2596 } else {
2597 // For regular files and directories, we can just use fremovexattr. Safe because this
2598 // doesn't modify any memory and we check the return value.
2599 syscall!(unsafe { libc::fremovexattr(file.0.as_raw_descriptor(), name.as_ptr()) })?;
2600 }
2601
2602 Ok(())
2603 }
2604
fallocate( &self, _ctx: Context, inode: Inode, handle: Handle, mode: u32, offset: u64, length: u64, ) -> io::Result<()>2605 fn fallocate(
2606 &self,
2607 _ctx: Context,
2608 inode: Inode,
2609 handle: Handle,
2610 mode: u32,
2611 offset: u64,
2612 length: u64,
2613 ) -> io::Result<()> {
2614 cros_tracing::trace_simple_print!(
2615 "{}: fallocate: inode={inode}, handle={handle}, mode={mode}, offset={offset}, lenght={length}",
2616 self.tag
2617 );
2618
2619 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
2620 let data = self.find_inode(inode)?;
2621
2622 {
2623 // fallocate needs a writable fd
2624 let mut file = data.file.lock();
2625 let mut flags = file.1;
2626 match flags & libc::O_ACCMODE {
2627 libc::O_RDONLY => {
2628 flags &= !libc::O_RDONLY;
2629 flags |= libc::O_RDWR;
2630
2631 // We need to get a writable handle for this file.
2632 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
2633 *file = (newfile, flags);
2634 }
2635 libc::O_WRONLY | libc::O_RDWR => {}
2636 _ => panic!("Unexpected flags: {:#x}", flags),
2637 }
2638 }
2639
2640 data
2641 } else {
2642 self.find_handle(handle, inode)?
2643 };
2644
2645 let fd = data.as_raw_descriptor();
2646 // Safe because this doesn't modify any memory and we check the return value.
2647 syscall!(unsafe {
2648 libc::fallocate64(
2649 fd,
2650 mode as libc::c_int,
2651 offset as libc::off64_t,
2652 length as libc::off64_t,
2653 )
2654 })?;
2655
2656 Ok(())
2657 }
2658
ioctl<R: io::Read>( &self, ctx: Context, inode: Inode, handle: Handle, _flags: IoctlFlags, cmd: u32, _arg: u64, in_size: u32, out_size: u32, r: R, ) -> io::Result<IoctlReply>2659 fn ioctl<R: io::Read>(
2660 &self,
2661 ctx: Context,
2662 inode: Inode,
2663 handle: Handle,
2664 _flags: IoctlFlags,
2665 cmd: u32,
2666 _arg: u64,
2667 in_size: u32,
2668 out_size: u32,
2669 r: R,
2670 ) -> io::Result<IoctlReply> {
2671 cros_tracing::trace_simple_print!(
2672 "{}: ioctl: inode={inode}, handle={handle}, cmd={cmd}, in_size={in_size}, out_size={out_size}",
2673 self.tag
2674 );
2675
2676 const GET_ENCRYPTION_POLICY_EX: u32 = FS_IOC_GET_ENCRYPTION_POLICY_EX() as u32;
2677 const GET_FSXATTR: u32 = FS_IOC_FSGETXATTR() as u32;
2678 const SET_FSXATTR: u32 = FS_IOC_FSSETXATTR() as u32;
2679 const GET_FLAGS32: u32 = FS_IOC32_GETFLAGS() as u32;
2680 const SET_FLAGS32: u32 = FS_IOC32_SETFLAGS() as u32;
2681 const GET_FLAGS64: u32 = FS_IOC64_GETFLAGS() as u32;
2682 const SET_FLAGS64: u32 = FS_IOC64_SETFLAGS() as u32;
2683 const ENABLE_VERITY: u32 = FS_IOC_ENABLE_VERITY() as u32;
2684 const MEASURE_VERITY: u32 = FS_IOC_MEASURE_VERITY() as u32;
2685
2686 match cmd {
2687 GET_ENCRYPTION_POLICY_EX => self.get_encryption_policy_ex(inode, handle, r),
2688 GET_FSXATTR => {
2689 if out_size < size_of::<fsxattr>() as u32 {
2690 Err(io::Error::from_raw_os_error(libc::ENOMEM))
2691 } else {
2692 self.get_fsxattr(inode, handle)
2693 }
2694 }
2695 SET_FSXATTR => {
2696 if in_size < size_of::<fsxattr>() as u32 {
2697 Err(io::Error::from_raw_os_error(libc::EINVAL))
2698 } else {
2699 self.set_fsxattr(ctx, inode, handle, r)
2700 }
2701 }
2702 GET_FLAGS32 | GET_FLAGS64 => {
2703 if out_size < size_of::<c_int>() as u32 {
2704 Err(io::Error::from_raw_os_error(libc::ENOMEM))
2705 } else {
2706 self.get_flags(inode, handle)
2707 }
2708 }
2709 SET_FLAGS32 | SET_FLAGS64 => {
2710 if in_size < size_of::<c_int>() as u32 {
2711 Err(io::Error::from_raw_os_error(libc::ENOMEM))
2712 } else {
2713 self.set_flags(ctx, inode, handle, r)
2714 }
2715 }
2716 ENABLE_VERITY => {
2717 if in_size < size_of::<fsverity_enable_arg>() as u32 {
2718 Err(io::Error::from_raw_os_error(libc::ENOMEM))
2719 } else {
2720 self.enable_verity(inode, handle, r)
2721 }
2722 }
2723 MEASURE_VERITY => {
2724 if in_size < size_of::<fsverity_digest>() as u32
2725 || out_size < size_of::<fsverity_digest>() as u32
2726 {
2727 Err(io::Error::from_raw_os_error(libc::ENOMEM))
2728 } else {
2729 self.measure_verity(inode, handle, r, out_size)
2730 }
2731 }
2732 _ => Err(io::Error::from_raw_os_error(libc::ENOTTY)),
2733 }
2734 }
2735
copy_file_range( &self, ctx: Context, inode_src: Inode, handle_src: Handle, offset_src: u64, inode_dst: Inode, handle_dst: Handle, offset_dst: u64, length: u64, flags: u64, ) -> io::Result<usize>2736 fn copy_file_range(
2737 &self,
2738 ctx: Context,
2739 inode_src: Inode,
2740 handle_src: Handle,
2741 offset_src: u64,
2742 inode_dst: Inode,
2743 handle_dst: Handle,
2744 offset_dst: u64,
2745 length: u64,
2746 flags: u64,
2747 ) -> io::Result<usize> {
2748 cros_tracing::trace_simple_print!(
2749 "{}: copy_file_range: src=({inode_src}, {handle_src}, {offset_src}), dst=({inode_dst}, {handle_dst}, {offset_dst}), length={length}, flags={flags}",
2750 self.tag
2751 );
2752 // We need to change credentials during a write so that the kernel will remove setuid or
2753 // setgid bits from the file if it was written to by someone other than the owner.
2754 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
2755 let (src_data, dst_data): (Arc<dyn AsRawDescriptor>, Arc<dyn AsRawDescriptor>) =
2756 if self.zero_message_open.load(Ordering::Relaxed) {
2757 (self.find_inode(inode_src)?, self.find_inode(inode_dst)?)
2758 } else {
2759 (
2760 self.find_handle(handle_src, inode_src)?,
2761 self.find_handle(handle_dst, inode_dst)?,
2762 )
2763 };
2764
2765 let src = src_data.as_raw_descriptor();
2766 let dst = dst_data.as_raw_descriptor();
2767
2768 Ok(syscall!(unsafe {
2769 libc::syscall(
2770 libc::SYS_copy_file_range,
2771 src,
2772 &offset_src,
2773 dst,
2774 &offset_dst,
2775 length,
2776 flags,
2777 )
2778 })? as usize)
2779 }
2780
set_up_mapping<M: Mapper>( &self, _ctx: Context, inode: Self::Inode, _handle: Self::Handle, file_offset: u64, mem_offset: u64, size: usize, prot: u32, mapper: M, ) -> io::Result<()>2781 fn set_up_mapping<M: Mapper>(
2782 &self,
2783 _ctx: Context,
2784 inode: Self::Inode,
2785 _handle: Self::Handle,
2786 file_offset: u64,
2787 mem_offset: u64,
2788 size: usize,
2789 prot: u32,
2790 mapper: M,
2791 ) -> io::Result<()> {
2792 cros_tracing::trace_simple_print!(
2793 "{}: set_up_mapping: inode={inode}, file_offset={file_offset}, mem_offset={mem_offset}, size={size}, prot={prot}",
2794 self.tag
2795 );
2796 if !self.cfg.use_dax {
2797 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
2798 }
2799
2800 let read = prot & libc::PROT_READ as u32 != 0;
2801 let write = prot & libc::PROT_WRITE as u32 != 0;
2802 let mmap_flags = match (read, write) {
2803 (true, true) => libc::O_RDWR,
2804 (true, false) => libc::O_RDONLY,
2805 (false, true) => libc::O_RDWR, // mmap always requires an fd opened for reading.
2806 (false, false) => return Err(io::Error::from_raw_os_error(libc::EINVAL)),
2807 };
2808
2809 let data = self.find_inode(inode)?;
2810
2811 if self.zero_message_open.load(Ordering::Relaxed) {
2812 let mut file = data.file.lock();
2813 let mut open_flags = file.1;
2814 match (mmap_flags, open_flags & libc::O_ACCMODE) {
2815 (libc::O_RDONLY, libc::O_WRONLY)
2816 | (libc::O_RDWR, libc::O_RDONLY)
2817 | (libc::O_RDWR, libc::O_WRONLY) => {
2818 // We have a read-only or write-only fd and we need to upgrade it.
2819 open_flags &= !libc::O_ACCMODE;
2820 open_flags |= libc::O_RDWR;
2821
2822 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
2823 *file = (newfile, open_flags);
2824 }
2825 (libc::O_RDONLY, libc::O_RDONLY)
2826 | (libc::O_RDONLY, libc::O_RDWR)
2827 | (libc::O_RDWR, libc::O_RDWR) => {}
2828 (m, o) => panic!(
2829 "Unexpected combination of access flags: ({:#x}, {:#x})",
2830 m, o
2831 ),
2832 }
2833 mapper.map(mem_offset, size, &file.0, file_offset, prot)
2834 } else {
2835 let file = self.open_inode(&data, mmap_flags | libc::O_NONBLOCK)?;
2836 mapper.map(mem_offset, size, &file, file_offset, prot)
2837 }
2838 }
2839
remove_mapping<M: Mapper>(&self, msgs: &[RemoveMappingOne], mapper: M) -> io::Result<()>2840 fn remove_mapping<M: Mapper>(&self, msgs: &[RemoveMappingOne], mapper: M) -> io::Result<()> {
2841 cros_tracing::trace_simple_print!("{}: remove_mapping: msgs={:?}", self.tag, msgs);
2842 if !self.cfg.use_dax {
2843 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
2844 }
2845
2846 for RemoveMappingOne { moffset, len } in msgs {
2847 mapper.unmap(*moffset, *len)?;
2848 }
2849 Ok(())
2850 }
2851 }
2852
2853 #[cfg(test)]
2854 mod tests {
2855 use super::*;
2856
2857 #[test]
rewrite_xattr_names()2858 fn rewrite_xattr_names() {
2859 let cfg = Config {
2860 rewrite_security_xattrs: true,
2861 ..Default::default()
2862 };
2863
2864 let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
2865
2866 // Selinux shouldn't get overwritten.
2867 let selinux = unsafe { CStr::from_bytes_with_nul_unchecked(b"security.selinux\0") };
2868 assert_eq!(p.rewrite_xattr_name(selinux).to_bytes(), selinux.to_bytes());
2869
2870 // user, trusted, and system should not be changed either.
2871 let user = unsafe { CStr::from_bytes_with_nul_unchecked(b"user.foobar\0") };
2872 assert_eq!(p.rewrite_xattr_name(user).to_bytes(), user.to_bytes());
2873 let trusted = unsafe { CStr::from_bytes_with_nul_unchecked(b"trusted.foobar\0") };
2874 assert_eq!(p.rewrite_xattr_name(trusted).to_bytes(), trusted.to_bytes());
2875 let system = unsafe { CStr::from_bytes_with_nul_unchecked(b"system.foobar\0") };
2876 assert_eq!(p.rewrite_xattr_name(system).to_bytes(), system.to_bytes());
2877
2878 // sehash should be re-written.
2879 let sehash = unsafe { CStr::from_bytes_with_nul_unchecked(b"security.sehash\0") };
2880 assert_eq!(
2881 p.rewrite_xattr_name(sehash).to_bytes(),
2882 b"user.virtiofs.security.sehash"
2883 );
2884 }
2885
2886 #[test]
strip_xattr_names()2887 fn strip_xattr_names() {
2888 let only_nuls = b"\0\0\0\0\0";
2889 let mut actual = only_nuls.to_vec();
2890 strip_xattr_prefix(&mut actual);
2891 assert_eq!(&actual[..], &only_nuls[..]);
2892
2893 let no_nuls = b"security.sehashuser.virtiofs";
2894 let mut actual = no_nuls.to_vec();
2895 strip_xattr_prefix(&mut actual);
2896 assert_eq!(&actual[..], &no_nuls[..]);
2897
2898 let empty = b"";
2899 let mut actual = empty.to_vec();
2900 strip_xattr_prefix(&mut actual);
2901 assert_eq!(&actual[..], &empty[..]);
2902
2903 let no_strippable_names = b"security.selinux\0user.foobar\0system.test\0";
2904 let mut actual = no_strippable_names.to_vec();
2905 strip_xattr_prefix(&mut actual);
2906 assert_eq!(&actual[..], &no_strippable_names[..]);
2907
2908 let only_strippable_names = b"user.virtiofs.security.sehash\0user.virtiofs.security.wat\0";
2909 let mut actual = only_strippable_names.to_vec();
2910 strip_xattr_prefix(&mut actual);
2911 assert_eq!(&actual[..], b"security.sehash\0security.wat\0");
2912
2913 let mixed_names = b"user.virtiofs.security.sehash\0security.selinux\0user.virtiofs.security.wat\0user.foobar\0";
2914 let mut actual = mixed_names.to_vec();
2915 strip_xattr_prefix(&mut actual);
2916 let expected = b"security.sehash\0security.selinux\0security.wat\0user.foobar\0";
2917 assert_eq!(&actual[..], &expected[..]);
2918
2919 let no_nul_with_prefix = b"user.virtiofs.security.sehash";
2920 let mut actual = no_nul_with_prefix.to_vec();
2921 strip_xattr_prefix(&mut actual);
2922 assert_eq!(&actual[..], b"security.sehash");
2923 }
2924 }
2925