• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::io;
6 use std::os::unix::io::AsRawFd;
7 use std::os::unix::net::UnixListener;
8 use std::path::{Path, PathBuf};
9 use std::sync::Arc;
10 
11 use anyhow::{anyhow, bail, Context};
12 use argh::FromArgs;
13 use base::{error, get_max_open_files, warn, Event, RawDescriptor, Tube, UnlinkUnixListener};
14 use cros_async::{EventAsync, Executor};
15 use data_model::{DataInit, Le32};
16 use fuse::Server;
17 use futures::future::{AbortHandle, Abortable};
18 use hypervisor::ProtectionType;
19 use minijail::{self, Minijail};
20 use sync::Mutex;
21 use vm_memory::GuestMemory;
22 use vmm_vhost::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
23 
24 use crate::virtio;
25 use crate::virtio::copy_config;
26 use crate::virtio::fs::passthrough::PassthroughFs;
27 use crate::virtio::fs::{process_fs_queue, virtio_fs_config, FS_MAX_TAG_LEN};
28 use crate::virtio::vhost::user::device::handler::{
29     DeviceRequestHandler, Doorbell, VhostUserBackend,
30 };
31 
handle_fs_queue( mut queue: virtio::Queue, mem: GuestMemory, doorbell: Arc<Mutex<Doorbell>>, kick_evt: EventAsync, server: Arc<fuse::Server<PassthroughFs>>, tube: Arc<Mutex<Tube>>, )32 async fn handle_fs_queue(
33     mut queue: virtio::Queue,
34     mem: GuestMemory,
35     doorbell: Arc<Mutex<Doorbell>>,
36     kick_evt: EventAsync,
37     server: Arc<fuse::Server<PassthroughFs>>,
38     tube: Arc<Mutex<Tube>>,
39 ) {
40     // Slot is always going to be 0 because we do not support DAX
41     let slot: u32 = 0;
42 
43     loop {
44         if let Err(e) = kick_evt.next_val().await {
45             error!("Failed to read kick event for fs queue: {}", e);
46             break;
47         }
48         if let Err(e) = process_fs_queue(&mem, &doorbell, &mut queue, &server, &tube, slot) {
49             error!("Process FS queue failed: {}", e);
50             break;
51         }
52     }
53 }
54 
default_uidmap() -> String55 fn default_uidmap() -> String {
56     let euid = unsafe { libc::geteuid() };
57     format!("{} {} 1", euid, euid)
58 }
59 
default_gidmap() -> String60 fn default_gidmap() -> String {
61     let egid = unsafe { libc::getegid() };
62     format!("{} {} 1", egid, egid)
63 }
64 
jail_and_fork( mut keep_rds: Vec<RawDescriptor>, dir_path: PathBuf, uid_map: Option<String>, gid_map: Option<String>, ) -> anyhow::Result<i32>65 fn jail_and_fork(
66     mut keep_rds: Vec<RawDescriptor>,
67     dir_path: PathBuf,
68     uid_map: Option<String>,
69     gid_map: Option<String>,
70 ) -> anyhow::Result<i32> {
71     // Create new minijail sandbox
72     let mut j = Minijail::new()?;
73 
74     j.namespace_pids();
75     j.namespace_user();
76     j.namespace_user_disable_setgroups();
77     j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
78     j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
79     j.run_as_init();
80 
81     j.namespace_vfs();
82     j.namespace_net();
83     j.no_new_privs();
84 
85     // Only pivot_root if we are not re-using the current root directory.
86     if dir_path != Path::new("/") {
87         // It's safe to call `namespace_vfs` multiple times.
88         j.namespace_vfs();
89         j.enter_pivot_root(&dir_path)?;
90     }
91     j.set_remount_mode(libc::MS_SLAVE);
92 
93     let limit = get_max_open_files().context("failed to get max open files")?;
94     j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
95 
96     // Make sure there are no duplicates in keep_rds
97     keep_rds.dedup();
98 
99     // fork on the jail here
100     let pid = unsafe { j.fork(Some(&keep_rds))? };
101 
102     if pid > 0 {
103         unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) };
104     }
105 
106     if pid < 0 {
107         bail!("Fork error! {}", std::io::Error::last_os_error());
108     }
109 
110     Ok(pid)
111 }
112 
113 struct FsBackend {
114     ex: Executor,
115     server: Arc<fuse::Server<PassthroughFs>>,
116     tag: [u8; FS_MAX_TAG_LEN],
117     avail_features: u64,
118     acked_features: u64,
119     acked_protocol_features: VhostUserProtocolFeatures,
120     workers: [Option<AbortHandle>; Self::MAX_QUEUE_NUM],
121     keep_rds: Vec<RawDescriptor>,
122 }
123 
124 impl FsBackend {
new(ex: &Executor, tag: &str) -> anyhow::Result<Self>125     pub fn new(ex: &Executor, tag: &str) -> anyhow::Result<Self> {
126         if tag.len() > FS_MAX_TAG_LEN {
127             bail!(
128                 "fs tag is too long: {} (max supported: {})",
129                 tag.len(),
130                 FS_MAX_TAG_LEN
131             );
132         }
133         let mut fs_tag = [0u8; FS_MAX_TAG_LEN];
134         fs_tag[..tag.len()].copy_from_slice(tag.as_bytes());
135 
136         let avail_features = virtio::base_features(ProtectionType::Unprotected)
137             | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
138 
139         // Use default passthroughfs config
140         let fs = PassthroughFs::new(Default::default())?;
141 
142         let mut keep_rds: Vec<RawDescriptor> = [0, 1, 2].to_vec();
143         keep_rds.append(&mut fs.keep_rds());
144 
145         let server = Arc::new(Server::new(fs));
146 
147         Ok(FsBackend {
148             ex: ex.clone(),
149             server,
150             tag: fs_tag,
151             avail_features,
152             acked_features: 0,
153             acked_protocol_features: VhostUserProtocolFeatures::empty(),
154             workers: Default::default(),
155             keep_rds,
156         })
157     }
158 }
159 
160 impl VhostUserBackend for FsBackend {
161     const MAX_QUEUE_NUM: usize = 2; /* worker queue and high priority queue */
162     const MAX_VRING_LEN: u16 = 1024;
163 
164     type Error = anyhow::Error;
165 
features(&self) -> u64166     fn features(&self) -> u64 {
167         self.avail_features
168     }
169 
ack_features(&mut self, value: u64) -> anyhow::Result<()>170     fn ack_features(&mut self, value: u64) -> anyhow::Result<()> {
171         let unrequested_features = value & !self.avail_features;
172         if unrequested_features != 0 {
173             bail!("invalid features are given: {:#x}", unrequested_features);
174         }
175 
176         self.acked_features |= value;
177 
178         Ok(())
179     }
180 
acked_features(&self) -> u64181     fn acked_features(&self) -> u64 {
182         self.acked_features
183     }
184 
protocol_features(&self) -> VhostUserProtocolFeatures185     fn protocol_features(&self) -> VhostUserProtocolFeatures {
186         VhostUserProtocolFeatures::CONFIG | VhostUserProtocolFeatures::MQ
187     }
188 
ack_protocol_features(&mut self, features: u64) -> anyhow::Result<()>189     fn ack_protocol_features(&mut self, features: u64) -> anyhow::Result<()> {
190         let features = VhostUserProtocolFeatures::from_bits(features)
191             .ok_or_else(|| anyhow!("invalid protocol features are given: {:#x}", features))?;
192         let supported = self.protocol_features();
193         self.acked_protocol_features = features & supported;
194         Ok(())
195     }
196 
acked_protocol_features(&self) -> u64197     fn acked_protocol_features(&self) -> u64 {
198         self.acked_protocol_features.bits()
199     }
200 
read_config(&self, offset: u64, data: &mut [u8])201     fn read_config(&self, offset: u64, data: &mut [u8]) {
202         let config = virtio_fs_config {
203             tag: self.tag,
204             num_request_queues: Le32::from(1),
205         };
206         copy_config(data, 0, config.as_slice(), offset);
207     }
208 
reset(&mut self)209     fn reset(&mut self) {
210         for handle in self.workers.iter_mut().filter_map(Option::take) {
211             handle.abort();
212         }
213     }
214 
start_queue( &mut self, idx: usize, mut queue: virtio::Queue, mem: GuestMemory, doorbell: Arc<Mutex<Doorbell>>, kick_evt: Event, ) -> anyhow::Result<()>215     fn start_queue(
216         &mut self,
217         idx: usize,
218         mut queue: virtio::Queue,
219         mem: GuestMemory,
220         doorbell: Arc<Mutex<Doorbell>>,
221         kick_evt: Event,
222     ) -> anyhow::Result<()> {
223         if let Some(handle) = self.workers.get_mut(idx).and_then(Option::take) {
224             warn!("Starting new queue handler without stopping old handler");
225             handle.abort();
226         }
227 
228         // Enable any virtqueue features that were negotiated (like VIRTIO_RING_F_EVENT_IDX).
229         queue.ack_features(self.acked_features);
230 
231         let kick_evt = EventAsync::new(kick_evt.0, &self.ex)
232             .context("failed to create EventAsync for kick_evt")?;
233         let (handle, registration) = AbortHandle::new_pair();
234         let (_, fs_device_tube) = Tube::pair()?;
235 
236         self.ex
237             .spawn_local(Abortable::new(
238                 handle_fs_queue(
239                     queue,
240                     mem,
241                     doorbell,
242                     kick_evt,
243                     self.server.clone(),
244                     Arc::new(Mutex::new(fs_device_tube)),
245                 ),
246                 registration,
247             ))
248             .detach();
249 
250         self.workers[idx] = Some(handle);
251         Ok(())
252     }
253 
stop_queue(&mut self, idx: usize)254     fn stop_queue(&mut self, idx: usize) {
255         if let Some(handle) = self.workers.get_mut(idx).and_then(Option::take) {
256             handle.abort();
257         }
258     }
259 }
260 
261 #[derive(FromArgs)]
262 #[argh(description = "")]
263 struct Options {
264     #[argh(option, description = "path to a socket", arg_name = "PATH")]
265     socket: String,
266     #[argh(option, description = "the virtio-fs tag", arg_name = "TAG")]
267     tag: String,
268     #[argh(option, description = "path to a directory to share", arg_name = "DIR")]
269     shared_dir: PathBuf,
270     #[argh(option, description = "uid map to use", arg_name = "UIDMAP")]
271     uid_map: Option<String>,
272     #[argh(option, description = "gid map to use", arg_name = "GIDMAP")]
273     gid_map: Option<String>,
274 }
275 
276 /// Starts a vhost-user fs device.
277 /// Returns an error if the given `args` is invalid or the device fails to run.
run_fs_device(program_name: &str, args: &[&str]) -> anyhow::Result<()>278 pub fn run_fs_device(program_name: &str, args: &[&str]) -> anyhow::Result<()> {
279     let opts = match Options::from_args(&[program_name], args) {
280         Ok(opts) => opts,
281         Err(e) => {
282             if e.status.is_err() {
283                 bail!(e.output);
284             } else {
285                 println!("{}", e.output);
286             }
287             return Ok(());
288         }
289     };
290 
291     base::syslog::init().context("Failed to initialize syslog")?;
292 
293     let ex = Executor::new().context("Failed to create executor")?;
294     let fs_device = FsBackend::new(&ex, &opts.tag)?;
295 
296     // Create and bind unix socket
297     let listener = UnixListener::bind(opts.socket).map(UnlinkUnixListener)?;
298     let mut keep_rds = fs_device.keep_rds.clone();
299     keep_rds.push(listener.as_raw_fd());
300     base::syslog::push_descriptors(&mut keep_rds);
301 
302     let handler = DeviceRequestHandler::new(fs_device);
303 
304     let pid = jail_and_fork(keep_rds, opts.shared_dir, opts.uid_map, opts.gid_map)?;
305 
306     // Parent, nothing to do but wait and then exit
307     if pid != 0 {
308         unsafe { libc::waitpid(pid, std::ptr::null_mut(), 0) };
309         return Ok(());
310     }
311 
312     // We need to set the no setuid fixup secure bit so that we don't drop capabilities when
313     // changing the thread uid/gid. Without this, creating new entries can fail in some corner
314     // cases.
315     const SECBIT_NO_SETUID_FIXUP: i32 = 1 << 2;
316     // TODO(crbug.com/1199487): Remove this once libc provides the wrapper for all targets.
317     #[cfg(target_os = "linux")]
318     {
319         // Safe because this doesn't modify any memory and we check the return value.
320         let mut securebits = unsafe { libc::prctl(libc::PR_GET_SECUREBITS) };
321         if securebits < 0 {
322             bail!(io::Error::last_os_error());
323         }
324         securebits |= SECBIT_NO_SETUID_FIXUP;
325         // Safe because this doesn't modify any memory and we check the return value.
326         let ret = unsafe { libc::prctl(libc::PR_SET_SECUREBITS, securebits) };
327         if ret < 0 {
328             bail!(io::Error::last_os_error());
329         }
330     }
331 
332     // run_until() returns an Result<Result<..>> which the ? operator lets us flatten.
333     ex.run_until(handler.run_with_listener(listener, &ex))?
334 }
335