1 // Copyright 2021 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::io;
6 use std::os::unix::io::AsRawFd;
7 use std::os::unix::net::UnixListener;
8 use std::path::{Path, PathBuf};
9 use std::sync::Arc;
10
11 use anyhow::{anyhow, bail, Context};
12 use argh::FromArgs;
13 use base::{error, get_max_open_files, warn, Event, RawDescriptor, Tube, UnlinkUnixListener};
14 use cros_async::{EventAsync, Executor};
15 use data_model::{DataInit, Le32};
16 use fuse::Server;
17 use futures::future::{AbortHandle, Abortable};
18 use hypervisor::ProtectionType;
19 use minijail::{self, Minijail};
20 use sync::Mutex;
21 use vm_memory::GuestMemory;
22 use vmm_vhost::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
23
24 use crate::virtio;
25 use crate::virtio::copy_config;
26 use crate::virtio::fs::passthrough::PassthroughFs;
27 use crate::virtio::fs::{process_fs_queue, virtio_fs_config, FS_MAX_TAG_LEN};
28 use crate::virtio::vhost::user::device::handler::{
29 DeviceRequestHandler, Doorbell, VhostUserBackend,
30 };
31
handle_fs_queue( mut queue: virtio::Queue, mem: GuestMemory, doorbell: Arc<Mutex<Doorbell>>, kick_evt: EventAsync, server: Arc<fuse::Server<PassthroughFs>>, tube: Arc<Mutex<Tube>>, )32 async fn handle_fs_queue(
33 mut queue: virtio::Queue,
34 mem: GuestMemory,
35 doorbell: Arc<Mutex<Doorbell>>,
36 kick_evt: EventAsync,
37 server: Arc<fuse::Server<PassthroughFs>>,
38 tube: Arc<Mutex<Tube>>,
39 ) {
40 // Slot is always going to be 0 because we do not support DAX
41 let slot: u32 = 0;
42
43 loop {
44 if let Err(e) = kick_evt.next_val().await {
45 error!("Failed to read kick event for fs queue: {}", e);
46 break;
47 }
48 if let Err(e) = process_fs_queue(&mem, &doorbell, &mut queue, &server, &tube, slot) {
49 error!("Process FS queue failed: {}", e);
50 break;
51 }
52 }
53 }
54
default_uidmap() -> String55 fn default_uidmap() -> String {
56 let euid = unsafe { libc::geteuid() };
57 format!("{} {} 1", euid, euid)
58 }
59
default_gidmap() -> String60 fn default_gidmap() -> String {
61 let egid = unsafe { libc::getegid() };
62 format!("{} {} 1", egid, egid)
63 }
64
jail_and_fork( mut keep_rds: Vec<RawDescriptor>, dir_path: PathBuf, uid_map: Option<String>, gid_map: Option<String>, ) -> anyhow::Result<i32>65 fn jail_and_fork(
66 mut keep_rds: Vec<RawDescriptor>,
67 dir_path: PathBuf,
68 uid_map: Option<String>,
69 gid_map: Option<String>,
70 ) -> anyhow::Result<i32> {
71 // Create new minijail sandbox
72 let mut j = Minijail::new()?;
73
74 j.namespace_pids();
75 j.namespace_user();
76 j.namespace_user_disable_setgroups();
77 j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
78 j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
79 j.run_as_init();
80
81 j.namespace_vfs();
82 j.namespace_net();
83 j.no_new_privs();
84
85 // Only pivot_root if we are not re-using the current root directory.
86 if dir_path != Path::new("/") {
87 // It's safe to call `namespace_vfs` multiple times.
88 j.namespace_vfs();
89 j.enter_pivot_root(&dir_path)?;
90 }
91 j.set_remount_mode(libc::MS_SLAVE);
92
93 let limit = get_max_open_files().context("failed to get max open files")?;
94 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
95
96 // Make sure there are no duplicates in keep_rds
97 keep_rds.dedup();
98
99 // fork on the jail here
100 let pid = unsafe { j.fork(Some(&keep_rds))? };
101
102 if pid > 0 {
103 unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) };
104 }
105
106 if pid < 0 {
107 bail!("Fork error! {}", std::io::Error::last_os_error());
108 }
109
110 Ok(pid)
111 }
112
113 struct FsBackend {
114 ex: Executor,
115 server: Arc<fuse::Server<PassthroughFs>>,
116 tag: [u8; FS_MAX_TAG_LEN],
117 avail_features: u64,
118 acked_features: u64,
119 acked_protocol_features: VhostUserProtocolFeatures,
120 workers: [Option<AbortHandle>; Self::MAX_QUEUE_NUM],
121 keep_rds: Vec<RawDescriptor>,
122 }
123
124 impl FsBackend {
new(ex: &Executor, tag: &str) -> anyhow::Result<Self>125 pub fn new(ex: &Executor, tag: &str) -> anyhow::Result<Self> {
126 if tag.len() > FS_MAX_TAG_LEN {
127 bail!(
128 "fs tag is too long: {} (max supported: {})",
129 tag.len(),
130 FS_MAX_TAG_LEN
131 );
132 }
133 let mut fs_tag = [0u8; FS_MAX_TAG_LEN];
134 fs_tag[..tag.len()].copy_from_slice(tag.as_bytes());
135
136 let avail_features = virtio::base_features(ProtectionType::Unprotected)
137 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
138
139 // Use default passthroughfs config
140 let fs = PassthroughFs::new(Default::default())?;
141
142 let mut keep_rds: Vec<RawDescriptor> = [0, 1, 2].to_vec();
143 keep_rds.append(&mut fs.keep_rds());
144
145 let server = Arc::new(Server::new(fs));
146
147 Ok(FsBackend {
148 ex: ex.clone(),
149 server,
150 tag: fs_tag,
151 avail_features,
152 acked_features: 0,
153 acked_protocol_features: VhostUserProtocolFeatures::empty(),
154 workers: Default::default(),
155 keep_rds,
156 })
157 }
158 }
159
160 impl VhostUserBackend for FsBackend {
161 const MAX_QUEUE_NUM: usize = 2; /* worker queue and high priority queue */
162 const MAX_VRING_LEN: u16 = 1024;
163
164 type Error = anyhow::Error;
165
features(&self) -> u64166 fn features(&self) -> u64 {
167 self.avail_features
168 }
169
ack_features(&mut self, value: u64) -> anyhow::Result<()>170 fn ack_features(&mut self, value: u64) -> anyhow::Result<()> {
171 let unrequested_features = value & !self.avail_features;
172 if unrequested_features != 0 {
173 bail!("invalid features are given: {:#x}", unrequested_features);
174 }
175
176 self.acked_features |= value;
177
178 Ok(())
179 }
180
acked_features(&self) -> u64181 fn acked_features(&self) -> u64 {
182 self.acked_features
183 }
184
protocol_features(&self) -> VhostUserProtocolFeatures185 fn protocol_features(&self) -> VhostUserProtocolFeatures {
186 VhostUserProtocolFeatures::CONFIG | VhostUserProtocolFeatures::MQ
187 }
188
ack_protocol_features(&mut self, features: u64) -> anyhow::Result<()>189 fn ack_protocol_features(&mut self, features: u64) -> anyhow::Result<()> {
190 let features = VhostUserProtocolFeatures::from_bits(features)
191 .ok_or_else(|| anyhow!("invalid protocol features are given: {:#x}", features))?;
192 let supported = self.protocol_features();
193 self.acked_protocol_features = features & supported;
194 Ok(())
195 }
196
acked_protocol_features(&self) -> u64197 fn acked_protocol_features(&self) -> u64 {
198 self.acked_protocol_features.bits()
199 }
200
read_config(&self, offset: u64, data: &mut [u8])201 fn read_config(&self, offset: u64, data: &mut [u8]) {
202 let config = virtio_fs_config {
203 tag: self.tag,
204 num_request_queues: Le32::from(1),
205 };
206 copy_config(data, 0, config.as_slice(), offset);
207 }
208
reset(&mut self)209 fn reset(&mut self) {
210 for handle in self.workers.iter_mut().filter_map(Option::take) {
211 handle.abort();
212 }
213 }
214
start_queue( &mut self, idx: usize, mut queue: virtio::Queue, mem: GuestMemory, doorbell: Arc<Mutex<Doorbell>>, kick_evt: Event, ) -> anyhow::Result<()>215 fn start_queue(
216 &mut self,
217 idx: usize,
218 mut queue: virtio::Queue,
219 mem: GuestMemory,
220 doorbell: Arc<Mutex<Doorbell>>,
221 kick_evt: Event,
222 ) -> anyhow::Result<()> {
223 if let Some(handle) = self.workers.get_mut(idx).and_then(Option::take) {
224 warn!("Starting new queue handler without stopping old handler");
225 handle.abort();
226 }
227
228 // Enable any virtqueue features that were negotiated (like VIRTIO_RING_F_EVENT_IDX).
229 queue.ack_features(self.acked_features);
230
231 let kick_evt = EventAsync::new(kick_evt.0, &self.ex)
232 .context("failed to create EventAsync for kick_evt")?;
233 let (handle, registration) = AbortHandle::new_pair();
234 let (_, fs_device_tube) = Tube::pair()?;
235
236 self.ex
237 .spawn_local(Abortable::new(
238 handle_fs_queue(
239 queue,
240 mem,
241 doorbell,
242 kick_evt,
243 self.server.clone(),
244 Arc::new(Mutex::new(fs_device_tube)),
245 ),
246 registration,
247 ))
248 .detach();
249
250 self.workers[idx] = Some(handle);
251 Ok(())
252 }
253
stop_queue(&mut self, idx: usize)254 fn stop_queue(&mut self, idx: usize) {
255 if let Some(handle) = self.workers.get_mut(idx).and_then(Option::take) {
256 handle.abort();
257 }
258 }
259 }
260
261 #[derive(FromArgs)]
262 #[argh(description = "")]
263 struct Options {
264 #[argh(option, description = "path to a socket", arg_name = "PATH")]
265 socket: String,
266 #[argh(option, description = "the virtio-fs tag", arg_name = "TAG")]
267 tag: String,
268 #[argh(option, description = "path to a directory to share", arg_name = "DIR")]
269 shared_dir: PathBuf,
270 #[argh(option, description = "uid map to use", arg_name = "UIDMAP")]
271 uid_map: Option<String>,
272 #[argh(option, description = "gid map to use", arg_name = "GIDMAP")]
273 gid_map: Option<String>,
274 }
275
276 /// Starts a vhost-user fs device.
277 /// Returns an error if the given `args` is invalid or the device fails to run.
run_fs_device(program_name: &str, args: &[&str]) -> anyhow::Result<()>278 pub fn run_fs_device(program_name: &str, args: &[&str]) -> anyhow::Result<()> {
279 let opts = match Options::from_args(&[program_name], args) {
280 Ok(opts) => opts,
281 Err(e) => {
282 if e.status.is_err() {
283 bail!(e.output);
284 } else {
285 println!("{}", e.output);
286 }
287 return Ok(());
288 }
289 };
290
291 base::syslog::init().context("Failed to initialize syslog")?;
292
293 let ex = Executor::new().context("Failed to create executor")?;
294 let fs_device = FsBackend::new(&ex, &opts.tag)?;
295
296 // Create and bind unix socket
297 let listener = UnixListener::bind(opts.socket).map(UnlinkUnixListener)?;
298 let mut keep_rds = fs_device.keep_rds.clone();
299 keep_rds.push(listener.as_raw_fd());
300 base::syslog::push_descriptors(&mut keep_rds);
301
302 let handler = DeviceRequestHandler::new(fs_device);
303
304 let pid = jail_and_fork(keep_rds, opts.shared_dir, opts.uid_map, opts.gid_map)?;
305
306 // Parent, nothing to do but wait and then exit
307 if pid != 0 {
308 unsafe { libc::waitpid(pid, std::ptr::null_mut(), 0) };
309 return Ok(());
310 }
311
312 // We need to set the no setuid fixup secure bit so that we don't drop capabilities when
313 // changing the thread uid/gid. Without this, creating new entries can fail in some corner
314 // cases.
315 const SECBIT_NO_SETUID_FIXUP: i32 = 1 << 2;
316 // TODO(crbug.com/1199487): Remove this once libc provides the wrapper for all targets.
317 #[cfg(target_os = "linux")]
318 {
319 // Safe because this doesn't modify any memory and we check the return value.
320 let mut securebits = unsafe { libc::prctl(libc::PR_GET_SECUREBITS) };
321 if securebits < 0 {
322 bail!(io::Error::last_os_error());
323 }
324 securebits |= SECBIT_NO_SETUID_FIXUP;
325 // Safe because this doesn't modify any memory and we check the return value.
326 let ret = unsafe { libc::prctl(libc::PR_SET_SECUREBITS, securebits) };
327 if ret < 0 {
328 bail!(io::Error::last_os_error());
329 }
330 }
331
332 // run_until() returns an Result<Result<..>> which the ? operator lets us flatten.
333 ex.run_until(handler.run_with_listener(listener, &ex))?
334 }
335