1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::BTreeMap;
6 use std::fs::File;
7 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
8 use std::fs::OpenOptions;
9 use std::io::prelude::*;
10 use std::sync::mpsc;
11 use std::sync::Arc;
12 use std::sync::Barrier;
13 use std::thread;
14 use std::thread::JoinHandle;
15 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
16 use std::time::Duration;
17
18 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
19 use aarch64::AArch64 as Arch;
20 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
21 use aarch64::MsrHandlers;
22 use anyhow::Context;
23 use anyhow::Result;
24 use arch::CpuSet;
25 use arch::LinuxArch;
26 use arch::MsrConfig;
27 use base::*;
28 use devices::Bus;
29 use devices::IrqChip;
30 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
31 use devices::IrqChipAArch64 as IrqChipArch;
32 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
33 use devices::IrqChipX86_64 as IrqChipArch;
34 use devices::VcpuRunState;
35 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
36 use hypervisor::CpuConfigAArch64 as CpuConfigArch;
37 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
38 use hypervisor::CpuConfigX86_64 as CpuConfigArch;
39 use hypervisor::IoOperation;
40 use hypervisor::IoParams;
41 use hypervisor::Vcpu;
42 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
43 use hypervisor::VcpuAArch64 as VcpuArch;
44 use hypervisor::VcpuExit;
45 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
46 use hypervisor::VcpuInitAArch64 as VcpuInitArch;
47 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48 use hypervisor::VcpuInitX86_64 as VcpuInitArch;
49 use hypervisor::VcpuRunHandle;
50 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
51 use hypervisor::VcpuX86_64 as VcpuArch;
52 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
53 use hypervisor::VmAArch64 as VmArch;
54 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
55 use hypervisor::VmX86_64 as VmArch;
56 use libc::c_int;
57 use sync::Condvar;
58 use sync::Mutex;
59 use vm_control::*;
60 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
61 use vm_memory::GuestMemory;
62 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
63 use x86_64::msr::MsrHandlers;
64 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
65 use x86_64::X8664arch as Arch;
66
67 use super::ExitState;
68 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
69 use crate::crosvm::ratelimit::Ratelimit;
70
setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()>71 pub fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
72 if use_hypervisor_signals {
73 unsafe {
74 extern "C" fn handle_signal(_: c_int) {}
75 // Our signal handler does nothing and is trivially async signal safe.
76 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
77 .context("error registering signal handler")?;
78 }
79 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
80 } else {
81 unsafe {
82 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
83 T::set_local_immediate_exit(true);
84 }
85 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
86 .context("error registering signal handler")?;
87 }
88 }
89 Ok(())
90 }
91
bus_io_handler(bus: &Bus) -> impl FnMut(IoParams) -> Option<[u8; 8]> + '_92 fn bus_io_handler(bus: &Bus) -> impl FnMut(IoParams) -> Option<[u8; 8]> + '_ {
93 |IoParams {
94 address,
95 mut size,
96 operation: direction,
97 }| match direction {
98 IoOperation::Read => {
99 let mut data = [0u8; 8];
100 if size > data.len() {
101 error!("unsupported Read size of {} bytes", size);
102 size = data.len();
103 }
104 // Ignore the return value of `read()`. If no device exists on the bus at the given
105 // location, return the initial value of data, which is all zeroes.
106 let _ = bus.read(address, &mut data[..size]);
107 Some(data)
108 }
109 IoOperation::Write { data } => {
110 if size > data.len() {
111 error!("unsupported Write size of {} bytes", size);
112 size = data.len()
113 }
114 let data = &data[..size];
115 bus.write(address, data);
116 None
117 }
118 }
119 }
120
121 /// Set the VCPU thread affinity and other per-thread scheduler properties.
122 /// This function will be called from each VCPU thread at startup.
set_vcpu_thread_scheduling( vcpu_affinity: CpuSet, core_scheduling: bool, enable_per_vm_core_scheduling: bool, vcpu_cgroup_tasks_file: Option<File>, run_rt: bool, ) -> anyhow::Result<()>123 pub fn set_vcpu_thread_scheduling(
124 vcpu_affinity: CpuSet,
125 core_scheduling: bool,
126 enable_per_vm_core_scheduling: bool,
127 vcpu_cgroup_tasks_file: Option<File>,
128 run_rt: bool,
129 ) -> anyhow::Result<()> {
130 if !vcpu_affinity.is_empty() {
131 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
132 error!("Failed to set CPU affinity: {}", e);
133 }
134 }
135
136 if core_scheduling && !enable_per_vm_core_scheduling {
137 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
138 if let Err(e) = enable_core_scheduling() {
139 error!("Failed to enable core scheduling: {}", e);
140 }
141 }
142
143 // Move vcpu thread to cgroup
144 if let Some(mut f) = vcpu_cgroup_tasks_file {
145 f.write_all(base::gettid().to_string().as_bytes())
146 .context("failed to write vcpu tid to cgroup tasks")?;
147 }
148
149 if run_rt {
150 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
151 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
152 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
153 {
154 warn!("Failed to set vcpu to real time: {}", e);
155 }
156 }
157
158 Ok(())
159 }
160
161 // Sets up a vcpu and converts it into a runnable vcpu.
runnable_vcpu<V>( cpu_id: usize, vcpu_id: usize, vcpu: Option<V>, vcpu_init: VcpuInitArch, vm: impl VmArch, irq_chip: &mut dyn IrqChipArch, vcpu_count: usize, has_bios: bool, use_hypervisor_signals: bool, cpu_config: Option<CpuConfigArch>, ) -> Result<(V, VcpuRunHandle)> where V: VcpuArch,162 pub fn runnable_vcpu<V>(
163 cpu_id: usize,
164 vcpu_id: usize,
165 vcpu: Option<V>,
166 vcpu_init: VcpuInitArch,
167 vm: impl VmArch,
168 irq_chip: &mut dyn IrqChipArch,
169 vcpu_count: usize,
170 has_bios: bool,
171 use_hypervisor_signals: bool,
172 cpu_config: Option<CpuConfigArch>,
173 ) -> Result<(V, VcpuRunHandle)>
174 where
175 V: VcpuArch,
176 {
177 let mut vcpu = match vcpu {
178 Some(v) => v,
179 None => {
180 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
181 // the vcpu thread.
182 match vm
183 .create_vcpu(vcpu_id)
184 .context("failed to create vcpu")?
185 .downcast::<V>()
186 {
187 Ok(v) => *v,
188 Err(_) => panic!("VM created wrong type of VCPU"),
189 }
190 }
191 };
192
193 irq_chip
194 .add_vcpu(cpu_id, &vcpu)
195 .context("failed to add vcpu to irq chip")?;
196
197 Arch::configure_vcpu(
198 &vm,
199 vm.get_hypervisor(),
200 irq_chip,
201 &mut vcpu,
202 vcpu_init,
203 cpu_id,
204 vcpu_count,
205 has_bios,
206 cpu_config,
207 )
208 .context("failed to configure vcpu")?;
209
210 if use_hypervisor_signals {
211 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
212 v.retain(|&x| x != SIGRTMIN() + 0);
213 vcpu.set_signal_mask(&v)
214 .context("failed to set the signal mask for vcpu")?;
215 }
216
217 let vcpu_run_handle = vcpu
218 .take_run_handle(Some(SIGRTMIN() + 0))
219 .context("failed to set thread id for vcpu")?;
220
221 Ok((vcpu, vcpu_run_handle))
222 }
223
224 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
handle_s2idle_request( _privileged_vm: bool, _guest_suspended_cvar: &Arc<(Mutex<bool>, Condvar)>, )225 fn handle_s2idle_request(
226 _privileged_vm: bool,
227 _guest_suspended_cvar: &Arc<(Mutex<bool>, Condvar)>,
228 ) {
229 }
230
231 // Allow error! and early return anywhere in function
232 #[allow(clippy::needless_return)]
233 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
handle_s2idle_request(privileged_vm: bool, guest_suspended_cvar: &Arc<(Mutex<bool>, Condvar)>)234 fn handle_s2idle_request(privileged_vm: bool, guest_suspended_cvar: &Arc<(Mutex<bool>, Condvar)>) {
235 const POWER_STATE_FREEZE: &[u8] = b"freeze";
236
237 // For non privileged guests, wake up blocked thread on condvar, which is awaiting
238 // non-privileged guest suspension to finish.
239 if !privileged_vm {
240 let (lock, cvar) = &**guest_suspended_cvar;
241 let mut guest_suspended = lock.lock();
242 *guest_suspended = true;
243
244 cvar.notify_one();
245 info!("dbg: s2idle notified");
246
247 return;
248 }
249
250 // For privileged guests, proceed with the suspend request
251 let mut power_state = match OpenOptions::new().write(true).open("/sys/power/state") {
252 Ok(s) => s,
253 Err(err) => {
254 error!("Failed on open /sys/power/state: {}", err);
255 return;
256 }
257 };
258
259 if let Err(err) = power_state.write(POWER_STATE_FREEZE) {
260 error!("Failed on writing to /sys/power/state: {}", err);
261 return;
262 }
263 }
264
vcpu_loop<V>( mut run_mode: VmRunMode, cpu_id: usize, mut vcpu: V, vcpu_run_handle: VcpuRunHandle, irq_chip: Box<dyn IrqChipArch + 'static>, run_rt: bool, delay_rt: bool, io_bus: Bus, mmio_bus: Bus, requires_pvclock_ctrl: bool, from_main_tube: mpsc::Receiver<VcpuControl>, use_hypervisor_signals: bool, privileged_vm: bool, #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender<VcpuDebugStatusMessage>, >, #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))] guest_mem: GuestMemory, msr_handlers: MsrHandlers, guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>, #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))] bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>>, ) -> ExitState where V: VcpuArch + 'static,265 fn vcpu_loop<V>(
266 mut run_mode: VmRunMode,
267 cpu_id: usize,
268 mut vcpu: V,
269 vcpu_run_handle: VcpuRunHandle,
270 irq_chip: Box<dyn IrqChipArch + 'static>,
271 run_rt: bool,
272 delay_rt: bool,
273 io_bus: Bus,
274 mmio_bus: Bus,
275 requires_pvclock_ctrl: bool,
276 from_main_tube: mpsc::Receiver<VcpuControl>,
277 use_hypervisor_signals: bool,
278 privileged_vm: bool,
279 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
280 to_gdb_tube: Option<
281 mpsc::Sender<VcpuDebugStatusMessage>,
282 >,
283 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
284 guest_mem: GuestMemory,
285 msr_handlers: MsrHandlers,
286 guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>,
287 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
288 bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>>,
289 ) -> ExitState
290 where
291 V: VcpuArch + 'static,
292 {
293 let mut interrupted_by_signal = false;
294
295 loop {
296 // Start by checking for messages to process and the run state of the CPU.
297 // An extra check here for Running so there isn't a need to call recv unless a
298 // message is likely to be ready because a signal was sent.
299 if interrupted_by_signal || run_mode != VmRunMode::Running {
300 'state_loop: loop {
301 // Tries to get a pending message without blocking first.
302 let msg = match from_main_tube.try_recv() {
303 Ok(m) => m,
304 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
305 // If the VM is running and no message is pending, the state won't
306 // change.
307 break 'state_loop;
308 }
309 Err(mpsc::TryRecvError::Empty) => {
310 // If the VM is not running, wait until a message is ready.
311 match from_main_tube.recv() {
312 Ok(m) => m,
313 Err(mpsc::RecvError) => {
314 error!("Failed to read from main tube in vcpu");
315 return ExitState::Crash;
316 }
317 }
318 }
319 Err(mpsc::TryRecvError::Disconnected) => {
320 error!("Failed to read from main tube in vcpu");
321 return ExitState::Crash;
322 }
323 };
324
325 // Collect all pending messages.
326 let mut messages = vec![msg];
327 messages.append(&mut from_main_tube.try_iter().collect());
328
329 for msg in messages {
330 match msg {
331 VcpuControl::RunState(new_mode) => {
332 run_mode = new_mode;
333 match run_mode {
334 VmRunMode::Running => break 'state_loop,
335 VmRunMode::Suspending => {
336 // On KVM implementations that use a paravirtualized
337 // clock (e.g. x86), a flag must be set to indicate to
338 // the guest kernel that a vCPU was suspended. The guest
339 // kernel will use this flag to prevent the soft lockup
340 // detection from triggering when this vCPU resumes,
341 // which could happen days later in realtime.
342 if requires_pvclock_ctrl {
343 if let Err(e) = vcpu.pvclock_ctrl() {
344 error!(
345 "failed to tell hypervisor vcpu {} is suspending: {}",
346 cpu_id, e
347 );
348 }
349 }
350 }
351 VmRunMode::Breakpoint => {}
352 VmRunMode::Exiting => return ExitState::Stop,
353 }
354 }
355 #[cfg(all(
356 any(target_arch = "x86_64", target_arch = "aarch64"),
357 feature = "gdb"
358 ))]
359 VcpuControl::Debug(d) => {
360 if let Err(e) = crate::crosvm::gdb::vcpu_control_debug(
361 cpu_id,
362 &vcpu,
363 &guest_mem,
364 d,
365 to_gdb_tube.as_ref(),
366 ) {
367 error!("Failed to handle VcpuControl::Debug message: {:#}", e);
368 }
369 }
370 VcpuControl::MakeRT => {
371 if run_rt && delay_rt {
372 info!("Making vcpu {} RT\n", cpu_id);
373 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
374 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
375 .and_then(|_| {
376 set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL))
377 })
378 {
379 warn!("Failed to set vcpu to real time: {}", e);
380 }
381 }
382 }
383 VcpuControl::GetStates(response_chan) => {
384 if let Err(e) = response_chan.send(run_mode) {
385 error!("Failed to send GetState: {}", e);
386 };
387 }
388 VcpuControl::Snapshot(response_chan) => {
389 let resp = vcpu
390 .snapshot()
391 .with_context(|| format!("Failed to snapshot Vcpu #{}", vcpu.id()));
392 if let Err(e) = response_chan.send(resp) {
393 error!("Failed to send snapshot response: {}", e);
394 }
395 }
396 VcpuControl::Restore(response_chan, vcpu_data) => {
397 let resp = vcpu
398 .restore(&vcpu_data)
399 .with_context(|| format!("Failed to restore Vcpu #{}", vcpu.id()));
400 if let Err(e) = response_chan.send(resp) {
401 error!("Failed to send restore response: {}", e);
402 }
403 }
404 }
405 }
406 }
407 }
408
409 interrupted_by_signal = false;
410
411 // Vcpus may have run a HLT instruction, which puts them into a state other than
412 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
413 // until either the irqchip receives an interrupt for this vcpu, or until the main
414 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
415 // implementations HLT instructions do not make it to crosvm, and thus this is a
416 // no-op that always returns VcpuRunState::Runnable.
417 match irq_chip.wait_until_runnable(&vcpu) {
418 Ok(VcpuRunState::Runnable) => {}
419 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
420 Err(e) => error!(
421 "error waiting for vcpu {} to become runnable: {}",
422 cpu_id, e
423 ),
424 }
425
426 if !interrupted_by_signal {
427 match vcpu.run(&vcpu_run_handle) {
428 Ok(VcpuExit::Io) => {
429 if let Err(e) = vcpu.handle_io(&mut bus_io_handler(&io_bus)) {
430 error!("failed to handle io: {}", e)
431 }
432 }
433 Ok(VcpuExit::Mmio) => {
434 if let Err(e) = vcpu.handle_mmio(&mut bus_io_handler(&mmio_bus)) {
435 error!("failed to handle mmio: {}", e);
436 }
437 }
438 Ok(VcpuExit::RdMsr { index }) => {
439 if let Some(data) = msr_handlers.read(index) {
440 let _ = vcpu.handle_rdmsr(data);
441 }
442 }
443 Ok(VcpuExit::WrMsr { index, data }) => {
444 if msr_handlers.write(index, data).is_some() {
445 vcpu.handle_wrmsr();
446 }
447 }
448 Ok(VcpuExit::IoapicEoi { vector }) => {
449 if let Err(e) = irq_chip.broadcast_eoi(vector) {
450 error!(
451 "failed to broadcast eoi {} on vcpu {}: {}",
452 vector, cpu_id, e
453 );
454 }
455 }
456 Ok(VcpuExit::IrqWindowOpen) => {}
457 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
458 Ok(VcpuExit::Shutdown) => return ExitState::Stop,
459 Ok(VcpuExit::FailEntry {
460 hardware_entry_failure_reason,
461 }) => {
462 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
463 return ExitState::Crash;
464 }
465 Ok(VcpuExit::SystemEventShutdown) => {
466 info!("system shutdown event on vcpu {}", cpu_id);
467 return ExitState::Stop;
468 }
469 Ok(VcpuExit::SystemEventReset) => {
470 info!("system reset event");
471 return ExitState::Reset;
472 }
473 Ok(VcpuExit::SystemEventCrash) => {
474 info!("system crash event on vcpu {}", cpu_id);
475 return ExitState::Stop;
476 }
477 Ok(VcpuExit::SystemEventS2Idle) => {
478 handle_s2idle_request(privileged_vm, &guest_suspended_cvar);
479 }
480 Ok(VcpuExit::Debug) => {
481 #[cfg(all(
482 any(target_arch = "x86_64", target_arch = "aarch64"),
483 feature = "gdb"
484 ))]
485 if let Err(e) =
486 crate::crosvm::gdb::vcpu_exit_debug(cpu_id, to_gdb_tube.as_ref())
487 {
488 error!("Failed to handle VcpuExit::Debug: {:#}", e);
489 return ExitState::Crash;
490 }
491
492 run_mode = VmRunMode::Breakpoint;
493 }
494 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
495 Ok(VcpuExit::BusLock) => {
496 let delay_ns: u64 = bus_lock_ratelimit_ctrl.lock().ratelimit_calculate_delay(1);
497 thread::sleep(Duration::from_nanos(delay_ns));
498 }
499 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
500 Err(e) => match e.errno() {
501 libc::EINTR => interrupted_by_signal = true,
502 libc::EAGAIN => {}
503 _ => {
504 error!("vcpu hit unknown error: {}", e);
505 return ExitState::Crash;
506 }
507 },
508 }
509 }
510
511 if interrupted_by_signal {
512 if use_hypervisor_signals {
513 // Try to clear the signal that we use to kick VCPU if it is pending before
514 // attempting to handle pause requests.
515 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
516 error!("failed to clear pending signal: {}", e);
517 return ExitState::Crash;
518 }
519 } else {
520 vcpu.set_immediate_exit(false);
521 }
522 }
523
524 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
525 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
526 }
527 }
528 }
529
run_vcpu<V>( cpu_id: usize, vcpu_id: usize, vcpu: Option<V>, vcpu_init: VcpuInitArch, vm: impl VmArch + 'static, mut irq_chip: Box<dyn IrqChipArch + 'static>, vcpu_count: usize, run_rt: bool, vcpu_affinity: CpuSet, delay_rt: bool, start_barrier: Arc<Barrier>, has_bios: bool, mut io_bus: Bus, mut mmio_bus: Bus, vm_evt_wrtube: SendTube, requires_pvclock_ctrl: bool, from_main_tube: mpsc::Receiver<VcpuControl>, use_hypervisor_signals: bool, #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender<VcpuDebugStatusMessage>, >, enable_core_scheduling: bool, enable_per_vm_core_scheduling: bool, cpu_config: Option<CpuConfigArch>, privileged_vm: bool, vcpu_cgroup_tasks_file: Option<File>, userspace_msr: BTreeMap<u32, MsrConfig>, guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>, #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))] bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>>, run_mode: VmRunMode, ) -> Result<JoinHandle<()>> where V: VcpuArch + 'static,530 pub fn run_vcpu<V>(
531 cpu_id: usize,
532 vcpu_id: usize,
533 vcpu: Option<V>,
534 vcpu_init: VcpuInitArch,
535 vm: impl VmArch + 'static,
536 mut irq_chip: Box<dyn IrqChipArch + 'static>,
537 vcpu_count: usize,
538 run_rt: bool,
539 vcpu_affinity: CpuSet,
540 delay_rt: bool,
541 start_barrier: Arc<Barrier>,
542 has_bios: bool,
543 mut io_bus: Bus,
544 mut mmio_bus: Bus,
545 vm_evt_wrtube: SendTube,
546 requires_pvclock_ctrl: bool,
547 from_main_tube: mpsc::Receiver<VcpuControl>,
548 use_hypervisor_signals: bool,
549 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))] to_gdb_tube: Option<
550 mpsc::Sender<VcpuDebugStatusMessage>,
551 >,
552 enable_core_scheduling: bool,
553 enable_per_vm_core_scheduling: bool,
554 cpu_config: Option<CpuConfigArch>,
555 privileged_vm: bool,
556 vcpu_cgroup_tasks_file: Option<File>,
557 userspace_msr: BTreeMap<u32, MsrConfig>,
558 guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>,
559 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
560 bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>>,
561 run_mode: VmRunMode,
562 ) -> Result<JoinHandle<()>>
563 where
564 V: VcpuArch + 'static,
565 {
566 thread::Builder::new()
567 .name(format!("crosvm_vcpu{}", cpu_id))
568 .spawn(move || {
569 // Having a closure returning ExitState guarentees that we
570 // send a VmEventType on all code paths after the closure
571 // returns.
572 let vcpu_fn = || -> ExitState {
573 if let Err(e) = set_vcpu_thread_scheduling(
574 vcpu_affinity,
575 enable_core_scheduling,
576 enable_per_vm_core_scheduling,
577 vcpu_cgroup_tasks_file,
578 run_rt && !delay_rt,
579 ) {
580 error!("vcpu thread setup failed: {:#}", e);
581 return ExitState::Stop;
582 }
583
584 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
585 let guest_mem = vm.get_memory().clone();
586
587 let runnable_vcpu = runnable_vcpu(
588 cpu_id,
589 vcpu_id,
590 vcpu,
591 vcpu_init,
592 vm,
593 irq_chip.as_mut(),
594 vcpu_count,
595 has_bios,
596 use_hypervisor_signals,
597 cpu_config,
598 );
599
600 // Add MSR handlers after CPU affinity setting.
601 // This avoids redundant MSR file fd creation.
602 let mut msr_handlers = MsrHandlers::new();
603 if !userspace_msr.is_empty() {
604 userspace_msr.iter().for_each(|(index, msr_config)| {
605 if let Err(e) = msr_handlers.add_handler(*index, msr_config.clone(), cpu_id)
606 {
607 error!("failed to add msr handler {}: {:#}", cpu_id, e);
608 };
609 });
610 }
611
612 start_barrier.wait();
613
614 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
615 Ok(v) => v,
616 Err(e) => {
617 error!("failed to start vcpu {}: {:#}", cpu_id, e);
618 return ExitState::Stop;
619 }
620 };
621
622 mmio_bus.set_access_id(cpu_id);
623 io_bus.set_access_id(cpu_id);
624
625 vcpu_loop(
626 run_mode,
627 cpu_id,
628 vcpu,
629 vcpu_run_handle,
630 irq_chip,
631 run_rt,
632 delay_rt,
633 io_bus,
634 mmio_bus,
635 requires_pvclock_ctrl,
636 from_main_tube,
637 use_hypervisor_signals,
638 privileged_vm,
639 #[cfg(all(
640 any(target_arch = "x86_64", target_arch = "aarch64"),
641 feature = "gdb"
642 ))]
643 to_gdb_tube,
644 #[cfg(all(
645 any(target_arch = "x86_64", target_arch = "aarch64"),
646 feature = "gdb"
647 ))]
648 guest_mem,
649 msr_handlers,
650 guest_suspended_cvar,
651 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
652 bus_lock_ratelimit_ctrl,
653 )
654 };
655
656 let final_event_data = match vcpu_fn() {
657 ExitState::Stop => VmEventType::Exit,
658 ExitState::Reset => VmEventType::Reset,
659 ExitState::Crash => VmEventType::Crash,
660 // vcpu_loop doesn't exit with GuestPanic.
661 ExitState::GuestPanic => unreachable!(),
662 ExitState::WatchdogReset => VmEventType::WatchdogReset,
663 };
664 if let Err(e) = vm_evt_wrtube.send::<VmEventType>(&final_event_data) {
665 error!(
666 "failed to send final event {:?} on vcpu {}: {}",
667 final_event_data, cpu_id, e
668 )
669 }
670 })
671 .context("failed to spawn VCPU thread")
672 }
673
674 /// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
675 /// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
676 /// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
677 /// loop.
kick_all_vcpus( vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)], irq_chip: &dyn IrqChip, message: VcpuControl, )678 pub fn kick_all_vcpus(
679 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
680 irq_chip: &dyn IrqChip,
681 message: VcpuControl,
682 ) {
683 for (handle, tube) in vcpu_handles {
684 if let Err(e) = tube.send(message.clone()) {
685 error!("failed to send VcpuControl: {}", e);
686 }
687 let _ = handle.kill(SIGRTMIN() + 0);
688 }
689 irq_chip.kick_halted_vcpus();
690 }
691
692 /// Signals specific running VCPUs to vmexit, sends VcpuControl message to the VCPU tube, and tells
693 /// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
694 /// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
695 /// loop.
kick_vcpu( vcpu_handle: &Option<&(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)>, irq_chip: &dyn IrqChip, message: VcpuControl, )696 pub fn kick_vcpu(
697 vcpu_handle: &Option<&(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)>,
698 irq_chip: &dyn IrqChip,
699 message: VcpuControl,
700 ) {
701 if let Some((handle, tube)) = vcpu_handle {
702 if let Err(e) = tube.send(message) {
703 error!("failed to send VcpuControl: {}", e);
704 }
705 let _ = handle.kill(SIGRTMIN() + 0);
706 }
707 irq_chip.kick_halted_vcpus();
708 }
709