1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::{BTreeMap, BTreeSet};
6 use std::fs::{File, OpenOptions};
7 use std::io::prelude::*;
8 use std::os::unix::fs::FileExt;
9 use std::rc::Rc;
10 use std::sync::{mpsc, Arc, Barrier};
11
12 use std::thread;
13 use std::thread::JoinHandle;
14
15 use libc::{self, c_int};
16
17 use anyhow::{Context, Result};
18 use base::*;
19 use devices::{self, IrqChip, VcpuRunState};
20 use hypervisor::{Vcpu, VcpuExit, VcpuRunHandle};
21 use vm_control::*;
22 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
23 use vm_memory::GuestMemory;
24
25 use arch::{self, LinuxArch};
26
27 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
28 use {
29 aarch64::AArch64 as Arch,
30 devices::IrqChipAArch64 as IrqChipArch,
31 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
32 };
33 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
34 use {
35 devices::IrqChipX86_64 as IrqChipArch,
36 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
37 x86_64::X8664arch as Arch,
38 };
39
40 use super::ExitState;
41
setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()>42 pub fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
43 if use_hypervisor_signals {
44 unsafe {
45 extern "C" fn handle_signal(_: c_int) {}
46 // Our signal handler does nothing and is trivially async signal safe.
47 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
48 .context("error registering signal handler")?;
49 }
50 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
51 } else {
52 unsafe {
53 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
54 T::set_local_immediate_exit(true);
55 }
56 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
57 .context("error registering signal handler")?;
58 }
59 }
60 Ok(())
61 }
62
63 // Sets up a vcpu and converts it into a runnable vcpu.
runnable_vcpu<V>( cpu_id: usize, kvm_vcpu_id: usize, vcpu: Option<V>, vm: impl VmArch, irq_chip: &mut dyn IrqChipArch, vcpu_count: usize, run_rt: bool, vcpu_affinity: Vec<usize>, no_smt: bool, has_bios: bool, use_hypervisor_signals: bool, enable_per_vm_core_scheduling: bool, host_cpu_topology: bool, vcpu_cgroup_tasks_file: Option<File>, ) -> Result<(V, VcpuRunHandle)> where V: VcpuArch,64 pub fn runnable_vcpu<V>(
65 cpu_id: usize,
66 kvm_vcpu_id: usize,
67 vcpu: Option<V>,
68 vm: impl VmArch,
69 irq_chip: &mut dyn IrqChipArch,
70 vcpu_count: usize,
71 run_rt: bool,
72 vcpu_affinity: Vec<usize>,
73 no_smt: bool,
74 has_bios: bool,
75 use_hypervisor_signals: bool,
76 enable_per_vm_core_scheduling: bool,
77 host_cpu_topology: bool,
78 vcpu_cgroup_tasks_file: Option<File>,
79 ) -> Result<(V, VcpuRunHandle)>
80 where
81 V: VcpuArch,
82 {
83 let mut vcpu = match vcpu {
84 Some(v) => v,
85 None => {
86 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
87 // the vcpu thread.
88 match vm
89 .create_vcpu(kvm_vcpu_id)
90 .context("failed to create vcpu")?
91 .downcast::<V>()
92 {
93 Ok(v) => *v,
94 Err(_) => panic!("VM created wrong type of VCPU"),
95 }
96 }
97 };
98
99 irq_chip
100 .add_vcpu(cpu_id, &vcpu)
101 .context("failed to add vcpu to irq chip")?;
102
103 if !vcpu_affinity.is_empty() {
104 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
105 error!("Failed to set CPU affinity: {}", e);
106 }
107 }
108
109 Arch::configure_vcpu(
110 &vm,
111 vm.get_hypervisor(),
112 irq_chip,
113 &mut vcpu,
114 cpu_id,
115 vcpu_count,
116 has_bios,
117 no_smt,
118 host_cpu_topology,
119 )
120 .context("failed to configure vcpu")?;
121
122 if !enable_per_vm_core_scheduling {
123 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
124 if let Err(e) = enable_core_scheduling() {
125 error!("Failed to enable core scheduling: {}", e);
126 }
127 }
128
129 // Move vcpu thread to cgroup
130 if let Some(mut f) = vcpu_cgroup_tasks_file {
131 f.write_all(base::gettid().to_string().as_bytes())
132 .context("failed to write vcpu tid to cgroup tasks")?;
133 }
134
135 if run_rt {
136 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
137 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
138 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
139 {
140 warn!("Failed to set vcpu to real time: {}", e);
141 }
142 }
143
144 if use_hypervisor_signals {
145 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
146 v.retain(|&x| x != SIGRTMIN() + 0);
147 vcpu.set_signal_mask(&v)
148 .context("failed to set the signal mask for vcpu")?;
149 }
150
151 let vcpu_run_handle = vcpu
152 .take_run_handle(Some(SIGRTMIN() + 0))
153 .context("failed to set thread id for vcpu")?;
154
155 Ok((vcpu, vcpu_run_handle))
156 }
157
158 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
handle_debug_msg<V>( cpu_id: usize, vcpu: &V, guest_mem: &GuestMemory, d: VcpuDebug, reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>, ) -> Result<()> where V: VcpuArch + 'static,159 fn handle_debug_msg<V>(
160 cpu_id: usize,
161 vcpu: &V,
162 guest_mem: &GuestMemory,
163 d: VcpuDebug,
164 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
165 ) -> Result<()>
166 where
167 V: VcpuArch + 'static,
168 {
169 match d {
170 VcpuDebug::ReadRegs => {
171 let msg = VcpuDebugStatusMessage {
172 cpu: cpu_id as usize,
173 msg: VcpuDebugStatus::RegValues(
174 Arch::debug_read_registers(vcpu as &V)
175 .context("failed to handle a gdb ReadRegs command")?,
176 ),
177 };
178 reply_tube
179 .send(msg)
180 .context("failed to send a debug status to GDB thread")
181 }
182 VcpuDebug::WriteRegs(regs) => {
183 Arch::debug_write_registers(vcpu as &V, ®s)
184 .context("failed to handle a gdb WriteRegs command")?;
185 reply_tube
186 .send(VcpuDebugStatusMessage {
187 cpu: cpu_id as usize,
188 msg: VcpuDebugStatus::CommandComplete,
189 })
190 .context("failed to send a debug status to GDB thread")
191 }
192 VcpuDebug::ReadMem(vaddr, len) => {
193 let msg = VcpuDebugStatusMessage {
194 cpu: cpu_id as usize,
195 msg: VcpuDebugStatus::MemoryRegion(
196 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
197 .unwrap_or(Vec::new()),
198 ),
199 };
200 reply_tube
201 .send(msg)
202 .context("failed to send a debug status to GDB thread")
203 }
204 VcpuDebug::WriteMem(vaddr, buf) => {
205 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
206 .context("failed to handle a gdb WriteMem command")?;
207 reply_tube
208 .send(VcpuDebugStatusMessage {
209 cpu: cpu_id as usize,
210 msg: VcpuDebugStatus::CommandComplete,
211 })
212 .context("failed to send a debug status to GDB thread")
213 }
214 VcpuDebug::EnableSinglestep => {
215 Arch::debug_enable_singlestep(vcpu as &V)
216 .context("failed to handle a gdb EnableSingleStep command")?;
217 reply_tube
218 .send(VcpuDebugStatusMessage {
219 cpu: cpu_id as usize,
220 msg: VcpuDebugStatus::CommandComplete,
221 })
222 .context("failed to send a debug status to GDB thread")
223 }
224 VcpuDebug::SetHwBreakPoint(addrs) => {
225 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
226 .context("failed to handle a gdb SetHwBreakPoint command")?;
227 reply_tube
228 .send(VcpuDebugStatusMessage {
229 cpu: cpu_id as usize,
230 msg: VcpuDebugStatus::CommandComplete,
231 })
232 .context("failed to send a debug status to GDB thread")
233 }
234 }
235 }
236
237 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
handle_s2idle_request(_privileged_vm: bool)238 fn handle_s2idle_request(_privileged_vm: bool) {}
239
240 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
handle_s2idle_request(privileged_vm: bool)241 fn handle_s2idle_request(privileged_vm: bool) {
242 const POWER_STATE_FREEZE: &[u8] = b"freeze";
243
244 // For non privileged guests, we silently ignore the suspend request
245 if !privileged_vm {
246 return;
247 }
248
249 let mut power_state = match OpenOptions::new().write(true).open("/sys/power/state") {
250 Ok(s) => s,
251 Err(err) => {
252 error!("Failed on open /sys/power/state: {}", err);
253 return;
254 }
255 };
256
257 if let Err(err) = power_state.write(POWER_STATE_FREEZE) {
258 error!("Failed on writing to /sys/power/state: {}", err);
259 return;
260 }
261 }
262
vcpu_loop<V>( mut run_mode: VmRunMode, cpu_id: usize, vcpu: V, vcpu_run_handle: VcpuRunHandle, irq_chip: Box<dyn IrqChipArch + 'static>, run_rt: bool, delay_rt: bool, io_bus: devices::Bus, mmio_bus: devices::Bus, requires_pvclock_ctrl: bool, from_main_tube: mpsc::Receiver<VcpuControl>, use_hypervisor_signals: bool, privileged_vm: bool, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender<VcpuDebugStatusMessage>, >, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] guest_mem: GuestMemory, msr_handlers: MsrHandlers, ) -> ExitState where V: VcpuArch + 'static,263 fn vcpu_loop<V>(
264 mut run_mode: VmRunMode,
265 cpu_id: usize,
266 vcpu: V,
267 vcpu_run_handle: VcpuRunHandle,
268 irq_chip: Box<dyn IrqChipArch + 'static>,
269 run_rt: bool,
270 delay_rt: bool,
271 io_bus: devices::Bus,
272 mmio_bus: devices::Bus,
273 requires_pvclock_ctrl: bool,
274 from_main_tube: mpsc::Receiver<VcpuControl>,
275 use_hypervisor_signals: bool,
276 privileged_vm: bool,
277 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
278 mpsc::Sender<VcpuDebugStatusMessage>,
279 >,
280 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] guest_mem: GuestMemory,
281 msr_handlers: MsrHandlers,
282 ) -> ExitState
283 where
284 V: VcpuArch + 'static,
285 {
286 let mut interrupted_by_signal = false;
287
288 loop {
289 // Start by checking for messages to process and the run state of the CPU.
290 // An extra check here for Running so there isn't a need to call recv unless a
291 // message is likely to be ready because a signal was sent.
292 if interrupted_by_signal || run_mode != VmRunMode::Running {
293 'state_loop: loop {
294 // Tries to get a pending message without blocking first.
295 let msg = match from_main_tube.try_recv() {
296 Ok(m) => m,
297 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
298 // If the VM is running and no message is pending, the state won't
299 // change.
300 break 'state_loop;
301 }
302 Err(mpsc::TryRecvError::Empty) => {
303 // If the VM is not running, wait until a message is ready.
304 match from_main_tube.recv() {
305 Ok(m) => m,
306 Err(mpsc::RecvError) => {
307 error!("Failed to read from main tube in vcpu");
308 return ExitState::Crash;
309 }
310 }
311 }
312 Err(mpsc::TryRecvError::Disconnected) => {
313 error!("Failed to read from main tube in vcpu");
314 return ExitState::Crash;
315 }
316 };
317
318 // Collect all pending messages.
319 let mut messages = vec![msg];
320 messages.append(&mut from_main_tube.try_iter().collect());
321
322 for msg in messages {
323 match msg {
324 VcpuControl::RunState(new_mode) => {
325 run_mode = new_mode;
326 match run_mode {
327 VmRunMode::Running => break 'state_loop,
328 VmRunMode::Suspending => {
329 // On KVM implementations that use a paravirtualized
330 // clock (e.g. x86), a flag must be set to indicate to
331 // the guest kernel that a vCPU was suspended. The guest
332 // kernel will use this flag to prevent the soft lockup
333 // detection from triggering when this vCPU resumes,
334 // which could happen days later in realtime.
335 if requires_pvclock_ctrl {
336 if let Err(e) = vcpu.pvclock_ctrl() {
337 error!(
338 "failed to tell hypervisor vcpu {} is suspending: {}",
339 cpu_id, e
340 );
341 }
342 }
343 }
344 VmRunMode::Breakpoint => {}
345 VmRunMode::Exiting => return ExitState::Stop,
346 }
347 }
348 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
349 VcpuControl::Debug(d) => match &to_gdb_tube {
350 Some(ref ch) => {
351 if let Err(e) = handle_debug_msg(cpu_id, &vcpu, &guest_mem, d, ch) {
352 error!("Failed to handle gdb message: {}", e);
353 }
354 }
355 None => {
356 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
357 }
358 },
359 VcpuControl::MakeRT => {
360 if run_rt && delay_rt {
361 info!("Making vcpu {} RT\n", cpu_id);
362 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
363 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
364 .and_then(|_| {
365 set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL))
366 })
367 {
368 warn!("Failed to set vcpu to real time: {}", e);
369 }
370 }
371 }
372 }
373 }
374 }
375 }
376
377 interrupted_by_signal = false;
378
379 // Vcpus may have run a HLT instruction, which puts them into a state other than
380 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
381 // until either the irqchip receives an interrupt for this vcpu, or until the main
382 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
383 // implementations HLT instructions do not make it to crosvm, and thus this is a
384 // no-op that always returns VcpuRunState::Runnable.
385 match irq_chip.wait_until_runnable(&vcpu) {
386 Ok(VcpuRunState::Runnable) => {}
387 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
388 Err(e) => error!(
389 "error waiting for vcpu {} to become runnable: {}",
390 cpu_id, e
391 ),
392 }
393
394 if !interrupted_by_signal {
395 match vcpu.run(&vcpu_run_handle) {
396 Ok(VcpuExit::IoIn { port, mut size }) => {
397 let mut data = [0; 8];
398 if size > data.len() {
399 error!(
400 "unsupported IoIn size of {} bytes at port {:#x}",
401 size, port
402 );
403 size = data.len();
404 }
405 io_bus.read(port as u64, &mut data[..size]);
406 if let Err(e) = vcpu.set_data(&data[..size]) {
407 error!(
408 "failed to set return data for IoIn at port {:#x}: {}",
409 port, e
410 );
411 }
412 }
413 Ok(VcpuExit::IoOut {
414 port,
415 mut size,
416 data,
417 }) => {
418 if size > data.len() {
419 error!(
420 "unsupported IoOut size of {} bytes at port {:#x}",
421 size, port
422 );
423 size = data.len();
424 }
425 io_bus.write(port as u64, &data[..size]);
426 }
427 Ok(VcpuExit::MmioRead { address, size }) => {
428 let mut data = [0; 8];
429 mmio_bus.read(address, &mut data[..size]);
430 // Setting data for mmio can not fail.
431 let _ = vcpu.set_data(&data[..size]);
432 }
433 Ok(VcpuExit::MmioWrite {
434 address,
435 size,
436 data,
437 }) => {
438 mmio_bus.write(address, &data[..size]);
439 }
440 Ok(VcpuExit::RdMsr { index }) => {
441 if let Some(data) = msr_handlers.read(index) {
442 let _ = vcpu.set_data(&data.to_ne_bytes());
443 }
444 }
445 Ok(VcpuExit::WrMsr { .. }) => {
446 // TODO(b/215297064): implement MSR write
447 }
448 Ok(VcpuExit::IoapicEoi { vector }) => {
449 if let Err(e) = irq_chip.broadcast_eoi(vector) {
450 error!(
451 "failed to broadcast eoi {} on vcpu {}: {}",
452 vector, cpu_id, e
453 );
454 }
455 }
456 Ok(VcpuExit::IrqWindowOpen) => {}
457 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
458 Ok(VcpuExit::Shutdown) => return ExitState::Stop,
459 Ok(VcpuExit::FailEntry {
460 hardware_entry_failure_reason,
461 }) => {
462 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
463 return ExitState::Crash;
464 }
465 Ok(VcpuExit::SystemEventShutdown) => {
466 info!("system shutdown event on vcpu {}", cpu_id);
467 return ExitState::Stop;
468 }
469 Ok(VcpuExit::SystemEventReset) => {
470 info!("system reset event");
471 return ExitState::Reset;
472 }
473 Ok(VcpuExit::SystemEventCrash) => {
474 info!("system crash event on vcpu {}", cpu_id);
475 return ExitState::Stop;
476 }
477 Ok(VcpuExit::SystemEventS2Idle) => {
478 handle_s2idle_request(privileged_vm);
479 }
480 #[rustfmt::skip] Ok(VcpuExit::Debug { .. }) => {
481 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
482 {
483 let msg = VcpuDebugStatusMessage {
484 cpu: cpu_id as usize,
485 msg: VcpuDebugStatus::HitBreakPoint,
486 };
487 if let Some(ref ch) = to_gdb_tube {
488 if let Err(e) = ch.send(msg) {
489 error!("failed to notify breakpoint to GDB thread: {}", e);
490 return ExitState::Crash;
491 }
492 }
493 run_mode = VmRunMode::Breakpoint;
494 }
495 }
496 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
497 Err(e) => match e.errno() {
498 libc::EINTR => interrupted_by_signal = true,
499 libc::EAGAIN => {}
500 _ => {
501 error!("vcpu hit unknown error: {}", e);
502 return ExitState::Crash;
503 }
504 },
505 }
506 }
507
508 if interrupted_by_signal {
509 if use_hypervisor_signals {
510 // Try to clear the signal that we use to kick VCPU if it is pending before
511 // attempting to handle pause requests.
512 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
513 error!("failed to clear pending signal: {}", e);
514 return ExitState::Crash;
515 }
516 } else {
517 vcpu.set_immediate_exit(false);
518 }
519 }
520
521 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
522 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
523 }
524 }
525 }
526
527 trait MsrHandling {
read(&self, index: u32) -> Result<u64>528 fn read(&self, index: u32) -> Result<u64>;
write(&self, index: u32, data: u64) -> Result<()>529 fn write(&self, index: u32, data: u64) -> Result<()>;
530 }
531
532 struct ReadPassthrough {
533 dev_msr: std::fs::File,
534 }
535
536 impl MsrHandling for ReadPassthrough {
read(&self, index: u32) -> Result<u64>537 fn read(&self, index: u32) -> Result<u64> {
538 let mut data = [0; 8];
539 self.dev_msr.read_exact_at(&mut data, index.into())?;
540 Ok(u64::from_ne_bytes(data))
541 }
542
write(&self, _index: u32, _data: u64) -> Result<()>543 fn write(&self, _index: u32, _data: u64) -> Result<()> {
544 // TODO(b/215297064): implement MSR write
545 unimplemented!();
546 }
547 }
548
549 impl ReadPassthrough {
new() -> Result<Self>550 fn new() -> Result<Self> {
551 // TODO(b/215297064): Support reading from other CPUs than 0, should match running CPU.
552 let filename = "/dev/cpu/0/msr";
553 let dev_msr = OpenOptions::new()
554 .read(true)
555 .open(&filename)
556 .context("Cannot open /dev/cpu/0/msr, are you root?")?;
557 Ok(ReadPassthrough { dev_msr })
558 }
559 }
560
561 /// MSR handler configuration. Per-cpu.
562 struct MsrHandlers {
563 handler: BTreeMap<u32, Rc<Box<dyn MsrHandling>>>,
564 }
565
566 impl MsrHandlers {
new() -> Self567 fn new() -> Self {
568 MsrHandlers {
569 handler: BTreeMap::new(),
570 }
571 }
572
read(&self, index: u32) -> Option<u64>573 fn read(&self, index: u32) -> Option<u64> {
574 if let Some(handler) = self.handler.get(&index) {
575 match handler.read(index) {
576 Ok(data) => Some(data),
577 Err(e) => {
578 error!("MSR host read failed {:#x} {:?}", index, e);
579 None
580 }
581 }
582 } else {
583 None
584 }
585 }
586 }
587
run_vcpu<V>( cpu_id: usize, kvm_vcpu_id: usize, vcpu: Option<V>, vm: impl VmArch + 'static, mut irq_chip: Box<dyn IrqChipArch + 'static>, vcpu_count: usize, run_rt: bool, vcpu_affinity: Vec<usize>, delay_rt: bool, no_smt: bool, start_barrier: Arc<Barrier>, has_bios: bool, mut io_bus: devices::Bus, mut mmio_bus: devices::Bus, exit_evt: Event, reset_evt: Event, crash_evt: Event, requires_pvclock_ctrl: bool, from_main_tube: mpsc::Receiver<VcpuControl>, use_hypervisor_signals: bool, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender<VcpuDebugStatusMessage>, >, enable_per_vm_core_scheduling: bool, host_cpu_topology: bool, privileged_vm: bool, vcpu_cgroup_tasks_file: Option<File>, userspace_msr: BTreeSet<u32>, ) -> Result<JoinHandle<()>> where V: VcpuArch + 'static,588 pub fn run_vcpu<V>(
589 cpu_id: usize,
590 kvm_vcpu_id: usize,
591 vcpu: Option<V>,
592 vm: impl VmArch + 'static,
593 mut irq_chip: Box<dyn IrqChipArch + 'static>,
594 vcpu_count: usize,
595 run_rt: bool,
596 vcpu_affinity: Vec<usize>,
597 delay_rt: bool,
598 no_smt: bool,
599 start_barrier: Arc<Barrier>,
600 has_bios: bool,
601 mut io_bus: devices::Bus,
602 mut mmio_bus: devices::Bus,
603 exit_evt: Event,
604 reset_evt: Event,
605 crash_evt: Event,
606 requires_pvclock_ctrl: bool,
607 from_main_tube: mpsc::Receiver<VcpuControl>,
608 use_hypervisor_signals: bool,
609 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
610 mpsc::Sender<VcpuDebugStatusMessage>,
611 >,
612 enable_per_vm_core_scheduling: bool,
613 host_cpu_topology: bool,
614 privileged_vm: bool,
615 vcpu_cgroup_tasks_file: Option<File>,
616 userspace_msr: BTreeSet<u32>,
617 ) -> Result<JoinHandle<()>>
618 where
619 V: VcpuArch + 'static,
620 {
621 thread::Builder::new()
622 .name(format!("crosvm_vcpu{}", cpu_id))
623 .spawn(move || {
624 // The VCPU thread must trigger either `exit_evt` or `reset_event` in all paths. A
625 // `ScopedEvent`'s Drop implementation ensures that the `exit_evt` will be sent if
626 // anything happens before we get to writing the final event.
627 let scoped_exit_evt = ScopedEvent::from(exit_evt);
628
629 let mut msr_handlers = MsrHandlers::new();
630 if !userspace_msr.is_empty() {
631 let read_passthrough: Rc<Box<dyn MsrHandling>> = match ReadPassthrough::new() {
632 Ok(r) => Rc::new(Box::new(r)),
633 Err(e) => {
634 error!(
635 "failed to create MSR read passthrough handler for vcpu {}: {:#}",
636 cpu_id, e
637 );
638 return;
639 }
640 };
641
642 userspace_msr.iter().for_each(|&index| {
643 msr_handlers.handler.insert(index, read_passthrough.clone());
644 });
645 }
646
647 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
648 let guest_mem = vm.get_memory().clone();
649 let runnable_vcpu = runnable_vcpu(
650 cpu_id,
651 kvm_vcpu_id,
652 vcpu,
653 vm,
654 irq_chip.as_mut(),
655 vcpu_count,
656 run_rt && !delay_rt,
657 vcpu_affinity,
658 no_smt,
659 has_bios,
660 use_hypervisor_signals,
661 enable_per_vm_core_scheduling,
662 host_cpu_topology,
663 vcpu_cgroup_tasks_file,
664 );
665
666 start_barrier.wait();
667
668 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
669 Ok(v) => v,
670 Err(e) => {
671 error!("failed to start vcpu {}: {:#}", cpu_id, e);
672 return;
673 }
674 };
675
676 #[allow(unused_mut)]
677 let mut run_mode = VmRunMode::Running;
678 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
679 if to_gdb_tube.is_some() {
680 // Wait until a GDB client attaches
681 run_mode = VmRunMode::Breakpoint;
682 }
683
684 mmio_bus.set_access_id(cpu_id);
685 io_bus.set_access_id(cpu_id);
686
687 let exit_reason = vcpu_loop(
688 run_mode,
689 cpu_id,
690 vcpu,
691 vcpu_run_handle,
692 irq_chip,
693 run_rt,
694 delay_rt,
695 io_bus,
696 mmio_bus,
697 requires_pvclock_ctrl,
698 from_main_tube,
699 use_hypervisor_signals,
700 privileged_vm,
701 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
702 to_gdb_tube,
703 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
704 guest_mem,
705 msr_handlers,
706 );
707
708 let exit_evt = scoped_exit_evt.into();
709 let final_event = match exit_reason {
710 ExitState::Stop => Some(exit_evt),
711 ExitState::Reset => Some(reset_evt),
712 ExitState::Crash => Some(crash_evt),
713 // vcpu_loop doesn't exit with GuestPanic.
714 ExitState::GuestPanic => None,
715 };
716 if let Some(final_event) = final_event {
717 if let Err(e) = final_event.write(1) {
718 error!(
719 "failed to send final event {:?} on vcpu {}: {}",
720 final_event, cpu_id, e
721 )
722 }
723 }
724 })
725 .context("failed to spawn VCPU thread")
726 }
727
728 /// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
729 /// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
730 /// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
731 /// loop.
kick_all_vcpus( vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)], irq_chip: &dyn IrqChip, message: VcpuControl, )732 pub fn kick_all_vcpus(
733 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
734 irq_chip: &dyn IrqChip,
735 message: VcpuControl,
736 ) {
737 for (handle, tube) in vcpu_handles {
738 if let Err(e) = tube.send(message.clone()) {
739 error!("failed to send VcpuControl: {}", e);
740 }
741 let _ = handle.kill(SIGRTMIN() + 0);
742 }
743 irq_chip.kick_halted_vcpus();
744 }
745