• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::{BTreeMap, BTreeSet};
6 use std::fs::{File, OpenOptions};
7 use std::io::prelude::*;
8 use std::os::unix::fs::FileExt;
9 use std::rc::Rc;
10 use std::sync::{mpsc, Arc, Barrier};
11 
12 use std::thread;
13 use std::thread::JoinHandle;
14 
15 use libc::{self, c_int};
16 
17 use anyhow::{Context, Result};
18 use base::*;
19 use devices::{self, IrqChip, VcpuRunState};
20 use hypervisor::{Vcpu, VcpuExit, VcpuRunHandle};
21 use vm_control::*;
22 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
23 use vm_memory::GuestMemory;
24 
25 use arch::{self, LinuxArch};
26 
27 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
28 use {
29     aarch64::AArch64 as Arch,
30     devices::IrqChipAArch64 as IrqChipArch,
31     hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
32 };
33 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
34 use {
35     devices::IrqChipX86_64 as IrqChipArch,
36     hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
37     x86_64::X8664arch as Arch,
38 };
39 
40 use super::ExitState;
41 
setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()>42 pub fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
43     if use_hypervisor_signals {
44         unsafe {
45             extern "C" fn handle_signal(_: c_int) {}
46             // Our signal handler does nothing and is trivially async signal safe.
47             register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
48                 .context("error registering signal handler")?;
49         }
50         block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
51     } else {
52         unsafe {
53             extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
54                 T::set_local_immediate_exit(true);
55             }
56             register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
57                 .context("error registering signal handler")?;
58         }
59     }
60     Ok(())
61 }
62 
63 // Sets up a vcpu and converts it into a runnable vcpu.
runnable_vcpu<V>( cpu_id: usize, kvm_vcpu_id: usize, vcpu: Option<V>, vm: impl VmArch, irq_chip: &mut dyn IrqChipArch, vcpu_count: usize, run_rt: bool, vcpu_affinity: Vec<usize>, no_smt: bool, has_bios: bool, use_hypervisor_signals: bool, enable_per_vm_core_scheduling: bool, host_cpu_topology: bool, vcpu_cgroup_tasks_file: Option<File>, ) -> Result<(V, VcpuRunHandle)> where V: VcpuArch,64 pub fn runnable_vcpu<V>(
65     cpu_id: usize,
66     kvm_vcpu_id: usize,
67     vcpu: Option<V>,
68     vm: impl VmArch,
69     irq_chip: &mut dyn IrqChipArch,
70     vcpu_count: usize,
71     run_rt: bool,
72     vcpu_affinity: Vec<usize>,
73     no_smt: bool,
74     has_bios: bool,
75     use_hypervisor_signals: bool,
76     enable_per_vm_core_scheduling: bool,
77     host_cpu_topology: bool,
78     vcpu_cgroup_tasks_file: Option<File>,
79 ) -> Result<(V, VcpuRunHandle)>
80 where
81     V: VcpuArch,
82 {
83     let mut vcpu = match vcpu {
84         Some(v) => v,
85         None => {
86             // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
87             // the vcpu thread.
88             match vm
89                 .create_vcpu(kvm_vcpu_id)
90                 .context("failed to create vcpu")?
91                 .downcast::<V>()
92             {
93                 Ok(v) => *v,
94                 Err(_) => panic!("VM created wrong type of VCPU"),
95             }
96         }
97     };
98 
99     irq_chip
100         .add_vcpu(cpu_id, &vcpu)
101         .context("failed to add vcpu to irq chip")?;
102 
103     if !vcpu_affinity.is_empty() {
104         if let Err(e) = set_cpu_affinity(vcpu_affinity) {
105             error!("Failed to set CPU affinity: {}", e);
106         }
107     }
108 
109     Arch::configure_vcpu(
110         &vm,
111         vm.get_hypervisor(),
112         irq_chip,
113         &mut vcpu,
114         cpu_id,
115         vcpu_count,
116         has_bios,
117         no_smt,
118         host_cpu_topology,
119     )
120     .context("failed to configure vcpu")?;
121 
122     if !enable_per_vm_core_scheduling {
123         // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
124         if let Err(e) = enable_core_scheduling() {
125             error!("Failed to enable core scheduling: {}", e);
126         }
127     }
128 
129     // Move vcpu thread to cgroup
130     if let Some(mut f) = vcpu_cgroup_tasks_file {
131         f.write_all(base::gettid().to_string().as_bytes())
132             .context("failed to write vcpu tid to cgroup tasks")?;
133     }
134 
135     if run_rt {
136         const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
137         if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
138             .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
139         {
140             warn!("Failed to set vcpu to real time: {}", e);
141         }
142     }
143 
144     if use_hypervisor_signals {
145         let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
146         v.retain(|&x| x != SIGRTMIN() + 0);
147         vcpu.set_signal_mask(&v)
148             .context("failed to set the signal mask for vcpu")?;
149     }
150 
151     let vcpu_run_handle = vcpu
152         .take_run_handle(Some(SIGRTMIN() + 0))
153         .context("failed to set thread id for vcpu")?;
154 
155     Ok((vcpu, vcpu_run_handle))
156 }
157 
158 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
handle_debug_msg<V>( cpu_id: usize, vcpu: &V, guest_mem: &GuestMemory, d: VcpuDebug, reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>, ) -> Result<()> where V: VcpuArch + 'static,159 fn handle_debug_msg<V>(
160     cpu_id: usize,
161     vcpu: &V,
162     guest_mem: &GuestMemory,
163     d: VcpuDebug,
164     reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
165 ) -> Result<()>
166 where
167     V: VcpuArch + 'static,
168 {
169     match d {
170         VcpuDebug::ReadRegs => {
171             let msg = VcpuDebugStatusMessage {
172                 cpu: cpu_id as usize,
173                 msg: VcpuDebugStatus::RegValues(
174                     Arch::debug_read_registers(vcpu as &V)
175                         .context("failed to handle a gdb ReadRegs command")?,
176                 ),
177             };
178             reply_tube
179                 .send(msg)
180                 .context("failed to send a debug status to GDB thread")
181         }
182         VcpuDebug::WriteRegs(regs) => {
183             Arch::debug_write_registers(vcpu as &V, &regs)
184                 .context("failed to handle a gdb WriteRegs command")?;
185             reply_tube
186                 .send(VcpuDebugStatusMessage {
187                     cpu: cpu_id as usize,
188                     msg: VcpuDebugStatus::CommandComplete,
189                 })
190                 .context("failed to send a debug status to GDB thread")
191         }
192         VcpuDebug::ReadMem(vaddr, len) => {
193             let msg = VcpuDebugStatusMessage {
194                 cpu: cpu_id as usize,
195                 msg: VcpuDebugStatus::MemoryRegion(
196                     Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
197                         .unwrap_or(Vec::new()),
198                 ),
199             };
200             reply_tube
201                 .send(msg)
202                 .context("failed to send a debug status to GDB thread")
203         }
204         VcpuDebug::WriteMem(vaddr, buf) => {
205             Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
206                 .context("failed to handle a gdb WriteMem command")?;
207             reply_tube
208                 .send(VcpuDebugStatusMessage {
209                     cpu: cpu_id as usize,
210                     msg: VcpuDebugStatus::CommandComplete,
211                 })
212                 .context("failed to send a debug status to GDB thread")
213         }
214         VcpuDebug::EnableSinglestep => {
215             Arch::debug_enable_singlestep(vcpu as &V)
216                 .context("failed to handle a gdb EnableSingleStep command")?;
217             reply_tube
218                 .send(VcpuDebugStatusMessage {
219                     cpu: cpu_id as usize,
220                     msg: VcpuDebugStatus::CommandComplete,
221                 })
222                 .context("failed to send a debug status to GDB thread")
223         }
224         VcpuDebug::SetHwBreakPoint(addrs) => {
225             Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
226                 .context("failed to handle a gdb SetHwBreakPoint command")?;
227             reply_tube
228                 .send(VcpuDebugStatusMessage {
229                     cpu: cpu_id as usize,
230                     msg: VcpuDebugStatus::CommandComplete,
231                 })
232                 .context("failed to send a debug status to GDB thread")
233         }
234     }
235 }
236 
237 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
handle_s2idle_request(_privileged_vm: bool)238 fn handle_s2idle_request(_privileged_vm: bool) {}
239 
240 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
handle_s2idle_request(privileged_vm: bool)241 fn handle_s2idle_request(privileged_vm: bool) {
242     const POWER_STATE_FREEZE: &[u8] = b"freeze";
243 
244     // For non privileged guests, we silently ignore the suspend request
245     if !privileged_vm {
246         return;
247     }
248 
249     let mut power_state = match OpenOptions::new().write(true).open("/sys/power/state") {
250         Ok(s) => s,
251         Err(err) => {
252             error!("Failed on open /sys/power/state: {}", err);
253             return;
254         }
255     };
256 
257     if let Err(err) = power_state.write(POWER_STATE_FREEZE) {
258         error!("Failed on writing to /sys/power/state: {}", err);
259         return;
260     }
261 }
262 
vcpu_loop<V>( mut run_mode: VmRunMode, cpu_id: usize, vcpu: V, vcpu_run_handle: VcpuRunHandle, irq_chip: Box<dyn IrqChipArch + 'static>, run_rt: bool, delay_rt: bool, io_bus: devices::Bus, mmio_bus: devices::Bus, requires_pvclock_ctrl: bool, from_main_tube: mpsc::Receiver<VcpuControl>, use_hypervisor_signals: bool, privileged_vm: bool, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender<VcpuDebugStatusMessage>, >, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] guest_mem: GuestMemory, msr_handlers: MsrHandlers, ) -> ExitState where V: VcpuArch + 'static,263 fn vcpu_loop<V>(
264     mut run_mode: VmRunMode,
265     cpu_id: usize,
266     vcpu: V,
267     vcpu_run_handle: VcpuRunHandle,
268     irq_chip: Box<dyn IrqChipArch + 'static>,
269     run_rt: bool,
270     delay_rt: bool,
271     io_bus: devices::Bus,
272     mmio_bus: devices::Bus,
273     requires_pvclock_ctrl: bool,
274     from_main_tube: mpsc::Receiver<VcpuControl>,
275     use_hypervisor_signals: bool,
276     privileged_vm: bool,
277     #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
278         mpsc::Sender<VcpuDebugStatusMessage>,
279     >,
280     #[cfg(all(target_arch = "x86_64", feature = "gdb"))] guest_mem: GuestMemory,
281     msr_handlers: MsrHandlers,
282 ) -> ExitState
283 where
284     V: VcpuArch + 'static,
285 {
286     let mut interrupted_by_signal = false;
287 
288     loop {
289         // Start by checking for messages to process and the run state of the CPU.
290         // An extra check here for Running so there isn't a need to call recv unless a
291         // message is likely to be ready because a signal was sent.
292         if interrupted_by_signal || run_mode != VmRunMode::Running {
293             'state_loop: loop {
294                 // Tries to get a pending message without blocking first.
295                 let msg = match from_main_tube.try_recv() {
296                     Ok(m) => m,
297                     Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
298                         // If the VM is running and no message is pending, the state won't
299                         // change.
300                         break 'state_loop;
301                     }
302                     Err(mpsc::TryRecvError::Empty) => {
303                         // If the VM is not running, wait until a message is ready.
304                         match from_main_tube.recv() {
305                             Ok(m) => m,
306                             Err(mpsc::RecvError) => {
307                                 error!("Failed to read from main tube in vcpu");
308                                 return ExitState::Crash;
309                             }
310                         }
311                     }
312                     Err(mpsc::TryRecvError::Disconnected) => {
313                         error!("Failed to read from main tube in vcpu");
314                         return ExitState::Crash;
315                     }
316                 };
317 
318                 // Collect all pending messages.
319                 let mut messages = vec![msg];
320                 messages.append(&mut from_main_tube.try_iter().collect());
321 
322                 for msg in messages {
323                     match msg {
324                         VcpuControl::RunState(new_mode) => {
325                             run_mode = new_mode;
326                             match run_mode {
327                                 VmRunMode::Running => break 'state_loop,
328                                 VmRunMode::Suspending => {
329                                     // On KVM implementations that use a paravirtualized
330                                     // clock (e.g. x86), a flag must be set to indicate to
331                                     // the guest kernel that a vCPU was suspended. The guest
332                                     // kernel will use this flag to prevent the soft lockup
333                                     // detection from triggering when this vCPU resumes,
334                                     // which could happen days later in realtime.
335                                     if requires_pvclock_ctrl {
336                                         if let Err(e) = vcpu.pvclock_ctrl() {
337                                             error!(
338                                                 "failed to tell hypervisor vcpu {} is suspending: {}",
339                                                 cpu_id, e
340                                             );
341                                         }
342                                     }
343                                 }
344                                 VmRunMode::Breakpoint => {}
345                                 VmRunMode::Exiting => return ExitState::Stop,
346                             }
347                         }
348                         #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
349                         VcpuControl::Debug(d) => match &to_gdb_tube {
350                             Some(ref ch) => {
351                                 if let Err(e) = handle_debug_msg(cpu_id, &vcpu, &guest_mem, d, ch) {
352                                     error!("Failed to handle gdb message: {}", e);
353                                 }
354                             }
355                             None => {
356                                 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
357                             }
358                         },
359                         VcpuControl::MakeRT => {
360                             if run_rt && delay_rt {
361                                 info!("Making vcpu {} RT\n", cpu_id);
362                                 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
363                                 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
364                                     .and_then(|_| {
365                                         set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL))
366                                     })
367                                 {
368                                     warn!("Failed to set vcpu to real time: {}", e);
369                                 }
370                             }
371                         }
372                     }
373                 }
374             }
375         }
376 
377         interrupted_by_signal = false;
378 
379         // Vcpus may have run a HLT instruction, which puts them into a state other than
380         // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
381         // until either the irqchip receives an interrupt for this vcpu, or until the main
382         // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
383         // implementations HLT instructions do not make it to crosvm, and thus this is a
384         // no-op that always returns VcpuRunState::Runnable.
385         match irq_chip.wait_until_runnable(&vcpu) {
386             Ok(VcpuRunState::Runnable) => {}
387             Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
388             Err(e) => error!(
389                 "error waiting for vcpu {} to become runnable: {}",
390                 cpu_id, e
391             ),
392         }
393 
394         if !interrupted_by_signal {
395             match vcpu.run(&vcpu_run_handle) {
396                 Ok(VcpuExit::IoIn { port, mut size }) => {
397                     let mut data = [0; 8];
398                     if size > data.len() {
399                         error!(
400                             "unsupported IoIn size of {} bytes at port {:#x}",
401                             size, port
402                         );
403                         size = data.len();
404                     }
405                     io_bus.read(port as u64, &mut data[..size]);
406                     if let Err(e) = vcpu.set_data(&data[..size]) {
407                         error!(
408                             "failed to set return data for IoIn at port {:#x}: {}",
409                             port, e
410                         );
411                     }
412                 }
413                 Ok(VcpuExit::IoOut {
414                     port,
415                     mut size,
416                     data,
417                 }) => {
418                     if size > data.len() {
419                         error!(
420                             "unsupported IoOut size of {} bytes at port {:#x}",
421                             size, port
422                         );
423                         size = data.len();
424                     }
425                     io_bus.write(port as u64, &data[..size]);
426                 }
427                 Ok(VcpuExit::MmioRead { address, size }) => {
428                     let mut data = [0; 8];
429                     mmio_bus.read(address, &mut data[..size]);
430                     // Setting data for mmio can not fail.
431                     let _ = vcpu.set_data(&data[..size]);
432                 }
433                 Ok(VcpuExit::MmioWrite {
434                     address,
435                     size,
436                     data,
437                 }) => {
438                     mmio_bus.write(address, &data[..size]);
439                 }
440                 Ok(VcpuExit::RdMsr { index }) => {
441                     if let Some(data) = msr_handlers.read(index) {
442                         let _ = vcpu.set_data(&data.to_ne_bytes());
443                     }
444                 }
445                 Ok(VcpuExit::WrMsr { .. }) => {
446                     // TODO(b/215297064): implement MSR write
447                 }
448                 Ok(VcpuExit::IoapicEoi { vector }) => {
449                     if let Err(e) = irq_chip.broadcast_eoi(vector) {
450                         error!(
451                             "failed to broadcast eoi {} on vcpu {}: {}",
452                             vector, cpu_id, e
453                         );
454                     }
455                 }
456                 Ok(VcpuExit::IrqWindowOpen) => {}
457                 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
458                 Ok(VcpuExit::Shutdown) => return ExitState::Stop,
459                 Ok(VcpuExit::FailEntry {
460                     hardware_entry_failure_reason,
461                 }) => {
462                     error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
463                     return ExitState::Crash;
464                 }
465                 Ok(VcpuExit::SystemEventShutdown) => {
466                     info!("system shutdown event on vcpu {}", cpu_id);
467                     return ExitState::Stop;
468                 }
469                 Ok(VcpuExit::SystemEventReset) => {
470                     info!("system reset event");
471                     return ExitState::Reset;
472                 }
473                 Ok(VcpuExit::SystemEventCrash) => {
474                     info!("system crash event on vcpu {}", cpu_id);
475                     return ExitState::Stop;
476                 }
477                 Ok(VcpuExit::SystemEventS2Idle) => {
478                     handle_s2idle_request(privileged_vm);
479                 }
480                 #[rustfmt::skip] Ok(VcpuExit::Debug { .. }) => {
481                     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
482                     {
483                         let msg = VcpuDebugStatusMessage {
484                             cpu: cpu_id as usize,
485                             msg: VcpuDebugStatus::HitBreakPoint,
486                         };
487                         if let Some(ref ch) = to_gdb_tube {
488                             if let Err(e) = ch.send(msg) {
489                                 error!("failed to notify breakpoint to GDB thread: {}", e);
490                                 return ExitState::Crash;
491                             }
492                         }
493                         run_mode = VmRunMode::Breakpoint;
494                     }
495                 }
496                 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
497                 Err(e) => match e.errno() {
498                     libc::EINTR => interrupted_by_signal = true,
499                     libc::EAGAIN => {}
500                     _ => {
501                         error!("vcpu hit unknown error: {}", e);
502                         return ExitState::Crash;
503                     }
504                 },
505             }
506         }
507 
508         if interrupted_by_signal {
509             if use_hypervisor_signals {
510                 // Try to clear the signal that we use to kick VCPU if it is pending before
511                 // attempting to handle pause requests.
512                 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
513                     error!("failed to clear pending signal: {}", e);
514                     return ExitState::Crash;
515                 }
516             } else {
517                 vcpu.set_immediate_exit(false);
518             }
519         }
520 
521         if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
522             error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
523         }
524     }
525 }
526 
527 trait MsrHandling {
read(&self, index: u32) -> Result<u64>528     fn read(&self, index: u32) -> Result<u64>;
write(&self, index: u32, data: u64) -> Result<()>529     fn write(&self, index: u32, data: u64) -> Result<()>;
530 }
531 
532 struct ReadPassthrough {
533     dev_msr: std::fs::File,
534 }
535 
536 impl MsrHandling for ReadPassthrough {
read(&self, index: u32) -> Result<u64>537     fn read(&self, index: u32) -> Result<u64> {
538         let mut data = [0; 8];
539         self.dev_msr.read_exact_at(&mut data, index.into())?;
540         Ok(u64::from_ne_bytes(data))
541     }
542 
write(&self, _index: u32, _data: u64) -> Result<()>543     fn write(&self, _index: u32, _data: u64) -> Result<()> {
544         // TODO(b/215297064): implement MSR write
545         unimplemented!();
546     }
547 }
548 
549 impl ReadPassthrough {
new() -> Result<Self>550     fn new() -> Result<Self> {
551         // TODO(b/215297064): Support reading from other CPUs than 0, should match running CPU.
552         let filename = "/dev/cpu/0/msr";
553         let dev_msr = OpenOptions::new()
554             .read(true)
555             .open(&filename)
556             .context("Cannot open /dev/cpu/0/msr, are you root?")?;
557         Ok(ReadPassthrough { dev_msr })
558     }
559 }
560 
561 /// MSR handler configuration. Per-cpu.
562 struct MsrHandlers {
563     handler: BTreeMap<u32, Rc<Box<dyn MsrHandling>>>,
564 }
565 
566 impl MsrHandlers {
new() -> Self567     fn new() -> Self {
568         MsrHandlers {
569             handler: BTreeMap::new(),
570         }
571     }
572 
read(&self, index: u32) -> Option<u64>573     fn read(&self, index: u32) -> Option<u64> {
574         if let Some(handler) = self.handler.get(&index) {
575             match handler.read(index) {
576                 Ok(data) => Some(data),
577                 Err(e) => {
578                     error!("MSR host read failed {:#x} {:?}", index, e);
579                     None
580                 }
581             }
582         } else {
583             None
584         }
585     }
586 }
587 
run_vcpu<V>( cpu_id: usize, kvm_vcpu_id: usize, vcpu: Option<V>, vm: impl VmArch + 'static, mut irq_chip: Box<dyn IrqChipArch + 'static>, vcpu_count: usize, run_rt: bool, vcpu_affinity: Vec<usize>, delay_rt: bool, no_smt: bool, start_barrier: Arc<Barrier>, has_bios: bool, mut io_bus: devices::Bus, mut mmio_bus: devices::Bus, exit_evt: Event, reset_evt: Event, crash_evt: Event, requires_pvclock_ctrl: bool, from_main_tube: mpsc::Receiver<VcpuControl>, use_hypervisor_signals: bool, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option< mpsc::Sender<VcpuDebugStatusMessage>, >, enable_per_vm_core_scheduling: bool, host_cpu_topology: bool, privileged_vm: bool, vcpu_cgroup_tasks_file: Option<File>, userspace_msr: BTreeSet<u32>, ) -> Result<JoinHandle<()>> where V: VcpuArch + 'static,588 pub fn run_vcpu<V>(
589     cpu_id: usize,
590     kvm_vcpu_id: usize,
591     vcpu: Option<V>,
592     vm: impl VmArch + 'static,
593     mut irq_chip: Box<dyn IrqChipArch + 'static>,
594     vcpu_count: usize,
595     run_rt: bool,
596     vcpu_affinity: Vec<usize>,
597     delay_rt: bool,
598     no_smt: bool,
599     start_barrier: Arc<Barrier>,
600     has_bios: bool,
601     mut io_bus: devices::Bus,
602     mut mmio_bus: devices::Bus,
603     exit_evt: Event,
604     reset_evt: Event,
605     crash_evt: Event,
606     requires_pvclock_ctrl: bool,
607     from_main_tube: mpsc::Receiver<VcpuControl>,
608     use_hypervisor_signals: bool,
609     #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
610         mpsc::Sender<VcpuDebugStatusMessage>,
611     >,
612     enable_per_vm_core_scheduling: bool,
613     host_cpu_topology: bool,
614     privileged_vm: bool,
615     vcpu_cgroup_tasks_file: Option<File>,
616     userspace_msr: BTreeSet<u32>,
617 ) -> Result<JoinHandle<()>>
618 where
619     V: VcpuArch + 'static,
620 {
621     thread::Builder::new()
622         .name(format!("crosvm_vcpu{}", cpu_id))
623         .spawn(move || {
624             // The VCPU thread must trigger either `exit_evt` or `reset_event` in all paths. A
625             // `ScopedEvent`'s Drop implementation ensures that the `exit_evt` will be sent if
626             // anything happens before we get to writing the final event.
627             let scoped_exit_evt = ScopedEvent::from(exit_evt);
628 
629             let mut msr_handlers = MsrHandlers::new();
630             if !userspace_msr.is_empty() {
631                 let read_passthrough: Rc<Box<dyn MsrHandling>> = match ReadPassthrough::new() {
632                     Ok(r) => Rc::new(Box::new(r)),
633                     Err(e) => {
634                         error!(
635                             "failed to create MSR read passthrough handler for vcpu {}: {:#}",
636                             cpu_id, e
637                         );
638                         return;
639                     }
640                 };
641 
642                 userspace_msr.iter().for_each(|&index| {
643                     msr_handlers.handler.insert(index, read_passthrough.clone());
644                 });
645             }
646 
647             #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
648             let guest_mem = vm.get_memory().clone();
649             let runnable_vcpu = runnable_vcpu(
650                 cpu_id,
651                 kvm_vcpu_id,
652                 vcpu,
653                 vm,
654                 irq_chip.as_mut(),
655                 vcpu_count,
656                 run_rt && !delay_rt,
657                 vcpu_affinity,
658                 no_smt,
659                 has_bios,
660                 use_hypervisor_signals,
661                 enable_per_vm_core_scheduling,
662                 host_cpu_topology,
663                 vcpu_cgroup_tasks_file,
664             );
665 
666             start_barrier.wait();
667 
668             let (vcpu, vcpu_run_handle) = match runnable_vcpu {
669                 Ok(v) => v,
670                 Err(e) => {
671                     error!("failed to start vcpu {}: {:#}", cpu_id, e);
672                     return;
673                 }
674             };
675 
676             #[allow(unused_mut)]
677             let mut run_mode = VmRunMode::Running;
678             #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
679             if to_gdb_tube.is_some() {
680                 // Wait until a GDB client attaches
681                 run_mode = VmRunMode::Breakpoint;
682             }
683 
684             mmio_bus.set_access_id(cpu_id);
685             io_bus.set_access_id(cpu_id);
686 
687             let exit_reason = vcpu_loop(
688                 run_mode,
689                 cpu_id,
690                 vcpu,
691                 vcpu_run_handle,
692                 irq_chip,
693                 run_rt,
694                 delay_rt,
695                 io_bus,
696                 mmio_bus,
697                 requires_pvclock_ctrl,
698                 from_main_tube,
699                 use_hypervisor_signals,
700                 privileged_vm,
701                 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
702                 to_gdb_tube,
703                 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
704                 guest_mem,
705                 msr_handlers,
706             );
707 
708             let exit_evt = scoped_exit_evt.into();
709             let final_event = match exit_reason {
710                 ExitState::Stop => Some(exit_evt),
711                 ExitState::Reset => Some(reset_evt),
712                 ExitState::Crash => Some(crash_evt),
713                 // vcpu_loop doesn't exit with GuestPanic.
714                 ExitState::GuestPanic => None,
715             };
716             if let Some(final_event) = final_event {
717                 if let Err(e) = final_event.write(1) {
718                     error!(
719                         "failed to send final event {:?} on vcpu {}: {}",
720                         final_event, cpu_id, e
721                     )
722                 }
723             }
724         })
725         .context("failed to spawn VCPU thread")
726 }
727 
728 /// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
729 /// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
730 /// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
731 /// loop.
kick_all_vcpus( vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)], irq_chip: &dyn IrqChip, message: VcpuControl, )732 pub fn kick_all_vcpus(
733     vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
734     irq_chip: &dyn IrqChip,
735     message: VcpuControl,
736 ) {
737     for (handle, tube) in vcpu_handles {
738         if let Err(e) = tube.send(message.clone()) {
739             error!("failed to send VcpuControl: {}", e);
740         }
741         let _ = handle.kill(SIGRTMIN() + 0);
742     }
743     irq_chip.kick_halted_vcpus();
744 }
745