• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use core::ffi::c_void;
6 use std::arch::x86_64::CpuidResult;
7 use std::collections::BTreeMap;
8 use std::convert::TryInto;
9 use std::mem::size_of;
10 use std::mem::size_of_val;
11 use std::sync::Arc;
12 
13 use base::Error;
14 use base::Result;
15 use libc::EINVAL;
16 use libc::EIO;
17 use libc::ENOENT;
18 use libc::ENXIO;
19 use snapshot::AnySnapshot;
20 use vm_memory::GuestAddress;
21 use winapi::shared::winerror::E_UNEXPECTED;
22 use windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER;
23 
24 use super::types::*;
25 use super::*;
26 use crate::CpuId;
27 use crate::CpuIdEntry;
28 use crate::DebugRegs;
29 use crate::Fpu;
30 use crate::IoOperation;
31 use crate::IoParams;
32 use crate::Regs;
33 use crate::Sregs;
34 use crate::Vcpu;
35 use crate::VcpuExit;
36 use crate::VcpuX86_64;
37 use crate::Xsave;
38 
39 const WHPX_EXIT_DIRECTION_MMIO_READ: u8 = 0;
40 const WHPX_EXIT_DIRECTION_MMIO_WRITE: u8 = 1;
41 const WHPX_EXIT_DIRECTION_PIO_IN: u8 = 0;
42 const WHPX_EXIT_DIRECTION_PIO_OUT: u8 = 1;
43 
44 /// This is the whpx instruction emulator, useful for deconstructing
45 /// io & memory port instructions. Whpx does not do this automatically.
46 struct SafeInstructionEmulator {
47     handle: WHV_EMULATOR_HANDLE,
48 }
49 
50 impl SafeInstructionEmulator {
new() -> Result<SafeInstructionEmulator>51     fn new() -> Result<SafeInstructionEmulator> {
52         const EMULATOR_CALLBACKS: WHV_EMULATOR_CALLBACKS = WHV_EMULATOR_CALLBACKS {
53             Size: size_of::<WHV_EMULATOR_CALLBACKS>() as u32,
54             Reserved: 0,
55             WHvEmulatorIoPortCallback: Some(SafeInstructionEmulator::io_port_cb),
56             WHvEmulatorMemoryCallback: Some(SafeInstructionEmulator::memory_cb),
57             WHvEmulatorGetVirtualProcessorRegisters: Some(
58                 SafeInstructionEmulator::get_virtual_processor_registers_cb,
59             ),
60             WHvEmulatorSetVirtualProcessorRegisters: Some(
61                 SafeInstructionEmulator::set_virtual_processor_registers_cb,
62             ),
63             WHvEmulatorTranslateGvaPage: Some(SafeInstructionEmulator::translate_gva_page_cb),
64         };
65         let mut handle: WHV_EMULATOR_HANDLE = std::ptr::null_mut();
66         // safe because pass in valid callbacks and a emulator handle for the kernel to place the
67         // allocated handle into.
68         check_whpx!(unsafe { WHvEmulatorCreateEmulator(&EMULATOR_CALLBACKS, &mut handle) })?;
69 
70         Ok(SafeInstructionEmulator { handle })
71     }
72 }
73 
74 trait InstructionEmulatorCallbacks {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT75     extern "stdcall" fn io_port_cb(
76         context: *mut ::std::os::raw::c_void,
77         io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
78     ) -> HRESULT;
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT79     extern "stdcall" fn memory_cb(
80         context: *mut ::std::os::raw::c_void,
81         memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
82     ) -> HRESULT;
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT83     extern "stdcall" fn get_virtual_processor_registers_cb(
84         context: *mut ::std::os::raw::c_void,
85         register_names: *const WHV_REGISTER_NAME,
86         register_count: UINT32,
87         register_values: *mut WHV_REGISTER_VALUE,
88     ) -> HRESULT;
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT89     extern "stdcall" fn set_virtual_processor_registers_cb(
90         context: *mut ::std::os::raw::c_void,
91         register_names: *const WHV_REGISTER_NAME,
92         register_count: UINT32,
93         register_values: *const WHV_REGISTER_VALUE,
94     ) -> HRESULT;
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT95     extern "stdcall" fn translate_gva_page_cb(
96         context: *mut ::std::os::raw::c_void,
97         gva: WHV_GUEST_VIRTUAL_ADDRESS,
98         translate_flags: WHV_TRANSLATE_GVA_FLAGS,
99         translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
100         gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
101     ) -> HRESULT;
102 }
103 
104 /// Context passed into the instruction emulator when trying io or mmio emulation.
105 /// Since we need this for set/get registers and memory translation,
106 /// a single context is used that captures all necessary contextual information for the operation.
107 struct InstructionEmulatorContext<'a> {
108     vm_partition: Arc<SafePartition>,
109     index: u32,
110     handle_mmio: Option<&'a mut dyn FnMut(IoParams) -> Result<()>>,
111     handle_io: Option<&'a mut dyn FnMut(IoParams)>,
112 }
113 
114 impl InstructionEmulatorCallbacks for SafeInstructionEmulator {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT115     extern "stdcall" fn io_port_cb(
116         context: *mut ::std::os::raw::c_void,
117         io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
118     ) -> HRESULT {
119         // unsafe because windows could decide to call this at any time.
120         // However, we trust the kernel to call this while the vm/vcpu is valid.
121         let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
122         let Some(handle_io) = &mut ctx.handle_io else {
123             return E_UNEXPECTED;
124         };
125 
126         // safe because we trust the kernel to fill in the io_access
127         let io_access_info = unsafe { &mut *io_access };
128         let address = io_access_info.Port.into();
129         let size = io_access_info.AccessSize as usize;
130         // SAFETY: We trust the kernel to fill in the io_access
131         let data: &mut [u8] = unsafe {
132             assert!(size <= size_of_val(&io_access_info.Data));
133             std::slice::from_raw_parts_mut(&mut io_access_info.Data as *mut u32 as *mut u8, size)
134         };
135         match io_access_info.Direction {
136             WHPX_EXIT_DIRECTION_PIO_IN => {
137                 handle_io(IoParams {
138                     address,
139                     operation: IoOperation::Read(data),
140                 });
141                 S_OK
142             }
143             WHPX_EXIT_DIRECTION_PIO_OUT => {
144                 handle_io(IoParams {
145                     address,
146                     operation: IoOperation::Write(data),
147                 });
148                 S_OK
149             }
150             _ => E_UNEXPECTED,
151         }
152     }
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT153     extern "stdcall" fn memory_cb(
154         context: *mut ::std::os::raw::c_void,
155         memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
156     ) -> HRESULT {
157         // unsafe because windows could decide to call this at any time.
158         // However, we trust the kernel to call this while the vm/vcpu is valid.
159         let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
160         let Some(handle_mmio) = &mut ctx.handle_mmio else {
161             return E_UNEXPECTED;
162         };
163 
164         // safe because we trust the kernel to fill in the memory_access
165         let memory_access_info = unsafe { &mut *memory_access };
166         let address = memory_access_info.GpaAddress;
167         let size = memory_access_info.AccessSize as usize;
168         let data = &mut memory_access_info.Data[..size];
169 
170         match memory_access_info.Direction {
171             WHPX_EXIT_DIRECTION_MMIO_READ => {
172                 if let Err(e) = handle_mmio(IoParams {
173                     address,
174                     operation: IoOperation::Read(data),
175                 }) {
176                     error!("handle_mmio failed with {e}");
177                     E_UNEXPECTED
178                 } else {
179                     S_OK
180                 }
181             }
182             WHPX_EXIT_DIRECTION_MMIO_WRITE => {
183                 if let Err(e) = handle_mmio(IoParams {
184                     address,
185                     operation: IoOperation::Write(data),
186                 }) {
187                     error!("handle_mmio write with {e}");
188                     E_UNEXPECTED
189                 } else {
190                     S_OK
191                 }
192             }
193             _ => E_UNEXPECTED,
194         }
195     }
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT196     extern "stdcall" fn get_virtual_processor_registers_cb(
197         context: *mut ::std::os::raw::c_void,
198         register_names: *const WHV_REGISTER_NAME,
199         register_count: UINT32,
200         register_values: *mut WHV_REGISTER_VALUE,
201     ) -> HRESULT {
202         // unsafe because windows could decide to call this at any time.
203         // However, we trust the kernel to call this while the vm/vcpu is valid.
204         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
205         // safe because the ctx has a weak reference to the vm partition, which should be
206         // alive longer than the ctx
207         unsafe {
208             WHvGetVirtualProcessorRegisters(
209                 ctx.vm_partition.partition,
210                 ctx.index,
211                 register_names,
212                 register_count,
213                 register_values,
214             )
215         }
216     }
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT217     extern "stdcall" fn set_virtual_processor_registers_cb(
218         context: *mut ::std::os::raw::c_void,
219         register_names: *const WHV_REGISTER_NAME,
220         register_count: UINT32,
221         register_values: *const WHV_REGISTER_VALUE,
222     ) -> HRESULT {
223         // unsafe because windows could decide to call this at any time.
224         // However, we trust the kernel to call this while the vm/vcpu is valid.
225         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
226         // safe because the ctx has a weak reference to the vm partition, which should be
227         // alive longer than the ctx
228         unsafe {
229             WHvSetVirtualProcessorRegisters(
230                 ctx.vm_partition.partition,
231                 ctx.index,
232                 register_names,
233                 register_count,
234                 register_values,
235             )
236         }
237     }
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT238     extern "stdcall" fn translate_gva_page_cb(
239         context: *mut ::std::os::raw::c_void,
240         gva: WHV_GUEST_VIRTUAL_ADDRESS,
241         translate_flags: WHV_TRANSLATE_GVA_FLAGS,
242         translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
243         gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
244     ) -> HRESULT {
245         // unsafe because windows could decide to call this at any time.
246         // However, we trust the kernel to call this while the vm/vcpu is valid.
247         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
248         let mut translation_result: WHV_TRANSLATE_GVA_RESULT = Default::default();
249         // safe because the ctx has a weak reference to the vm partition, which should be
250         // alive longer than the ctx
251         let ret = unsafe {
252             WHvTranslateGva(
253                 ctx.vm_partition.partition,
254                 ctx.index,
255                 gva,
256                 translate_flags,
257                 &mut translation_result,
258                 gpa,
259             )
260         };
261         if ret == S_OK {
262             // safe assuming the kernel passed in a valid result_code ptr
263             unsafe {
264                 *translation_result_code = translation_result.ResultCode;
265             }
266         }
267         ret
268     }
269 }
270 
271 impl Drop for SafeInstructionEmulator {
drop(&mut self)272     fn drop(&mut self) {
273         // safe because we own the instruction emulator
274         check_whpx!(unsafe { WHvEmulatorDestroyEmulator(self.handle) }).unwrap();
275     }
276 }
277 
278 // we can send and share the instruction emulator over threads safely even though it is void*.
279 unsafe impl Send for SafeInstructionEmulator {}
280 unsafe impl Sync for SafeInstructionEmulator {}
281 
282 struct SafeVirtualProcessor {
283     vm_partition: Arc<SafePartition>,
284     index: u32,
285 }
286 
287 impl SafeVirtualProcessor {
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor>288     fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor> {
289         // safe since the vm partition should be valid.
290         check_whpx!(unsafe { WHvCreateVirtualProcessor(vm_partition.partition, index, 0) })?;
291         Ok(SafeVirtualProcessor {
292             vm_partition,
293             index,
294         })
295     }
296 }
297 
298 impl Drop for SafeVirtualProcessor {
drop(&mut self)299     fn drop(&mut self) {
300         // safe because we are the owner of this windows virtual processor.
301         check_whpx!(unsafe { WHvDeleteVirtualProcessor(self.vm_partition.partition, self.index,) })
302             .unwrap();
303     }
304 }
305 
306 pub struct WhpxVcpu {
307     index: u32,
308     safe_virtual_processor: Arc<SafeVirtualProcessor>,
309     vm_partition: Arc<SafePartition>,
310     last_exit_context: Arc<WHV_RUN_VP_EXIT_CONTEXT>,
311     // must be arc, since we cannot "dupe" an instruction emulator similar to a handle.
312     instruction_emulator: Arc<SafeInstructionEmulator>,
313     tsc_frequency: Option<u64>,
314     apic_frequency: Option<u32>,
315 }
316 
317 impl WhpxVcpu {
318     /// The SafePartition passed in is weak, so that there is no circular references.
319     /// However, the SafePartition should be valid as long as this VCPU is alive. The index
320     /// is the index for this vcpu.
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu>321     pub(super) fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu> {
322         let safe_virtual_processor = SafeVirtualProcessor::new(vm_partition.clone(), index)?;
323         let instruction_emulator = SafeInstructionEmulator::new()?;
324         Ok(WhpxVcpu {
325             index,
326             safe_virtual_processor: Arc::new(safe_virtual_processor),
327             vm_partition,
328             last_exit_context: Arc::new(Default::default()),
329             instruction_emulator: Arc::new(instruction_emulator),
330             tsc_frequency: None,
331             apic_frequency: None,
332         })
333     }
334 
set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32)335     pub fn set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32) {
336         self.tsc_frequency = tsc_frequency;
337         self.apic_frequency = Some(lapic_frequency);
338     }
339 
340     /// Handle reading the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_read(&mut self, id: u32) -> Result<()>341     fn handle_msr_read(&mut self, id: u32) -> Result<()> {
342         // Verify that we're only being called in a situation where the last exit reason was
343         // ExitReasonX64MsrAccess
344         if self.last_exit_context.ExitReason
345             != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
346         {
347             return Err(Error::new(EINVAL));
348         }
349 
350         let value = match id {
351             HV_X64_MSR_TSC_FREQUENCY => Some(self.tsc_frequency.unwrap_or(0)),
352             HV_X64_MSR_APIC_FREQUENCY => Some(self.apic_frequency.unwrap_or(0) as u64),
353             _ => None,
354         };
355 
356         if let Some(value) = value {
357             // Get the next rip from the exit context
358             let rip = self.last_exit_context.VpContext.Rip
359                 + self.last_exit_context.VpContext.InstructionLength() as u64;
360 
361             const REG_NAMES: [WHV_REGISTER_NAME; 3] = [
362                 WHV_REGISTER_NAME_WHvX64RegisterRip,
363                 WHV_REGISTER_NAME_WHvX64RegisterRax,
364                 WHV_REGISTER_NAME_WHvX64RegisterRdx,
365             ];
366 
367             let values = vec![
368                 WHV_REGISTER_VALUE { Reg64: rip },
369                 // RDMSR instruction puts lower 32 bits in EAX and upper 32 bits in EDX
370                 WHV_REGISTER_VALUE {
371                     Reg64: (value & 0xffffffff),
372                 },
373                 WHV_REGISTER_VALUE {
374                     Reg64: (value >> 32),
375                 },
376             ];
377 
378             // safe because we have enough space for all the registers
379             check_whpx!(unsafe {
380                 WHvSetVirtualProcessorRegisters(
381                     self.vm_partition.partition,
382                     self.index,
383                     &REG_NAMES as *const WHV_REGISTER_NAME,
384                     REG_NAMES.len() as u32,
385                     values.as_ptr() as *const WHV_REGISTER_VALUE,
386                 )
387             })
388         } else {
389             self.inject_gp_fault()
390         }
391     }
392 
393     /// Handle writing the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()>394     fn handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()> {
395         // Verify that we're only being called in a situation where the last exit reason was
396         // ExitReasonX64MsrAccess
397         if self.last_exit_context.ExitReason
398             != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
399         {
400             return Err(Error::new(EINVAL));
401         }
402 
403         // Do nothing, we assume TSC is always invariant
404         let success = matches!(id, HV_X64_MSR_TSC_INVARIANT_CONTROL);
405 
406         if !success {
407             return self.inject_gp_fault();
408         }
409 
410         // Get the next rip from the exit context
411         let rip = self.last_exit_context.VpContext.Rip
412             + self.last_exit_context.VpContext.InstructionLength() as u64;
413 
414         const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvX64RegisterRip];
415 
416         let values = vec![WHV_REGISTER_VALUE { Reg64: rip }];
417 
418         // safe because we have enough space for all the registers
419         check_whpx!(unsafe {
420             WHvSetVirtualProcessorRegisters(
421                 self.vm_partition.partition,
422                 self.index,
423                 &REG_NAMES as *const WHV_REGISTER_NAME,
424                 REG_NAMES.len() as u32,
425                 values.as_ptr() as *const WHV_REGISTER_VALUE,
426             )
427         })
428     }
429 
inject_gp_fault(&self) -> Result<()>430     fn inject_gp_fault(&self) -> Result<()> {
431         const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvRegisterPendingEvent];
432 
433         let mut event = WHV_REGISTER_VALUE {
434             ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
435                 __bindgen_anon_1: Default::default(),
436             },
437         };
438         // safe because we have enough space for all the registers
439         check_whpx!(unsafe {
440             WHvGetVirtualProcessorRegisters(
441                 self.vm_partition.partition,
442                 self.index,
443                 &REG_NAMES as *const WHV_REGISTER_NAME,
444                 REG_NAMES.len() as u32,
445                 &mut event as *mut WHV_REGISTER_VALUE,
446             )
447         })?;
448 
449         if unsafe { event.ExceptionEvent.__bindgen_anon_1.EventPending() } != 0 {
450             error!("Unable to inject gp fault because pending exception exists");
451             return Err(Error::new(EINVAL));
452         }
453 
454         let mut pending_exception = unsafe { event.ExceptionEvent.__bindgen_anon_1 };
455 
456         pending_exception.set_EventPending(1);
457         // GP faults set error code
458         pending_exception.set_DeliverErrorCode(1);
459         // GP fault error code is 0 unless the fault is segment related
460         pending_exception.ErrorCode = 0;
461         // This must be set to WHvX64PendingEventException
462         pending_exception
463             .set_EventType(WHV_X64_PENDING_EVENT_TYPE_WHvX64PendingEventException as u32);
464         // GP fault vector is 13
465         const GP_VECTOR: u32 = 13;
466         pending_exception.set_Vector(GP_VECTOR);
467 
468         let event = WHV_REGISTER_VALUE {
469             ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
470                 __bindgen_anon_1: pending_exception,
471             },
472         };
473 
474         // safe because we have enough space for all the registers
475         check_whpx!(unsafe {
476             WHvSetVirtualProcessorRegisters(
477                 self.vm_partition.partition,
478                 self.index,
479                 &REG_NAMES as *const WHV_REGISTER_NAME,
480                 REG_NAMES.len() as u32,
481                 &event as *const WHV_REGISTER_VALUE,
482             )
483         })
484     }
485 }
486 
487 impl Vcpu for WhpxVcpu {
488     /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self>489     fn try_clone(&self) -> Result<Self> {
490         Ok(WhpxVcpu {
491             index: self.index,
492             safe_virtual_processor: self.safe_virtual_processor.clone(),
493             vm_partition: self.vm_partition.clone(),
494             last_exit_context: self.last_exit_context.clone(),
495             instruction_emulator: self.instruction_emulator.clone(),
496             tsc_frequency: self.tsc_frequency,
497             apic_frequency: self.apic_frequency,
498         })
499     }
500 
as_vcpu(&self) -> &dyn Vcpu501     fn as_vcpu(&self) -> &dyn Vcpu {
502         self
503     }
504 
505     /// Returns the vcpu id.
id(&self) -> usize506     fn id(&self) -> usize {
507         self.index.try_into().unwrap()
508     }
509 
510     /// Exits the vcpu immediately if exit is true
set_immediate_exit(&self, exit: bool)511     fn set_immediate_exit(&self, exit: bool) {
512         if exit {
513             // safe because we own this whpx virtual processor index, and assume the vm partition is
514             // still valid
515             unsafe {
516                 WHvCancelRunVirtualProcessor(self.vm_partition.partition, self.index, 0);
517             }
518         }
519     }
520 
521     /// Signals to the hypervisor that this guest is being paused by userspace. On some hypervisors,
522     /// this is used to control the pvclock. On WHPX, we handle it separately with virtio-pvclock.
523     /// So the correct implementation here is to do nothing.
on_suspend(&self) -> Result<()>524     fn on_suspend(&self) -> Result<()> {
525         Ok(())
526     }
527 
528     /// Enables a hypervisor-specific extension on this Vcpu.  `cap` is a constant defined by the
529     /// hypervisor API (e.g., kvm.h).  `args` are the arguments for enabling the feature, if any.
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>530     unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
531         // Whpx does not support raw capability on the vcpu.
532         Err(Error::new(ENXIO))
533     }
534 
535     /// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`.
536     ///
537     /// Once called, it will determine whether a mmio read or mmio write was the reason for the mmio
538     /// exit, call `handle_fn` with the respective IoOperation to perform the mmio read or
539     /// write, and set the return data in the vcpu so that the vcpu can resume running.
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()>540     fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
541         let mut status: WHV_EMULATOR_STATUS = Default::default();
542         let mut ctx = InstructionEmulatorContext {
543             vm_partition: self.vm_partition.clone(),
544             index: self.index,
545             handle_mmio: Some(handle_fn),
546             handle_io: None,
547         };
548         // safe as long as all callbacks occur before this fn returns.
549         check_whpx!(unsafe {
550             WHvEmulatorTryMmioEmulation(
551                 self.instruction_emulator.handle,
552                 &mut ctx as *mut _ as *mut c_void,
553                 &self.last_exit_context.VpContext,
554                 &self.last_exit_context.__bindgen_anon_1.MemoryAccess,
555                 &mut status,
556             )
557         })?;
558         // safe because we trust the kernel to fill in the union field properly.
559         let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
560         if success {
561             Ok(())
562         } else {
563             self.inject_gp_fault()?;
564             // safe because we trust the kernel to fill in the union field properly.
565             Err(Error::new(unsafe { status.AsUINT32 }))
566         }
567     }
568 
569     /// This function should be called after `Vcpu::run` returns `VcpuExit::Io`.
570     ///
571     /// Once called, it will determine whether an io in or io out was the reason for the io exit,
572     /// call `handle_fn` with the respective IoOperation to perform the io in or io out,
573     /// and set the return data in the vcpu so that the vcpu can resume running.
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()>574     fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
575         let mut status: WHV_EMULATOR_STATUS = Default::default();
576         let mut ctx = InstructionEmulatorContext {
577             vm_partition: self.vm_partition.clone(),
578             index: self.index,
579             handle_mmio: None,
580             handle_io: Some(handle_fn),
581         };
582         // safe as long as all callbacks occur before this fn returns.
583         check_whpx!(unsafe {
584             WHvEmulatorTryIoEmulation(
585                 self.instruction_emulator.handle,
586                 &mut ctx as *mut _ as *mut c_void,
587                 &self.last_exit_context.VpContext,
588                 &self.last_exit_context.__bindgen_anon_1.IoPortAccess,
589                 &mut status,
590             )
591         })?; // safe because we trust the kernel to fill in the union field properly.
592         let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
593         if success {
594             Ok(())
595         } else {
596             // safe because we trust the kernel to fill in the union field properly.
597             Err(Error::new(unsafe { status.AsUINT32 }))
598         }
599     }
600 
601     #[allow(non_upper_case_globals)]
run(&mut self) -> Result<VcpuExit>602     fn run(&mut self) -> Result<VcpuExit> {
603         // safe because we own this whpx virtual processor index, and assume the vm partition is
604         // still valid
605         let exit_context_ptr = Arc::as_ptr(&self.last_exit_context);
606         check_whpx!(unsafe {
607             WHvRunVirtualProcessor(
608                 self.vm_partition.partition,
609                 self.index,
610                 exit_context_ptr as *mut WHV_RUN_VP_EXIT_CONTEXT as *mut c_void,
611                 size_of::<WHV_RUN_VP_EXIT_CONTEXT>() as u32,
612             )
613         })?;
614 
615         match self.last_exit_context.ExitReason {
616             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonMemoryAccess => Ok(VcpuExit::Mmio),
617             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64IoPortAccess => Ok(VcpuExit::Io),
618             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnrecoverableException => {
619                 Ok(VcpuExit::UnrecoverableException)
620             }
621             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonInvalidVpRegisterValue => {
622                 Ok(VcpuExit::InvalidVpRegister)
623             }
624             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnsupportedFeature => {
625                 Ok(VcpuExit::UnsupportedFeature)
626             }
627             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64InterruptWindow => {
628                 Ok(VcpuExit::IrqWindowOpen)
629             }
630             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Halt => Ok(VcpuExit::Hlt),
631             // additional exits that are configurable
632             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicEoi => {
633                 // safe because we trust the kernel to fill in the union field properly.
634                 let vector = unsafe {
635                     self.last_exit_context
636                         .__bindgen_anon_1
637                         .ApicEoi
638                         .InterruptVector as u8
639                 };
640                 Ok(VcpuExit::IoapicEoi { vector })
641             }
642             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess => {
643                 // Safe because we know this was an MSR access exit.
644                 let id = unsafe { self.last_exit_context.__bindgen_anon_1.MsrAccess.MsrNumber };
645 
646                 // Safe because we know this was an MSR access exit
647                 let is_write = unsafe {
648                     self.last_exit_context
649                         .__bindgen_anon_1
650                         .MsrAccess
651                         .AccessInfo
652                         .__bindgen_anon_1
653                         .IsWrite()
654                         == 1
655                 };
656                 if is_write {
657                     // Safe because we know this was an MSR access exit
658                     let value = unsafe {
659                         // WRMSR writes the contents of registers EDX:EAX into the 64-bit model
660                         // specific register
661                         (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rdx << 32)
662                             | (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rax & 0xffffffff)
663                     };
664                     self.handle_msr_write(id, value)?;
665                 } else {
666                     self.handle_msr_read(id)?;
667                 }
668                 Ok(VcpuExit::MsrAccess)
669             }
670             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid => {
671                 // Safe because we know this was a CPUID exit.
672                 let entry = unsafe {
673                     CpuIdEntry {
674                         function: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rax as u32,
675                         index: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rcx as u32,
676                         flags: 0,
677                         cpuid: CpuidResult {
678                             eax: self
679                                 .last_exit_context
680                                 .__bindgen_anon_1
681                                 .CpuidAccess
682                                 .DefaultResultRax as u32,
683                             ebx: self
684                                 .last_exit_context
685                                 .__bindgen_anon_1
686                                 .CpuidAccess
687                                 .DefaultResultRbx as u32,
688                             ecx: self
689                                 .last_exit_context
690                                 .__bindgen_anon_1
691                                 .CpuidAccess
692                                 .DefaultResultRcx as u32,
693                             edx: self
694                                 .last_exit_context
695                                 .__bindgen_anon_1
696                                 .CpuidAccess
697                                 .DefaultResultRdx as u32,
698                         },
699                     }
700                 };
701                 Ok(VcpuExit::Cpuid { entry })
702             }
703             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonException => Ok(VcpuExit::Exception),
704             // undocumented exit calls from the header file, WinHvPlatformDefs.h.
705             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Rdtsc => Ok(VcpuExit::RdTsc),
706             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicSmiTrap => Ok(VcpuExit::ApicSmiTrap),
707             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonHypercall => Ok(VcpuExit::Hypercall),
708             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicInitSipiTrap => {
709                 Ok(VcpuExit::ApicInitSipiTrap)
710             }
711             // exit caused by host cancellation thorugh WHvCancelRunVirtualProcessor,
712             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonCanceled => Ok(VcpuExit::Canceled),
713             r => panic!("unknown exit reason: {}", r),
714         }
715     }
716 }
717 
718 impl VcpuX86_64 for WhpxVcpu {
719     /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
720     /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)721     fn set_interrupt_window_requested(&self, requested: bool) {
722         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
723             [WHV_REGISTER_NAME_WHvX64RegisterDeliverabilityNotifications];
724         let mut notifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER__bindgen_ty_1 =
725             Default::default();
726         notifications.set_InterruptNotification(if requested { 1 } else { 0 });
727         let notify_register = WHV_REGISTER_VALUE {
728             DeliverabilityNotifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER {
729                 __bindgen_anon_1: notifications,
730             },
731         };
732         // safe because we have enough space for all the registers
733         check_whpx!(unsafe {
734             WHvSetVirtualProcessorRegisters(
735                 self.vm_partition.partition,
736                 self.index,
737                 &REG_NAMES as *const WHV_REGISTER_NAME,
738                 REG_NAMES.len() as u32,
739                 &notify_register as *const WHV_REGISTER_VALUE,
740             )
741         })
742         .unwrap();
743     }
744 
745     /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool746     fn ready_for_interrupt(&self) -> bool {
747         // safe because InterruptionPending bit is always valid in ExecutionState struct
748         let pending = unsafe {
749             self.last_exit_context
750                 .VpContext
751                 .ExecutionState
752                 .__bindgen_anon_1
753                 .InterruptionPending()
754         };
755         // safe because InterruptShadow bit is always valid in ExecutionState struct
756         let shadow = unsafe {
757             self.last_exit_context
758                 .VpContext
759                 .ExecutionState
760                 .__bindgen_anon_1
761                 .InterruptShadow()
762         };
763 
764         let eflags = self.last_exit_context.VpContext.Rflags;
765         const IF_MASK: u64 = 0x00000200;
766 
767         // can't inject an interrupt if InterruptShadow or InterruptPending bits are set, or if
768         // the IF flag is clear
769         shadow == 0 && pending == 0 && (eflags & IF_MASK) != 0
770     }
771 
772     /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u8) -> Result<()>773     fn interrupt(&self, irq: u8) -> Result<()> {
774         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
775             [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
776         let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
777             Default::default();
778         pending_interrupt.set_InterruptionPending(1);
779         pending_interrupt
780             .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingInterrupt as u32);
781         pending_interrupt.set_InterruptionVector(irq.into());
782         let interrupt = WHV_REGISTER_VALUE {
783             PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
784                 __bindgen_anon_1: pending_interrupt,
785             },
786         };
787         // safe because we have enough space for all the registers
788         check_whpx!(unsafe {
789             WHvSetVirtualProcessorRegisters(
790                 self.vm_partition.partition,
791                 self.index,
792                 &REG_NAMES as *const WHV_REGISTER_NAME,
793                 REG_NAMES.len() as u32,
794                 &interrupt as *const WHV_REGISTER_VALUE,
795             )
796         })
797     }
798 
799     /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>800     fn inject_nmi(&self) -> Result<()> {
801         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
802             [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
803         let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
804             Default::default();
805         pending_interrupt.set_InterruptionPending(1);
806         pending_interrupt
807             .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingNmi as u32);
808         const NMI_VECTOR: u32 = 2; // 2 is the NMI vector.
809         pending_interrupt.set_InterruptionVector(NMI_VECTOR);
810         let interrupt = WHV_REGISTER_VALUE {
811             PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
812                 __bindgen_anon_1: pending_interrupt,
813             },
814         };
815         // safe because we have enough space for all the registers
816         check_whpx!(unsafe {
817             WHvSetVirtualProcessorRegisters(
818                 self.vm_partition.partition,
819                 self.index,
820                 &REG_NAMES as *const WHV_REGISTER_NAME,
821                 REG_NAMES.len() as u32,
822                 &interrupt as *const WHV_REGISTER_VALUE,
823             )
824         })
825     }
826 
827     /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>828     fn get_regs(&self) -> Result<Regs> {
829         let mut whpx_regs: WhpxRegs = Default::default();
830         let reg_names = WhpxRegs::get_register_names();
831         // safe because we have enough space for all the registers
832         check_whpx!(unsafe {
833             WHvGetVirtualProcessorRegisters(
834                 self.vm_partition.partition,
835                 self.index,
836                 reg_names as *const WHV_REGISTER_NAME,
837                 reg_names.len() as u32,
838                 whpx_regs.as_mut_ptr(),
839             )
840         })?;
841         Ok(Regs::from(&whpx_regs))
842     }
843 
844     /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>845     fn set_regs(&self, regs: &Regs) -> Result<()> {
846         let whpx_regs = WhpxRegs::from(regs);
847         let reg_names = WhpxRegs::get_register_names();
848         // safe because we have enough space for all the registers
849         check_whpx!(unsafe {
850             WHvSetVirtualProcessorRegisters(
851                 self.vm_partition.partition,
852                 self.index,
853                 reg_names as *const WHV_REGISTER_NAME,
854                 reg_names.len() as u32,
855                 whpx_regs.as_ptr(),
856             )
857         })
858     }
859 
860     /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>861     fn get_sregs(&self) -> Result<Sregs> {
862         let mut whpx_sregs: WhpxSregs = Default::default();
863         let reg_names = WhpxSregs::get_register_names();
864         // safe because we have enough space for all the registers
865         check_whpx!(unsafe {
866             WHvGetVirtualProcessorRegisters(
867                 self.vm_partition.partition,
868                 self.index,
869                 reg_names as *const WHV_REGISTER_NAME,
870                 reg_names.len() as u32,
871                 whpx_sregs.as_mut_ptr(),
872             )
873         })?;
874         Ok(Sregs::from(&whpx_sregs))
875     }
876 
877     /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>878     fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
879         let whpx_sregs = WhpxSregs::from(sregs);
880         let reg_names = WhpxSregs::get_register_names();
881         // safe because we have enough space for all the registers
882         check_whpx!(unsafe {
883             WHvSetVirtualProcessorRegisters(
884                 self.vm_partition.partition,
885                 self.index,
886                 reg_names as *const WHV_REGISTER_NAME,
887                 reg_names.len() as u32,
888                 whpx_sregs.as_ptr(),
889             )
890         })
891     }
892 
893     /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>894     fn get_fpu(&self) -> Result<Fpu> {
895         let mut whpx_fpu: WhpxFpu = Default::default();
896         let reg_names = WhpxFpu::get_register_names();
897         // safe because we have enough space for all the registers
898         check_whpx!(unsafe {
899             WHvGetVirtualProcessorRegisters(
900                 self.vm_partition.partition,
901                 self.index,
902                 reg_names as *const WHV_REGISTER_NAME,
903                 reg_names.len() as u32,
904                 whpx_fpu.as_mut_ptr(),
905             )
906         })?;
907         Ok(Fpu::from(&whpx_fpu))
908     }
909 
910     /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>911     fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
912         let whpx_fpu = WhpxFpu::from(fpu);
913         let reg_names = WhpxFpu::get_register_names();
914         // safe because we have enough space for all the registers
915         check_whpx!(unsafe {
916             WHvSetVirtualProcessorRegisters(
917                 self.vm_partition.partition,
918                 self.index,
919                 reg_names as *const WHV_REGISTER_NAME,
920                 reg_names.len() as u32,
921                 whpx_fpu.as_ptr(),
922             )
923         })
924     }
925 
926     /// Gets the VCPU XSAVE.
get_xsave(&self) -> Result<Xsave>927     fn get_xsave(&self) -> Result<Xsave> {
928         let mut empty_buffer = [0u8; 1];
929         let mut needed_buf_size: u32 = 0;
930 
931         // Find out how much space is needed for XSAVEs.
932         let res = unsafe {
933             WHvGetVirtualProcessorXsaveState(
934                 self.vm_partition.partition,
935                 self.index,
936                 empty_buffer.as_mut_ptr() as *mut _,
937                 0,
938                 &mut needed_buf_size,
939             )
940         };
941         if res != WHV_E_INSUFFICIENT_BUFFER.0 {
942             // This should always work, so if it doesn't, we'll return unsupported.
943             error!("failed to get size of vcpu xsave");
944             return Err(Error::new(EIO));
945         }
946 
947         let mut xsave = Xsave::new(needed_buf_size as usize);
948         // SAFETY: xsave_data is valid for the duration of the FFI call, and we pass its length in
949         // bytes so writes are bounded within the buffer.
950         check_whpx!(unsafe {
951             WHvGetVirtualProcessorXsaveState(
952                 self.vm_partition.partition,
953                 self.index,
954                 xsave.as_mut_ptr(),
955                 xsave.len() as u32,
956                 &mut needed_buf_size,
957             )
958         })?;
959         Ok(xsave)
960     }
961 
962     /// Sets the VCPU XSAVE.
set_xsave(&self, xsave: &Xsave) -> Result<()>963     fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
964         // SAFETY: the xsave buffer is valid for the duration of the FFI call, and we pass its
965         // length in bytes so reads are bounded within the buffer.
966         check_whpx!(unsafe {
967             WHvSetVirtualProcessorXsaveState(
968                 self.vm_partition.partition,
969                 self.index,
970                 xsave.as_ptr(),
971                 xsave.len() as u32,
972             )
973         })
974     }
975 
get_interrupt_state(&self) -> Result<AnySnapshot>976     fn get_interrupt_state(&self) -> Result<AnySnapshot> {
977         let mut whpx_interrupt_regs: WhpxInterruptRegs = Default::default();
978         let reg_names = WhpxInterruptRegs::get_register_names();
979         // SAFETY: we have enough space for all the registers & the memory lives for the duration
980         // of the FFI call.
981         check_whpx!(unsafe {
982             WHvGetVirtualProcessorRegisters(
983                 self.vm_partition.partition,
984                 self.index,
985                 reg_names as *const WHV_REGISTER_NAME,
986                 reg_names.len() as u32,
987                 whpx_interrupt_regs.as_mut_ptr(),
988             )
989         })?;
990 
991         AnySnapshot::to_any(whpx_interrupt_regs.into_serializable()).map_err(|e| {
992             error!("failed to serialize interrupt state: {:?}", e);
993             Error::new(EIO)
994         })
995     }
996 
set_interrupt_state(&self, data: AnySnapshot) -> Result<()>997     fn set_interrupt_state(&self, data: AnySnapshot) -> Result<()> {
998         let whpx_interrupt_regs =
999             WhpxInterruptRegs::from_serializable(AnySnapshot::from_any(data).map_err(|e| {
1000                 error!("failed to serialize interrupt state: {:?}", e);
1001                 Error::new(EIO)
1002             })?);
1003         let reg_names = WhpxInterruptRegs::get_register_names();
1004         // SAFETY: we have enough space for all the registers & the memory lives for the duration
1005         // of the FFI call.
1006         check_whpx!(unsafe {
1007             WHvSetVirtualProcessorRegisters(
1008                 self.vm_partition.partition,
1009                 self.index,
1010                 reg_names as *const WHV_REGISTER_NAME,
1011                 reg_names.len() as u32,
1012                 whpx_interrupt_regs.as_ptr(),
1013             )
1014         })
1015     }
1016 
1017     /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>1018     fn get_debugregs(&self) -> Result<DebugRegs> {
1019         let mut whpx_debugregs: WhpxDebugRegs = Default::default();
1020         let reg_names = WhpxDebugRegs::get_register_names();
1021         // safe because we have enough space for all the registers
1022         check_whpx!(unsafe {
1023             WHvGetVirtualProcessorRegisters(
1024                 self.vm_partition.partition,
1025                 self.index,
1026                 reg_names as *const WHV_REGISTER_NAME,
1027                 reg_names.len() as u32,
1028                 whpx_debugregs.as_mut_ptr(),
1029             )
1030         })?;
1031         Ok(DebugRegs::from(&whpx_debugregs))
1032     }
1033 
1034     /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>1035     fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()> {
1036         let whpx_debugregs = WhpxDebugRegs::from(debugregs);
1037         let reg_names = WhpxDebugRegs::get_register_names();
1038         // safe because we have enough space for all the registers
1039         check_whpx!(unsafe {
1040             WHvSetVirtualProcessorRegisters(
1041                 self.vm_partition.partition,
1042                 self.index,
1043                 reg_names as *const WHV_REGISTER_NAME,
1044                 reg_names.len() as u32,
1045                 whpx_debugregs.as_ptr(),
1046             )
1047         })
1048     }
1049 
1050     /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>1051     fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>> {
1052         const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1053         let mut reg_value = WHV_REGISTER_VALUE::default();
1054         // safe because we have enough space for all the registers in whpx_regs
1055         check_whpx!(unsafe {
1056             WHvGetVirtualProcessorRegisters(
1057                 self.vm_partition.partition,
1058                 self.index,
1059                 &REG_NAME,
1060                 /* RegisterCount */ 1,
1061                 &mut reg_value,
1062             )
1063         })?;
1064 
1065         // safe because the union value, reg64, is safe to pull out assuming
1066         // kernel filled in the xcrs properly.
1067         let xcr0 = unsafe { reg_value.Reg64 };
1068 
1069         // whpx only supports xcr0
1070         let xcrs = BTreeMap::from([(0, xcr0)]);
1071         Ok(xcrs)
1072     }
1073 
1074     /// Sets a VCPU extended control register.
set_xcr(&self, xcr_index: u32, value: u64) -> Result<()>1075     fn set_xcr(&self, xcr_index: u32, value: u64) -> Result<()> {
1076         if xcr_index != 0 {
1077             // invalid xcr register provided
1078             return Err(Error::new(EINVAL));
1079         }
1080 
1081         const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1082         let reg_value = WHV_REGISTER_VALUE { Reg64: value };
1083         // safe because we have enough space for all the registers in whpx_xcrs
1084         check_whpx!(unsafe {
1085             WHvSetVirtualProcessorRegisters(
1086                 self.vm_partition.partition,
1087                 self.index,
1088                 &REG_NAME,
1089                 /* RegisterCount */ 1,
1090                 &reg_value,
1091             )
1092         })
1093     }
1094 
1095     /// Gets the value of a single model-specific register.
get_msr(&self, msr_index: u32) -> Result<u64>1096     fn get_msr(&self, msr_index: u32) -> Result<u64> {
1097         let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1098         let mut msr_value = WHV_REGISTER_VALUE::default();
1099         // safe because we have enough space for all the registers in whpx_regs
1100         check_whpx!(unsafe {
1101             WHvGetVirtualProcessorRegisters(
1102                 self.vm_partition.partition,
1103                 self.index,
1104                 &msr_name,
1105                 /* RegisterCount */ 1,
1106                 &mut msr_value,
1107             )
1108         })?;
1109 
1110         // safe because Reg64 will be a valid union value
1111         let value = unsafe { msr_value.Reg64 };
1112         Ok(value)
1113     }
1114 
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>1115     fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>> {
1116         // Note that some members of VALID_MSRS cannot be fetched from WHPX with
1117         // WHvGetVirtualProcessorRegisters per the HTLFS, so we enumerate all of
1118         // permitted MSRs here.
1119         //
1120         // We intentionally exclude WHvRegisterPendingInterruption and
1121         // WHvRegisterInterruptState because they are included in
1122         // get_interrupt_state.
1123         //
1124         // We intentionally exclude MSR_TSC because in snapshotting it is
1125         // handled by the generic x86_64 VCPU snapshot/restore. Non snapshot
1126         // consumers should use get/set_tsc_adjust to access the adjust register
1127         // if needed.
1128         const MSRS_TO_SAVE: &[u32] = &[
1129             MSR_EFER,
1130             MSR_KERNEL_GS_BASE,
1131             MSR_APIC_BASE,
1132             MSR_SYSENTER_CS,
1133             MSR_SYSENTER_EIP,
1134             MSR_SYSENTER_ESP,
1135             MSR_STAR,
1136             MSR_LSTAR,
1137             MSR_CSTAR,
1138             MSR_SFMASK,
1139         ];
1140 
1141         let registers = MSRS_TO_SAVE
1142             .iter()
1143             .map(|msr_index| {
1144                 let value = self.get_msr(*msr_index)?;
1145                 Ok((*msr_index, value))
1146             })
1147             .collect::<Result<BTreeMap<u32, u64>>>()?;
1148 
1149         Ok(registers)
1150     }
1151 
1152     /// Sets the value of a single model-specific register.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>1153     fn set_msr(&self, msr_index: u32, value: u64) -> Result<()> {
1154         match get_msr_name(msr_index) {
1155             Some(msr_name) => {
1156                 let msr_value = WHV_REGISTER_VALUE { Reg64: value };
1157                 check_whpx!(unsafe {
1158                     WHvSetVirtualProcessorRegisters(
1159                         self.vm_partition.partition,
1160                         self.index,
1161                         &msr_name,
1162                         /* RegisterCount */ 1,
1163                         &msr_value,
1164                     )
1165                 })
1166             }
1167             None => {
1168                 warn!("msr 0x{msr_index:X} write unsupported by WHPX, dropping");
1169                 Ok(())
1170             }
1171         }
1172     }
1173 
1174     /// Sets up the data returned by the CPUID instruction.
1175     /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
set_cpuid(&self, _cpuid: &CpuId) -> Result<()>1176     fn set_cpuid(&self, _cpuid: &CpuId) -> Result<()> {
1177         Err(Error::new(ENXIO))
1178     }
1179 
1180     /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
1181     /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
1182     /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>1183     fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()> {
1184         // Verify that we're only being called in a situation where the last exit reason was
1185         // ExitReasonX64Cpuid
1186         if self.last_exit_context.ExitReason != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid {
1187             return Err(Error::new(EINVAL));
1188         }
1189 
1190         // Get the next rip from the exit context
1191         let rip = self.last_exit_context.VpContext.Rip
1192             + self.last_exit_context.VpContext.InstructionLength() as u64;
1193 
1194         const REG_NAMES: [WHV_REGISTER_NAME; 5] = [
1195             WHV_REGISTER_NAME_WHvX64RegisterRip,
1196             WHV_REGISTER_NAME_WHvX64RegisterRax,
1197             WHV_REGISTER_NAME_WHvX64RegisterRbx,
1198             WHV_REGISTER_NAME_WHvX64RegisterRcx,
1199             WHV_REGISTER_NAME_WHvX64RegisterRdx,
1200         ];
1201 
1202         let values = vec![
1203             WHV_REGISTER_VALUE { Reg64: rip },
1204             WHV_REGISTER_VALUE {
1205                 Reg64: entry.cpuid.eax as u64,
1206             },
1207             WHV_REGISTER_VALUE {
1208                 Reg64: entry.cpuid.ebx as u64,
1209             },
1210             WHV_REGISTER_VALUE {
1211                 Reg64: entry.cpuid.ecx as u64,
1212             },
1213             WHV_REGISTER_VALUE {
1214                 Reg64: entry.cpuid.edx as u64,
1215             },
1216         ];
1217 
1218         // safe because we have enough space for all the registers
1219         check_whpx!(unsafe {
1220             WHvSetVirtualProcessorRegisters(
1221                 self.vm_partition.partition,
1222                 self.index,
1223                 &REG_NAMES as *const WHV_REGISTER_NAME,
1224                 REG_NAMES.len() as u32,
1225                 values.as_ptr() as *const WHV_REGISTER_VALUE,
1226             )
1227         })
1228     }
1229 
1230     /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>1231     fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
1232         // TODO(b/173807302): Implement this
1233         Err(Error::new(ENOENT))
1234     }
1235 
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>1236     fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()> {
1237         // Set the guest TSC such that it has the same TSC_OFFSET as it did at
1238         // the moment it was snapshotted. This is required for virtio-pvclock
1239         // to function correctly. (virtio-pvclock assumes the offset is fixed,
1240         // and adjusts CLOCK_BOOTTIME accordingly. It also hides the TSC jump
1241         // from CLOCK_MONOTONIC by setting the timebase.)
1242         self.set_tsc_value(host_tsc_reference_moment.wrapping_add(tsc_offset))
1243     }
1244 }
1245 
get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME>1246 fn get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME> {
1247     VALID_MSRS.get(&msr_index).copied()
1248 }
1249 
1250 // run calls are tested with the integration tests since the full vcpu needs to be setup for it.
1251 #[cfg(test)]
1252 mod tests {
1253     use vm_memory::GuestAddress;
1254     use vm_memory::GuestMemory;
1255 
1256     use super::*;
1257     use crate::VmX86_64;
1258 
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm1259     fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
1260         let whpx = Whpx::new().expect("failed to instantiate whpx");
1261         let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1262             .expect("failed to get whpx features");
1263         WhpxVm::new(
1264             &whpx,
1265             cpu_count,
1266             mem,
1267             CpuId::new(0),
1268             local_apic_supported,
1269             None,
1270         )
1271         .expect("failed to create whpx vm")
1272     }
1273 
1274     #[test]
try_clone()1275     fn try_clone() {
1276         if !Whpx::is_enabled() {
1277             return;
1278         }
1279         let cpu_count = 1;
1280         let mem =
1281             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1282         let vm = new_vm(cpu_count, mem);
1283         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1284         let vcpu: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1285         let _vcpu_clone = vcpu.try_clone().expect("failed to clone whpx vcpu");
1286     }
1287 
1288     #[test]
index()1289     fn index() {
1290         if !Whpx::is_enabled() {
1291             return;
1292         }
1293         let cpu_count = 2;
1294         let mem =
1295             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1296         let vm = new_vm(cpu_count, mem);
1297         let mut vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1298         let vcpu0: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1299         assert_eq!(vcpu0.index, 0);
1300         vcpu = vm.create_vcpu(1).expect("failed to create vcpu");
1301         let vcpu1: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1302         assert_eq!(vcpu1.index, 1);
1303     }
1304 
1305     #[test]
get_regs()1306     fn get_regs() {
1307         if !Whpx::is_enabled() {
1308             return;
1309         }
1310         let cpu_count = 1;
1311         let mem =
1312             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1313         let vm = new_vm(cpu_count, mem);
1314         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1315 
1316         vcpu.get_regs().expect("failed to get regs");
1317     }
1318 
1319     #[test]
set_regs()1320     fn set_regs() {
1321         if !Whpx::is_enabled() {
1322             return;
1323         }
1324         let cpu_count = 1;
1325         let mem =
1326             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1327         let vm = new_vm(cpu_count, mem);
1328         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1329 
1330         let mut regs = vcpu.get_regs().expect("failed to get regs");
1331         let new_val = regs.rax + 2;
1332         regs.rax = new_val;
1333 
1334         vcpu.set_regs(&regs).expect("failed to set regs");
1335         let new_regs = vcpu.get_regs().expect("failed to get regs");
1336         assert_eq!(new_regs.rax, new_val);
1337     }
1338 
1339     #[test]
debugregs()1340     fn debugregs() {
1341         if !Whpx::is_enabled() {
1342             return;
1343         }
1344         let cpu_count = 1;
1345         let mem =
1346             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1347         let vm = new_vm(cpu_count, mem);
1348         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1349 
1350         let mut dregs = vcpu.get_debugregs().unwrap();
1351         dregs.dr7 += 13;
1352         vcpu.set_debugregs(&dregs).unwrap();
1353         let dregs2 = vcpu.get_debugregs().unwrap();
1354         assert_eq!(dregs.dr7, dregs2.dr7);
1355     }
1356 
1357     #[test]
sregs()1358     fn sregs() {
1359         if !Whpx::is_enabled() {
1360             return;
1361         }
1362         let cpu_count = 1;
1363         let mem =
1364             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1365         let vm = new_vm(cpu_count, mem);
1366         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1367 
1368         let mut sregs = vcpu.get_sregs().unwrap();
1369         sregs.cs.base += 7;
1370         vcpu.set_sregs(&sregs).unwrap();
1371         let sregs2 = vcpu.get_sregs().unwrap();
1372         assert_eq!(sregs.cs.base, sregs2.cs.base);
1373     }
1374 
1375     #[test]
fpu()1376     fn fpu() {
1377         if !Whpx::is_enabled() {
1378             return;
1379         }
1380         let cpu_count = 1;
1381         let mem =
1382             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1383         let vm = new_vm(cpu_count, mem);
1384         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1385 
1386         let mut fpu = vcpu.get_fpu().unwrap();
1387         fpu.fpr[0].significand += 3;
1388         vcpu.set_fpu(&fpu).unwrap();
1389         let fpu2 = vcpu.get_fpu().unwrap();
1390         assert_eq!(fpu.fpr, fpu2.fpr);
1391     }
1392 
1393     #[test]
xcrs()1394     fn xcrs() {
1395         if !Whpx::is_enabled() {
1396             return;
1397         }
1398         let whpx = Whpx::new().expect("failed to instantiate whpx");
1399         let cpu_count = 1;
1400         let mem =
1401             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1402         let vm = new_vm(cpu_count, mem);
1403         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1404         // check xsave support
1405         if !whpx.check_capability(HypervisorCap::Xcrs) {
1406             return;
1407         }
1408 
1409         vcpu.set_xcr(0, 1).unwrap();
1410         let xcrs = vcpu.get_xcrs().unwrap();
1411         let xcr0 = xcrs.get(&0).unwrap();
1412         assert_eq!(*xcr0, 1);
1413     }
1414 
1415     #[test]
set_msr()1416     fn set_msr() {
1417         if !Whpx::is_enabled() {
1418             return;
1419         }
1420         let cpu_count = 1;
1421         let mem =
1422             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1423         let vm = new_vm(cpu_count, mem);
1424         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1425 
1426         vcpu.set_msr(MSR_KERNEL_GS_BASE, 42).unwrap();
1427 
1428         let gs_base = vcpu.get_msr(MSR_KERNEL_GS_BASE).unwrap();
1429         assert_eq!(gs_base, 42);
1430     }
1431 
1432     #[test]
get_msr()1433     fn get_msr() {
1434         if !Whpx::is_enabled() {
1435             return;
1436         }
1437         let cpu_count = 1;
1438         let mem =
1439             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1440         let vm = new_vm(cpu_count, mem);
1441         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1442 
1443         // This one should succeed
1444         let _value = vcpu.get_msr(MSR_TSC).unwrap();
1445 
1446         // This one will fail to fetch
1447         vcpu.get_msr(MSR_TSC + 1)
1448             .expect_err("invalid MSR index should fail");
1449     }
1450 
1451     #[test]
set_efer()1452     fn set_efer() {
1453         if !Whpx::is_enabled() {
1454             return;
1455         }
1456         // EFER Bits
1457         const EFER_SCE: u64 = 0x00000001;
1458         const EFER_LME: u64 = 0x00000100;
1459         const EFER_LMA: u64 = 0x00000400;
1460         const X86_CR0_PE: u64 = 0x1;
1461         const X86_CR0_PG: u64 = 0x80000000;
1462         const X86_CR4_PAE: u64 = 0x20;
1463 
1464         let cpu_count = 1;
1465         let mem =
1466             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1467         let vm = new_vm(cpu_count, mem);
1468         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1469 
1470         let mut sregs = vcpu.get_sregs().expect("failed to get sregs");
1471         // Initial value should be 0
1472         assert_eq!(sregs.efer, 0);
1473 
1474         // Enable and activate long mode
1475         sregs.cr0 |= X86_CR0_PE; // enable protected mode
1476         sregs.cr0 |= X86_CR0_PG; // enable paging
1477         sregs.cr4 |= X86_CR4_PAE; // enable physical address extension
1478         sregs.efer = EFER_LMA | EFER_LME;
1479         vcpu.set_sregs(&sregs).expect("failed to set sregs");
1480 
1481         // Verify that setting stuck
1482         let sregs = vcpu.get_sregs().expect("failed to get sregs");
1483         assert_eq!(sregs.efer, EFER_LMA | EFER_LME);
1484         assert_eq!(sregs.cr0 & X86_CR0_PE, X86_CR0_PE);
1485         assert_eq!(sregs.cr0 & X86_CR0_PG, X86_CR0_PG);
1486         assert_eq!(sregs.cr4 & X86_CR4_PAE, X86_CR4_PAE);
1487 
1488         let efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1489         assert_eq!(efer, EFER_LMA | EFER_LME);
1490 
1491         // Enable SCE via set_msrs
1492         vcpu.set_msr(MSR_EFER, efer | EFER_SCE)
1493             .expect("failed to set msr");
1494 
1495         // Verify that setting stuck
1496         let sregs = vcpu.get_sregs().expect("failed to get sregs");
1497         assert_eq!(sregs.efer, EFER_SCE | EFER_LME | EFER_LMA);
1498         let new_efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1499         assert_eq!(new_efer, EFER_SCE | EFER_LME | EFER_LMA);
1500     }
1501 
1502     #[test]
get_and_set_xsave_smoke()1503     fn get_and_set_xsave_smoke() {
1504         if !Whpx::is_enabled() {
1505             return;
1506         }
1507         let cpu_count = 1;
1508         let mem =
1509             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1510         let vm = new_vm(cpu_count, mem);
1511         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1512 
1513         // XSAVE is essentially opaque for our purposes. We just want to make sure our syscalls
1514         // succeed.
1515         let xsave = vcpu.get_xsave().unwrap();
1516         vcpu.set_xsave(&xsave).unwrap();
1517     }
1518 
1519     #[test]
get_and_set_interrupt_state_smoke()1520     fn get_and_set_interrupt_state_smoke() {
1521         if !Whpx::is_enabled() {
1522             return;
1523         }
1524         let cpu_count = 1;
1525         let mem =
1526             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1527         let vm = new_vm(cpu_count, mem);
1528         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1529 
1530         // For the sake of snapshotting, interrupt state is essentially opaque. We just want to make
1531         // sure our syscalls succeed.
1532         let interrupt_state = vcpu.get_interrupt_state().unwrap();
1533         vcpu.set_interrupt_state(interrupt_state).unwrap();
1534     }
1535 
1536     #[test]
get_all_msrs()1537     fn get_all_msrs() {
1538         if !Whpx::is_enabled() {
1539             return;
1540         }
1541         let cpu_count = 1;
1542         let mem =
1543             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1544         let vm = new_vm(cpu_count, mem);
1545         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1546 
1547         let all_msrs = vcpu.get_all_msrs().unwrap();
1548 
1549         // Our MSR buffer is init'ed to zeros in the registers. The APIC base will be non-zero, so
1550         // by asserting that we know the MSR fetch actually did get us data.
1551         let apic_base = all_msrs.get(&MSR_APIC_BASE).unwrap();
1552         assert_ne!(*apic_base, 0);
1553     }
1554 }
1555