• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use core::ffi::c_void;
6 use std::arch::x86_64::CpuidResult;
7 use std::collections::BTreeMap;
8 use std::convert::TryInto;
9 use std::mem::size_of;
10 use std::sync::Arc;
11 
12 use base::Error;
13 use base::Result;
14 use libc::EINVAL;
15 use libc::EIO;
16 use libc::ENOENT;
17 use libc::ENXIO;
18 use vm_memory::GuestAddress;
19 use winapi::shared::winerror::E_UNEXPECTED;
20 use windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER;
21 
22 use super::types::*;
23 use super::*;
24 use crate::CpuId;
25 use crate::CpuIdEntry;
26 use crate::DebugRegs;
27 use crate::Fpu;
28 use crate::HypervHypercall;
29 use crate::IoOperation;
30 use crate::IoParams;
31 use crate::Regs;
32 use crate::Sregs;
33 use crate::Vcpu;
34 use crate::VcpuExit;
35 use crate::VcpuX86_64;
36 use crate::Xsave;
37 
38 const WHPX_EXIT_DIRECTION_MMIO_READ: u8 = 0;
39 const WHPX_EXIT_DIRECTION_MMIO_WRITE: u8 = 1;
40 const WHPX_EXIT_DIRECTION_PIO_IN: u8 = 0;
41 const WHPX_EXIT_DIRECTION_PIO_OUT: u8 = 1;
42 
43 /// This is the whpx instruction emulator, useful for deconstructing
44 /// io & memory port instructions. Whpx does not do this automatically.
45 struct SafeInstructionEmulator {
46     handle: WHV_EMULATOR_HANDLE,
47 }
48 
49 impl SafeInstructionEmulator {
new() -> Result<SafeInstructionEmulator>50     fn new() -> Result<SafeInstructionEmulator> {
51         const EMULATOR_CALLBACKS: WHV_EMULATOR_CALLBACKS = WHV_EMULATOR_CALLBACKS {
52             Size: size_of::<WHV_EMULATOR_CALLBACKS>() as u32,
53             Reserved: 0,
54             WHvEmulatorIoPortCallback: Some(SafeInstructionEmulator::io_port_cb),
55             WHvEmulatorMemoryCallback: Some(SafeInstructionEmulator::memory_cb),
56             WHvEmulatorGetVirtualProcessorRegisters: Some(
57                 SafeInstructionEmulator::get_virtual_processor_registers_cb,
58             ),
59             WHvEmulatorSetVirtualProcessorRegisters: Some(
60                 SafeInstructionEmulator::set_virtual_processor_registers_cb,
61             ),
62             WHvEmulatorTranslateGvaPage: Some(SafeInstructionEmulator::translate_gva_page_cb),
63         };
64         let mut handle: WHV_EMULATOR_HANDLE = std::ptr::null_mut();
65         // safe because pass in valid callbacks and a emulator handle for the kernel to place the
66         // allocated handle into.
67         check_whpx!(unsafe { WHvEmulatorCreateEmulator(&EMULATOR_CALLBACKS, &mut handle) })?;
68 
69         Ok(SafeInstructionEmulator { handle })
70     }
71 }
72 
73 trait InstructionEmulatorCallbacks {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT74     extern "stdcall" fn io_port_cb(
75         context: *mut ::std::os::raw::c_void,
76         io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
77     ) -> HRESULT;
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT78     extern "stdcall" fn memory_cb(
79         context: *mut ::std::os::raw::c_void,
80         memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
81     ) -> HRESULT;
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT82     extern "stdcall" fn get_virtual_processor_registers_cb(
83         context: *mut ::std::os::raw::c_void,
84         register_names: *const WHV_REGISTER_NAME,
85         register_count: UINT32,
86         register_values: *mut WHV_REGISTER_VALUE,
87     ) -> HRESULT;
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT88     extern "stdcall" fn set_virtual_processor_registers_cb(
89         context: *mut ::std::os::raw::c_void,
90         register_names: *const WHV_REGISTER_NAME,
91         register_count: UINT32,
92         register_values: *const WHV_REGISTER_VALUE,
93     ) -> HRESULT;
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT94     extern "stdcall" fn translate_gva_page_cb(
95         context: *mut ::std::os::raw::c_void,
96         gva: WHV_GUEST_VIRTUAL_ADDRESS,
97         translate_flags: WHV_TRANSLATE_GVA_FLAGS,
98         translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
99         gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
100     ) -> HRESULT;
101 }
102 
103 /// Context passed into the instruction emulator when trying io or mmio emulation.
104 /// Since we need this for set/get registers and memory translation,
105 /// a single context is used that captures all necessary contextual information for the operation.
106 struct InstructionEmulatorContext<'a> {
107     vm_partition: Arc<SafePartition>,
108     index: u32,
109     handle_mmio: Option<&'a mut dyn FnMut(IoParams) -> Option<[u8; 8]>>,
110     handle_io: Option<&'a mut dyn FnMut(IoParams) -> Option<[u8; 8]>>,
111 }
112 
113 impl InstructionEmulatorCallbacks for SafeInstructionEmulator {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT114     extern "stdcall" fn io_port_cb(
115         context: *mut ::std::os::raw::c_void,
116         io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
117     ) -> HRESULT {
118         // unsafe because windows could decide to call this at any time.
119         // However, we trust the kernel to call this while the vm/vcpu is valid.
120         let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
121         // safe because we trust the kernel to fill in the io_access
122         let io_access_info = unsafe { &mut *io_access };
123         let address = io_access_info.Port.into();
124         let size = io_access_info.AccessSize as usize;
125         match io_access_info.Direction {
126             WHPX_EXIT_DIRECTION_PIO_IN => {
127                 if let Some(handle_io) = &mut ctx.handle_io {
128                     if let Some(data) = handle_io(IoParams {
129                         address,
130                         size,
131                         operation: IoOperation::Read,
132                     }) {
133                         // Safe because we know this is an io_access_info field of u32,
134                         //  so casting as a &mut [u8] of len 4 is safe.
135                         let buffer = unsafe {
136                             std::slice::from_raw_parts_mut(
137                                 &mut io_access_info.Data as *mut u32 as *mut u8,
138                                 4,
139                             )
140                         };
141                         buffer[..size].copy_from_slice(&data[..size]);
142                     }
143                     S_OK
144                 } else {
145                     E_UNEXPECTED
146                 }
147             }
148             WHPX_EXIT_DIRECTION_PIO_OUT => {
149                 if let Some(handle_io) = &mut ctx.handle_io {
150                     handle_io(IoParams {
151                         address,
152                         size,
153                         operation: IoOperation::Write {
154                             data: (io_access_info.Data as u64).to_ne_bytes(),
155                         },
156                     });
157                     S_OK
158                 } else {
159                     E_UNEXPECTED
160                 }
161             }
162             _ => E_UNEXPECTED,
163         }
164     }
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT165     extern "stdcall" fn memory_cb(
166         context: *mut ::std::os::raw::c_void,
167         memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
168     ) -> HRESULT {
169         // unsafe because windows could decide to call this at any time.
170         // However, we trust the kernel to call this while the vm/vcpu is valid.
171         let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
172         // safe because we trust the kernel to fill in the memory_access
173         let memory_access_info = unsafe { &mut *memory_access };
174         let address = memory_access_info.GpaAddress;
175         let size = memory_access_info.AccessSize as usize;
176         match memory_access_info.Direction {
177             WHPX_EXIT_DIRECTION_MMIO_READ => {
178                 if let Some(handle_mmio) = &mut ctx.handle_mmio {
179                     if let Some(data) = handle_mmio(IoParams {
180                         address,
181                         size,
182                         operation: IoOperation::Read,
183                     }) {
184                         memory_access_info.Data = data;
185                     }
186                     S_OK
187                 } else {
188                     E_UNEXPECTED
189                 }
190             }
191             WHPX_EXIT_DIRECTION_MMIO_WRITE => {
192                 if let Some(handle_mmio) = &mut ctx.handle_mmio {
193                     handle_mmio(IoParams {
194                         address,
195                         size,
196                         operation: IoOperation::Write {
197                             data: memory_access_info.Data,
198                         },
199                     });
200                     S_OK
201                 } else {
202                     E_UNEXPECTED
203                 }
204             }
205             _ => E_UNEXPECTED,
206         }
207     }
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT208     extern "stdcall" fn get_virtual_processor_registers_cb(
209         context: *mut ::std::os::raw::c_void,
210         register_names: *const WHV_REGISTER_NAME,
211         register_count: UINT32,
212         register_values: *mut WHV_REGISTER_VALUE,
213     ) -> HRESULT {
214         // unsafe because windows could decide to call this at any time.
215         // However, we trust the kernel to call this while the vm/vcpu is valid.
216         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
217         // safe because the ctx has a weak reference to the vm partition, which should be
218         // alive longer than the ctx
219         unsafe {
220             WHvGetVirtualProcessorRegisters(
221                 ctx.vm_partition.partition,
222                 ctx.index,
223                 register_names,
224                 register_count,
225                 register_values,
226             )
227         }
228     }
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT229     extern "stdcall" fn set_virtual_processor_registers_cb(
230         context: *mut ::std::os::raw::c_void,
231         register_names: *const WHV_REGISTER_NAME,
232         register_count: UINT32,
233         register_values: *const WHV_REGISTER_VALUE,
234     ) -> HRESULT {
235         // unsafe because windows could decide to call this at any time.
236         // However, we trust the kernel to call this while the vm/vcpu is valid.
237         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
238         // safe because the ctx has a weak reference to the vm partition, which should be
239         // alive longer than the ctx
240         unsafe {
241             WHvSetVirtualProcessorRegisters(
242                 ctx.vm_partition.partition,
243                 ctx.index,
244                 register_names,
245                 register_count,
246                 register_values,
247             )
248         }
249     }
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT250     extern "stdcall" fn translate_gva_page_cb(
251         context: *mut ::std::os::raw::c_void,
252         gva: WHV_GUEST_VIRTUAL_ADDRESS,
253         translate_flags: WHV_TRANSLATE_GVA_FLAGS,
254         translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
255         gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
256     ) -> HRESULT {
257         // unsafe because windows could decide to call this at any time.
258         // However, we trust the kernel to call this while the vm/vcpu is valid.
259         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
260         let mut translation_result: WHV_TRANSLATE_GVA_RESULT = Default::default();
261         // safe because the ctx has a weak reference to the vm partition, which should be
262         // alive longer than the ctx
263         let ret = unsafe {
264             WHvTranslateGva(
265                 ctx.vm_partition.partition,
266                 ctx.index,
267                 gva,
268                 translate_flags,
269                 &mut translation_result,
270                 gpa,
271             )
272         };
273         if ret == S_OK {
274             // safe assuming the kernel passed in a valid result_code ptr
275             unsafe {
276                 *translation_result_code = translation_result.ResultCode;
277             }
278         }
279         ret
280     }
281 }
282 
283 impl Drop for SafeInstructionEmulator {
drop(&mut self)284     fn drop(&mut self) {
285         // safe because we own the instruction emulator
286         check_whpx!(unsafe { WHvEmulatorDestroyEmulator(self.handle) }).unwrap();
287     }
288 }
289 
290 // we can send and share the instruction emulator over threads safely even though it is void*.
291 unsafe impl Send for SafeInstructionEmulator {}
292 unsafe impl Sync for SafeInstructionEmulator {}
293 
294 struct SafeVirtualProcessor {
295     vm_partition: Arc<SafePartition>,
296     index: u32,
297 }
298 
299 impl SafeVirtualProcessor {
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor>300     fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor> {
301         // safe since the vm partition should be valid.
302         check_whpx!(unsafe { WHvCreateVirtualProcessor(vm_partition.partition, index, 0) })?;
303         Ok(SafeVirtualProcessor {
304             vm_partition,
305             index,
306         })
307     }
308 }
309 
310 impl Drop for SafeVirtualProcessor {
drop(&mut self)311     fn drop(&mut self) {
312         // safe because we are the owner of this windows virtual processor.
313         check_whpx!(unsafe { WHvDeleteVirtualProcessor(self.vm_partition.partition, self.index,) })
314             .unwrap();
315     }
316 }
317 
318 pub struct WhpxVcpu {
319     index: u32,
320     safe_virtual_processor: Arc<SafeVirtualProcessor>,
321     vm_partition: Arc<SafePartition>,
322     last_exit_context: Arc<WHV_RUN_VP_EXIT_CONTEXT>,
323     // must be arc, since we cannot "dupe" an instruction emulator similar to a handle.
324     instruction_emulator: Arc<SafeInstructionEmulator>,
325     tsc_frequency: Option<u64>,
326     apic_frequency: Option<u32>,
327 }
328 
329 impl WhpxVcpu {
330     /// The SafePartition passed in is weak, so that there is no circular references.
331     /// However, the SafePartition should be valid as long as this VCPU is alive. The index
332     /// is the index for this vcpu.
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu>333     pub(super) fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu> {
334         let safe_virtual_processor = SafeVirtualProcessor::new(vm_partition.clone(), index)?;
335         let instruction_emulator = SafeInstructionEmulator::new()?;
336         Ok(WhpxVcpu {
337             index,
338             safe_virtual_processor: Arc::new(safe_virtual_processor),
339             vm_partition,
340             last_exit_context: Arc::new(Default::default()),
341             instruction_emulator: Arc::new(instruction_emulator),
342             tsc_frequency: None,
343             apic_frequency: None,
344         })
345     }
346 
set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32)347     pub fn set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32) {
348         self.tsc_frequency = tsc_frequency;
349         self.apic_frequency = Some(lapic_frequency);
350     }
351 
352     /// Handle reading the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_read(&mut self, id: u32) -> Result<()>353     fn handle_msr_read(&mut self, id: u32) -> Result<()> {
354         // Verify that we're only being called in a situation where the last exit reason was
355         // ExitReasonX64MsrAccess
356         if self.last_exit_context.ExitReason
357             != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
358         {
359             return Err(Error::new(EINVAL));
360         }
361 
362         let value = match id {
363             HV_X64_MSR_TSC_FREQUENCY => Some(self.tsc_frequency.unwrap_or(0)),
364             HV_X64_MSR_APIC_FREQUENCY => Some(self.apic_frequency.unwrap_or(0) as u64),
365             _ => None,
366         };
367 
368         if let Some(value) = value {
369             // Get the next rip from the exit context
370             let rip = self.last_exit_context.VpContext.Rip
371                 + self.last_exit_context.VpContext.InstructionLength() as u64;
372 
373             const REG_NAMES: [WHV_REGISTER_NAME; 3] = [
374                 WHV_REGISTER_NAME_WHvX64RegisterRip,
375                 WHV_REGISTER_NAME_WHvX64RegisterRax,
376                 WHV_REGISTER_NAME_WHvX64RegisterRdx,
377             ];
378 
379             let values = vec![
380                 WHV_REGISTER_VALUE { Reg64: rip },
381                 // RDMSR instruction puts lower 32 bits in EAX and upper 32 bits in EDX
382                 WHV_REGISTER_VALUE {
383                     Reg64: (value & 0xffffffff),
384                 },
385                 WHV_REGISTER_VALUE {
386                     Reg64: (value >> 32),
387                 },
388             ];
389 
390             // safe because we have enough space for all the registers
391             check_whpx!(unsafe {
392                 WHvSetVirtualProcessorRegisters(
393                     self.vm_partition.partition,
394                     self.index,
395                     &REG_NAMES as *const WHV_REGISTER_NAME,
396                     REG_NAMES.len() as u32,
397                     values.as_ptr() as *const WHV_REGISTER_VALUE,
398                 )
399             })
400         } else {
401             self.inject_gp_fault()
402         }
403     }
404 
405     /// Handle writing the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()>406     fn handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()> {
407         // Verify that we're only being called in a situation where the last exit reason was
408         // ExitReasonX64MsrAccess
409         if self.last_exit_context.ExitReason
410             != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
411         {
412             return Err(Error::new(EINVAL));
413         }
414 
415         // Do nothing, we assume TSC is always invariant
416         let success = matches!(id, HV_X64_MSR_TSC_INVARIANT_CONTROL);
417 
418         if !success {
419             return self.inject_gp_fault();
420         }
421 
422         // Get the next rip from the exit context
423         let rip = self.last_exit_context.VpContext.Rip
424             + self.last_exit_context.VpContext.InstructionLength() as u64;
425 
426         const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvX64RegisterRip];
427 
428         let values = vec![WHV_REGISTER_VALUE { Reg64: rip }];
429 
430         // safe because we have enough space for all the registers
431         check_whpx!(unsafe {
432             WHvSetVirtualProcessorRegisters(
433                 self.vm_partition.partition,
434                 self.index,
435                 &REG_NAMES as *const WHV_REGISTER_NAME,
436                 REG_NAMES.len() as u32,
437                 values.as_ptr() as *const WHV_REGISTER_VALUE,
438             )
439         })
440     }
441 
inject_gp_fault(&self) -> Result<()>442     fn inject_gp_fault(&self) -> Result<()> {
443         const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvRegisterPendingEvent];
444 
445         let mut event = WHV_REGISTER_VALUE {
446             ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
447                 __bindgen_anon_1: Default::default(),
448             },
449         };
450         // safe because we have enough space for all the registers
451         check_whpx!(unsafe {
452             WHvGetVirtualProcessorRegisters(
453                 self.vm_partition.partition,
454                 self.index,
455                 &REG_NAMES as *const WHV_REGISTER_NAME,
456                 REG_NAMES.len() as u32,
457                 &mut event as *mut WHV_REGISTER_VALUE,
458             )
459         })?;
460 
461         if unsafe { event.ExceptionEvent.__bindgen_anon_1.EventPending() } != 0 {
462             error!("Unable to inject gp fault because pending exception exists");
463             return Err(Error::new(EINVAL));
464         }
465 
466         let mut pending_exception = unsafe { event.ExceptionEvent.__bindgen_anon_1 };
467 
468         pending_exception.set_EventPending(1);
469         // GP faults set error code
470         pending_exception.set_DeliverErrorCode(1);
471         // GP fault error code is 0 unless the fault is segment related
472         pending_exception.ErrorCode = 0;
473         // This must be set to WHvX64PendingEventException
474         pending_exception
475             .set_EventType(WHV_X64_PENDING_EVENT_TYPE_WHvX64PendingEventException as u32);
476         // GP fault vector is 13
477         const GP_VECTOR: u32 = 13;
478         pending_exception.set_Vector(GP_VECTOR);
479 
480         let event = WHV_REGISTER_VALUE {
481             ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
482                 __bindgen_anon_1: pending_exception,
483             },
484         };
485 
486         // safe because we have enough space for all the registers
487         check_whpx!(unsafe {
488             WHvSetVirtualProcessorRegisters(
489                 self.vm_partition.partition,
490                 self.index,
491                 &REG_NAMES as *const WHV_REGISTER_NAME,
492                 REG_NAMES.len() as u32,
493                 &event as *const WHV_REGISTER_VALUE,
494             )
495         })
496     }
497 }
498 
499 impl Vcpu for WhpxVcpu {
500     /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self>501     fn try_clone(&self) -> Result<Self> {
502         Ok(WhpxVcpu {
503             index: self.index,
504             safe_virtual_processor: self.safe_virtual_processor.clone(),
505             vm_partition: self.vm_partition.clone(),
506             last_exit_context: self.last_exit_context.clone(),
507             instruction_emulator: self.instruction_emulator.clone(),
508             tsc_frequency: self.tsc_frequency,
509             apic_frequency: self.apic_frequency,
510         })
511     }
512 
as_vcpu(&self) -> &dyn Vcpu513     fn as_vcpu(&self) -> &dyn Vcpu {
514         self
515     }
516 
517     /// Returns the vcpu id.
id(&self) -> usize518     fn id(&self) -> usize {
519         self.index.try_into().unwrap()
520     }
521 
522     /// Exits the vcpu immediately if exit is true
set_immediate_exit(&self, exit: bool)523     fn set_immediate_exit(&self, exit: bool) {
524         if exit {
525             // safe because we own this whpx virtual processor index, and assume the vm partition is
526             // still valid
527             unsafe {
528                 WHvCancelRunVirtualProcessor(self.vm_partition.partition, self.index, 0);
529             }
530         }
531     }
532 
533     /// Signals to the hypervisor that this guest is being paused by userspace. On some hypervisors,
534     /// this is used to control the pvclock. On WHPX, we handle it separately with virtio-pvclock.
535     /// So the correct implementation here is to do nothing.
on_suspend(&self) -> Result<()>536     fn on_suspend(&self) -> Result<()> {
537         Ok(())
538     }
539 
540     /// Enables a hypervisor-specific extension on this Vcpu.  `cap` is a constant defined by the
541     /// hypervisor API (e.g., kvm.h).  `args` are the arguments for enabling the feature, if any.
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>542     unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
543         // Whpx does not support raw capability on the vcpu.
544         Err(Error::new(ENXIO))
545     }
546 
547     /// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`.
548     ///
549     /// Once called, it will determine whether a mmio read or mmio write was the reason for the mmio
550     /// exit, call `handle_fn` with the respective IoOperation to perform the mmio read or
551     /// write, and set the return data in the vcpu so that the vcpu can resume running.
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>552     fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
553         let mut status: WHV_EMULATOR_STATUS = Default::default();
554         let mut ctx = InstructionEmulatorContext {
555             vm_partition: self.vm_partition.clone(),
556             index: self.index,
557             handle_mmio: Some(handle_fn),
558             handle_io: None,
559         };
560         // safe as long as all callbacks occur before this fn returns.
561         check_whpx!(unsafe {
562             WHvEmulatorTryMmioEmulation(
563                 self.instruction_emulator.handle,
564                 &mut ctx as *mut _ as *mut c_void,
565                 &self.last_exit_context.VpContext,
566                 &self.last_exit_context.__bindgen_anon_1.MemoryAccess,
567                 &mut status,
568             )
569         })?;
570         // safe because we trust the kernel to fill in the union field properly.
571         let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
572         if success {
573             Ok(())
574         } else {
575             // safe because we trust the kernel to fill in the union field properly.
576             Err(Error::new(unsafe { status.AsUINT32 }))
577         }
578     }
579 
580     /// This function should be called after `Vcpu::run` returns `VcpuExit::Io`.
581     ///
582     /// Once called, it will determine whether an io in or io out was the reason for the io exit,
583     /// call `handle_fn` with the respective IoOperation to perform the io in or io out,
584     /// and set the return data in the vcpu so that the vcpu can resume running.
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>585     fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
586         let mut status: WHV_EMULATOR_STATUS = Default::default();
587         let mut ctx = InstructionEmulatorContext {
588             vm_partition: self.vm_partition.clone(),
589             index: self.index,
590             handle_mmio: None,
591             handle_io: Some(handle_fn),
592         };
593         // safe as long as all callbacks occur before this fn returns.
594         check_whpx!(unsafe {
595             WHvEmulatorTryIoEmulation(
596                 self.instruction_emulator.handle,
597                 &mut ctx as *mut _ as *mut c_void,
598                 &self.last_exit_context.VpContext,
599                 &self.last_exit_context.__bindgen_anon_1.IoPortAccess,
600                 &mut status,
601             )
602         })?; // safe because we trust the kernel to fill in the union field properly.
603         let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
604         if success {
605             Ok(())
606         } else {
607             // safe because we trust the kernel to fill in the union field properly.
608             Err(Error::new(unsafe { status.AsUINT32 }))
609         }
610     }
611 
612     /// this is unhandled currently since we don't emulate hypercall instructions for whpx.
handle_hyperv_hypercall(&self, _func: &mut dyn FnMut(HypervHypercall) -> u64) -> Result<()>613     fn handle_hyperv_hypercall(&self, _func: &mut dyn FnMut(HypervHypercall) -> u64) -> Result<()> {
614         Ok(())
615     }
616 
617     /// This function should be called after `Vcpu::run` returns `VcpuExit::RdMsr`,
618     /// and in the same thread as run.
619     ///
620     /// It will put `data` into the user buffer and return.
handle_rdmsr(&self, _data: u64) -> Result<()>621     fn handle_rdmsr(&self, _data: u64) -> Result<()> {
622         // TODO(b/235691411): Implement.
623         Err(Error::new(libc::ENXIO))
624     }
625 
626     /// This function should be called after `Vcpu::run` returns `VcpuExit::WrMsr`,
627     /// and in the same thread as run.
handle_wrmsr(&self)628     fn handle_wrmsr(&self) {
629         // TODO(b/235691411): Implement.
630     }
631 
632     #[allow(non_upper_case_globals)]
run(&mut self) -> Result<VcpuExit>633     fn run(&mut self) -> Result<VcpuExit> {
634         // safe because we own this whpx virtual processor index, and assume the vm partition is
635         // still valid
636         let exit_context_ptr = Arc::as_ptr(&self.last_exit_context);
637         check_whpx!(unsafe {
638             WHvRunVirtualProcessor(
639                 self.vm_partition.partition,
640                 self.index,
641                 exit_context_ptr as *mut WHV_RUN_VP_EXIT_CONTEXT as *mut c_void,
642                 size_of::<WHV_RUN_VP_EXIT_CONTEXT>() as u32,
643             )
644         })?;
645 
646         match self.last_exit_context.ExitReason {
647             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonNone => Ok(VcpuExit::Unknown),
648             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonMemoryAccess => Ok(VcpuExit::Mmio),
649             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64IoPortAccess => Ok(VcpuExit::Io),
650             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnrecoverableException => {
651                 Ok(VcpuExit::UnrecoverableException)
652             }
653             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonInvalidVpRegisterValue => {
654                 Ok(VcpuExit::InvalidVpRegister)
655             }
656             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnsupportedFeature => {
657                 Ok(VcpuExit::UnsupportedFeature)
658             }
659             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64InterruptWindow => {
660                 Ok(VcpuExit::IrqWindowOpen)
661             }
662             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Halt => Ok(VcpuExit::Hlt),
663             // additional exits that are configurable
664             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicEoi => {
665                 // safe because we trust the kernel to fill in the union field properly.
666                 let vector = unsafe {
667                     self.last_exit_context
668                         .__bindgen_anon_1
669                         .ApicEoi
670                         .InterruptVector as u8
671                 };
672                 Ok(VcpuExit::IoapicEoi { vector })
673             }
674             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess => {
675                 // Safe because we know this was an MSR access exit.
676                 let id = unsafe { self.last_exit_context.__bindgen_anon_1.MsrAccess.MsrNumber };
677 
678                 // Safe because we know this was an MSR access exit
679                 let is_write = unsafe {
680                     self.last_exit_context
681                         .__bindgen_anon_1
682                         .MsrAccess
683                         .AccessInfo
684                         .__bindgen_anon_1
685                         .IsWrite()
686                         == 1
687                 };
688                 if is_write {
689                     // Safe because we know this was an MSR access exit
690                     let value = unsafe {
691                         // WRMSR writes the contents of registers EDX:EAX into the 64-bit model
692                         // specific register
693                         (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rdx << 32)
694                             | (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rax & 0xffffffff)
695                     };
696                     self.handle_msr_write(id, value)?;
697                 } else {
698                     self.handle_msr_read(id)?;
699                 }
700                 Ok(VcpuExit::MsrAccess)
701             }
702             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid => {
703                 // Safe because we know this was a CPUID exit.
704                 let entry = unsafe {
705                     CpuIdEntry {
706                         function: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rax as u32,
707                         index: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rcx as u32,
708                         flags: 0,
709                         cpuid: CpuidResult {
710                             eax: self
711                                 .last_exit_context
712                                 .__bindgen_anon_1
713                                 .CpuidAccess
714                                 .DefaultResultRax as u32,
715                             ebx: self
716                                 .last_exit_context
717                                 .__bindgen_anon_1
718                                 .CpuidAccess
719                                 .DefaultResultRbx as u32,
720                             ecx: self
721                                 .last_exit_context
722                                 .__bindgen_anon_1
723                                 .CpuidAccess
724                                 .DefaultResultRcx as u32,
725                             edx: self
726                                 .last_exit_context
727                                 .__bindgen_anon_1
728                                 .CpuidAccess
729                                 .DefaultResultRdx as u32,
730                         },
731                     }
732                 };
733                 Ok(VcpuExit::Cpuid { entry })
734             }
735             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonException => Ok(VcpuExit::Exception),
736             // undocumented exit calls from the header file, WinHvPlatformDefs.h.
737             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Rdtsc => Ok(VcpuExit::RdTsc),
738             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicSmiTrap => Ok(VcpuExit::ApicSmiTrap),
739             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonHypercall => Ok(VcpuExit::Hypercall),
740             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicInitSipiTrap => {
741                 Ok(VcpuExit::ApicInitSipiTrap)
742             }
743             // exit caused by host cancellation thorugh WHvCancelRunVirtualProcessor,
744             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonCanceled => Ok(VcpuExit::Canceled),
745             r => panic!("unknown exit reason: {}", r),
746         }
747     }
748 }
749 
750 impl VcpuX86_64 for WhpxVcpu {
751     /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
752     /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)753     fn set_interrupt_window_requested(&self, requested: bool) {
754         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
755             [WHV_REGISTER_NAME_WHvX64RegisterDeliverabilityNotifications];
756         let mut notifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER__bindgen_ty_1 =
757             Default::default();
758         notifications.set_InterruptNotification(if requested { 1 } else { 0 });
759         let notify_register = WHV_REGISTER_VALUE {
760             DeliverabilityNotifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER {
761                 __bindgen_anon_1: notifications,
762             },
763         };
764         // safe because we have enough space for all the registers
765         check_whpx!(unsafe {
766             WHvSetVirtualProcessorRegisters(
767                 self.vm_partition.partition,
768                 self.index,
769                 &REG_NAMES as *const WHV_REGISTER_NAME,
770                 REG_NAMES.len() as u32,
771                 &notify_register as *const WHV_REGISTER_VALUE,
772             )
773         })
774         .unwrap();
775     }
776 
777     /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool778     fn ready_for_interrupt(&self) -> bool {
779         // safe because InterruptionPending bit is always valid in ExecutionState struct
780         let pending = unsafe {
781             self.last_exit_context
782                 .VpContext
783                 .ExecutionState
784                 .__bindgen_anon_1
785                 .InterruptionPending()
786         };
787         // safe because InterruptShadow bit is always valid in ExecutionState struct
788         let shadow = unsafe {
789             self.last_exit_context
790                 .VpContext
791                 .ExecutionState
792                 .__bindgen_anon_1
793                 .InterruptShadow()
794         };
795 
796         let eflags = self.last_exit_context.VpContext.Rflags;
797         const IF_MASK: u64 = 0x00000200;
798 
799         // can't inject an interrupt if InterruptShadow or InterruptPending bits are set, or if
800         // the IF flag is clear
801         shadow == 0 && pending == 0 && (eflags & IF_MASK) != 0
802     }
803 
804     /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u32) -> Result<()>805     fn interrupt(&self, irq: u32) -> Result<()> {
806         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
807             [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
808         let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
809             Default::default();
810         pending_interrupt.set_InterruptionPending(1);
811         pending_interrupt
812             .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingInterrupt as u32);
813         pending_interrupt.set_InterruptionVector(irq);
814         let interrupt = WHV_REGISTER_VALUE {
815             PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
816                 __bindgen_anon_1: pending_interrupt,
817             },
818         };
819         // safe because we have enough space for all the registers
820         check_whpx!(unsafe {
821             WHvSetVirtualProcessorRegisters(
822                 self.vm_partition.partition,
823                 self.index,
824                 &REG_NAMES as *const WHV_REGISTER_NAME,
825                 REG_NAMES.len() as u32,
826                 &interrupt as *const WHV_REGISTER_VALUE,
827             )
828         })
829     }
830 
831     /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>832     fn inject_nmi(&self) -> Result<()> {
833         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
834             [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
835         let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
836             Default::default();
837         pending_interrupt.set_InterruptionPending(1);
838         pending_interrupt
839             .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingNmi as u32);
840         const NMI_VECTOR: u32 = 2; // 2 is the NMI vector.
841         pending_interrupt.set_InterruptionVector(NMI_VECTOR);
842         let interrupt = WHV_REGISTER_VALUE {
843             PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
844                 __bindgen_anon_1: pending_interrupt,
845             },
846         };
847         // safe because we have enough space for all the registers
848         check_whpx!(unsafe {
849             WHvSetVirtualProcessorRegisters(
850                 self.vm_partition.partition,
851                 self.index,
852                 &REG_NAMES as *const WHV_REGISTER_NAME,
853                 REG_NAMES.len() as u32,
854                 &interrupt as *const WHV_REGISTER_VALUE,
855             )
856         })
857     }
858 
859     /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>860     fn get_regs(&self) -> Result<Regs> {
861         let mut whpx_regs: WhpxRegs = Default::default();
862         let reg_names = WhpxRegs::get_register_names();
863         // safe because we have enough space for all the registers
864         check_whpx!(unsafe {
865             WHvGetVirtualProcessorRegisters(
866                 self.vm_partition.partition,
867                 self.index,
868                 reg_names as *const WHV_REGISTER_NAME,
869                 reg_names.len() as u32,
870                 whpx_regs.as_mut_ptr(),
871             )
872         })?;
873         Ok(Regs::from(&whpx_regs))
874     }
875 
876     /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>877     fn set_regs(&self, regs: &Regs) -> Result<()> {
878         let whpx_regs = WhpxRegs::from(regs);
879         let reg_names = WhpxRegs::get_register_names();
880         // safe because we have enough space for all the registers
881         check_whpx!(unsafe {
882             WHvSetVirtualProcessorRegisters(
883                 self.vm_partition.partition,
884                 self.index,
885                 reg_names as *const WHV_REGISTER_NAME,
886                 reg_names.len() as u32,
887                 whpx_regs.as_ptr(),
888             )
889         })
890     }
891 
892     /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>893     fn get_sregs(&self) -> Result<Sregs> {
894         let mut whpx_sregs: WhpxSregs = Default::default();
895         let reg_names = WhpxSregs::get_register_names();
896         // safe because we have enough space for all the registers
897         check_whpx!(unsafe {
898             WHvGetVirtualProcessorRegisters(
899                 self.vm_partition.partition,
900                 self.index,
901                 reg_names as *const WHV_REGISTER_NAME,
902                 reg_names.len() as u32,
903                 whpx_sregs.as_mut_ptr(),
904             )
905         })?;
906         Ok(Sregs::from(&whpx_sregs))
907     }
908 
909     /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>910     fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
911         let whpx_sregs = WhpxSregs::from(sregs);
912         let reg_names = WhpxSregs::get_register_names();
913         // safe because we have enough space for all the registers
914         check_whpx!(unsafe {
915             WHvSetVirtualProcessorRegisters(
916                 self.vm_partition.partition,
917                 self.index,
918                 reg_names as *const WHV_REGISTER_NAME,
919                 reg_names.len() as u32,
920                 whpx_sregs.as_ptr(),
921             )
922         })
923     }
924 
925     /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>926     fn get_fpu(&self) -> Result<Fpu> {
927         let mut whpx_fpu: WhpxFpu = Default::default();
928         let reg_names = WhpxFpu::get_register_names();
929         // safe because we have enough space for all the registers
930         check_whpx!(unsafe {
931             WHvGetVirtualProcessorRegisters(
932                 self.vm_partition.partition,
933                 self.index,
934                 reg_names as *const WHV_REGISTER_NAME,
935                 reg_names.len() as u32,
936                 whpx_fpu.as_mut_ptr(),
937             )
938         })?;
939         Ok(Fpu::from(&whpx_fpu))
940     }
941 
942     /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>943     fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
944         let whpx_fpu = WhpxFpu::from(fpu);
945         let reg_names = WhpxFpu::get_register_names();
946         // safe because we have enough space for all the registers
947         check_whpx!(unsafe {
948             WHvSetVirtualProcessorRegisters(
949                 self.vm_partition.partition,
950                 self.index,
951                 reg_names as *const WHV_REGISTER_NAME,
952                 reg_names.len() as u32,
953                 whpx_fpu.as_ptr(),
954             )
955         })
956     }
957 
958     /// Gets the VCPU XSAVE.
get_xsave(&self) -> Result<Xsave>959     fn get_xsave(&self) -> Result<Xsave> {
960         let mut empty_buffer = [0u8; 1];
961         let mut needed_buf_size: u32 = 0;
962 
963         // Find out how much space is needed for XSAVEs.
964         let res = unsafe {
965             WHvGetVirtualProcessorXsaveState(
966                 self.vm_partition.partition,
967                 self.index,
968                 empty_buffer.as_mut_ptr() as *mut _,
969                 0,
970                 &mut needed_buf_size,
971             )
972         };
973         if res != WHV_E_INSUFFICIENT_BUFFER.0 {
974             // This should always work, so if it doesn't, we'll return unsupported.
975             error!("failed to get size of vcpu xsave");
976             return Err(Error::new(EIO));
977         }
978 
979         let mut xsave = Xsave::new(needed_buf_size as usize);
980         // SAFETY: xsave_data is valid for the duration of the FFI call, and we pass its length in
981         // bytes so writes are bounded within the buffer.
982         check_whpx!(unsafe {
983             WHvGetVirtualProcessorXsaveState(
984                 self.vm_partition.partition,
985                 self.index,
986                 xsave.as_mut_ptr(),
987                 xsave.len() as u32,
988                 &mut needed_buf_size,
989             )
990         })?;
991         Ok(xsave)
992     }
993 
994     /// Sets the VCPU XSAVE.
set_xsave(&self, xsave: &Xsave) -> Result<()>995     fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
996         // SAFETY: the xsave buffer is valid for the duration of the FFI call, and we pass its
997         // length in bytes so reads are bounded within the buffer.
998         check_whpx!(unsafe {
999             WHvSetVirtualProcessorXsaveState(
1000                 self.vm_partition.partition,
1001                 self.index,
1002                 xsave.as_ptr(),
1003                 xsave.len() as u32,
1004             )
1005         })
1006     }
1007 
get_interrupt_state(&self) -> Result<serde_json::Value>1008     fn get_interrupt_state(&self) -> Result<serde_json::Value> {
1009         let mut whpx_interrupt_regs: WhpxInterruptRegs = Default::default();
1010         let reg_names = WhpxInterruptRegs::get_register_names();
1011         // SAFETY: we have enough space for all the registers & the memory lives for the duration
1012         // of the FFI call.
1013         check_whpx!(unsafe {
1014             WHvGetVirtualProcessorRegisters(
1015                 self.vm_partition.partition,
1016                 self.index,
1017                 reg_names as *const WHV_REGISTER_NAME,
1018                 reg_names.len() as u32,
1019                 whpx_interrupt_regs.as_mut_ptr(),
1020             )
1021         })?;
1022 
1023         serde_json::to_value(whpx_interrupt_regs.into_serializable()).map_err(|e| {
1024             error!("failed to serialize interrupt state: {:?}", e);
1025             Error::new(EIO)
1026         })
1027     }
1028 
set_interrupt_state(&self, data: serde_json::Value) -> Result<()>1029     fn set_interrupt_state(&self, data: serde_json::Value) -> Result<()> {
1030         let whpx_interrupt_regs =
1031             WhpxInterruptRegs::from_serializable(serde_json::from_value(data).map_err(|e| {
1032                 error!("failed to serialize interrupt state: {:?}", e);
1033                 Error::new(EIO)
1034             })?);
1035         let reg_names = WhpxInterruptRegs::get_register_names();
1036         // SAFETY: we have enough space for all the registers & the memory lives for the duration
1037         // of the FFI call.
1038         check_whpx!(unsafe {
1039             WHvSetVirtualProcessorRegisters(
1040                 self.vm_partition.partition,
1041                 self.index,
1042                 reg_names as *const WHV_REGISTER_NAME,
1043                 reg_names.len() as u32,
1044                 whpx_interrupt_regs.as_ptr(),
1045             )
1046         })
1047     }
1048 
1049     /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>1050     fn get_debugregs(&self) -> Result<DebugRegs> {
1051         let mut whpx_debugregs: WhpxDebugRegs = Default::default();
1052         let reg_names = WhpxDebugRegs::get_register_names();
1053         // safe because we have enough space for all the registers
1054         check_whpx!(unsafe {
1055             WHvGetVirtualProcessorRegisters(
1056                 self.vm_partition.partition,
1057                 self.index,
1058                 reg_names as *const WHV_REGISTER_NAME,
1059                 reg_names.len() as u32,
1060                 whpx_debugregs.as_mut_ptr(),
1061             )
1062         })?;
1063         Ok(DebugRegs::from(&whpx_debugregs))
1064     }
1065 
1066     /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>1067     fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()> {
1068         let whpx_debugregs = WhpxDebugRegs::from(debugregs);
1069         let reg_names = WhpxDebugRegs::get_register_names();
1070         // safe because we have enough space for all the registers
1071         check_whpx!(unsafe {
1072             WHvSetVirtualProcessorRegisters(
1073                 self.vm_partition.partition,
1074                 self.index,
1075                 reg_names as *const WHV_REGISTER_NAME,
1076                 reg_names.len() as u32,
1077                 whpx_debugregs.as_ptr(),
1078             )
1079         })
1080     }
1081 
1082     /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>1083     fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>> {
1084         const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1085         let mut reg_value = WHV_REGISTER_VALUE::default();
1086         // safe because we have enough space for all the registers in whpx_regs
1087         check_whpx!(unsafe {
1088             WHvGetVirtualProcessorRegisters(
1089                 self.vm_partition.partition,
1090                 self.index,
1091                 &REG_NAME,
1092                 /* RegisterCount */ 1,
1093                 &mut reg_value,
1094             )
1095         })?;
1096 
1097         // safe because the union value, reg64, is safe to pull out assuming
1098         // kernel filled in the xcrs properly.
1099         let xcr0 = unsafe { reg_value.Reg64 };
1100 
1101         // whpx only supports xcr0
1102         let xcrs = BTreeMap::from([(0, xcr0)]);
1103         Ok(xcrs)
1104     }
1105 
1106     /// Sets a VCPU extended control register.
set_xcr(&self, xcr_index: u32, value: u64) -> Result<()>1107     fn set_xcr(&self, xcr_index: u32, value: u64) -> Result<()> {
1108         if xcr_index != 0 {
1109             // invalid xcr register provided
1110             return Err(Error::new(EINVAL));
1111         }
1112 
1113         const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1114         let reg_value = WHV_REGISTER_VALUE { Reg64: value };
1115         // safe because we have enough space for all the registers in whpx_xcrs
1116         check_whpx!(unsafe {
1117             WHvSetVirtualProcessorRegisters(
1118                 self.vm_partition.partition,
1119                 self.index,
1120                 &REG_NAME,
1121                 /* RegisterCount */ 1,
1122                 &reg_value,
1123             )
1124         })
1125     }
1126 
1127     /// Gets the value of a single model-specific register.
get_msr(&self, msr_index: u32) -> Result<u64>1128     fn get_msr(&self, msr_index: u32) -> Result<u64> {
1129         let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1130         let mut msr_value = WHV_REGISTER_VALUE::default();
1131         // safe because we have enough space for all the registers in whpx_regs
1132         check_whpx!(unsafe {
1133             WHvGetVirtualProcessorRegisters(
1134                 self.vm_partition.partition,
1135                 self.index,
1136                 &msr_name,
1137                 /* RegisterCount */ 1,
1138                 &mut msr_value,
1139             )
1140         })?;
1141 
1142         // safe because Reg64 will be a valid union value
1143         let value = unsafe { msr_value.Reg64 };
1144         Ok(value)
1145     }
1146 
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>1147     fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>> {
1148         // Note that some members of VALID_MSRS cannot be fetched from WHPX with
1149         // WHvGetVirtualProcessorRegisters per the HTLFS, so we enumerate all of
1150         // permitted MSRs here.
1151         //
1152         // We intentionally exclude WHvRegisterPendingInterruption and
1153         // WHvRegisterInterruptState because they are included in
1154         // get_interrupt_state.
1155         //
1156         // We intentionally exclude MSR_TSC because in snapshotting it is
1157         // handled by the generic x86_64 VCPU snapshot/restore. Non snapshot
1158         // consumers should use get/set_tsc_adjust to access the adjust register
1159         // if needed.
1160         const MSRS_TO_SAVE: &[u32] = &[
1161             MSR_EFER,
1162             MSR_KERNEL_GS_BASE,
1163             MSR_APIC_BASE,
1164             MSR_SYSENTER_CS,
1165             MSR_SYSENTER_EIP,
1166             MSR_SYSENTER_ESP,
1167             MSR_STAR,
1168             MSR_LSTAR,
1169             MSR_CSTAR,
1170             MSR_SFMASK,
1171         ];
1172 
1173         let registers = MSRS_TO_SAVE
1174             .iter()
1175             .map(|msr_index| {
1176                 let value = self.get_msr(*msr_index)?;
1177                 Ok((*msr_index, value))
1178             })
1179             .collect::<Result<BTreeMap<u32, u64>>>()?;
1180 
1181         Ok(registers)
1182     }
1183 
1184     /// Sets the value of a single model-specific register.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>1185     fn set_msr(&self, msr_index: u32, value: u64) -> Result<()> {
1186         let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1187         let msr_value = WHV_REGISTER_VALUE { Reg64: value };
1188         check_whpx!(unsafe {
1189             WHvSetVirtualProcessorRegisters(
1190                 self.vm_partition.partition,
1191                 self.index,
1192                 &msr_name,
1193                 /* RegisterCount */ 1,
1194                 &msr_value,
1195             )
1196         })
1197     }
1198 
1199     /// Sets up the data returned by the CPUID instruction.
1200     /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
set_cpuid(&self, _cpuid: &CpuId) -> Result<()>1201     fn set_cpuid(&self, _cpuid: &CpuId) -> Result<()> {
1202         Err(Error::new(ENXIO))
1203     }
1204 
1205     /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
1206     /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
1207     /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>1208     fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()> {
1209         // Verify that we're only being called in a situation where the last exit reason was
1210         // ExitReasonX64Cpuid
1211         if self.last_exit_context.ExitReason != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid {
1212             return Err(Error::new(EINVAL));
1213         }
1214 
1215         // Get the next rip from the exit context
1216         let rip = self.last_exit_context.VpContext.Rip
1217             + self.last_exit_context.VpContext.InstructionLength() as u64;
1218 
1219         const REG_NAMES: [WHV_REGISTER_NAME; 5] = [
1220             WHV_REGISTER_NAME_WHvX64RegisterRip,
1221             WHV_REGISTER_NAME_WHvX64RegisterRax,
1222             WHV_REGISTER_NAME_WHvX64RegisterRbx,
1223             WHV_REGISTER_NAME_WHvX64RegisterRcx,
1224             WHV_REGISTER_NAME_WHvX64RegisterRdx,
1225         ];
1226 
1227         let values = vec![
1228             WHV_REGISTER_VALUE { Reg64: rip },
1229             WHV_REGISTER_VALUE {
1230                 Reg64: entry.cpuid.eax as u64,
1231             },
1232             WHV_REGISTER_VALUE {
1233                 Reg64: entry.cpuid.ebx as u64,
1234             },
1235             WHV_REGISTER_VALUE {
1236                 Reg64: entry.cpuid.ecx as u64,
1237             },
1238             WHV_REGISTER_VALUE {
1239                 Reg64: entry.cpuid.edx as u64,
1240             },
1241         ];
1242 
1243         // safe because we have enough space for all the registers
1244         check_whpx!(unsafe {
1245             WHvSetVirtualProcessorRegisters(
1246                 self.vm_partition.partition,
1247                 self.index,
1248                 &REG_NAMES as *const WHV_REGISTER_NAME,
1249                 REG_NAMES.len() as u32,
1250                 values.as_ptr() as *const WHV_REGISTER_VALUE,
1251             )
1252         })
1253     }
1254 
1255     /// Gets the system emulated hyper-v CPUID values.
1256     /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
get_hyperv_cpuid(&self) -> Result<CpuId>1257     fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1258         Err(Error::new(ENXIO))
1259     }
1260 
1261     /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>1262     fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
1263         // TODO(b/173807302): Implement this
1264         Err(Error::new(ENOENT))
1265     }
1266 
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>1267     fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()> {
1268         // Set the guest TSC such that it has the same TSC_OFFSET as it did at
1269         // the moment it was snapshotted. This is required for virtio-pvclock
1270         // to function correctly. (virtio-pvclock assumes the offset is fixed,
1271         // and adjusts CLOCK_BOOTTIME accordingly. It also hides the TSC jump
1272         // from CLOCK_MONOTONIC by setting the timebase.)
1273         self.set_tsc_value(host_tsc_reference_moment.wrapping_add(tsc_offset))
1274     }
1275 }
1276 
get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME>1277 fn get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME> {
1278     VALID_MSRS.get(&msr_index).copied()
1279 }
1280 
1281 // run calls are tested with the integration tests since the full vcpu needs to be setup for it.
1282 #[cfg(test)]
1283 mod tests {
1284     use vm_memory::GuestAddress;
1285     use vm_memory::GuestMemory;
1286 
1287     use super::*;
1288     use crate::VmX86_64;
1289 
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm1290     fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
1291         let whpx = Whpx::new().expect("failed to instantiate whpx");
1292         let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1293             .expect("failed to get whpx features");
1294         WhpxVm::new(
1295             &whpx,
1296             cpu_count,
1297             mem,
1298             CpuId::new(0),
1299             local_apic_supported,
1300             None,
1301         )
1302         .expect("failed to create whpx vm")
1303     }
1304 
1305     #[test]
try_clone()1306     fn try_clone() {
1307         if !Whpx::is_enabled() {
1308             return;
1309         }
1310         let cpu_count = 1;
1311         let mem =
1312             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1313         let vm = new_vm(cpu_count, mem);
1314         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1315         let vcpu: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1316         let _vcpu_clone = vcpu.try_clone().expect("failed to clone whpx vcpu");
1317     }
1318 
1319     #[test]
index()1320     fn index() {
1321         if !Whpx::is_enabled() {
1322             return;
1323         }
1324         let cpu_count = 2;
1325         let mem =
1326             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1327         let vm = new_vm(cpu_count, mem);
1328         let mut vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1329         let vcpu0: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1330         assert_eq!(vcpu0.index, 0);
1331         vcpu = vm.create_vcpu(1).expect("failed to create vcpu");
1332         let vcpu1: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1333         assert_eq!(vcpu1.index, 1);
1334     }
1335 
1336     #[test]
get_regs()1337     fn get_regs() {
1338         if !Whpx::is_enabled() {
1339             return;
1340         }
1341         let cpu_count = 1;
1342         let mem =
1343             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1344         let vm = new_vm(cpu_count, mem);
1345         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1346 
1347         vcpu.get_regs().expect("failed to get regs");
1348     }
1349 
1350     #[test]
set_regs()1351     fn set_regs() {
1352         if !Whpx::is_enabled() {
1353             return;
1354         }
1355         let cpu_count = 1;
1356         let mem =
1357             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1358         let vm = new_vm(cpu_count, mem);
1359         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1360 
1361         let mut regs = vcpu.get_regs().expect("failed to get regs");
1362         let new_val = regs.rax + 2;
1363         regs.rax = new_val;
1364 
1365         vcpu.set_regs(&regs).expect("failed to set regs");
1366         let new_regs = vcpu.get_regs().expect("failed to get regs");
1367         assert_eq!(new_regs.rax, new_val);
1368     }
1369 
1370     #[test]
debugregs()1371     fn debugregs() {
1372         if !Whpx::is_enabled() {
1373             return;
1374         }
1375         let cpu_count = 1;
1376         let mem =
1377             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1378         let vm = new_vm(cpu_count, mem);
1379         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1380 
1381         let mut dregs = vcpu.get_debugregs().unwrap();
1382         dregs.dr7 += 13;
1383         vcpu.set_debugregs(&dregs).unwrap();
1384         let dregs2 = vcpu.get_debugregs().unwrap();
1385         assert_eq!(dregs.dr7, dregs2.dr7);
1386     }
1387 
1388     #[test]
sregs()1389     fn sregs() {
1390         if !Whpx::is_enabled() {
1391             return;
1392         }
1393         let cpu_count = 1;
1394         let mem =
1395             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1396         let vm = new_vm(cpu_count, mem);
1397         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1398 
1399         let mut sregs = vcpu.get_sregs().unwrap();
1400         sregs.cs.base += 7;
1401         vcpu.set_sregs(&sregs).unwrap();
1402         let sregs2 = vcpu.get_sregs().unwrap();
1403         assert_eq!(sregs.cs.base, sregs2.cs.base);
1404     }
1405 
1406     #[test]
fpu()1407     fn fpu() {
1408         if !Whpx::is_enabled() {
1409             return;
1410         }
1411         let cpu_count = 1;
1412         let mem =
1413             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1414         let vm = new_vm(cpu_count, mem);
1415         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1416 
1417         let mut fpu = vcpu.get_fpu().unwrap();
1418         fpu.fpr[0][0] += 3;
1419         vcpu.set_fpu(&fpu).unwrap();
1420         let fpu2 = vcpu.get_fpu().unwrap();
1421         assert_eq!(fpu.fpr[0][0], fpu2.fpr[0][0]);
1422     }
1423 
1424     #[test]
xcrs()1425     fn xcrs() {
1426         if !Whpx::is_enabled() {
1427             return;
1428         }
1429         let whpx = Whpx::new().expect("failed to instantiate whpx");
1430         let cpu_count = 1;
1431         let mem =
1432             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1433         let vm = new_vm(cpu_count, mem);
1434         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1435         // check xsave support
1436         if !whpx.check_capability(HypervisorCap::Xcrs) {
1437             return;
1438         }
1439 
1440         vcpu.set_xcr(0, 1).unwrap();
1441         let xcrs = vcpu.get_xcrs().unwrap();
1442         let xcr0 = xcrs.get(&0).unwrap();
1443         assert_eq!(*xcr0, 1);
1444     }
1445 
1446     #[test]
set_msr()1447     fn set_msr() {
1448         if !Whpx::is_enabled() {
1449             return;
1450         }
1451         let cpu_count = 1;
1452         let mem =
1453             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1454         let vm = new_vm(cpu_count, mem);
1455         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1456 
1457         vcpu.set_msr(MSR_KERNEL_GS_BASE, 42).unwrap();
1458 
1459         let gs_base = vcpu.get_msr(MSR_KERNEL_GS_BASE).unwrap();
1460         assert_eq!(gs_base, 42);
1461     }
1462 
1463     #[test]
get_msr()1464     fn get_msr() {
1465         if !Whpx::is_enabled() {
1466             return;
1467         }
1468         let cpu_count = 1;
1469         let mem =
1470             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1471         let vm = new_vm(cpu_count, mem);
1472         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1473 
1474         // This one should succeed
1475         let _value = vcpu.get_msr(MSR_TSC).unwrap();
1476 
1477         // This one will fail to fetch
1478         vcpu.get_msr(MSR_TSC + 1)
1479             .expect_err("invalid MSR index should fail");
1480     }
1481 
1482     #[test]
set_efer()1483     fn set_efer() {
1484         if !Whpx::is_enabled() {
1485             return;
1486         }
1487         // EFER Bits
1488         const EFER_SCE: u64 = 0x00000001;
1489         const EFER_LME: u64 = 0x00000100;
1490         const EFER_LMA: u64 = 0x00000400;
1491         const X86_CR0_PE: u64 = 0x1;
1492         const X86_CR0_PG: u64 = 0x80000000;
1493         const X86_CR4_PAE: u64 = 0x20;
1494 
1495         let cpu_count = 1;
1496         let mem =
1497             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1498         let vm = new_vm(cpu_count, mem);
1499         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1500 
1501         let mut sregs = vcpu.get_sregs().expect("failed to get sregs");
1502         // Initial value should be 0
1503         assert_eq!(sregs.efer, 0);
1504 
1505         // Enable and activate long mode
1506         sregs.cr0 |= X86_CR0_PE; // enable protected mode
1507         sregs.cr0 |= X86_CR0_PG; // enable paging
1508         sregs.cr4 |= X86_CR4_PAE; // enable physical address extension
1509         sregs.efer = EFER_LMA | EFER_LME;
1510         vcpu.set_sregs(&sregs).expect("failed to set sregs");
1511 
1512         // Verify that setting stuck
1513         let sregs = vcpu.get_sregs().expect("failed to get sregs");
1514         assert_eq!(sregs.efer, EFER_LMA | EFER_LME);
1515         assert_eq!(sregs.cr0 & X86_CR0_PE, X86_CR0_PE);
1516         assert_eq!(sregs.cr0 & X86_CR0_PG, X86_CR0_PG);
1517         assert_eq!(sregs.cr4 & X86_CR4_PAE, X86_CR4_PAE);
1518 
1519         let efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1520         assert_eq!(efer, EFER_LMA | EFER_LME);
1521 
1522         // Enable SCE via set_msrs
1523         vcpu.set_msr(MSR_EFER, efer | EFER_SCE)
1524             .expect("failed to set msr");
1525 
1526         // Verify that setting stuck
1527         let sregs = vcpu.get_sregs().expect("failed to get sregs");
1528         assert_eq!(sregs.efer, EFER_SCE | EFER_LME | EFER_LMA);
1529         let new_efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1530         assert_eq!(new_efer, EFER_SCE | EFER_LME | EFER_LMA);
1531     }
1532 
1533     #[test]
get_and_set_xsave_smoke()1534     fn get_and_set_xsave_smoke() {
1535         if !Whpx::is_enabled() {
1536             return;
1537         }
1538         let cpu_count = 1;
1539         let mem =
1540             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1541         let vm = new_vm(cpu_count, mem);
1542         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1543 
1544         // XSAVE is essentially opaque for our purposes. We just want to make sure our syscalls
1545         // succeed.
1546         let xsave = vcpu.get_xsave().unwrap();
1547         vcpu.set_xsave(&xsave).unwrap();
1548     }
1549 
1550     #[test]
get_and_set_interrupt_state_smoke()1551     fn get_and_set_interrupt_state_smoke() {
1552         if !Whpx::is_enabled() {
1553             return;
1554         }
1555         let cpu_count = 1;
1556         let mem =
1557             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1558         let vm = new_vm(cpu_count, mem);
1559         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1560 
1561         // For the sake of snapshotting, interrupt state is essentially opaque. We just want to make
1562         // sure our syscalls succeed.
1563         let interrupt_state = vcpu.get_interrupt_state().unwrap();
1564         vcpu.set_interrupt_state(interrupt_state).unwrap();
1565     }
1566 
1567     #[test]
get_all_msrs()1568     fn get_all_msrs() {
1569         if !Whpx::is_enabled() {
1570             return;
1571         }
1572         let cpu_count = 1;
1573         let mem =
1574             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1575         let vm = new_vm(cpu_count, mem);
1576         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1577 
1578         let all_msrs = vcpu.get_all_msrs().unwrap();
1579 
1580         // Our MSR buffer is init'ed to zeros in the registers. The APIC base will be non-zero, so
1581         // by asserting that we know the MSR fetch actually did get us data.
1582         let apic_base = all_msrs.get(&MSR_APIC_BASE).unwrap();
1583         assert_ne!(*apic_base, 0);
1584     }
1585 }
1586