1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use core::ffi::c_void;
6 use std::arch::x86_64::CpuidResult;
7 use std::collections::BTreeMap;
8 use std::convert::TryInto;
9 use std::mem::size_of;
10 use std::mem::size_of_val;
11 use std::sync::Arc;
12
13 use base::Error;
14 use base::Result;
15 use libc::EINVAL;
16 use libc::EIO;
17 use libc::ENOENT;
18 use libc::ENXIO;
19 use snapshot::AnySnapshot;
20 use vm_memory::GuestAddress;
21 use winapi::shared::winerror::E_UNEXPECTED;
22 use windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER;
23
24 use super::types::*;
25 use super::*;
26 use crate::CpuId;
27 use crate::CpuIdEntry;
28 use crate::DebugRegs;
29 use crate::Fpu;
30 use crate::IoOperation;
31 use crate::IoParams;
32 use crate::Regs;
33 use crate::Sregs;
34 use crate::Vcpu;
35 use crate::VcpuExit;
36 use crate::VcpuX86_64;
37 use crate::Xsave;
38
39 const WHPX_EXIT_DIRECTION_MMIO_READ: u8 = 0;
40 const WHPX_EXIT_DIRECTION_MMIO_WRITE: u8 = 1;
41 const WHPX_EXIT_DIRECTION_PIO_IN: u8 = 0;
42 const WHPX_EXIT_DIRECTION_PIO_OUT: u8 = 1;
43
44 /// This is the whpx instruction emulator, useful for deconstructing
45 /// io & memory port instructions. Whpx does not do this automatically.
46 struct SafeInstructionEmulator {
47 handle: WHV_EMULATOR_HANDLE,
48 }
49
50 impl SafeInstructionEmulator {
new() -> Result<SafeInstructionEmulator>51 fn new() -> Result<SafeInstructionEmulator> {
52 const EMULATOR_CALLBACKS: WHV_EMULATOR_CALLBACKS = WHV_EMULATOR_CALLBACKS {
53 Size: size_of::<WHV_EMULATOR_CALLBACKS>() as u32,
54 Reserved: 0,
55 WHvEmulatorIoPortCallback: Some(SafeInstructionEmulator::io_port_cb),
56 WHvEmulatorMemoryCallback: Some(SafeInstructionEmulator::memory_cb),
57 WHvEmulatorGetVirtualProcessorRegisters: Some(
58 SafeInstructionEmulator::get_virtual_processor_registers_cb,
59 ),
60 WHvEmulatorSetVirtualProcessorRegisters: Some(
61 SafeInstructionEmulator::set_virtual_processor_registers_cb,
62 ),
63 WHvEmulatorTranslateGvaPage: Some(SafeInstructionEmulator::translate_gva_page_cb),
64 };
65 let mut handle: WHV_EMULATOR_HANDLE = std::ptr::null_mut();
66 // safe because pass in valid callbacks and a emulator handle for the kernel to place the
67 // allocated handle into.
68 check_whpx!(unsafe { WHvEmulatorCreateEmulator(&EMULATOR_CALLBACKS, &mut handle) })?;
69
70 Ok(SafeInstructionEmulator { handle })
71 }
72 }
73
74 trait InstructionEmulatorCallbacks {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT75 extern "stdcall" fn io_port_cb(
76 context: *mut ::std::os::raw::c_void,
77 io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
78 ) -> HRESULT;
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT79 extern "stdcall" fn memory_cb(
80 context: *mut ::std::os::raw::c_void,
81 memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
82 ) -> HRESULT;
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT83 extern "stdcall" fn get_virtual_processor_registers_cb(
84 context: *mut ::std::os::raw::c_void,
85 register_names: *const WHV_REGISTER_NAME,
86 register_count: UINT32,
87 register_values: *mut WHV_REGISTER_VALUE,
88 ) -> HRESULT;
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT89 extern "stdcall" fn set_virtual_processor_registers_cb(
90 context: *mut ::std::os::raw::c_void,
91 register_names: *const WHV_REGISTER_NAME,
92 register_count: UINT32,
93 register_values: *const WHV_REGISTER_VALUE,
94 ) -> HRESULT;
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT95 extern "stdcall" fn translate_gva_page_cb(
96 context: *mut ::std::os::raw::c_void,
97 gva: WHV_GUEST_VIRTUAL_ADDRESS,
98 translate_flags: WHV_TRANSLATE_GVA_FLAGS,
99 translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
100 gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
101 ) -> HRESULT;
102 }
103
104 /// Context passed into the instruction emulator when trying io or mmio emulation.
105 /// Since we need this for set/get registers and memory translation,
106 /// a single context is used that captures all necessary contextual information for the operation.
107 struct InstructionEmulatorContext<'a> {
108 vm_partition: Arc<SafePartition>,
109 index: u32,
110 handle_mmio: Option<&'a mut dyn FnMut(IoParams) -> Result<()>>,
111 handle_io: Option<&'a mut dyn FnMut(IoParams)>,
112 }
113
114 impl InstructionEmulatorCallbacks for SafeInstructionEmulator {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT115 extern "stdcall" fn io_port_cb(
116 context: *mut ::std::os::raw::c_void,
117 io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
118 ) -> HRESULT {
119 // unsafe because windows could decide to call this at any time.
120 // However, we trust the kernel to call this while the vm/vcpu is valid.
121 let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
122 let Some(handle_io) = &mut ctx.handle_io else {
123 return E_UNEXPECTED;
124 };
125
126 // safe because we trust the kernel to fill in the io_access
127 let io_access_info = unsafe { &mut *io_access };
128 let address = io_access_info.Port.into();
129 let size = io_access_info.AccessSize as usize;
130 // SAFETY: We trust the kernel to fill in the io_access
131 let data: &mut [u8] = unsafe {
132 assert!(size <= size_of_val(&io_access_info.Data));
133 std::slice::from_raw_parts_mut(&mut io_access_info.Data as *mut u32 as *mut u8, size)
134 };
135 match io_access_info.Direction {
136 WHPX_EXIT_DIRECTION_PIO_IN => {
137 handle_io(IoParams {
138 address,
139 operation: IoOperation::Read(data),
140 });
141 S_OK
142 }
143 WHPX_EXIT_DIRECTION_PIO_OUT => {
144 handle_io(IoParams {
145 address,
146 operation: IoOperation::Write(data),
147 });
148 S_OK
149 }
150 _ => E_UNEXPECTED,
151 }
152 }
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT153 extern "stdcall" fn memory_cb(
154 context: *mut ::std::os::raw::c_void,
155 memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
156 ) -> HRESULT {
157 // unsafe because windows could decide to call this at any time.
158 // However, we trust the kernel to call this while the vm/vcpu is valid.
159 let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
160 let Some(handle_mmio) = &mut ctx.handle_mmio else {
161 return E_UNEXPECTED;
162 };
163
164 // safe because we trust the kernel to fill in the memory_access
165 let memory_access_info = unsafe { &mut *memory_access };
166 let address = memory_access_info.GpaAddress;
167 let size = memory_access_info.AccessSize as usize;
168 let data = &mut memory_access_info.Data[..size];
169
170 match memory_access_info.Direction {
171 WHPX_EXIT_DIRECTION_MMIO_READ => {
172 if let Err(e) = handle_mmio(IoParams {
173 address,
174 operation: IoOperation::Read(data),
175 }) {
176 error!("handle_mmio failed with {e}");
177 E_UNEXPECTED
178 } else {
179 S_OK
180 }
181 }
182 WHPX_EXIT_DIRECTION_MMIO_WRITE => {
183 if let Err(e) = handle_mmio(IoParams {
184 address,
185 operation: IoOperation::Write(data),
186 }) {
187 error!("handle_mmio write with {e}");
188 E_UNEXPECTED
189 } else {
190 S_OK
191 }
192 }
193 _ => E_UNEXPECTED,
194 }
195 }
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT196 extern "stdcall" fn get_virtual_processor_registers_cb(
197 context: *mut ::std::os::raw::c_void,
198 register_names: *const WHV_REGISTER_NAME,
199 register_count: UINT32,
200 register_values: *mut WHV_REGISTER_VALUE,
201 ) -> HRESULT {
202 // unsafe because windows could decide to call this at any time.
203 // However, we trust the kernel to call this while the vm/vcpu is valid.
204 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
205 // safe because the ctx has a weak reference to the vm partition, which should be
206 // alive longer than the ctx
207 unsafe {
208 WHvGetVirtualProcessorRegisters(
209 ctx.vm_partition.partition,
210 ctx.index,
211 register_names,
212 register_count,
213 register_values,
214 )
215 }
216 }
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT217 extern "stdcall" fn set_virtual_processor_registers_cb(
218 context: *mut ::std::os::raw::c_void,
219 register_names: *const WHV_REGISTER_NAME,
220 register_count: UINT32,
221 register_values: *const WHV_REGISTER_VALUE,
222 ) -> HRESULT {
223 // unsafe because windows could decide to call this at any time.
224 // However, we trust the kernel to call this while the vm/vcpu is valid.
225 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
226 // safe because the ctx has a weak reference to the vm partition, which should be
227 // alive longer than the ctx
228 unsafe {
229 WHvSetVirtualProcessorRegisters(
230 ctx.vm_partition.partition,
231 ctx.index,
232 register_names,
233 register_count,
234 register_values,
235 )
236 }
237 }
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT238 extern "stdcall" fn translate_gva_page_cb(
239 context: *mut ::std::os::raw::c_void,
240 gva: WHV_GUEST_VIRTUAL_ADDRESS,
241 translate_flags: WHV_TRANSLATE_GVA_FLAGS,
242 translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
243 gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
244 ) -> HRESULT {
245 // unsafe because windows could decide to call this at any time.
246 // However, we trust the kernel to call this while the vm/vcpu is valid.
247 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
248 let mut translation_result: WHV_TRANSLATE_GVA_RESULT = Default::default();
249 // safe because the ctx has a weak reference to the vm partition, which should be
250 // alive longer than the ctx
251 let ret = unsafe {
252 WHvTranslateGva(
253 ctx.vm_partition.partition,
254 ctx.index,
255 gva,
256 translate_flags,
257 &mut translation_result,
258 gpa,
259 )
260 };
261 if ret == S_OK {
262 // safe assuming the kernel passed in a valid result_code ptr
263 unsafe {
264 *translation_result_code = translation_result.ResultCode;
265 }
266 }
267 ret
268 }
269 }
270
271 impl Drop for SafeInstructionEmulator {
drop(&mut self)272 fn drop(&mut self) {
273 // safe because we own the instruction emulator
274 check_whpx!(unsafe { WHvEmulatorDestroyEmulator(self.handle) }).unwrap();
275 }
276 }
277
278 // we can send and share the instruction emulator over threads safely even though it is void*.
279 unsafe impl Send for SafeInstructionEmulator {}
280 unsafe impl Sync for SafeInstructionEmulator {}
281
282 struct SafeVirtualProcessor {
283 vm_partition: Arc<SafePartition>,
284 index: u32,
285 }
286
287 impl SafeVirtualProcessor {
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor>288 fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor> {
289 // safe since the vm partition should be valid.
290 check_whpx!(unsafe { WHvCreateVirtualProcessor(vm_partition.partition, index, 0) })?;
291 Ok(SafeVirtualProcessor {
292 vm_partition,
293 index,
294 })
295 }
296 }
297
298 impl Drop for SafeVirtualProcessor {
drop(&mut self)299 fn drop(&mut self) {
300 // safe because we are the owner of this windows virtual processor.
301 check_whpx!(unsafe { WHvDeleteVirtualProcessor(self.vm_partition.partition, self.index,) })
302 .unwrap();
303 }
304 }
305
306 pub struct WhpxVcpu {
307 index: u32,
308 safe_virtual_processor: Arc<SafeVirtualProcessor>,
309 vm_partition: Arc<SafePartition>,
310 last_exit_context: Arc<WHV_RUN_VP_EXIT_CONTEXT>,
311 // must be arc, since we cannot "dupe" an instruction emulator similar to a handle.
312 instruction_emulator: Arc<SafeInstructionEmulator>,
313 tsc_frequency: Option<u64>,
314 apic_frequency: Option<u32>,
315 }
316
317 impl WhpxVcpu {
318 /// The SafePartition passed in is weak, so that there is no circular references.
319 /// However, the SafePartition should be valid as long as this VCPU is alive. The index
320 /// is the index for this vcpu.
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu>321 pub(super) fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu> {
322 let safe_virtual_processor = SafeVirtualProcessor::new(vm_partition.clone(), index)?;
323 let instruction_emulator = SafeInstructionEmulator::new()?;
324 Ok(WhpxVcpu {
325 index,
326 safe_virtual_processor: Arc::new(safe_virtual_processor),
327 vm_partition,
328 last_exit_context: Arc::new(Default::default()),
329 instruction_emulator: Arc::new(instruction_emulator),
330 tsc_frequency: None,
331 apic_frequency: None,
332 })
333 }
334
set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32)335 pub fn set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32) {
336 self.tsc_frequency = tsc_frequency;
337 self.apic_frequency = Some(lapic_frequency);
338 }
339
340 /// Handle reading the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_read(&mut self, id: u32) -> Result<()>341 fn handle_msr_read(&mut self, id: u32) -> Result<()> {
342 // Verify that we're only being called in a situation where the last exit reason was
343 // ExitReasonX64MsrAccess
344 if self.last_exit_context.ExitReason
345 != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
346 {
347 return Err(Error::new(EINVAL));
348 }
349
350 let value = match id {
351 HV_X64_MSR_TSC_FREQUENCY => Some(self.tsc_frequency.unwrap_or(0)),
352 HV_X64_MSR_APIC_FREQUENCY => Some(self.apic_frequency.unwrap_or(0) as u64),
353 _ => None,
354 };
355
356 if let Some(value) = value {
357 // Get the next rip from the exit context
358 let rip = self.last_exit_context.VpContext.Rip
359 + self.last_exit_context.VpContext.InstructionLength() as u64;
360
361 const REG_NAMES: [WHV_REGISTER_NAME; 3] = [
362 WHV_REGISTER_NAME_WHvX64RegisterRip,
363 WHV_REGISTER_NAME_WHvX64RegisterRax,
364 WHV_REGISTER_NAME_WHvX64RegisterRdx,
365 ];
366
367 let values = vec![
368 WHV_REGISTER_VALUE { Reg64: rip },
369 // RDMSR instruction puts lower 32 bits in EAX and upper 32 bits in EDX
370 WHV_REGISTER_VALUE {
371 Reg64: (value & 0xffffffff),
372 },
373 WHV_REGISTER_VALUE {
374 Reg64: (value >> 32),
375 },
376 ];
377
378 // safe because we have enough space for all the registers
379 check_whpx!(unsafe {
380 WHvSetVirtualProcessorRegisters(
381 self.vm_partition.partition,
382 self.index,
383 ®_NAMES as *const WHV_REGISTER_NAME,
384 REG_NAMES.len() as u32,
385 values.as_ptr() as *const WHV_REGISTER_VALUE,
386 )
387 })
388 } else {
389 self.inject_gp_fault()
390 }
391 }
392
393 /// Handle writing the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()>394 fn handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()> {
395 // Verify that we're only being called in a situation where the last exit reason was
396 // ExitReasonX64MsrAccess
397 if self.last_exit_context.ExitReason
398 != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
399 {
400 return Err(Error::new(EINVAL));
401 }
402
403 // Do nothing, we assume TSC is always invariant
404 let success = matches!(id, HV_X64_MSR_TSC_INVARIANT_CONTROL);
405
406 if !success {
407 return self.inject_gp_fault();
408 }
409
410 // Get the next rip from the exit context
411 let rip = self.last_exit_context.VpContext.Rip
412 + self.last_exit_context.VpContext.InstructionLength() as u64;
413
414 const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvX64RegisterRip];
415
416 let values = vec![WHV_REGISTER_VALUE { Reg64: rip }];
417
418 // safe because we have enough space for all the registers
419 check_whpx!(unsafe {
420 WHvSetVirtualProcessorRegisters(
421 self.vm_partition.partition,
422 self.index,
423 ®_NAMES as *const WHV_REGISTER_NAME,
424 REG_NAMES.len() as u32,
425 values.as_ptr() as *const WHV_REGISTER_VALUE,
426 )
427 })
428 }
429
inject_gp_fault(&self) -> Result<()>430 fn inject_gp_fault(&self) -> Result<()> {
431 const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvRegisterPendingEvent];
432
433 let mut event = WHV_REGISTER_VALUE {
434 ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
435 __bindgen_anon_1: Default::default(),
436 },
437 };
438 // safe because we have enough space for all the registers
439 check_whpx!(unsafe {
440 WHvGetVirtualProcessorRegisters(
441 self.vm_partition.partition,
442 self.index,
443 ®_NAMES as *const WHV_REGISTER_NAME,
444 REG_NAMES.len() as u32,
445 &mut event as *mut WHV_REGISTER_VALUE,
446 )
447 })?;
448
449 if unsafe { event.ExceptionEvent.__bindgen_anon_1.EventPending() } != 0 {
450 error!("Unable to inject gp fault because pending exception exists");
451 return Err(Error::new(EINVAL));
452 }
453
454 let mut pending_exception = unsafe { event.ExceptionEvent.__bindgen_anon_1 };
455
456 pending_exception.set_EventPending(1);
457 // GP faults set error code
458 pending_exception.set_DeliverErrorCode(1);
459 // GP fault error code is 0 unless the fault is segment related
460 pending_exception.ErrorCode = 0;
461 // This must be set to WHvX64PendingEventException
462 pending_exception
463 .set_EventType(WHV_X64_PENDING_EVENT_TYPE_WHvX64PendingEventException as u32);
464 // GP fault vector is 13
465 const GP_VECTOR: u32 = 13;
466 pending_exception.set_Vector(GP_VECTOR);
467
468 let event = WHV_REGISTER_VALUE {
469 ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
470 __bindgen_anon_1: pending_exception,
471 },
472 };
473
474 // safe because we have enough space for all the registers
475 check_whpx!(unsafe {
476 WHvSetVirtualProcessorRegisters(
477 self.vm_partition.partition,
478 self.index,
479 ®_NAMES as *const WHV_REGISTER_NAME,
480 REG_NAMES.len() as u32,
481 &event as *const WHV_REGISTER_VALUE,
482 )
483 })
484 }
485 }
486
487 impl Vcpu for WhpxVcpu {
488 /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self>489 fn try_clone(&self) -> Result<Self> {
490 Ok(WhpxVcpu {
491 index: self.index,
492 safe_virtual_processor: self.safe_virtual_processor.clone(),
493 vm_partition: self.vm_partition.clone(),
494 last_exit_context: self.last_exit_context.clone(),
495 instruction_emulator: self.instruction_emulator.clone(),
496 tsc_frequency: self.tsc_frequency,
497 apic_frequency: self.apic_frequency,
498 })
499 }
500
as_vcpu(&self) -> &dyn Vcpu501 fn as_vcpu(&self) -> &dyn Vcpu {
502 self
503 }
504
505 /// Returns the vcpu id.
id(&self) -> usize506 fn id(&self) -> usize {
507 self.index.try_into().unwrap()
508 }
509
510 /// Exits the vcpu immediately if exit is true
set_immediate_exit(&self, exit: bool)511 fn set_immediate_exit(&self, exit: bool) {
512 if exit {
513 // safe because we own this whpx virtual processor index, and assume the vm partition is
514 // still valid
515 unsafe {
516 WHvCancelRunVirtualProcessor(self.vm_partition.partition, self.index, 0);
517 }
518 }
519 }
520
521 /// Signals to the hypervisor that this guest is being paused by userspace. On some hypervisors,
522 /// this is used to control the pvclock. On WHPX, we handle it separately with virtio-pvclock.
523 /// So the correct implementation here is to do nothing.
on_suspend(&self) -> Result<()>524 fn on_suspend(&self) -> Result<()> {
525 Ok(())
526 }
527
528 /// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the
529 /// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any.
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>530 unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
531 // Whpx does not support raw capability on the vcpu.
532 Err(Error::new(ENXIO))
533 }
534
535 /// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`.
536 ///
537 /// Once called, it will determine whether a mmio read or mmio write was the reason for the mmio
538 /// exit, call `handle_fn` with the respective IoOperation to perform the mmio read or
539 /// write, and set the return data in the vcpu so that the vcpu can resume running.
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()>540 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
541 let mut status: WHV_EMULATOR_STATUS = Default::default();
542 let mut ctx = InstructionEmulatorContext {
543 vm_partition: self.vm_partition.clone(),
544 index: self.index,
545 handle_mmio: Some(handle_fn),
546 handle_io: None,
547 };
548 // safe as long as all callbacks occur before this fn returns.
549 check_whpx!(unsafe {
550 WHvEmulatorTryMmioEmulation(
551 self.instruction_emulator.handle,
552 &mut ctx as *mut _ as *mut c_void,
553 &self.last_exit_context.VpContext,
554 &self.last_exit_context.__bindgen_anon_1.MemoryAccess,
555 &mut status,
556 )
557 })?;
558 // safe because we trust the kernel to fill in the union field properly.
559 let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
560 if success {
561 Ok(())
562 } else {
563 self.inject_gp_fault()?;
564 // safe because we trust the kernel to fill in the union field properly.
565 Err(Error::new(unsafe { status.AsUINT32 }))
566 }
567 }
568
569 /// This function should be called after `Vcpu::run` returns `VcpuExit::Io`.
570 ///
571 /// Once called, it will determine whether an io in or io out was the reason for the io exit,
572 /// call `handle_fn` with the respective IoOperation to perform the io in or io out,
573 /// and set the return data in the vcpu so that the vcpu can resume running.
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()>574 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
575 let mut status: WHV_EMULATOR_STATUS = Default::default();
576 let mut ctx = InstructionEmulatorContext {
577 vm_partition: self.vm_partition.clone(),
578 index: self.index,
579 handle_mmio: None,
580 handle_io: Some(handle_fn),
581 };
582 // safe as long as all callbacks occur before this fn returns.
583 check_whpx!(unsafe {
584 WHvEmulatorTryIoEmulation(
585 self.instruction_emulator.handle,
586 &mut ctx as *mut _ as *mut c_void,
587 &self.last_exit_context.VpContext,
588 &self.last_exit_context.__bindgen_anon_1.IoPortAccess,
589 &mut status,
590 )
591 })?; // safe because we trust the kernel to fill in the union field properly.
592 let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
593 if success {
594 Ok(())
595 } else {
596 // safe because we trust the kernel to fill in the union field properly.
597 Err(Error::new(unsafe { status.AsUINT32 }))
598 }
599 }
600
601 #[allow(non_upper_case_globals)]
run(&mut self) -> Result<VcpuExit>602 fn run(&mut self) -> Result<VcpuExit> {
603 // safe because we own this whpx virtual processor index, and assume the vm partition is
604 // still valid
605 let exit_context_ptr = Arc::as_ptr(&self.last_exit_context);
606 check_whpx!(unsafe {
607 WHvRunVirtualProcessor(
608 self.vm_partition.partition,
609 self.index,
610 exit_context_ptr as *mut WHV_RUN_VP_EXIT_CONTEXT as *mut c_void,
611 size_of::<WHV_RUN_VP_EXIT_CONTEXT>() as u32,
612 )
613 })?;
614
615 match self.last_exit_context.ExitReason {
616 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonMemoryAccess => Ok(VcpuExit::Mmio),
617 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64IoPortAccess => Ok(VcpuExit::Io),
618 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnrecoverableException => {
619 Ok(VcpuExit::UnrecoverableException)
620 }
621 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonInvalidVpRegisterValue => {
622 Ok(VcpuExit::InvalidVpRegister)
623 }
624 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnsupportedFeature => {
625 Ok(VcpuExit::UnsupportedFeature)
626 }
627 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64InterruptWindow => {
628 Ok(VcpuExit::IrqWindowOpen)
629 }
630 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Halt => Ok(VcpuExit::Hlt),
631 // additional exits that are configurable
632 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicEoi => {
633 // safe because we trust the kernel to fill in the union field properly.
634 let vector = unsafe {
635 self.last_exit_context
636 .__bindgen_anon_1
637 .ApicEoi
638 .InterruptVector as u8
639 };
640 Ok(VcpuExit::IoapicEoi { vector })
641 }
642 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess => {
643 // Safe because we know this was an MSR access exit.
644 let id = unsafe { self.last_exit_context.__bindgen_anon_1.MsrAccess.MsrNumber };
645
646 // Safe because we know this was an MSR access exit
647 let is_write = unsafe {
648 self.last_exit_context
649 .__bindgen_anon_1
650 .MsrAccess
651 .AccessInfo
652 .__bindgen_anon_1
653 .IsWrite()
654 == 1
655 };
656 if is_write {
657 // Safe because we know this was an MSR access exit
658 let value = unsafe {
659 // WRMSR writes the contents of registers EDX:EAX into the 64-bit model
660 // specific register
661 (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rdx << 32)
662 | (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rax & 0xffffffff)
663 };
664 self.handle_msr_write(id, value)?;
665 } else {
666 self.handle_msr_read(id)?;
667 }
668 Ok(VcpuExit::MsrAccess)
669 }
670 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid => {
671 // Safe because we know this was a CPUID exit.
672 let entry = unsafe {
673 CpuIdEntry {
674 function: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rax as u32,
675 index: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rcx as u32,
676 flags: 0,
677 cpuid: CpuidResult {
678 eax: self
679 .last_exit_context
680 .__bindgen_anon_1
681 .CpuidAccess
682 .DefaultResultRax as u32,
683 ebx: self
684 .last_exit_context
685 .__bindgen_anon_1
686 .CpuidAccess
687 .DefaultResultRbx as u32,
688 ecx: self
689 .last_exit_context
690 .__bindgen_anon_1
691 .CpuidAccess
692 .DefaultResultRcx as u32,
693 edx: self
694 .last_exit_context
695 .__bindgen_anon_1
696 .CpuidAccess
697 .DefaultResultRdx as u32,
698 },
699 }
700 };
701 Ok(VcpuExit::Cpuid { entry })
702 }
703 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonException => Ok(VcpuExit::Exception),
704 // undocumented exit calls from the header file, WinHvPlatformDefs.h.
705 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Rdtsc => Ok(VcpuExit::RdTsc),
706 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicSmiTrap => Ok(VcpuExit::ApicSmiTrap),
707 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonHypercall => Ok(VcpuExit::Hypercall),
708 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicInitSipiTrap => {
709 Ok(VcpuExit::ApicInitSipiTrap)
710 }
711 // exit caused by host cancellation thorugh WHvCancelRunVirtualProcessor,
712 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonCanceled => Ok(VcpuExit::Canceled),
713 r => panic!("unknown exit reason: {}", r),
714 }
715 }
716 }
717
718 impl VcpuX86_64 for WhpxVcpu {
719 /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
720 /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)721 fn set_interrupt_window_requested(&self, requested: bool) {
722 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
723 [WHV_REGISTER_NAME_WHvX64RegisterDeliverabilityNotifications];
724 let mut notifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER__bindgen_ty_1 =
725 Default::default();
726 notifications.set_InterruptNotification(if requested { 1 } else { 0 });
727 let notify_register = WHV_REGISTER_VALUE {
728 DeliverabilityNotifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER {
729 __bindgen_anon_1: notifications,
730 },
731 };
732 // safe because we have enough space for all the registers
733 check_whpx!(unsafe {
734 WHvSetVirtualProcessorRegisters(
735 self.vm_partition.partition,
736 self.index,
737 ®_NAMES as *const WHV_REGISTER_NAME,
738 REG_NAMES.len() as u32,
739 ¬ify_register as *const WHV_REGISTER_VALUE,
740 )
741 })
742 .unwrap();
743 }
744
745 /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool746 fn ready_for_interrupt(&self) -> bool {
747 // safe because InterruptionPending bit is always valid in ExecutionState struct
748 let pending = unsafe {
749 self.last_exit_context
750 .VpContext
751 .ExecutionState
752 .__bindgen_anon_1
753 .InterruptionPending()
754 };
755 // safe because InterruptShadow bit is always valid in ExecutionState struct
756 let shadow = unsafe {
757 self.last_exit_context
758 .VpContext
759 .ExecutionState
760 .__bindgen_anon_1
761 .InterruptShadow()
762 };
763
764 let eflags = self.last_exit_context.VpContext.Rflags;
765 const IF_MASK: u64 = 0x00000200;
766
767 // can't inject an interrupt if InterruptShadow or InterruptPending bits are set, or if
768 // the IF flag is clear
769 shadow == 0 && pending == 0 && (eflags & IF_MASK) != 0
770 }
771
772 /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u8) -> Result<()>773 fn interrupt(&self, irq: u8) -> Result<()> {
774 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
775 [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
776 let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
777 Default::default();
778 pending_interrupt.set_InterruptionPending(1);
779 pending_interrupt
780 .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingInterrupt as u32);
781 pending_interrupt.set_InterruptionVector(irq.into());
782 let interrupt = WHV_REGISTER_VALUE {
783 PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
784 __bindgen_anon_1: pending_interrupt,
785 },
786 };
787 // safe because we have enough space for all the registers
788 check_whpx!(unsafe {
789 WHvSetVirtualProcessorRegisters(
790 self.vm_partition.partition,
791 self.index,
792 ®_NAMES as *const WHV_REGISTER_NAME,
793 REG_NAMES.len() as u32,
794 &interrupt as *const WHV_REGISTER_VALUE,
795 )
796 })
797 }
798
799 /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>800 fn inject_nmi(&self) -> Result<()> {
801 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
802 [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
803 let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
804 Default::default();
805 pending_interrupt.set_InterruptionPending(1);
806 pending_interrupt
807 .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingNmi as u32);
808 const NMI_VECTOR: u32 = 2; // 2 is the NMI vector.
809 pending_interrupt.set_InterruptionVector(NMI_VECTOR);
810 let interrupt = WHV_REGISTER_VALUE {
811 PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
812 __bindgen_anon_1: pending_interrupt,
813 },
814 };
815 // safe because we have enough space for all the registers
816 check_whpx!(unsafe {
817 WHvSetVirtualProcessorRegisters(
818 self.vm_partition.partition,
819 self.index,
820 ®_NAMES as *const WHV_REGISTER_NAME,
821 REG_NAMES.len() as u32,
822 &interrupt as *const WHV_REGISTER_VALUE,
823 )
824 })
825 }
826
827 /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>828 fn get_regs(&self) -> Result<Regs> {
829 let mut whpx_regs: WhpxRegs = Default::default();
830 let reg_names = WhpxRegs::get_register_names();
831 // safe because we have enough space for all the registers
832 check_whpx!(unsafe {
833 WHvGetVirtualProcessorRegisters(
834 self.vm_partition.partition,
835 self.index,
836 reg_names as *const WHV_REGISTER_NAME,
837 reg_names.len() as u32,
838 whpx_regs.as_mut_ptr(),
839 )
840 })?;
841 Ok(Regs::from(&whpx_regs))
842 }
843
844 /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>845 fn set_regs(&self, regs: &Regs) -> Result<()> {
846 let whpx_regs = WhpxRegs::from(regs);
847 let reg_names = WhpxRegs::get_register_names();
848 // safe because we have enough space for all the registers
849 check_whpx!(unsafe {
850 WHvSetVirtualProcessorRegisters(
851 self.vm_partition.partition,
852 self.index,
853 reg_names as *const WHV_REGISTER_NAME,
854 reg_names.len() as u32,
855 whpx_regs.as_ptr(),
856 )
857 })
858 }
859
860 /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>861 fn get_sregs(&self) -> Result<Sregs> {
862 let mut whpx_sregs: WhpxSregs = Default::default();
863 let reg_names = WhpxSregs::get_register_names();
864 // safe because we have enough space for all the registers
865 check_whpx!(unsafe {
866 WHvGetVirtualProcessorRegisters(
867 self.vm_partition.partition,
868 self.index,
869 reg_names as *const WHV_REGISTER_NAME,
870 reg_names.len() as u32,
871 whpx_sregs.as_mut_ptr(),
872 )
873 })?;
874 Ok(Sregs::from(&whpx_sregs))
875 }
876
877 /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>878 fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
879 let whpx_sregs = WhpxSregs::from(sregs);
880 let reg_names = WhpxSregs::get_register_names();
881 // safe because we have enough space for all the registers
882 check_whpx!(unsafe {
883 WHvSetVirtualProcessorRegisters(
884 self.vm_partition.partition,
885 self.index,
886 reg_names as *const WHV_REGISTER_NAME,
887 reg_names.len() as u32,
888 whpx_sregs.as_ptr(),
889 )
890 })
891 }
892
893 /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>894 fn get_fpu(&self) -> Result<Fpu> {
895 let mut whpx_fpu: WhpxFpu = Default::default();
896 let reg_names = WhpxFpu::get_register_names();
897 // safe because we have enough space for all the registers
898 check_whpx!(unsafe {
899 WHvGetVirtualProcessorRegisters(
900 self.vm_partition.partition,
901 self.index,
902 reg_names as *const WHV_REGISTER_NAME,
903 reg_names.len() as u32,
904 whpx_fpu.as_mut_ptr(),
905 )
906 })?;
907 Ok(Fpu::from(&whpx_fpu))
908 }
909
910 /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>911 fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
912 let whpx_fpu = WhpxFpu::from(fpu);
913 let reg_names = WhpxFpu::get_register_names();
914 // safe because we have enough space for all the registers
915 check_whpx!(unsafe {
916 WHvSetVirtualProcessorRegisters(
917 self.vm_partition.partition,
918 self.index,
919 reg_names as *const WHV_REGISTER_NAME,
920 reg_names.len() as u32,
921 whpx_fpu.as_ptr(),
922 )
923 })
924 }
925
926 /// Gets the VCPU XSAVE.
get_xsave(&self) -> Result<Xsave>927 fn get_xsave(&self) -> Result<Xsave> {
928 let mut empty_buffer = [0u8; 1];
929 let mut needed_buf_size: u32 = 0;
930
931 // Find out how much space is needed for XSAVEs.
932 let res = unsafe {
933 WHvGetVirtualProcessorXsaveState(
934 self.vm_partition.partition,
935 self.index,
936 empty_buffer.as_mut_ptr() as *mut _,
937 0,
938 &mut needed_buf_size,
939 )
940 };
941 if res != WHV_E_INSUFFICIENT_BUFFER.0 {
942 // This should always work, so if it doesn't, we'll return unsupported.
943 error!("failed to get size of vcpu xsave");
944 return Err(Error::new(EIO));
945 }
946
947 let mut xsave = Xsave::new(needed_buf_size as usize);
948 // SAFETY: xsave_data is valid for the duration of the FFI call, and we pass its length in
949 // bytes so writes are bounded within the buffer.
950 check_whpx!(unsafe {
951 WHvGetVirtualProcessorXsaveState(
952 self.vm_partition.partition,
953 self.index,
954 xsave.as_mut_ptr(),
955 xsave.len() as u32,
956 &mut needed_buf_size,
957 )
958 })?;
959 Ok(xsave)
960 }
961
962 /// Sets the VCPU XSAVE.
set_xsave(&self, xsave: &Xsave) -> Result<()>963 fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
964 // SAFETY: the xsave buffer is valid for the duration of the FFI call, and we pass its
965 // length in bytes so reads are bounded within the buffer.
966 check_whpx!(unsafe {
967 WHvSetVirtualProcessorXsaveState(
968 self.vm_partition.partition,
969 self.index,
970 xsave.as_ptr(),
971 xsave.len() as u32,
972 )
973 })
974 }
975
get_interrupt_state(&self) -> Result<AnySnapshot>976 fn get_interrupt_state(&self) -> Result<AnySnapshot> {
977 let mut whpx_interrupt_regs: WhpxInterruptRegs = Default::default();
978 let reg_names = WhpxInterruptRegs::get_register_names();
979 // SAFETY: we have enough space for all the registers & the memory lives for the duration
980 // of the FFI call.
981 check_whpx!(unsafe {
982 WHvGetVirtualProcessorRegisters(
983 self.vm_partition.partition,
984 self.index,
985 reg_names as *const WHV_REGISTER_NAME,
986 reg_names.len() as u32,
987 whpx_interrupt_regs.as_mut_ptr(),
988 )
989 })?;
990
991 AnySnapshot::to_any(whpx_interrupt_regs.into_serializable()).map_err(|e| {
992 error!("failed to serialize interrupt state: {:?}", e);
993 Error::new(EIO)
994 })
995 }
996
set_interrupt_state(&self, data: AnySnapshot) -> Result<()>997 fn set_interrupt_state(&self, data: AnySnapshot) -> Result<()> {
998 let whpx_interrupt_regs =
999 WhpxInterruptRegs::from_serializable(AnySnapshot::from_any(data).map_err(|e| {
1000 error!("failed to serialize interrupt state: {:?}", e);
1001 Error::new(EIO)
1002 })?);
1003 let reg_names = WhpxInterruptRegs::get_register_names();
1004 // SAFETY: we have enough space for all the registers & the memory lives for the duration
1005 // of the FFI call.
1006 check_whpx!(unsafe {
1007 WHvSetVirtualProcessorRegisters(
1008 self.vm_partition.partition,
1009 self.index,
1010 reg_names as *const WHV_REGISTER_NAME,
1011 reg_names.len() as u32,
1012 whpx_interrupt_regs.as_ptr(),
1013 )
1014 })
1015 }
1016
1017 /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>1018 fn get_debugregs(&self) -> Result<DebugRegs> {
1019 let mut whpx_debugregs: WhpxDebugRegs = Default::default();
1020 let reg_names = WhpxDebugRegs::get_register_names();
1021 // safe because we have enough space for all the registers
1022 check_whpx!(unsafe {
1023 WHvGetVirtualProcessorRegisters(
1024 self.vm_partition.partition,
1025 self.index,
1026 reg_names as *const WHV_REGISTER_NAME,
1027 reg_names.len() as u32,
1028 whpx_debugregs.as_mut_ptr(),
1029 )
1030 })?;
1031 Ok(DebugRegs::from(&whpx_debugregs))
1032 }
1033
1034 /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>1035 fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()> {
1036 let whpx_debugregs = WhpxDebugRegs::from(debugregs);
1037 let reg_names = WhpxDebugRegs::get_register_names();
1038 // safe because we have enough space for all the registers
1039 check_whpx!(unsafe {
1040 WHvSetVirtualProcessorRegisters(
1041 self.vm_partition.partition,
1042 self.index,
1043 reg_names as *const WHV_REGISTER_NAME,
1044 reg_names.len() as u32,
1045 whpx_debugregs.as_ptr(),
1046 )
1047 })
1048 }
1049
1050 /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>1051 fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>> {
1052 const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1053 let mut reg_value = WHV_REGISTER_VALUE::default();
1054 // safe because we have enough space for all the registers in whpx_regs
1055 check_whpx!(unsafe {
1056 WHvGetVirtualProcessorRegisters(
1057 self.vm_partition.partition,
1058 self.index,
1059 ®_NAME,
1060 /* RegisterCount */ 1,
1061 &mut reg_value,
1062 )
1063 })?;
1064
1065 // safe because the union value, reg64, is safe to pull out assuming
1066 // kernel filled in the xcrs properly.
1067 let xcr0 = unsafe { reg_value.Reg64 };
1068
1069 // whpx only supports xcr0
1070 let xcrs = BTreeMap::from([(0, xcr0)]);
1071 Ok(xcrs)
1072 }
1073
1074 /// Sets a VCPU extended control register.
set_xcr(&self, xcr_index: u32, value: u64) -> Result<()>1075 fn set_xcr(&self, xcr_index: u32, value: u64) -> Result<()> {
1076 if xcr_index != 0 {
1077 // invalid xcr register provided
1078 return Err(Error::new(EINVAL));
1079 }
1080
1081 const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1082 let reg_value = WHV_REGISTER_VALUE { Reg64: value };
1083 // safe because we have enough space for all the registers in whpx_xcrs
1084 check_whpx!(unsafe {
1085 WHvSetVirtualProcessorRegisters(
1086 self.vm_partition.partition,
1087 self.index,
1088 ®_NAME,
1089 /* RegisterCount */ 1,
1090 ®_value,
1091 )
1092 })
1093 }
1094
1095 /// Gets the value of a single model-specific register.
get_msr(&self, msr_index: u32) -> Result<u64>1096 fn get_msr(&self, msr_index: u32) -> Result<u64> {
1097 let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1098 let mut msr_value = WHV_REGISTER_VALUE::default();
1099 // safe because we have enough space for all the registers in whpx_regs
1100 check_whpx!(unsafe {
1101 WHvGetVirtualProcessorRegisters(
1102 self.vm_partition.partition,
1103 self.index,
1104 &msr_name,
1105 /* RegisterCount */ 1,
1106 &mut msr_value,
1107 )
1108 })?;
1109
1110 // safe because Reg64 will be a valid union value
1111 let value = unsafe { msr_value.Reg64 };
1112 Ok(value)
1113 }
1114
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>1115 fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>> {
1116 // Note that some members of VALID_MSRS cannot be fetched from WHPX with
1117 // WHvGetVirtualProcessorRegisters per the HTLFS, so we enumerate all of
1118 // permitted MSRs here.
1119 //
1120 // We intentionally exclude WHvRegisterPendingInterruption and
1121 // WHvRegisterInterruptState because they are included in
1122 // get_interrupt_state.
1123 //
1124 // We intentionally exclude MSR_TSC because in snapshotting it is
1125 // handled by the generic x86_64 VCPU snapshot/restore. Non snapshot
1126 // consumers should use get/set_tsc_adjust to access the adjust register
1127 // if needed.
1128 const MSRS_TO_SAVE: &[u32] = &[
1129 MSR_EFER,
1130 MSR_KERNEL_GS_BASE,
1131 MSR_APIC_BASE,
1132 MSR_SYSENTER_CS,
1133 MSR_SYSENTER_EIP,
1134 MSR_SYSENTER_ESP,
1135 MSR_STAR,
1136 MSR_LSTAR,
1137 MSR_CSTAR,
1138 MSR_SFMASK,
1139 ];
1140
1141 let registers = MSRS_TO_SAVE
1142 .iter()
1143 .map(|msr_index| {
1144 let value = self.get_msr(*msr_index)?;
1145 Ok((*msr_index, value))
1146 })
1147 .collect::<Result<BTreeMap<u32, u64>>>()?;
1148
1149 Ok(registers)
1150 }
1151
1152 /// Sets the value of a single model-specific register.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>1153 fn set_msr(&self, msr_index: u32, value: u64) -> Result<()> {
1154 match get_msr_name(msr_index) {
1155 Some(msr_name) => {
1156 let msr_value = WHV_REGISTER_VALUE { Reg64: value };
1157 check_whpx!(unsafe {
1158 WHvSetVirtualProcessorRegisters(
1159 self.vm_partition.partition,
1160 self.index,
1161 &msr_name,
1162 /* RegisterCount */ 1,
1163 &msr_value,
1164 )
1165 })
1166 }
1167 None => {
1168 warn!("msr 0x{msr_index:X} write unsupported by WHPX, dropping");
1169 Ok(())
1170 }
1171 }
1172 }
1173
1174 /// Sets up the data returned by the CPUID instruction.
1175 /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
set_cpuid(&self, _cpuid: &CpuId) -> Result<()>1176 fn set_cpuid(&self, _cpuid: &CpuId) -> Result<()> {
1177 Err(Error::new(ENXIO))
1178 }
1179
1180 /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
1181 /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
1182 /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>1183 fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()> {
1184 // Verify that we're only being called in a situation where the last exit reason was
1185 // ExitReasonX64Cpuid
1186 if self.last_exit_context.ExitReason != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid {
1187 return Err(Error::new(EINVAL));
1188 }
1189
1190 // Get the next rip from the exit context
1191 let rip = self.last_exit_context.VpContext.Rip
1192 + self.last_exit_context.VpContext.InstructionLength() as u64;
1193
1194 const REG_NAMES: [WHV_REGISTER_NAME; 5] = [
1195 WHV_REGISTER_NAME_WHvX64RegisterRip,
1196 WHV_REGISTER_NAME_WHvX64RegisterRax,
1197 WHV_REGISTER_NAME_WHvX64RegisterRbx,
1198 WHV_REGISTER_NAME_WHvX64RegisterRcx,
1199 WHV_REGISTER_NAME_WHvX64RegisterRdx,
1200 ];
1201
1202 let values = vec![
1203 WHV_REGISTER_VALUE { Reg64: rip },
1204 WHV_REGISTER_VALUE {
1205 Reg64: entry.cpuid.eax as u64,
1206 },
1207 WHV_REGISTER_VALUE {
1208 Reg64: entry.cpuid.ebx as u64,
1209 },
1210 WHV_REGISTER_VALUE {
1211 Reg64: entry.cpuid.ecx as u64,
1212 },
1213 WHV_REGISTER_VALUE {
1214 Reg64: entry.cpuid.edx as u64,
1215 },
1216 ];
1217
1218 // safe because we have enough space for all the registers
1219 check_whpx!(unsafe {
1220 WHvSetVirtualProcessorRegisters(
1221 self.vm_partition.partition,
1222 self.index,
1223 ®_NAMES as *const WHV_REGISTER_NAME,
1224 REG_NAMES.len() as u32,
1225 values.as_ptr() as *const WHV_REGISTER_VALUE,
1226 )
1227 })
1228 }
1229
1230 /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>1231 fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
1232 // TODO(b/173807302): Implement this
1233 Err(Error::new(ENOENT))
1234 }
1235
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>1236 fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()> {
1237 // Set the guest TSC such that it has the same TSC_OFFSET as it did at
1238 // the moment it was snapshotted. This is required for virtio-pvclock
1239 // to function correctly. (virtio-pvclock assumes the offset is fixed,
1240 // and adjusts CLOCK_BOOTTIME accordingly. It also hides the TSC jump
1241 // from CLOCK_MONOTONIC by setting the timebase.)
1242 self.set_tsc_value(host_tsc_reference_moment.wrapping_add(tsc_offset))
1243 }
1244 }
1245
get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME>1246 fn get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME> {
1247 VALID_MSRS.get(&msr_index).copied()
1248 }
1249
1250 // run calls are tested with the integration tests since the full vcpu needs to be setup for it.
1251 #[cfg(test)]
1252 mod tests {
1253 use vm_memory::GuestAddress;
1254 use vm_memory::GuestMemory;
1255
1256 use super::*;
1257 use crate::VmX86_64;
1258
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm1259 fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
1260 let whpx = Whpx::new().expect("failed to instantiate whpx");
1261 let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1262 .expect("failed to get whpx features");
1263 WhpxVm::new(
1264 &whpx,
1265 cpu_count,
1266 mem,
1267 CpuId::new(0),
1268 local_apic_supported,
1269 None,
1270 )
1271 .expect("failed to create whpx vm")
1272 }
1273
1274 #[test]
try_clone()1275 fn try_clone() {
1276 if !Whpx::is_enabled() {
1277 return;
1278 }
1279 let cpu_count = 1;
1280 let mem =
1281 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1282 let vm = new_vm(cpu_count, mem);
1283 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1284 let vcpu: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1285 let _vcpu_clone = vcpu.try_clone().expect("failed to clone whpx vcpu");
1286 }
1287
1288 #[test]
index()1289 fn index() {
1290 if !Whpx::is_enabled() {
1291 return;
1292 }
1293 let cpu_count = 2;
1294 let mem =
1295 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1296 let vm = new_vm(cpu_count, mem);
1297 let mut vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1298 let vcpu0: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1299 assert_eq!(vcpu0.index, 0);
1300 vcpu = vm.create_vcpu(1).expect("failed to create vcpu");
1301 let vcpu1: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1302 assert_eq!(vcpu1.index, 1);
1303 }
1304
1305 #[test]
get_regs()1306 fn get_regs() {
1307 if !Whpx::is_enabled() {
1308 return;
1309 }
1310 let cpu_count = 1;
1311 let mem =
1312 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1313 let vm = new_vm(cpu_count, mem);
1314 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1315
1316 vcpu.get_regs().expect("failed to get regs");
1317 }
1318
1319 #[test]
set_regs()1320 fn set_regs() {
1321 if !Whpx::is_enabled() {
1322 return;
1323 }
1324 let cpu_count = 1;
1325 let mem =
1326 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1327 let vm = new_vm(cpu_count, mem);
1328 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1329
1330 let mut regs = vcpu.get_regs().expect("failed to get regs");
1331 let new_val = regs.rax + 2;
1332 regs.rax = new_val;
1333
1334 vcpu.set_regs(®s).expect("failed to set regs");
1335 let new_regs = vcpu.get_regs().expect("failed to get regs");
1336 assert_eq!(new_regs.rax, new_val);
1337 }
1338
1339 #[test]
debugregs()1340 fn debugregs() {
1341 if !Whpx::is_enabled() {
1342 return;
1343 }
1344 let cpu_count = 1;
1345 let mem =
1346 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1347 let vm = new_vm(cpu_count, mem);
1348 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1349
1350 let mut dregs = vcpu.get_debugregs().unwrap();
1351 dregs.dr7 += 13;
1352 vcpu.set_debugregs(&dregs).unwrap();
1353 let dregs2 = vcpu.get_debugregs().unwrap();
1354 assert_eq!(dregs.dr7, dregs2.dr7);
1355 }
1356
1357 #[test]
sregs()1358 fn sregs() {
1359 if !Whpx::is_enabled() {
1360 return;
1361 }
1362 let cpu_count = 1;
1363 let mem =
1364 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1365 let vm = new_vm(cpu_count, mem);
1366 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1367
1368 let mut sregs = vcpu.get_sregs().unwrap();
1369 sregs.cs.base += 7;
1370 vcpu.set_sregs(&sregs).unwrap();
1371 let sregs2 = vcpu.get_sregs().unwrap();
1372 assert_eq!(sregs.cs.base, sregs2.cs.base);
1373 }
1374
1375 #[test]
fpu()1376 fn fpu() {
1377 if !Whpx::is_enabled() {
1378 return;
1379 }
1380 let cpu_count = 1;
1381 let mem =
1382 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1383 let vm = new_vm(cpu_count, mem);
1384 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1385
1386 let mut fpu = vcpu.get_fpu().unwrap();
1387 fpu.fpr[0].significand += 3;
1388 vcpu.set_fpu(&fpu).unwrap();
1389 let fpu2 = vcpu.get_fpu().unwrap();
1390 assert_eq!(fpu.fpr, fpu2.fpr);
1391 }
1392
1393 #[test]
xcrs()1394 fn xcrs() {
1395 if !Whpx::is_enabled() {
1396 return;
1397 }
1398 let whpx = Whpx::new().expect("failed to instantiate whpx");
1399 let cpu_count = 1;
1400 let mem =
1401 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1402 let vm = new_vm(cpu_count, mem);
1403 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1404 // check xsave support
1405 if !whpx.check_capability(HypervisorCap::Xcrs) {
1406 return;
1407 }
1408
1409 vcpu.set_xcr(0, 1).unwrap();
1410 let xcrs = vcpu.get_xcrs().unwrap();
1411 let xcr0 = xcrs.get(&0).unwrap();
1412 assert_eq!(*xcr0, 1);
1413 }
1414
1415 #[test]
set_msr()1416 fn set_msr() {
1417 if !Whpx::is_enabled() {
1418 return;
1419 }
1420 let cpu_count = 1;
1421 let mem =
1422 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1423 let vm = new_vm(cpu_count, mem);
1424 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1425
1426 vcpu.set_msr(MSR_KERNEL_GS_BASE, 42).unwrap();
1427
1428 let gs_base = vcpu.get_msr(MSR_KERNEL_GS_BASE).unwrap();
1429 assert_eq!(gs_base, 42);
1430 }
1431
1432 #[test]
get_msr()1433 fn get_msr() {
1434 if !Whpx::is_enabled() {
1435 return;
1436 }
1437 let cpu_count = 1;
1438 let mem =
1439 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1440 let vm = new_vm(cpu_count, mem);
1441 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1442
1443 // This one should succeed
1444 let _value = vcpu.get_msr(MSR_TSC).unwrap();
1445
1446 // This one will fail to fetch
1447 vcpu.get_msr(MSR_TSC + 1)
1448 .expect_err("invalid MSR index should fail");
1449 }
1450
1451 #[test]
set_efer()1452 fn set_efer() {
1453 if !Whpx::is_enabled() {
1454 return;
1455 }
1456 // EFER Bits
1457 const EFER_SCE: u64 = 0x00000001;
1458 const EFER_LME: u64 = 0x00000100;
1459 const EFER_LMA: u64 = 0x00000400;
1460 const X86_CR0_PE: u64 = 0x1;
1461 const X86_CR0_PG: u64 = 0x80000000;
1462 const X86_CR4_PAE: u64 = 0x20;
1463
1464 let cpu_count = 1;
1465 let mem =
1466 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1467 let vm = new_vm(cpu_count, mem);
1468 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1469
1470 let mut sregs = vcpu.get_sregs().expect("failed to get sregs");
1471 // Initial value should be 0
1472 assert_eq!(sregs.efer, 0);
1473
1474 // Enable and activate long mode
1475 sregs.cr0 |= X86_CR0_PE; // enable protected mode
1476 sregs.cr0 |= X86_CR0_PG; // enable paging
1477 sregs.cr4 |= X86_CR4_PAE; // enable physical address extension
1478 sregs.efer = EFER_LMA | EFER_LME;
1479 vcpu.set_sregs(&sregs).expect("failed to set sregs");
1480
1481 // Verify that setting stuck
1482 let sregs = vcpu.get_sregs().expect("failed to get sregs");
1483 assert_eq!(sregs.efer, EFER_LMA | EFER_LME);
1484 assert_eq!(sregs.cr0 & X86_CR0_PE, X86_CR0_PE);
1485 assert_eq!(sregs.cr0 & X86_CR0_PG, X86_CR0_PG);
1486 assert_eq!(sregs.cr4 & X86_CR4_PAE, X86_CR4_PAE);
1487
1488 let efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1489 assert_eq!(efer, EFER_LMA | EFER_LME);
1490
1491 // Enable SCE via set_msrs
1492 vcpu.set_msr(MSR_EFER, efer | EFER_SCE)
1493 .expect("failed to set msr");
1494
1495 // Verify that setting stuck
1496 let sregs = vcpu.get_sregs().expect("failed to get sregs");
1497 assert_eq!(sregs.efer, EFER_SCE | EFER_LME | EFER_LMA);
1498 let new_efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1499 assert_eq!(new_efer, EFER_SCE | EFER_LME | EFER_LMA);
1500 }
1501
1502 #[test]
get_and_set_xsave_smoke()1503 fn get_and_set_xsave_smoke() {
1504 if !Whpx::is_enabled() {
1505 return;
1506 }
1507 let cpu_count = 1;
1508 let mem =
1509 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1510 let vm = new_vm(cpu_count, mem);
1511 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1512
1513 // XSAVE is essentially opaque for our purposes. We just want to make sure our syscalls
1514 // succeed.
1515 let xsave = vcpu.get_xsave().unwrap();
1516 vcpu.set_xsave(&xsave).unwrap();
1517 }
1518
1519 #[test]
get_and_set_interrupt_state_smoke()1520 fn get_and_set_interrupt_state_smoke() {
1521 if !Whpx::is_enabled() {
1522 return;
1523 }
1524 let cpu_count = 1;
1525 let mem =
1526 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1527 let vm = new_vm(cpu_count, mem);
1528 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1529
1530 // For the sake of snapshotting, interrupt state is essentially opaque. We just want to make
1531 // sure our syscalls succeed.
1532 let interrupt_state = vcpu.get_interrupt_state().unwrap();
1533 vcpu.set_interrupt_state(interrupt_state).unwrap();
1534 }
1535
1536 #[test]
get_all_msrs()1537 fn get_all_msrs() {
1538 if !Whpx::is_enabled() {
1539 return;
1540 }
1541 let cpu_count = 1;
1542 let mem =
1543 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1544 let vm = new_vm(cpu_count, mem);
1545 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1546
1547 let all_msrs = vcpu.get_all_msrs().unwrap();
1548
1549 // Our MSR buffer is init'ed to zeros in the registers. The APIC base will be non-zero, so
1550 // by asserting that we know the MSR fetch actually did get us data.
1551 let apic_base = all_msrs.get(&MSR_APIC_BASE).unwrap();
1552 assert_ne!(*apic_base, 0);
1553 }
1554 }
1555