1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use core::ffi::c_void;
6 use std::arch::x86_64::CpuidResult;
7 use std::collections::BTreeMap;
8 use std::convert::TryInto;
9 use std::mem::size_of;
10 use std::sync::Arc;
11
12 use base::Error;
13 use base::Result;
14 use libc::EINVAL;
15 use libc::EIO;
16 use libc::ENOENT;
17 use libc::ENXIO;
18 use vm_memory::GuestAddress;
19 use winapi::shared::winerror::E_UNEXPECTED;
20 use windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER;
21
22 use super::types::*;
23 use super::*;
24 use crate::CpuId;
25 use crate::CpuIdEntry;
26 use crate::DebugRegs;
27 use crate::Fpu;
28 use crate::HypervHypercall;
29 use crate::IoOperation;
30 use crate::IoParams;
31 use crate::Regs;
32 use crate::Sregs;
33 use crate::Vcpu;
34 use crate::VcpuExit;
35 use crate::VcpuX86_64;
36 use crate::Xsave;
37
38 const WHPX_EXIT_DIRECTION_MMIO_READ: u8 = 0;
39 const WHPX_EXIT_DIRECTION_MMIO_WRITE: u8 = 1;
40 const WHPX_EXIT_DIRECTION_PIO_IN: u8 = 0;
41 const WHPX_EXIT_DIRECTION_PIO_OUT: u8 = 1;
42
43 /// This is the whpx instruction emulator, useful for deconstructing
44 /// io & memory port instructions. Whpx does not do this automatically.
45 struct SafeInstructionEmulator {
46 handle: WHV_EMULATOR_HANDLE,
47 }
48
49 impl SafeInstructionEmulator {
new() -> Result<SafeInstructionEmulator>50 fn new() -> Result<SafeInstructionEmulator> {
51 const EMULATOR_CALLBACKS: WHV_EMULATOR_CALLBACKS = WHV_EMULATOR_CALLBACKS {
52 Size: size_of::<WHV_EMULATOR_CALLBACKS>() as u32,
53 Reserved: 0,
54 WHvEmulatorIoPortCallback: Some(SafeInstructionEmulator::io_port_cb),
55 WHvEmulatorMemoryCallback: Some(SafeInstructionEmulator::memory_cb),
56 WHvEmulatorGetVirtualProcessorRegisters: Some(
57 SafeInstructionEmulator::get_virtual_processor_registers_cb,
58 ),
59 WHvEmulatorSetVirtualProcessorRegisters: Some(
60 SafeInstructionEmulator::set_virtual_processor_registers_cb,
61 ),
62 WHvEmulatorTranslateGvaPage: Some(SafeInstructionEmulator::translate_gva_page_cb),
63 };
64 let mut handle: WHV_EMULATOR_HANDLE = std::ptr::null_mut();
65 // safe because pass in valid callbacks and a emulator handle for the kernel to place the
66 // allocated handle into.
67 check_whpx!(unsafe { WHvEmulatorCreateEmulator(&EMULATOR_CALLBACKS, &mut handle) })?;
68
69 Ok(SafeInstructionEmulator { handle })
70 }
71 }
72
73 trait InstructionEmulatorCallbacks {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT74 extern "stdcall" fn io_port_cb(
75 context: *mut ::std::os::raw::c_void,
76 io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
77 ) -> HRESULT;
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT78 extern "stdcall" fn memory_cb(
79 context: *mut ::std::os::raw::c_void,
80 memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
81 ) -> HRESULT;
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT82 extern "stdcall" fn get_virtual_processor_registers_cb(
83 context: *mut ::std::os::raw::c_void,
84 register_names: *const WHV_REGISTER_NAME,
85 register_count: UINT32,
86 register_values: *mut WHV_REGISTER_VALUE,
87 ) -> HRESULT;
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT88 extern "stdcall" fn set_virtual_processor_registers_cb(
89 context: *mut ::std::os::raw::c_void,
90 register_names: *const WHV_REGISTER_NAME,
91 register_count: UINT32,
92 register_values: *const WHV_REGISTER_VALUE,
93 ) -> HRESULT;
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT94 extern "stdcall" fn translate_gva_page_cb(
95 context: *mut ::std::os::raw::c_void,
96 gva: WHV_GUEST_VIRTUAL_ADDRESS,
97 translate_flags: WHV_TRANSLATE_GVA_FLAGS,
98 translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
99 gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
100 ) -> HRESULT;
101 }
102
103 /// Context passed into the instruction emulator when trying io or mmio emulation.
104 /// Since we need this for set/get registers and memory translation,
105 /// a single context is used that captures all necessary contextual information for the operation.
106 struct InstructionEmulatorContext<'a> {
107 vm_partition: Arc<SafePartition>,
108 index: u32,
109 handle_mmio: Option<&'a mut dyn FnMut(IoParams) -> Option<[u8; 8]>>,
110 handle_io: Option<&'a mut dyn FnMut(IoParams) -> Option<[u8; 8]>>,
111 }
112
113 impl InstructionEmulatorCallbacks for SafeInstructionEmulator {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT114 extern "stdcall" fn io_port_cb(
115 context: *mut ::std::os::raw::c_void,
116 io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
117 ) -> HRESULT {
118 // unsafe because windows could decide to call this at any time.
119 // However, we trust the kernel to call this while the vm/vcpu is valid.
120 let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
121 // safe because we trust the kernel to fill in the io_access
122 let io_access_info = unsafe { &mut *io_access };
123 let address = io_access_info.Port.into();
124 let size = io_access_info.AccessSize as usize;
125 match io_access_info.Direction {
126 WHPX_EXIT_DIRECTION_PIO_IN => {
127 if let Some(handle_io) = &mut ctx.handle_io {
128 if let Some(data) = handle_io(IoParams {
129 address,
130 size,
131 operation: IoOperation::Read,
132 }) {
133 // Safe because we know this is an io_access_info field of u32,
134 // so casting as a &mut [u8] of len 4 is safe.
135 let buffer = unsafe {
136 std::slice::from_raw_parts_mut(
137 &mut io_access_info.Data as *mut u32 as *mut u8,
138 4,
139 )
140 };
141 buffer[..size].copy_from_slice(&data[..size]);
142 }
143 S_OK
144 } else {
145 E_UNEXPECTED
146 }
147 }
148 WHPX_EXIT_DIRECTION_PIO_OUT => {
149 if let Some(handle_io) = &mut ctx.handle_io {
150 handle_io(IoParams {
151 address,
152 size,
153 operation: IoOperation::Write {
154 data: (io_access_info.Data as u64).to_ne_bytes(),
155 },
156 });
157 S_OK
158 } else {
159 E_UNEXPECTED
160 }
161 }
162 _ => E_UNEXPECTED,
163 }
164 }
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT165 extern "stdcall" fn memory_cb(
166 context: *mut ::std::os::raw::c_void,
167 memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
168 ) -> HRESULT {
169 // unsafe because windows could decide to call this at any time.
170 // However, we trust the kernel to call this while the vm/vcpu is valid.
171 let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
172 // safe because we trust the kernel to fill in the memory_access
173 let memory_access_info = unsafe { &mut *memory_access };
174 let address = memory_access_info.GpaAddress;
175 let size = memory_access_info.AccessSize as usize;
176 match memory_access_info.Direction {
177 WHPX_EXIT_DIRECTION_MMIO_READ => {
178 if let Some(handle_mmio) = &mut ctx.handle_mmio {
179 if let Some(data) = handle_mmio(IoParams {
180 address,
181 size,
182 operation: IoOperation::Read,
183 }) {
184 memory_access_info.Data = data;
185 }
186 S_OK
187 } else {
188 E_UNEXPECTED
189 }
190 }
191 WHPX_EXIT_DIRECTION_MMIO_WRITE => {
192 if let Some(handle_mmio) = &mut ctx.handle_mmio {
193 handle_mmio(IoParams {
194 address,
195 size,
196 operation: IoOperation::Write {
197 data: memory_access_info.Data,
198 },
199 });
200 S_OK
201 } else {
202 E_UNEXPECTED
203 }
204 }
205 _ => E_UNEXPECTED,
206 }
207 }
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT208 extern "stdcall" fn get_virtual_processor_registers_cb(
209 context: *mut ::std::os::raw::c_void,
210 register_names: *const WHV_REGISTER_NAME,
211 register_count: UINT32,
212 register_values: *mut WHV_REGISTER_VALUE,
213 ) -> HRESULT {
214 // unsafe because windows could decide to call this at any time.
215 // However, we trust the kernel to call this while the vm/vcpu is valid.
216 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
217 // safe because the ctx has a weak reference to the vm partition, which should be
218 // alive longer than the ctx
219 unsafe {
220 WHvGetVirtualProcessorRegisters(
221 ctx.vm_partition.partition,
222 ctx.index,
223 register_names,
224 register_count,
225 register_values,
226 )
227 }
228 }
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT229 extern "stdcall" fn set_virtual_processor_registers_cb(
230 context: *mut ::std::os::raw::c_void,
231 register_names: *const WHV_REGISTER_NAME,
232 register_count: UINT32,
233 register_values: *const WHV_REGISTER_VALUE,
234 ) -> HRESULT {
235 // unsafe because windows could decide to call this at any time.
236 // However, we trust the kernel to call this while the vm/vcpu is valid.
237 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
238 // safe because the ctx has a weak reference to the vm partition, which should be
239 // alive longer than the ctx
240 unsafe {
241 WHvSetVirtualProcessorRegisters(
242 ctx.vm_partition.partition,
243 ctx.index,
244 register_names,
245 register_count,
246 register_values,
247 )
248 }
249 }
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT250 extern "stdcall" fn translate_gva_page_cb(
251 context: *mut ::std::os::raw::c_void,
252 gva: WHV_GUEST_VIRTUAL_ADDRESS,
253 translate_flags: WHV_TRANSLATE_GVA_FLAGS,
254 translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
255 gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
256 ) -> HRESULT {
257 // unsafe because windows could decide to call this at any time.
258 // However, we trust the kernel to call this while the vm/vcpu is valid.
259 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
260 let mut translation_result: WHV_TRANSLATE_GVA_RESULT = Default::default();
261 // safe because the ctx has a weak reference to the vm partition, which should be
262 // alive longer than the ctx
263 let ret = unsafe {
264 WHvTranslateGva(
265 ctx.vm_partition.partition,
266 ctx.index,
267 gva,
268 translate_flags,
269 &mut translation_result,
270 gpa,
271 )
272 };
273 if ret == S_OK {
274 // safe assuming the kernel passed in a valid result_code ptr
275 unsafe {
276 *translation_result_code = translation_result.ResultCode;
277 }
278 }
279 ret
280 }
281 }
282
283 impl Drop for SafeInstructionEmulator {
drop(&mut self)284 fn drop(&mut self) {
285 // safe because we own the instruction emulator
286 check_whpx!(unsafe { WHvEmulatorDestroyEmulator(self.handle) }).unwrap();
287 }
288 }
289
290 // we can send and share the instruction emulator over threads safely even though it is void*.
291 unsafe impl Send for SafeInstructionEmulator {}
292 unsafe impl Sync for SafeInstructionEmulator {}
293
294 struct SafeVirtualProcessor {
295 vm_partition: Arc<SafePartition>,
296 index: u32,
297 }
298
299 impl SafeVirtualProcessor {
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor>300 fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor> {
301 // safe since the vm partition should be valid.
302 check_whpx!(unsafe { WHvCreateVirtualProcessor(vm_partition.partition, index, 0) })?;
303 Ok(SafeVirtualProcessor {
304 vm_partition,
305 index,
306 })
307 }
308 }
309
310 impl Drop for SafeVirtualProcessor {
drop(&mut self)311 fn drop(&mut self) {
312 // safe because we are the owner of this windows virtual processor.
313 check_whpx!(unsafe { WHvDeleteVirtualProcessor(self.vm_partition.partition, self.index,) })
314 .unwrap();
315 }
316 }
317
318 pub struct WhpxVcpu {
319 index: u32,
320 safe_virtual_processor: Arc<SafeVirtualProcessor>,
321 vm_partition: Arc<SafePartition>,
322 last_exit_context: Arc<WHV_RUN_VP_EXIT_CONTEXT>,
323 // must be arc, since we cannot "dupe" an instruction emulator similar to a handle.
324 instruction_emulator: Arc<SafeInstructionEmulator>,
325 tsc_frequency: Option<u64>,
326 apic_frequency: Option<u32>,
327 }
328
329 impl WhpxVcpu {
330 /// The SafePartition passed in is weak, so that there is no circular references.
331 /// However, the SafePartition should be valid as long as this VCPU is alive. The index
332 /// is the index for this vcpu.
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu>333 pub(super) fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu> {
334 let safe_virtual_processor = SafeVirtualProcessor::new(vm_partition.clone(), index)?;
335 let instruction_emulator = SafeInstructionEmulator::new()?;
336 Ok(WhpxVcpu {
337 index,
338 safe_virtual_processor: Arc::new(safe_virtual_processor),
339 vm_partition,
340 last_exit_context: Arc::new(Default::default()),
341 instruction_emulator: Arc::new(instruction_emulator),
342 tsc_frequency: None,
343 apic_frequency: None,
344 })
345 }
346
set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32)347 pub fn set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32) {
348 self.tsc_frequency = tsc_frequency;
349 self.apic_frequency = Some(lapic_frequency);
350 }
351
352 /// Handle reading the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_read(&mut self, id: u32) -> Result<()>353 fn handle_msr_read(&mut self, id: u32) -> Result<()> {
354 // Verify that we're only being called in a situation where the last exit reason was
355 // ExitReasonX64MsrAccess
356 if self.last_exit_context.ExitReason
357 != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
358 {
359 return Err(Error::new(EINVAL));
360 }
361
362 let value = match id {
363 HV_X64_MSR_TSC_FREQUENCY => Some(self.tsc_frequency.unwrap_or(0)),
364 HV_X64_MSR_APIC_FREQUENCY => Some(self.apic_frequency.unwrap_or(0) as u64),
365 _ => None,
366 };
367
368 if let Some(value) = value {
369 // Get the next rip from the exit context
370 let rip = self.last_exit_context.VpContext.Rip
371 + self.last_exit_context.VpContext.InstructionLength() as u64;
372
373 const REG_NAMES: [WHV_REGISTER_NAME; 3] = [
374 WHV_REGISTER_NAME_WHvX64RegisterRip,
375 WHV_REGISTER_NAME_WHvX64RegisterRax,
376 WHV_REGISTER_NAME_WHvX64RegisterRdx,
377 ];
378
379 let values = vec![
380 WHV_REGISTER_VALUE { Reg64: rip },
381 // RDMSR instruction puts lower 32 bits in EAX and upper 32 bits in EDX
382 WHV_REGISTER_VALUE {
383 Reg64: (value & 0xffffffff),
384 },
385 WHV_REGISTER_VALUE {
386 Reg64: (value >> 32),
387 },
388 ];
389
390 // safe because we have enough space for all the registers
391 check_whpx!(unsafe {
392 WHvSetVirtualProcessorRegisters(
393 self.vm_partition.partition,
394 self.index,
395 ®_NAMES as *const WHV_REGISTER_NAME,
396 REG_NAMES.len() as u32,
397 values.as_ptr() as *const WHV_REGISTER_VALUE,
398 )
399 })
400 } else {
401 self.inject_gp_fault()
402 }
403 }
404
405 /// Handle writing the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()>406 fn handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()> {
407 // Verify that we're only being called in a situation where the last exit reason was
408 // ExitReasonX64MsrAccess
409 if self.last_exit_context.ExitReason
410 != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
411 {
412 return Err(Error::new(EINVAL));
413 }
414
415 // Do nothing, we assume TSC is always invariant
416 let success = matches!(id, HV_X64_MSR_TSC_INVARIANT_CONTROL);
417
418 if !success {
419 return self.inject_gp_fault();
420 }
421
422 // Get the next rip from the exit context
423 let rip = self.last_exit_context.VpContext.Rip
424 + self.last_exit_context.VpContext.InstructionLength() as u64;
425
426 const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvX64RegisterRip];
427
428 let values = vec![WHV_REGISTER_VALUE { Reg64: rip }];
429
430 // safe because we have enough space for all the registers
431 check_whpx!(unsafe {
432 WHvSetVirtualProcessorRegisters(
433 self.vm_partition.partition,
434 self.index,
435 ®_NAMES as *const WHV_REGISTER_NAME,
436 REG_NAMES.len() as u32,
437 values.as_ptr() as *const WHV_REGISTER_VALUE,
438 )
439 })
440 }
441
inject_gp_fault(&self) -> Result<()>442 fn inject_gp_fault(&self) -> Result<()> {
443 const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvRegisterPendingEvent];
444
445 let mut event = WHV_REGISTER_VALUE {
446 ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
447 __bindgen_anon_1: Default::default(),
448 },
449 };
450 // safe because we have enough space for all the registers
451 check_whpx!(unsafe {
452 WHvGetVirtualProcessorRegisters(
453 self.vm_partition.partition,
454 self.index,
455 ®_NAMES as *const WHV_REGISTER_NAME,
456 REG_NAMES.len() as u32,
457 &mut event as *mut WHV_REGISTER_VALUE,
458 )
459 })?;
460
461 if unsafe { event.ExceptionEvent.__bindgen_anon_1.EventPending() } != 0 {
462 error!("Unable to inject gp fault because pending exception exists");
463 return Err(Error::new(EINVAL));
464 }
465
466 let mut pending_exception = unsafe { event.ExceptionEvent.__bindgen_anon_1 };
467
468 pending_exception.set_EventPending(1);
469 // GP faults set error code
470 pending_exception.set_DeliverErrorCode(1);
471 // GP fault error code is 0 unless the fault is segment related
472 pending_exception.ErrorCode = 0;
473 // This must be set to WHvX64PendingEventException
474 pending_exception
475 .set_EventType(WHV_X64_PENDING_EVENT_TYPE_WHvX64PendingEventException as u32);
476 // GP fault vector is 13
477 const GP_VECTOR: u32 = 13;
478 pending_exception.set_Vector(GP_VECTOR);
479
480 let event = WHV_REGISTER_VALUE {
481 ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
482 __bindgen_anon_1: pending_exception,
483 },
484 };
485
486 // safe because we have enough space for all the registers
487 check_whpx!(unsafe {
488 WHvSetVirtualProcessorRegisters(
489 self.vm_partition.partition,
490 self.index,
491 ®_NAMES as *const WHV_REGISTER_NAME,
492 REG_NAMES.len() as u32,
493 &event as *const WHV_REGISTER_VALUE,
494 )
495 })
496 }
497 }
498
499 impl Vcpu for WhpxVcpu {
500 /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self>501 fn try_clone(&self) -> Result<Self> {
502 Ok(WhpxVcpu {
503 index: self.index,
504 safe_virtual_processor: self.safe_virtual_processor.clone(),
505 vm_partition: self.vm_partition.clone(),
506 last_exit_context: self.last_exit_context.clone(),
507 instruction_emulator: self.instruction_emulator.clone(),
508 tsc_frequency: self.tsc_frequency,
509 apic_frequency: self.apic_frequency,
510 })
511 }
512
as_vcpu(&self) -> &dyn Vcpu513 fn as_vcpu(&self) -> &dyn Vcpu {
514 self
515 }
516
517 /// Returns the vcpu id.
id(&self) -> usize518 fn id(&self) -> usize {
519 self.index.try_into().unwrap()
520 }
521
522 /// Exits the vcpu immediately if exit is true
set_immediate_exit(&self, exit: bool)523 fn set_immediate_exit(&self, exit: bool) {
524 if exit {
525 // safe because we own this whpx virtual processor index, and assume the vm partition is
526 // still valid
527 unsafe {
528 WHvCancelRunVirtualProcessor(self.vm_partition.partition, self.index, 0);
529 }
530 }
531 }
532
533 /// Signals to the hypervisor that this guest is being paused by userspace. On some hypervisors,
534 /// this is used to control the pvclock. On WHPX, we handle it separately with virtio-pvclock.
535 /// So the correct implementation here is to do nothing.
on_suspend(&self) -> Result<()>536 fn on_suspend(&self) -> Result<()> {
537 Ok(())
538 }
539
540 /// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the
541 /// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any.
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>542 unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
543 // Whpx does not support raw capability on the vcpu.
544 Err(Error::new(ENXIO))
545 }
546
547 /// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`.
548 ///
549 /// Once called, it will determine whether a mmio read or mmio write was the reason for the mmio
550 /// exit, call `handle_fn` with the respective IoOperation to perform the mmio read or
551 /// write, and set the return data in the vcpu so that the vcpu can resume running.
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>552 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
553 let mut status: WHV_EMULATOR_STATUS = Default::default();
554 let mut ctx = InstructionEmulatorContext {
555 vm_partition: self.vm_partition.clone(),
556 index: self.index,
557 handle_mmio: Some(handle_fn),
558 handle_io: None,
559 };
560 // safe as long as all callbacks occur before this fn returns.
561 check_whpx!(unsafe {
562 WHvEmulatorTryMmioEmulation(
563 self.instruction_emulator.handle,
564 &mut ctx as *mut _ as *mut c_void,
565 &self.last_exit_context.VpContext,
566 &self.last_exit_context.__bindgen_anon_1.MemoryAccess,
567 &mut status,
568 )
569 })?;
570 // safe because we trust the kernel to fill in the union field properly.
571 let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
572 if success {
573 Ok(())
574 } else {
575 // safe because we trust the kernel to fill in the union field properly.
576 Err(Error::new(unsafe { status.AsUINT32 }))
577 }
578 }
579
580 /// This function should be called after `Vcpu::run` returns `VcpuExit::Io`.
581 ///
582 /// Once called, it will determine whether an io in or io out was the reason for the io exit,
583 /// call `handle_fn` with the respective IoOperation to perform the io in or io out,
584 /// and set the return data in the vcpu so that the vcpu can resume running.
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>585 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> {
586 let mut status: WHV_EMULATOR_STATUS = Default::default();
587 let mut ctx = InstructionEmulatorContext {
588 vm_partition: self.vm_partition.clone(),
589 index: self.index,
590 handle_mmio: None,
591 handle_io: Some(handle_fn),
592 };
593 // safe as long as all callbacks occur before this fn returns.
594 check_whpx!(unsafe {
595 WHvEmulatorTryIoEmulation(
596 self.instruction_emulator.handle,
597 &mut ctx as *mut _ as *mut c_void,
598 &self.last_exit_context.VpContext,
599 &self.last_exit_context.__bindgen_anon_1.IoPortAccess,
600 &mut status,
601 )
602 })?; // safe because we trust the kernel to fill in the union field properly.
603 let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
604 if success {
605 Ok(())
606 } else {
607 // safe because we trust the kernel to fill in the union field properly.
608 Err(Error::new(unsafe { status.AsUINT32 }))
609 }
610 }
611
612 /// this is unhandled currently since we don't emulate hypercall instructions for whpx.
handle_hyperv_hypercall(&self, _func: &mut dyn FnMut(HypervHypercall) -> u64) -> Result<()>613 fn handle_hyperv_hypercall(&self, _func: &mut dyn FnMut(HypervHypercall) -> u64) -> Result<()> {
614 Ok(())
615 }
616
617 /// This function should be called after `Vcpu::run` returns `VcpuExit::RdMsr`,
618 /// and in the same thread as run.
619 ///
620 /// It will put `data` into the user buffer and return.
handle_rdmsr(&self, _data: u64) -> Result<()>621 fn handle_rdmsr(&self, _data: u64) -> Result<()> {
622 // TODO(b/235691411): Implement.
623 Err(Error::new(libc::ENXIO))
624 }
625
626 /// This function should be called after `Vcpu::run` returns `VcpuExit::WrMsr`,
627 /// and in the same thread as run.
handle_wrmsr(&self)628 fn handle_wrmsr(&self) {
629 // TODO(b/235691411): Implement.
630 }
631
632 #[allow(non_upper_case_globals)]
run(&mut self) -> Result<VcpuExit>633 fn run(&mut self) -> Result<VcpuExit> {
634 // safe because we own this whpx virtual processor index, and assume the vm partition is
635 // still valid
636 let exit_context_ptr = Arc::as_ptr(&self.last_exit_context);
637 check_whpx!(unsafe {
638 WHvRunVirtualProcessor(
639 self.vm_partition.partition,
640 self.index,
641 exit_context_ptr as *mut WHV_RUN_VP_EXIT_CONTEXT as *mut c_void,
642 size_of::<WHV_RUN_VP_EXIT_CONTEXT>() as u32,
643 )
644 })?;
645
646 match self.last_exit_context.ExitReason {
647 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonNone => Ok(VcpuExit::Unknown),
648 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonMemoryAccess => Ok(VcpuExit::Mmio),
649 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64IoPortAccess => Ok(VcpuExit::Io),
650 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnrecoverableException => {
651 Ok(VcpuExit::UnrecoverableException)
652 }
653 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonInvalidVpRegisterValue => {
654 Ok(VcpuExit::InvalidVpRegister)
655 }
656 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnsupportedFeature => {
657 Ok(VcpuExit::UnsupportedFeature)
658 }
659 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64InterruptWindow => {
660 Ok(VcpuExit::IrqWindowOpen)
661 }
662 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Halt => Ok(VcpuExit::Hlt),
663 // additional exits that are configurable
664 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicEoi => {
665 // safe because we trust the kernel to fill in the union field properly.
666 let vector = unsafe {
667 self.last_exit_context
668 .__bindgen_anon_1
669 .ApicEoi
670 .InterruptVector as u8
671 };
672 Ok(VcpuExit::IoapicEoi { vector })
673 }
674 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess => {
675 // Safe because we know this was an MSR access exit.
676 let id = unsafe { self.last_exit_context.__bindgen_anon_1.MsrAccess.MsrNumber };
677
678 // Safe because we know this was an MSR access exit
679 let is_write = unsafe {
680 self.last_exit_context
681 .__bindgen_anon_1
682 .MsrAccess
683 .AccessInfo
684 .__bindgen_anon_1
685 .IsWrite()
686 == 1
687 };
688 if is_write {
689 // Safe because we know this was an MSR access exit
690 let value = unsafe {
691 // WRMSR writes the contents of registers EDX:EAX into the 64-bit model
692 // specific register
693 (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rdx << 32)
694 | (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rax & 0xffffffff)
695 };
696 self.handle_msr_write(id, value)?;
697 } else {
698 self.handle_msr_read(id)?;
699 }
700 Ok(VcpuExit::MsrAccess)
701 }
702 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid => {
703 // Safe because we know this was a CPUID exit.
704 let entry = unsafe {
705 CpuIdEntry {
706 function: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rax as u32,
707 index: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rcx as u32,
708 flags: 0,
709 cpuid: CpuidResult {
710 eax: self
711 .last_exit_context
712 .__bindgen_anon_1
713 .CpuidAccess
714 .DefaultResultRax as u32,
715 ebx: self
716 .last_exit_context
717 .__bindgen_anon_1
718 .CpuidAccess
719 .DefaultResultRbx as u32,
720 ecx: self
721 .last_exit_context
722 .__bindgen_anon_1
723 .CpuidAccess
724 .DefaultResultRcx as u32,
725 edx: self
726 .last_exit_context
727 .__bindgen_anon_1
728 .CpuidAccess
729 .DefaultResultRdx as u32,
730 },
731 }
732 };
733 Ok(VcpuExit::Cpuid { entry })
734 }
735 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonException => Ok(VcpuExit::Exception),
736 // undocumented exit calls from the header file, WinHvPlatformDefs.h.
737 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Rdtsc => Ok(VcpuExit::RdTsc),
738 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicSmiTrap => Ok(VcpuExit::ApicSmiTrap),
739 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonHypercall => Ok(VcpuExit::Hypercall),
740 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicInitSipiTrap => {
741 Ok(VcpuExit::ApicInitSipiTrap)
742 }
743 // exit caused by host cancellation thorugh WHvCancelRunVirtualProcessor,
744 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonCanceled => Ok(VcpuExit::Canceled),
745 r => panic!("unknown exit reason: {}", r),
746 }
747 }
748 }
749
750 impl VcpuX86_64 for WhpxVcpu {
751 /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
752 /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)753 fn set_interrupt_window_requested(&self, requested: bool) {
754 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
755 [WHV_REGISTER_NAME_WHvX64RegisterDeliverabilityNotifications];
756 let mut notifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER__bindgen_ty_1 =
757 Default::default();
758 notifications.set_InterruptNotification(if requested { 1 } else { 0 });
759 let notify_register = WHV_REGISTER_VALUE {
760 DeliverabilityNotifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER {
761 __bindgen_anon_1: notifications,
762 },
763 };
764 // safe because we have enough space for all the registers
765 check_whpx!(unsafe {
766 WHvSetVirtualProcessorRegisters(
767 self.vm_partition.partition,
768 self.index,
769 ®_NAMES as *const WHV_REGISTER_NAME,
770 REG_NAMES.len() as u32,
771 ¬ify_register as *const WHV_REGISTER_VALUE,
772 )
773 })
774 .unwrap();
775 }
776
777 /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool778 fn ready_for_interrupt(&self) -> bool {
779 // safe because InterruptionPending bit is always valid in ExecutionState struct
780 let pending = unsafe {
781 self.last_exit_context
782 .VpContext
783 .ExecutionState
784 .__bindgen_anon_1
785 .InterruptionPending()
786 };
787 // safe because InterruptShadow bit is always valid in ExecutionState struct
788 let shadow = unsafe {
789 self.last_exit_context
790 .VpContext
791 .ExecutionState
792 .__bindgen_anon_1
793 .InterruptShadow()
794 };
795
796 let eflags = self.last_exit_context.VpContext.Rflags;
797 const IF_MASK: u64 = 0x00000200;
798
799 // can't inject an interrupt if InterruptShadow or InterruptPending bits are set, or if
800 // the IF flag is clear
801 shadow == 0 && pending == 0 && (eflags & IF_MASK) != 0
802 }
803
804 /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u32) -> Result<()>805 fn interrupt(&self, irq: u32) -> Result<()> {
806 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
807 [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
808 let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
809 Default::default();
810 pending_interrupt.set_InterruptionPending(1);
811 pending_interrupt
812 .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingInterrupt as u32);
813 pending_interrupt.set_InterruptionVector(irq);
814 let interrupt = WHV_REGISTER_VALUE {
815 PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
816 __bindgen_anon_1: pending_interrupt,
817 },
818 };
819 // safe because we have enough space for all the registers
820 check_whpx!(unsafe {
821 WHvSetVirtualProcessorRegisters(
822 self.vm_partition.partition,
823 self.index,
824 ®_NAMES as *const WHV_REGISTER_NAME,
825 REG_NAMES.len() as u32,
826 &interrupt as *const WHV_REGISTER_VALUE,
827 )
828 })
829 }
830
831 /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>832 fn inject_nmi(&self) -> Result<()> {
833 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
834 [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
835 let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
836 Default::default();
837 pending_interrupt.set_InterruptionPending(1);
838 pending_interrupt
839 .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingNmi as u32);
840 const NMI_VECTOR: u32 = 2; // 2 is the NMI vector.
841 pending_interrupt.set_InterruptionVector(NMI_VECTOR);
842 let interrupt = WHV_REGISTER_VALUE {
843 PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
844 __bindgen_anon_1: pending_interrupt,
845 },
846 };
847 // safe because we have enough space for all the registers
848 check_whpx!(unsafe {
849 WHvSetVirtualProcessorRegisters(
850 self.vm_partition.partition,
851 self.index,
852 ®_NAMES as *const WHV_REGISTER_NAME,
853 REG_NAMES.len() as u32,
854 &interrupt as *const WHV_REGISTER_VALUE,
855 )
856 })
857 }
858
859 /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>860 fn get_regs(&self) -> Result<Regs> {
861 let mut whpx_regs: WhpxRegs = Default::default();
862 let reg_names = WhpxRegs::get_register_names();
863 // safe because we have enough space for all the registers
864 check_whpx!(unsafe {
865 WHvGetVirtualProcessorRegisters(
866 self.vm_partition.partition,
867 self.index,
868 reg_names as *const WHV_REGISTER_NAME,
869 reg_names.len() as u32,
870 whpx_regs.as_mut_ptr(),
871 )
872 })?;
873 Ok(Regs::from(&whpx_regs))
874 }
875
876 /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>877 fn set_regs(&self, regs: &Regs) -> Result<()> {
878 let whpx_regs = WhpxRegs::from(regs);
879 let reg_names = WhpxRegs::get_register_names();
880 // safe because we have enough space for all the registers
881 check_whpx!(unsafe {
882 WHvSetVirtualProcessorRegisters(
883 self.vm_partition.partition,
884 self.index,
885 reg_names as *const WHV_REGISTER_NAME,
886 reg_names.len() as u32,
887 whpx_regs.as_ptr(),
888 )
889 })
890 }
891
892 /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>893 fn get_sregs(&self) -> Result<Sregs> {
894 let mut whpx_sregs: WhpxSregs = Default::default();
895 let reg_names = WhpxSregs::get_register_names();
896 // safe because we have enough space for all the registers
897 check_whpx!(unsafe {
898 WHvGetVirtualProcessorRegisters(
899 self.vm_partition.partition,
900 self.index,
901 reg_names as *const WHV_REGISTER_NAME,
902 reg_names.len() as u32,
903 whpx_sregs.as_mut_ptr(),
904 )
905 })?;
906 Ok(Sregs::from(&whpx_sregs))
907 }
908
909 /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>910 fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
911 let whpx_sregs = WhpxSregs::from(sregs);
912 let reg_names = WhpxSregs::get_register_names();
913 // safe because we have enough space for all the registers
914 check_whpx!(unsafe {
915 WHvSetVirtualProcessorRegisters(
916 self.vm_partition.partition,
917 self.index,
918 reg_names as *const WHV_REGISTER_NAME,
919 reg_names.len() as u32,
920 whpx_sregs.as_ptr(),
921 )
922 })
923 }
924
925 /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>926 fn get_fpu(&self) -> Result<Fpu> {
927 let mut whpx_fpu: WhpxFpu = Default::default();
928 let reg_names = WhpxFpu::get_register_names();
929 // safe because we have enough space for all the registers
930 check_whpx!(unsafe {
931 WHvGetVirtualProcessorRegisters(
932 self.vm_partition.partition,
933 self.index,
934 reg_names as *const WHV_REGISTER_NAME,
935 reg_names.len() as u32,
936 whpx_fpu.as_mut_ptr(),
937 )
938 })?;
939 Ok(Fpu::from(&whpx_fpu))
940 }
941
942 /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>943 fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
944 let whpx_fpu = WhpxFpu::from(fpu);
945 let reg_names = WhpxFpu::get_register_names();
946 // safe because we have enough space for all the registers
947 check_whpx!(unsafe {
948 WHvSetVirtualProcessorRegisters(
949 self.vm_partition.partition,
950 self.index,
951 reg_names as *const WHV_REGISTER_NAME,
952 reg_names.len() as u32,
953 whpx_fpu.as_ptr(),
954 )
955 })
956 }
957
958 /// Gets the VCPU XSAVE.
get_xsave(&self) -> Result<Xsave>959 fn get_xsave(&self) -> Result<Xsave> {
960 let mut empty_buffer = [0u8; 1];
961 let mut needed_buf_size: u32 = 0;
962
963 // Find out how much space is needed for XSAVEs.
964 let res = unsafe {
965 WHvGetVirtualProcessorXsaveState(
966 self.vm_partition.partition,
967 self.index,
968 empty_buffer.as_mut_ptr() as *mut _,
969 0,
970 &mut needed_buf_size,
971 )
972 };
973 if res != WHV_E_INSUFFICIENT_BUFFER.0 {
974 // This should always work, so if it doesn't, we'll return unsupported.
975 error!("failed to get size of vcpu xsave");
976 return Err(Error::new(EIO));
977 }
978
979 let mut xsave = Xsave::new(needed_buf_size as usize);
980 // SAFETY: xsave_data is valid for the duration of the FFI call, and we pass its length in
981 // bytes so writes are bounded within the buffer.
982 check_whpx!(unsafe {
983 WHvGetVirtualProcessorXsaveState(
984 self.vm_partition.partition,
985 self.index,
986 xsave.as_mut_ptr(),
987 xsave.len() as u32,
988 &mut needed_buf_size,
989 )
990 })?;
991 Ok(xsave)
992 }
993
994 /// Sets the VCPU XSAVE.
set_xsave(&self, xsave: &Xsave) -> Result<()>995 fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
996 // SAFETY: the xsave buffer is valid for the duration of the FFI call, and we pass its
997 // length in bytes so reads are bounded within the buffer.
998 check_whpx!(unsafe {
999 WHvSetVirtualProcessorXsaveState(
1000 self.vm_partition.partition,
1001 self.index,
1002 xsave.as_ptr(),
1003 xsave.len() as u32,
1004 )
1005 })
1006 }
1007
get_interrupt_state(&self) -> Result<serde_json::Value>1008 fn get_interrupt_state(&self) -> Result<serde_json::Value> {
1009 let mut whpx_interrupt_regs: WhpxInterruptRegs = Default::default();
1010 let reg_names = WhpxInterruptRegs::get_register_names();
1011 // SAFETY: we have enough space for all the registers & the memory lives for the duration
1012 // of the FFI call.
1013 check_whpx!(unsafe {
1014 WHvGetVirtualProcessorRegisters(
1015 self.vm_partition.partition,
1016 self.index,
1017 reg_names as *const WHV_REGISTER_NAME,
1018 reg_names.len() as u32,
1019 whpx_interrupt_regs.as_mut_ptr(),
1020 )
1021 })?;
1022
1023 serde_json::to_value(whpx_interrupt_regs.into_serializable()).map_err(|e| {
1024 error!("failed to serialize interrupt state: {:?}", e);
1025 Error::new(EIO)
1026 })
1027 }
1028
set_interrupt_state(&self, data: serde_json::Value) -> Result<()>1029 fn set_interrupt_state(&self, data: serde_json::Value) -> Result<()> {
1030 let whpx_interrupt_regs =
1031 WhpxInterruptRegs::from_serializable(serde_json::from_value(data).map_err(|e| {
1032 error!("failed to serialize interrupt state: {:?}", e);
1033 Error::new(EIO)
1034 })?);
1035 let reg_names = WhpxInterruptRegs::get_register_names();
1036 // SAFETY: we have enough space for all the registers & the memory lives for the duration
1037 // of the FFI call.
1038 check_whpx!(unsafe {
1039 WHvSetVirtualProcessorRegisters(
1040 self.vm_partition.partition,
1041 self.index,
1042 reg_names as *const WHV_REGISTER_NAME,
1043 reg_names.len() as u32,
1044 whpx_interrupt_regs.as_ptr(),
1045 )
1046 })
1047 }
1048
1049 /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>1050 fn get_debugregs(&self) -> Result<DebugRegs> {
1051 let mut whpx_debugregs: WhpxDebugRegs = Default::default();
1052 let reg_names = WhpxDebugRegs::get_register_names();
1053 // safe because we have enough space for all the registers
1054 check_whpx!(unsafe {
1055 WHvGetVirtualProcessorRegisters(
1056 self.vm_partition.partition,
1057 self.index,
1058 reg_names as *const WHV_REGISTER_NAME,
1059 reg_names.len() as u32,
1060 whpx_debugregs.as_mut_ptr(),
1061 )
1062 })?;
1063 Ok(DebugRegs::from(&whpx_debugregs))
1064 }
1065
1066 /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>1067 fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()> {
1068 let whpx_debugregs = WhpxDebugRegs::from(debugregs);
1069 let reg_names = WhpxDebugRegs::get_register_names();
1070 // safe because we have enough space for all the registers
1071 check_whpx!(unsafe {
1072 WHvSetVirtualProcessorRegisters(
1073 self.vm_partition.partition,
1074 self.index,
1075 reg_names as *const WHV_REGISTER_NAME,
1076 reg_names.len() as u32,
1077 whpx_debugregs.as_ptr(),
1078 )
1079 })
1080 }
1081
1082 /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>1083 fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>> {
1084 const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1085 let mut reg_value = WHV_REGISTER_VALUE::default();
1086 // safe because we have enough space for all the registers in whpx_regs
1087 check_whpx!(unsafe {
1088 WHvGetVirtualProcessorRegisters(
1089 self.vm_partition.partition,
1090 self.index,
1091 ®_NAME,
1092 /* RegisterCount */ 1,
1093 &mut reg_value,
1094 )
1095 })?;
1096
1097 // safe because the union value, reg64, is safe to pull out assuming
1098 // kernel filled in the xcrs properly.
1099 let xcr0 = unsafe { reg_value.Reg64 };
1100
1101 // whpx only supports xcr0
1102 let xcrs = BTreeMap::from([(0, xcr0)]);
1103 Ok(xcrs)
1104 }
1105
1106 /// Sets a VCPU extended control register.
set_xcr(&self, xcr_index: u32, value: u64) -> Result<()>1107 fn set_xcr(&self, xcr_index: u32, value: u64) -> Result<()> {
1108 if xcr_index != 0 {
1109 // invalid xcr register provided
1110 return Err(Error::new(EINVAL));
1111 }
1112
1113 const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1114 let reg_value = WHV_REGISTER_VALUE { Reg64: value };
1115 // safe because we have enough space for all the registers in whpx_xcrs
1116 check_whpx!(unsafe {
1117 WHvSetVirtualProcessorRegisters(
1118 self.vm_partition.partition,
1119 self.index,
1120 ®_NAME,
1121 /* RegisterCount */ 1,
1122 ®_value,
1123 )
1124 })
1125 }
1126
1127 /// Gets the value of a single model-specific register.
get_msr(&self, msr_index: u32) -> Result<u64>1128 fn get_msr(&self, msr_index: u32) -> Result<u64> {
1129 let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1130 let mut msr_value = WHV_REGISTER_VALUE::default();
1131 // safe because we have enough space for all the registers in whpx_regs
1132 check_whpx!(unsafe {
1133 WHvGetVirtualProcessorRegisters(
1134 self.vm_partition.partition,
1135 self.index,
1136 &msr_name,
1137 /* RegisterCount */ 1,
1138 &mut msr_value,
1139 )
1140 })?;
1141
1142 // safe because Reg64 will be a valid union value
1143 let value = unsafe { msr_value.Reg64 };
1144 Ok(value)
1145 }
1146
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>1147 fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>> {
1148 // Note that some members of VALID_MSRS cannot be fetched from WHPX with
1149 // WHvGetVirtualProcessorRegisters per the HTLFS, so we enumerate all of
1150 // permitted MSRs here.
1151 //
1152 // We intentionally exclude WHvRegisterPendingInterruption and
1153 // WHvRegisterInterruptState because they are included in
1154 // get_interrupt_state.
1155 //
1156 // We intentionally exclude MSR_TSC because in snapshotting it is
1157 // handled by the generic x86_64 VCPU snapshot/restore. Non snapshot
1158 // consumers should use get/set_tsc_adjust to access the adjust register
1159 // if needed.
1160 const MSRS_TO_SAVE: &[u32] = &[
1161 MSR_EFER,
1162 MSR_KERNEL_GS_BASE,
1163 MSR_APIC_BASE,
1164 MSR_SYSENTER_CS,
1165 MSR_SYSENTER_EIP,
1166 MSR_SYSENTER_ESP,
1167 MSR_STAR,
1168 MSR_LSTAR,
1169 MSR_CSTAR,
1170 MSR_SFMASK,
1171 ];
1172
1173 let registers = MSRS_TO_SAVE
1174 .iter()
1175 .map(|msr_index| {
1176 let value = self.get_msr(*msr_index)?;
1177 Ok((*msr_index, value))
1178 })
1179 .collect::<Result<BTreeMap<u32, u64>>>()?;
1180
1181 Ok(registers)
1182 }
1183
1184 /// Sets the value of a single model-specific register.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>1185 fn set_msr(&self, msr_index: u32, value: u64) -> Result<()> {
1186 let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1187 let msr_value = WHV_REGISTER_VALUE { Reg64: value };
1188 check_whpx!(unsafe {
1189 WHvSetVirtualProcessorRegisters(
1190 self.vm_partition.partition,
1191 self.index,
1192 &msr_name,
1193 /* RegisterCount */ 1,
1194 &msr_value,
1195 )
1196 })
1197 }
1198
1199 /// Sets up the data returned by the CPUID instruction.
1200 /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
set_cpuid(&self, _cpuid: &CpuId) -> Result<()>1201 fn set_cpuid(&self, _cpuid: &CpuId) -> Result<()> {
1202 Err(Error::new(ENXIO))
1203 }
1204
1205 /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
1206 /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
1207 /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>1208 fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()> {
1209 // Verify that we're only being called in a situation where the last exit reason was
1210 // ExitReasonX64Cpuid
1211 if self.last_exit_context.ExitReason != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid {
1212 return Err(Error::new(EINVAL));
1213 }
1214
1215 // Get the next rip from the exit context
1216 let rip = self.last_exit_context.VpContext.Rip
1217 + self.last_exit_context.VpContext.InstructionLength() as u64;
1218
1219 const REG_NAMES: [WHV_REGISTER_NAME; 5] = [
1220 WHV_REGISTER_NAME_WHvX64RegisterRip,
1221 WHV_REGISTER_NAME_WHvX64RegisterRax,
1222 WHV_REGISTER_NAME_WHvX64RegisterRbx,
1223 WHV_REGISTER_NAME_WHvX64RegisterRcx,
1224 WHV_REGISTER_NAME_WHvX64RegisterRdx,
1225 ];
1226
1227 let values = vec![
1228 WHV_REGISTER_VALUE { Reg64: rip },
1229 WHV_REGISTER_VALUE {
1230 Reg64: entry.cpuid.eax as u64,
1231 },
1232 WHV_REGISTER_VALUE {
1233 Reg64: entry.cpuid.ebx as u64,
1234 },
1235 WHV_REGISTER_VALUE {
1236 Reg64: entry.cpuid.ecx as u64,
1237 },
1238 WHV_REGISTER_VALUE {
1239 Reg64: entry.cpuid.edx as u64,
1240 },
1241 ];
1242
1243 // safe because we have enough space for all the registers
1244 check_whpx!(unsafe {
1245 WHvSetVirtualProcessorRegisters(
1246 self.vm_partition.partition,
1247 self.index,
1248 ®_NAMES as *const WHV_REGISTER_NAME,
1249 REG_NAMES.len() as u32,
1250 values.as_ptr() as *const WHV_REGISTER_VALUE,
1251 )
1252 })
1253 }
1254
1255 /// Gets the system emulated hyper-v CPUID values.
1256 /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
get_hyperv_cpuid(&self) -> Result<CpuId>1257 fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1258 Err(Error::new(ENXIO))
1259 }
1260
1261 /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>1262 fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
1263 // TODO(b/173807302): Implement this
1264 Err(Error::new(ENOENT))
1265 }
1266
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>1267 fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()> {
1268 // Set the guest TSC such that it has the same TSC_OFFSET as it did at
1269 // the moment it was snapshotted. This is required for virtio-pvclock
1270 // to function correctly. (virtio-pvclock assumes the offset is fixed,
1271 // and adjusts CLOCK_BOOTTIME accordingly. It also hides the TSC jump
1272 // from CLOCK_MONOTONIC by setting the timebase.)
1273 self.set_tsc_value(host_tsc_reference_moment.wrapping_add(tsc_offset))
1274 }
1275 }
1276
get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME>1277 fn get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME> {
1278 VALID_MSRS.get(&msr_index).copied()
1279 }
1280
1281 // run calls are tested with the integration tests since the full vcpu needs to be setup for it.
1282 #[cfg(test)]
1283 mod tests {
1284 use vm_memory::GuestAddress;
1285 use vm_memory::GuestMemory;
1286
1287 use super::*;
1288 use crate::VmX86_64;
1289
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm1290 fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
1291 let whpx = Whpx::new().expect("failed to instantiate whpx");
1292 let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1293 .expect("failed to get whpx features");
1294 WhpxVm::new(
1295 &whpx,
1296 cpu_count,
1297 mem,
1298 CpuId::new(0),
1299 local_apic_supported,
1300 None,
1301 )
1302 .expect("failed to create whpx vm")
1303 }
1304
1305 #[test]
try_clone()1306 fn try_clone() {
1307 if !Whpx::is_enabled() {
1308 return;
1309 }
1310 let cpu_count = 1;
1311 let mem =
1312 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1313 let vm = new_vm(cpu_count, mem);
1314 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1315 let vcpu: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1316 let _vcpu_clone = vcpu.try_clone().expect("failed to clone whpx vcpu");
1317 }
1318
1319 #[test]
index()1320 fn index() {
1321 if !Whpx::is_enabled() {
1322 return;
1323 }
1324 let cpu_count = 2;
1325 let mem =
1326 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1327 let vm = new_vm(cpu_count, mem);
1328 let mut vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1329 let vcpu0: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1330 assert_eq!(vcpu0.index, 0);
1331 vcpu = vm.create_vcpu(1).expect("failed to create vcpu");
1332 let vcpu1: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1333 assert_eq!(vcpu1.index, 1);
1334 }
1335
1336 #[test]
get_regs()1337 fn get_regs() {
1338 if !Whpx::is_enabled() {
1339 return;
1340 }
1341 let cpu_count = 1;
1342 let mem =
1343 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1344 let vm = new_vm(cpu_count, mem);
1345 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1346
1347 vcpu.get_regs().expect("failed to get regs");
1348 }
1349
1350 #[test]
set_regs()1351 fn set_regs() {
1352 if !Whpx::is_enabled() {
1353 return;
1354 }
1355 let cpu_count = 1;
1356 let mem =
1357 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1358 let vm = new_vm(cpu_count, mem);
1359 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1360
1361 let mut regs = vcpu.get_regs().expect("failed to get regs");
1362 let new_val = regs.rax + 2;
1363 regs.rax = new_val;
1364
1365 vcpu.set_regs(®s).expect("failed to set regs");
1366 let new_regs = vcpu.get_regs().expect("failed to get regs");
1367 assert_eq!(new_regs.rax, new_val);
1368 }
1369
1370 #[test]
debugregs()1371 fn debugregs() {
1372 if !Whpx::is_enabled() {
1373 return;
1374 }
1375 let cpu_count = 1;
1376 let mem =
1377 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1378 let vm = new_vm(cpu_count, mem);
1379 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1380
1381 let mut dregs = vcpu.get_debugregs().unwrap();
1382 dregs.dr7 += 13;
1383 vcpu.set_debugregs(&dregs).unwrap();
1384 let dregs2 = vcpu.get_debugregs().unwrap();
1385 assert_eq!(dregs.dr7, dregs2.dr7);
1386 }
1387
1388 #[test]
sregs()1389 fn sregs() {
1390 if !Whpx::is_enabled() {
1391 return;
1392 }
1393 let cpu_count = 1;
1394 let mem =
1395 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1396 let vm = new_vm(cpu_count, mem);
1397 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1398
1399 let mut sregs = vcpu.get_sregs().unwrap();
1400 sregs.cs.base += 7;
1401 vcpu.set_sregs(&sregs).unwrap();
1402 let sregs2 = vcpu.get_sregs().unwrap();
1403 assert_eq!(sregs.cs.base, sregs2.cs.base);
1404 }
1405
1406 #[test]
fpu()1407 fn fpu() {
1408 if !Whpx::is_enabled() {
1409 return;
1410 }
1411 let cpu_count = 1;
1412 let mem =
1413 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1414 let vm = new_vm(cpu_count, mem);
1415 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1416
1417 let mut fpu = vcpu.get_fpu().unwrap();
1418 fpu.fpr[0][0] += 3;
1419 vcpu.set_fpu(&fpu).unwrap();
1420 let fpu2 = vcpu.get_fpu().unwrap();
1421 assert_eq!(fpu.fpr[0][0], fpu2.fpr[0][0]);
1422 }
1423
1424 #[test]
xcrs()1425 fn xcrs() {
1426 if !Whpx::is_enabled() {
1427 return;
1428 }
1429 let whpx = Whpx::new().expect("failed to instantiate whpx");
1430 let cpu_count = 1;
1431 let mem =
1432 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1433 let vm = new_vm(cpu_count, mem);
1434 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1435 // check xsave support
1436 if !whpx.check_capability(HypervisorCap::Xcrs) {
1437 return;
1438 }
1439
1440 vcpu.set_xcr(0, 1).unwrap();
1441 let xcrs = vcpu.get_xcrs().unwrap();
1442 let xcr0 = xcrs.get(&0).unwrap();
1443 assert_eq!(*xcr0, 1);
1444 }
1445
1446 #[test]
set_msr()1447 fn set_msr() {
1448 if !Whpx::is_enabled() {
1449 return;
1450 }
1451 let cpu_count = 1;
1452 let mem =
1453 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1454 let vm = new_vm(cpu_count, mem);
1455 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1456
1457 vcpu.set_msr(MSR_KERNEL_GS_BASE, 42).unwrap();
1458
1459 let gs_base = vcpu.get_msr(MSR_KERNEL_GS_BASE).unwrap();
1460 assert_eq!(gs_base, 42);
1461 }
1462
1463 #[test]
get_msr()1464 fn get_msr() {
1465 if !Whpx::is_enabled() {
1466 return;
1467 }
1468 let cpu_count = 1;
1469 let mem =
1470 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1471 let vm = new_vm(cpu_count, mem);
1472 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1473
1474 // This one should succeed
1475 let _value = vcpu.get_msr(MSR_TSC).unwrap();
1476
1477 // This one will fail to fetch
1478 vcpu.get_msr(MSR_TSC + 1)
1479 .expect_err("invalid MSR index should fail");
1480 }
1481
1482 #[test]
set_efer()1483 fn set_efer() {
1484 if !Whpx::is_enabled() {
1485 return;
1486 }
1487 // EFER Bits
1488 const EFER_SCE: u64 = 0x00000001;
1489 const EFER_LME: u64 = 0x00000100;
1490 const EFER_LMA: u64 = 0x00000400;
1491 const X86_CR0_PE: u64 = 0x1;
1492 const X86_CR0_PG: u64 = 0x80000000;
1493 const X86_CR4_PAE: u64 = 0x20;
1494
1495 let cpu_count = 1;
1496 let mem =
1497 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1498 let vm = new_vm(cpu_count, mem);
1499 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1500
1501 let mut sregs = vcpu.get_sregs().expect("failed to get sregs");
1502 // Initial value should be 0
1503 assert_eq!(sregs.efer, 0);
1504
1505 // Enable and activate long mode
1506 sregs.cr0 |= X86_CR0_PE; // enable protected mode
1507 sregs.cr0 |= X86_CR0_PG; // enable paging
1508 sregs.cr4 |= X86_CR4_PAE; // enable physical address extension
1509 sregs.efer = EFER_LMA | EFER_LME;
1510 vcpu.set_sregs(&sregs).expect("failed to set sregs");
1511
1512 // Verify that setting stuck
1513 let sregs = vcpu.get_sregs().expect("failed to get sregs");
1514 assert_eq!(sregs.efer, EFER_LMA | EFER_LME);
1515 assert_eq!(sregs.cr0 & X86_CR0_PE, X86_CR0_PE);
1516 assert_eq!(sregs.cr0 & X86_CR0_PG, X86_CR0_PG);
1517 assert_eq!(sregs.cr4 & X86_CR4_PAE, X86_CR4_PAE);
1518
1519 let efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1520 assert_eq!(efer, EFER_LMA | EFER_LME);
1521
1522 // Enable SCE via set_msrs
1523 vcpu.set_msr(MSR_EFER, efer | EFER_SCE)
1524 .expect("failed to set msr");
1525
1526 // Verify that setting stuck
1527 let sregs = vcpu.get_sregs().expect("failed to get sregs");
1528 assert_eq!(sregs.efer, EFER_SCE | EFER_LME | EFER_LMA);
1529 let new_efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1530 assert_eq!(new_efer, EFER_SCE | EFER_LME | EFER_LMA);
1531 }
1532
1533 #[test]
get_and_set_xsave_smoke()1534 fn get_and_set_xsave_smoke() {
1535 if !Whpx::is_enabled() {
1536 return;
1537 }
1538 let cpu_count = 1;
1539 let mem =
1540 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1541 let vm = new_vm(cpu_count, mem);
1542 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1543
1544 // XSAVE is essentially opaque for our purposes. We just want to make sure our syscalls
1545 // succeed.
1546 let xsave = vcpu.get_xsave().unwrap();
1547 vcpu.set_xsave(&xsave).unwrap();
1548 }
1549
1550 #[test]
get_and_set_interrupt_state_smoke()1551 fn get_and_set_interrupt_state_smoke() {
1552 if !Whpx::is_enabled() {
1553 return;
1554 }
1555 let cpu_count = 1;
1556 let mem =
1557 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1558 let vm = new_vm(cpu_count, mem);
1559 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1560
1561 // For the sake of snapshotting, interrupt state is essentially opaque. We just want to make
1562 // sure our syscalls succeed.
1563 let interrupt_state = vcpu.get_interrupt_state().unwrap();
1564 vcpu.set_interrupt_state(interrupt_state).unwrap();
1565 }
1566
1567 #[test]
get_all_msrs()1568 fn get_all_msrs() {
1569 if !Whpx::is_enabled() {
1570 return;
1571 }
1572 let cpu_count = 1;
1573 let mem =
1574 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1575 let vm = new_vm(cpu_count, mem);
1576 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1577
1578 let all_msrs = vcpu.get_all_msrs().unwrap();
1579
1580 // Our MSR buffer is init'ed to zeros in the registers. The APIC base will be non-zero, so
1581 // by asserting that we know the MSR fetch actually did get us data.
1582 let apic_base = all_msrs.get(&MSR_APIC_BASE).unwrap();
1583 assert_ne!(*apic_base, 0);
1584 }
1585 }
1586