1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use core::ffi::c_void;
6 use std::cmp::Reverse;
7 use std::collections::BTreeMap;
8 use std::collections::BinaryHeap;
9 use std::convert::TryInto;
10 use std::sync::Arc;
11
12 use base::error;
13 use base::info;
14 use base::pagesize;
15 use base::AsRawDescriptor;
16 use base::Error;
17 use base::Event;
18 use base::MappedRegion;
19 use base::MmapError;
20 use base::Protection;
21 use base::RawDescriptor;
22 use base::Result;
23 use base::SafeDescriptor;
24 use base::SendTube;
25 use fnv::FnvHashMap;
26 use libc::EEXIST;
27 use libc::EFAULT;
28 use libc::EINVAL;
29 use libc::EIO;
30 use libc::ENODEV;
31 use libc::ENOENT;
32 use libc::ENOSPC;
33 use libc::ENOTSUP;
34 use libc::EOVERFLOW;
35 use sync::Mutex;
36 use vm_memory::GuestAddress;
37 use vm_memory::GuestMemory;
38 use winapi::shared::winerror::ERROR_BUSY;
39 use winapi::shared::winerror::ERROR_SUCCESS;
40 use winapi::um::memoryapi::OfferVirtualMemory;
41 use winapi::um::memoryapi::ReclaimVirtualMemory;
42 use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
43 use winapi::um::winnt::RtlZeroMemory;
44
45 use super::types::*;
46 use super::*;
47 use crate::host_phys_addr_bits;
48 use crate::whpx::whpx_sys::*;
49 use crate::BalloonEvent;
50 use crate::ClockState;
51 use crate::Datamatch;
52 use crate::DeliveryMode;
53 use crate::DestinationMode;
54 use crate::DeviceKind;
55 use crate::HypervisorKind;
56 use crate::IoEventAddress;
57 use crate::LapicState;
58 use crate::MemCacheType;
59 use crate::MemSlot;
60 use crate::TriggerMode;
61 use crate::VcpuX86_64;
62 use crate::Vm;
63 use crate::VmCap;
64 use crate::VmX86_64;
65
66 pub struct WhpxVm {
67 whpx: Whpx,
68 // reference counted, since we need to implement try_clone or some variation.
69 // There is only ever 1 create/1 delete partition unlike dup/close handle variations.
70 vm_partition: Arc<SafePartition>,
71 guest_mem: GuestMemory,
72 mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
73 /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
74 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
75 // WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
76 // 1. All ioevents are registered during device setup, and thus can be cloned when the vm is
77 // cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
78 // vcpu thread easier because no locks will need to be acquired.
79 // 2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
80 // will make this faster.
81 // 3. We only ever register one eventfd to each address. This simplifies our data structure.
82 ioevents: FnvHashMap<IoEventAddress, Event>,
83 // Tube to send events to control.
84 vm_evt_wrtube: Option<SendTube>,
85 }
86
87 impl WhpxVm {
new( whpx: &Whpx, cpu_count: usize, guest_mem: GuestMemory, cpuid: CpuId, apic_emulation: bool, vm_evt_wrtube: Option<SendTube>, ) -> WhpxResult<WhpxVm>88 pub fn new(
89 whpx: &Whpx,
90 cpu_count: usize,
91 guest_mem: GuestMemory,
92 cpuid: CpuId,
93 apic_emulation: bool,
94 vm_evt_wrtube: Option<SendTube>,
95 ) -> WhpxResult<WhpxVm> {
96 let partition = SafePartition::new()?;
97 // setup partition defaults.
98 let mut property: WHV_PARTITION_PROPERTY = Default::default();
99 property.ProcessorCount = cpu_count as u32;
100 // safe because we own this partition, and the partition property is allocated on the stack.
101 check_whpx!(unsafe {
102 WHvSetPartitionProperty(
103 partition.partition,
104 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
105 &property as *const _ as *const c_void,
106 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
107 )
108 })
109 .map_err(WhpxError::SetProcessorCount)?;
110
111 // Pre-set any cpuid results in cpuid.
112 let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
113 .cpu_id_entries
114 .iter()
115 .map(WHV_X64_CPUID_RESULT::from)
116 .collect();
117
118 // Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
119 cpuid_results.push(WHV_X64_CPUID_RESULT {
120 Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
121 Reserved: [0u32; 3],
122 // HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
123 Eax: HYPERV_CPUID_MIN,
124 Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
125 Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
126 Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
127 });
128
129 // HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
130 cpuid_results.push(WHV_X64_CPUID_RESULT {
131 Function: HYPERV_CPUID_FEATURES,
132 Reserved: [0u32; 3],
133 // We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
134 // TSC scaling/offseting is handled in hardware, not the guest.
135 Eax: HV_ACCESS_FREQUENCY_MSRS
136 | HV_ACCESS_TSC_INVARIANT
137 | HV_MSR_REFERENCE_TSC_AVAILABLE,
138 Ebx: 0,
139 Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
140 Ecx: 0,
141 });
142
143 // safe because we own this partition, and the cpuid_results vec is local to this function.
144 check_whpx!(unsafe {
145 WHvSetPartitionProperty(
146 partition.partition,
147 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
148 cpuid_results.as_ptr() as *const _ as *const c_void,
149 (std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
150 )
151 })
152 .map_err(WhpxError::SetCpuidResultList)?;
153
154 // Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
155 // We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
156 // that return different results per-cpu.
157 let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
158 // safe because we own this partition, and the exit_list vec local to this function.
159 check_whpx!(unsafe {
160 WHvSetPartitionProperty(
161 partition.partition,
162 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
163 exit_list.as_ptr() as *const _ as *const c_void,
164 (std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
165 )
166 })
167 .map_err(WhpxError::SetCpuidExitList)?;
168
169 // Setup exits for CPUID instruction.
170 let mut property: WHV_PARTITION_PROPERTY = Default::default();
171 // safe because we own this partition, and the partition property is allocated on the stack.
172 unsafe {
173 property
174 .ExtendedVmExits
175 .__bindgen_anon_1
176 .set_X64CpuidExit(1);
177 // X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
178 // MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
179 // related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
180 // fault for any unhandled MSR accesses.
181 property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
182 }
183 // safe because we own this partition, and the partition property is allocated on the stack.
184 check_whpx!(unsafe {
185 WHvSetPartitionProperty(
186 partition.partition,
187 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
188 &property as *const _ as *const c_void,
189 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
190 )
191 })
192 .map_err(WhpxError::SetExtendedVmExits)?;
193
194 if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
195 return Err(WhpxError::LocalApicEmulationNotSupported);
196 }
197
198 // Setup apic emulation mode
199 let mut property: WHV_PARTITION_PROPERTY = Default::default();
200 property.LocalApicEmulationMode = if apic_emulation {
201 // TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
202 // enable it if it is.
203 WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
204 } else {
205 WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
206 };
207
208 // safe because we own this partition, and the partition property is allocated on the stack.
209 check_whpx!(unsafe {
210 WHvSetPartitionProperty(
211 partition.partition,
212 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
213 &property as *const _ as *const c_void,
214 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
215 )
216 })
217 .map_err(WhpxError::SetLocalApicEmulationMode)?;
218
219 // safe because we own this partition
220 check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
221 .map_err(WhpxError::SetupPartition)?;
222
223 for region in guest_mem.regions() {
224 unsafe {
225 // Safe because the guest regions are guaranteed not to overlap.
226 set_user_memory_region(
227 &partition,
228 false, // read_only
229 false, // track dirty pages
230 region.guest_addr.offset(),
231 region.size as u64,
232 region.host_addr as *mut u8,
233 )
234 }
235 .map_err(WhpxError::MapGpaRange)?;
236 }
237
238 Ok(WhpxVm {
239 whpx: whpx.clone(),
240 vm_partition: Arc::new(partition),
241 guest_mem,
242 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
243 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
244 ioevents: FnvHashMap::default(),
245 vm_evt_wrtube,
246 })
247 }
248
249 /// Get the current state of the specified VCPU's local APIC
get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>250 pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
251 let buffer = WhpxLapicState { regs: [0u32; 1024] };
252 let mut written_size = 0u32;
253 let size = std::mem::size_of::<WhpxLapicState>();
254
255 check_whpx!(unsafe {
256 WHvGetVirtualProcessorInterruptControllerState(
257 self.vm_partition.partition,
258 vcpu_id as u32,
259 buffer.regs.as_ptr() as *mut c_void,
260 size as u32,
261 &mut written_size,
262 )
263 })?;
264
265 Ok(LapicState::from(&buffer))
266 }
267
268 /// Set the current state of the specified VCPU's local APIC
set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>269 pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
270 let buffer = WhpxLapicState::from(state);
271 check_whpx!(unsafe {
272 WHvSetVirtualProcessorInterruptControllerState(
273 self.vm_partition.partition,
274 vcpu_id as u32,
275 buffer.regs.as_ptr() as *mut c_void,
276 std::mem::size_of::<WhpxLapicState>() as u32,
277 )
278 })?;
279 Ok(())
280 }
281
282 /// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
283 /// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
request_interrupt( &self, vector: u8, dest_id: u8, dest_mode: DestinationMode, trigger: TriggerMode, delivery: DeliveryMode, ) -> Result<()>284 pub fn request_interrupt(
285 &self,
286 vector: u8,
287 dest_id: u8,
288 dest_mode: DestinationMode,
289 trigger: TriggerMode,
290 delivery: DeliveryMode,
291 ) -> Result<()> {
292 // The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
293 let mut interrupt = WHV_INTERRUPT_CONTROL {
294 Destination: dest_id as u32,
295 Vector: vector as u32,
296 ..Default::default()
297 };
298 interrupt.set_DestinationMode(match dest_mode {
299 DestinationMode::Physical => {
300 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
301 }
302 DestinationMode::Logical => {
303 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
304 }
305 } as u64);
306 interrupt.set_TriggerMode(match trigger {
307 TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
308 TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
309 } as u64);
310 interrupt.set_Type(match delivery {
311 DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
312 DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
313 DeliveryMode::SMI => {
314 error!("WHPX does not support requesting an SMI");
315 return Err(Error::new(ENOTSUP));
316 }
317 DeliveryMode::RemoteRead => {
318 // This is also no longer supported by intel.
319 error!("Remote Read interrupts are not supported by WHPX");
320 return Err(Error::new(ENOTSUP));
321 }
322 DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
323 DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
324 DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
325 DeliveryMode::External => {
326 error!("WHPX does not support requesting an external interrupt");
327 return Err(Error::new(ENOTSUP));
328 }
329 } as u64);
330
331 check_whpx!(unsafe {
332 WHvRequestInterrupt(
333 self.vm_partition.partition,
334 &interrupt,
335 std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
336 )
337 })
338 }
339
340 /// In order to fully unmap a memory range such that the host can reclaim the memory,
341 /// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
342 /// in the memory.
343 ///
344 /// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
345 /// uses when memory is in demand.
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>346 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
347 info!(
348 "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
349 guest_address, size
350 );
351 // Safe because WHPX does proper error checking, even if an out-of-bounds address is
352 // provided.
353 unsafe {
354 check_whpx!(WHvUnmapGpaRange(
355 self.vm_partition.partition,
356 guest_address.offset(),
357 size,
358 ))?;
359 }
360
361 let host_address = self
362 .guest_mem
363 .get_host_address(guest_address)
364 .map_err(|_| Error::new(1))? as *mut c_void;
365
366 // Safe because we have just successfully unmapped this range from the
367 // guest partition, so we know it's unused.
368 let result =
369 unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
370
371 if result != ERROR_SUCCESS {
372 let err = Error::new(result);
373 error!("Freeing memory failed with error: {}", err);
374 return Err(err);
375 }
376 Ok(())
377 }
378
379 /// Remap memory that has previously been unmapped with #handle_inflate. Note
380 /// that attempts to remap pages that were not previously unmapped, or addresses that are not
381 /// page-aligned, will result in failure.
382 ///
383 /// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
384 /// partition. Remapped memory has no guarantee of content, and the guest should not expect
385 /// it to.
handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>386 fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
387 info!(
388 "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
389 guest_address, size
390 );
391
392 let host_address = self
393 .guest_mem
394 .get_host_address(guest_address)
395 .map_err(|_| Error::new(1))? as *const c_void;
396
397 // Note that we aren't doing any validation here that this range was previously unmapped.
398 // However, we can avoid that expensive validation by relying on Windows error checking for
399 // ReclaimVirtualMemory. The call will fail if:
400 // - If the range is not currently "offered"
401 // - The range is outside of current guest mem (GuestMemory will fail to convert the
402 // address)
403 // In short, security is guaranteed by ensuring the guest can never reclaim ranges it
404 // hadn't previously forfeited (and even then, the contents will be zeroed).
405 //
406 // Safe because the memory ranges in question are managed by Windows, not Rust.
407 // Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
408 let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
409
410 if result == ERROR_BUSY || result == ERROR_SUCCESS {
411 // In either of these cases, the contents of the reclaimed memory
412 // are preserved or undefined. Regardless, zero the memory
413 // to ensure no unintentional memory contents are shared.
414 //
415 // Safe because we just reclaimed the region in question and haven't yet remapped
416 // it to the guest partition, so we know it's unused.
417 unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
418 } else {
419 let err = Error::new(result);
420 error!("Reclaiming memory failed with error: {}", err);
421 return Err(err);
422 }
423
424 // Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
425 // Which would fail if it was called on areas which were not unmapped.
426 unsafe {
427 set_user_memory_region(
428 &self.vm_partition,
429 false, // read_only
430 false, // track dirty pages
431 guest_address.offset(),
432 size,
433 host_address as *mut u8,
434 )
435 }
436 }
437 }
438
439 // Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
440 // from guest physical to host user pages.
441 //
442 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( partition: &SafePartition, read_only: bool, track_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>443 unsafe fn set_user_memory_region(
444 partition: &SafePartition,
445 read_only: bool,
446 track_dirty_pages: bool,
447 guest_addr: u64,
448 memory_size: u64,
449 userspace_addr: *mut u8,
450 ) -> Result<()> {
451 let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
452 | WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
453 if !read_only {
454 flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
455 }
456 if track_dirty_pages {
457 flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
458 }
459
460 let ret = WHvMapGpaRange(
461 partition.partition,
462 userspace_addr as *mut c_void,
463 guest_addr,
464 memory_size,
465 flags,
466 );
467 check_whpx!(ret)
468 }
469
470 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
471 /// size.
472 ///
473 /// # Arguments
474 ///
475 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize476 pub fn dirty_log_bitmap_size(size: usize) -> usize {
477 let page_size = pagesize();
478 (((size + page_size - 1) / page_size) + 7) / 8
479 }
480
481 impl Vm for WhpxVm {
482 /// Makes a shallow clone of this `Vm`.
try_clone(&self) -> Result<Self>483 fn try_clone(&self) -> Result<Self> {
484 let mut ioevents = FnvHashMap::default();
485 for (addr, evt) in self.ioevents.iter() {
486 ioevents.insert(*addr, evt.try_clone()?);
487 }
488 Ok(WhpxVm {
489 whpx: self.whpx.try_clone()?,
490 vm_partition: self.vm_partition.clone(),
491 guest_mem: self.guest_mem.clone(),
492 mem_regions: self.mem_regions.clone(),
493 mem_slot_gaps: self.mem_slot_gaps.clone(),
494 ioevents,
495 vm_evt_wrtube: self
496 .vm_evt_wrtube
497 .as_ref()
498 .map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
499 })
500 }
501
try_clone_descriptor(&self) -> Result<SafeDescriptor>502 fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
503 Err(Error::new(ENOTSUP))
504 }
505
hypervisor_kind(&self) -> HypervisorKind506 fn hypervisor_kind(&self) -> HypervisorKind {
507 HypervisorKind::Whpx
508 }
509
check_capability(&self, c: VmCap) -> bool510 fn check_capability(&self, c: VmCap) -> bool {
511 match c {
512 VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
513 .unwrap_or_else(|e| {
514 error!(
515 "failed to check whpx feature {:?}: {}",
516 WhpxFeature::DirtyPageTracking,
517 e
518 );
519 false
520 }),
521 // there is a pvclock like thing already done w/ hyperv, but we can't get the state.
522 VmCap::PvClock => false,
523 VmCap::Protected => false,
524 // whpx initializes cpuid early during VM creation.
525 VmCap::EarlyInitCpuid => true,
526 #[cfg(target_arch = "x86_64")]
527 VmCap::BusLockDetect => false,
528 VmCap::ReadOnlyMemoryRegion => true,
529 VmCap::MemNoncoherentDma => false,
530 }
531 }
532
get_memory(&self) -> &GuestMemory533 fn get_memory(&self) -> &GuestMemory {
534 &self.guest_mem
535 }
536
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, _cache: MemCacheType, ) -> Result<MemSlot>537 fn add_memory_region(
538 &mut self,
539 guest_addr: GuestAddress,
540 mem: Box<dyn MappedRegion>,
541 read_only: bool,
542 log_dirty_pages: bool,
543 _cache: MemCacheType,
544 ) -> Result<MemSlot> {
545 let size = mem.size() as u64;
546 let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
547 if self.guest_mem.range_overlap(guest_addr, end_addr) {
548 return Err(Error::new(ENOSPC));
549 }
550 let mut regions = self.mem_regions.lock();
551 let mut gaps = self.mem_slot_gaps.lock();
552 let slot = match gaps.pop() {
553 Some(gap) => gap.0,
554 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
555 };
556
557 // Safe because we check that the given guest address is valid and has no overlaps. We also
558 // know that the pointer and size are correct because the MemoryMapping interface ensures
559 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
560 // is removed.
561 let res = unsafe {
562 set_user_memory_region(
563 &self.vm_partition,
564 read_only,
565 log_dirty_pages,
566 guest_addr.offset(),
567 size,
568 mem.as_ptr(),
569 )
570 };
571
572 if let Err(e) = res {
573 gaps.push(Reverse(slot));
574 return Err(e);
575 }
576 regions.insert(slot, (guest_addr, mem));
577 Ok(slot)
578 }
579
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>580 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
581 let mut regions = self.mem_regions.lock();
582 let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
583
584 mem.msync(offset, size).map_err(|err| match err {
585 MmapError::InvalidAddress => Error::new(EFAULT),
586 MmapError::NotPageAligned => Error::new(EINVAL),
587 MmapError::SystemCallFailed(e) => e,
588 _ => Error::new(EIO),
589 })
590 }
591
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>592 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
593 let mut regions = self.mem_regions.lock();
594 if !regions.contains_key(&slot) {
595 return Err(Error::new(ENOENT));
596 }
597 if let Some((guest_addr, mem)) = regions.get(&slot) {
598 // Safe because the slot is checked against the list of memory slots.
599 unsafe {
600 check_whpx!(WHvUnmapGpaRange(
601 self.vm_partition.partition,
602 guest_addr.offset(),
603 mem.size() as u64,
604 ))?;
605 }
606 self.mem_slot_gaps.lock().push(Reverse(slot));
607 Ok(regions.remove(&slot).unwrap().1)
608 } else {
609 Err(Error::new(ENOENT))
610 }
611 }
612
create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor>613 fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
614 // Whpx does not support in-kernel devices
615 Err(Error::new(libc::ENXIO))
616 }
617
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>618 fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
619 let regions = self.mem_regions.lock();
620 if let Some((guest_addr, mem)) = regions.get(&slot) {
621 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
622 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
623 return Err(Error::new(EINVAL));
624 }
625 let bitmap_size = if dirty_log.len() % 8 == 0 {
626 dirty_log.len() / 8
627 } else {
628 dirty_log.len() / 8 + 1
629 };
630 let mut bitmap = vec![0u64; bitmap_size];
631 check_whpx!(unsafe {
632 WHvQueryGpaRangeDirtyBitmap(
633 self.vm_partition.partition,
634 guest_addr.offset(),
635 mem.size() as u64,
636 bitmap.as_mut_ptr() as *mut u64,
637 (bitmap.len() * 8) as u32,
638 )
639 })?;
640 // safe because we have allocated a vec of u64, which we can cast to a u8 slice.
641 let buffer = unsafe {
642 std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
643 };
644 dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
645 Ok(())
646 } else {
647 Err(Error::new(ENOENT))
648 }
649 }
650
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>651 fn register_ioevent(
652 &mut self,
653 evt: &Event,
654 addr: IoEventAddress,
655 datamatch: Datamatch,
656 ) -> Result<()> {
657 if datamatch != Datamatch::AnyLength {
658 error!("WHPX currently only supports Datamatch::AnyLength");
659 return Err(Error::new(ENOTSUP));
660 }
661
662 if self.ioevents.contains_key(&addr) {
663 error!("WHPX does not support multiple ioevents for the same address");
664 return Err(Error::new(EEXIST));
665 }
666
667 self.ioevents.insert(addr, evt.try_clone()?);
668
669 Ok(())
670 }
671
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>672 fn unregister_ioevent(
673 &mut self,
674 evt: &Event,
675 addr: IoEventAddress,
676 datamatch: Datamatch,
677 ) -> Result<()> {
678 if datamatch != Datamatch::AnyLength {
679 error!("WHPX only supports Datamatch::AnyLength");
680 return Err(Error::new(ENOTSUP));
681 }
682
683 match self.ioevents.get(&addr) {
684 Some(existing_evt) => {
685 // evt should match the existing evt associated with addr
686 if evt != existing_evt {
687 return Err(Error::new(ENOENT));
688 }
689 self.ioevents.remove(&addr);
690 }
691
692 None => {
693 return Err(Error::new(ENOENT));
694 }
695 };
696 Ok(())
697 }
698
699 /// Trigger any io events based on the memory mapped IO at `addr`. If the hypervisor does
700 /// in-kernel IO event delivery, this is a no-op.
handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()>701 fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
702 match self.ioevents.get(&addr) {
703 None => {}
704 Some(evt) => {
705 evt.signal()?;
706 }
707 };
708 Ok(())
709 }
710
get_pvclock(&self) -> Result<ClockState>711 fn get_pvclock(&self) -> Result<ClockState> {
712 Err(Error::new(ENODEV))
713 }
714
set_pvclock(&self, _state: &ClockState) -> Result<()>715 fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
716 Err(Error::new(ENODEV))
717 }
718
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>719 fn add_fd_mapping(
720 &mut self,
721 slot: u32,
722 offset: usize,
723 size: usize,
724 fd: &dyn AsRawDescriptor,
725 fd_offset: u64,
726 prot: Protection,
727 ) -> Result<()> {
728 let mut regions = self.mem_regions.lock();
729 let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
730
731 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
732 Ok(()) => Ok(()),
733 Err(MmapError::SystemCallFailed(e)) => Err(e),
734 Err(_) => Err(Error::new(EIO)),
735 }
736 }
737
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>738 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
739 let mut regions = self.mem_regions.lock();
740 let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
741
742 match region.remove_mapping(offset, size) {
743 Ok(()) => Ok(()),
744 Err(MmapError::SystemCallFailed(e)) => Err(e),
745 Err(_) => Err(Error::new(EIO)),
746 }
747 }
748
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>749 fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
750 match event {
751 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
752 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
753 BalloonEvent::BalloonTargetReached(_) => Ok(()),
754 }
755 }
756
get_guest_phys_addr_bits(&self) -> u8757 fn get_guest_phys_addr_bits(&self) -> u8 {
758 // Assume the guest physical address size is the same as the host.
759 host_phys_addr_bits()
760 }
761 }
762
763 impl VmX86_64 for WhpxVm {
get_hypervisor(&self) -> &dyn HypervisorX86_64764 fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
765 &self.whpx
766 }
767
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>768 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
769 Ok(Box::new(WhpxVcpu::new(
770 self.vm_partition.clone(),
771 id.try_into().unwrap(),
772 )?))
773 }
774
775 /// Sets the address of the three-page region in the VM's address space.
776 /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
777 /// WHPX.
set_tss_addr(&self, _addr: GuestAddress) -> Result<()>778 fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
779 Ok(())
780 }
781
782 /// Sets the address of a one-page region in the VM's address space.
783 /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
784 /// WHPX.
set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()>785 fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
786 Ok(())
787 }
788
load_protected_vm_firmware( &mut self, _fw_addr: GuestAddress, _fw_max_size: u64, ) -> Result<()>789 fn load_protected_vm_firmware(
790 &mut self,
791 _fw_addr: GuestAddress,
792 _fw_max_size: u64,
793 ) -> Result<()> {
794 // WHPX does not support protected VMs
795 Err(Error::new(libc::ENXIO))
796 }
797 }
798
799 // NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
800 // memory.
801 #[cfg(test)]
802 mod tests {
803 use std::thread;
804 use std::time::Duration;
805
806 use base::EventWaitResult;
807 use base::MemoryMappingBuilder;
808 use base::SharedMemory;
809
810 use super::*;
811
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm812 fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
813 let whpx = Whpx::new().expect("failed to instantiate whpx");
814 let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
815 .expect("failed to get whpx features");
816 WhpxVm::new(
817 &whpx,
818 cpu_count,
819 mem,
820 CpuId::new(0),
821 local_apic_supported,
822 None,
823 )
824 .expect("failed to create whpx vm")
825 }
826
827 #[test]
create_vm()828 fn create_vm() {
829 if !Whpx::is_enabled() {
830 return;
831 }
832 let cpu_count = 1;
833 let mem =
834 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
835 new_vm(cpu_count, mem);
836 }
837
838 #[test]
create_vcpu()839 fn create_vcpu() {
840 if !Whpx::is_enabled() {
841 return;
842 }
843 let cpu_count = 1;
844 let mem =
845 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
846 let vm = new_vm(cpu_count, mem);
847 vm.create_vcpu(0).expect("failed to create vcpu");
848 }
849
850 #[test]
try_clone()851 fn try_clone() {
852 if !Whpx::is_enabled() {
853 return;
854 }
855 let cpu_count = 1;
856 let mem =
857 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
858 let vm = new_vm(cpu_count, mem);
859 let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
860 }
861
862 #[test]
send_vm()863 fn send_vm() {
864 if !Whpx::is_enabled() {
865 return;
866 }
867 let cpu_count = 1;
868 let mem =
869 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
870 let vm = new_vm(cpu_count, mem);
871 thread::spawn(move || {
872 let _vm = vm;
873 })
874 .join()
875 .unwrap();
876 }
877
878 #[test]
check_vm_capability()879 fn check_vm_capability() {
880 if !Whpx::is_enabled() {
881 return;
882 }
883 let cpu_count = 1;
884 let mem =
885 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
886 let vm = new_vm(cpu_count, mem);
887 assert!(vm.check_capability(VmCap::DirtyLog));
888 assert!(!vm.check_capability(VmCap::PvClock));
889 }
890
891 #[test]
dirty_log_size()892 fn dirty_log_size() {
893 let page_size = pagesize();
894 assert_eq!(dirty_log_bitmap_size(0), 0);
895 assert_eq!(dirty_log_bitmap_size(page_size), 1);
896 assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
897 assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
898 assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
899 }
900
901 #[test]
register_ioevent()902 fn register_ioevent() {
903 if !Whpx::is_enabled() {
904 return;
905 }
906 let cpu_count = 1;
907 let mem =
908 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
909 let mut vm = new_vm(cpu_count, mem);
910 let evt = Event::new().expect("failed to create event");
911 let otherevt = Event::new().expect("failed to create event");
912 vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
913 .unwrap();
914 vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
915 .unwrap();
916
917 vm.register_ioevent(
918 &otherevt,
919 IoEventAddress::Mmio(0x1000),
920 Datamatch::AnyLength,
921 )
922 .expect_err("WHPX should not allow you to register two events for the same address");
923
924 vm.register_ioevent(
925 &otherevt,
926 IoEventAddress::Mmio(0x1000),
927 Datamatch::U8(None),
928 )
929 .expect_err(
930 "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
931 );
932
933 vm.register_ioevent(
934 &otherevt,
935 IoEventAddress::Mmio(0x1000),
936 Datamatch::U32(Some(0xf6)),
937 )
938 .expect_err(
939 "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
940 );
941
942 vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
943 .expect_err("unregistering an unknown event should fail");
944 vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
945 .expect_err("unregistering an unknown PIO address should fail");
946 vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
947 .expect_err("unregistering an unknown PIO address should fail");
948 vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
949 .expect_err("unregistering an unknown MMIO address should fail");
950 vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
951 .unwrap();
952 vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
953 .unwrap();
954 }
955
956 #[test]
handle_io_events()957 fn handle_io_events() {
958 if !Whpx::is_enabled() {
959 return;
960 }
961 let cpu_count = 1;
962 let mem =
963 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
964 let mut vm = new_vm(cpu_count, mem);
965 let evt = Event::new().expect("failed to create event");
966 let evt2 = Event::new().expect("failed to create event");
967 vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
968 .unwrap();
969 vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
970 .unwrap();
971
972 // Check a pio address
973 vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
974 .expect("failed to handle_io_events");
975 assert_ne!(
976 evt.wait_timeout(Duration::from_millis(10))
977 .expect("failed to read event"),
978 EventWaitResult::TimedOut
979 );
980 assert_eq!(
981 evt2.wait_timeout(Duration::from_millis(10))
982 .expect("failed to read event"),
983 EventWaitResult::TimedOut
984 );
985 // Check an mmio address
986 vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
987 .expect("failed to handle_io_events");
988 assert_eq!(
989 evt.wait_timeout(Duration::from_millis(10))
990 .expect("failed to read event"),
991 EventWaitResult::TimedOut
992 );
993 assert_ne!(
994 evt2.wait_timeout(Duration::from_millis(10))
995 .expect("failed to read event"),
996 EventWaitResult::TimedOut
997 );
998
999 // Check an address that does not match any registered ioevents
1000 vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
1001 .expect("failed to handle_io_events");
1002 assert_eq!(
1003 evt.wait_timeout(Duration::from_millis(10))
1004 .expect("failed to read event"),
1005 EventWaitResult::TimedOut
1006 );
1007 assert_eq!(
1008 evt2.wait_timeout(Duration::from_millis(10))
1009 .expect("failed to read event"),
1010 EventWaitResult::TimedOut
1011 );
1012 }
1013
1014 #[test]
add_memory_ro()1015 fn add_memory_ro() {
1016 if !Whpx::is_enabled() {
1017 return;
1018 }
1019 let cpu_count = 1;
1020 let mem =
1021 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1022 let mut vm = new_vm(cpu_count, mem);
1023 let mem_size = 0x1000;
1024 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1025 let mem = MemoryMappingBuilder::new(mem_size)
1026 .from_shared_memory(&shm)
1027 .build()
1028 .unwrap();
1029 vm.add_memory_region(
1030 GuestAddress(0x1000),
1031 Box::new(mem),
1032 true,
1033 false,
1034 MemCacheType::CacheCoherent,
1035 )
1036 .unwrap();
1037 }
1038
1039 #[test]
remove_memory()1040 fn remove_memory() {
1041 if !Whpx::is_enabled() {
1042 return;
1043 }
1044 let cpu_count = 1;
1045 let mem =
1046 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1047 let mut vm = new_vm(cpu_count, mem);
1048 let mem_size = 0x1000;
1049 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1050 let mem = MemoryMappingBuilder::new(mem_size)
1051 .from_shared_memory(&shm)
1052 .build()
1053 .unwrap();
1054 let mem_ptr = mem.as_ptr();
1055 let slot = vm
1056 .add_memory_region(
1057 GuestAddress(0x1000),
1058 Box::new(mem),
1059 false,
1060 false,
1061 MemCacheType::CacheCoherent,
1062 )
1063 .unwrap();
1064 let removed_mem = vm.remove_memory_region(slot).unwrap();
1065 assert_eq!(removed_mem.size(), mem_size);
1066 assert_eq!(removed_mem.as_ptr(), mem_ptr);
1067 }
1068
1069 #[test]
remove_invalid_memory()1070 fn remove_invalid_memory() {
1071 if !Whpx::is_enabled() {
1072 return;
1073 }
1074 let cpu_count = 1;
1075 let mem =
1076 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1077 let mut vm = new_vm(cpu_count, mem);
1078 assert!(vm.remove_memory_region(0).is_err());
1079 }
1080
1081 #[test]
overlap_memory()1082 fn overlap_memory() {
1083 if !Whpx::is_enabled() {
1084 return;
1085 }
1086 let cpu_count = 1;
1087 let mem =
1088 GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
1089 let mut vm = new_vm(cpu_count, mem);
1090 let mem_size = 0x2000;
1091 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1092 let mem = MemoryMappingBuilder::new(mem_size)
1093 .from_shared_memory(&shm)
1094 .build()
1095 .unwrap();
1096 assert!(vm
1097 .add_memory_region(
1098 GuestAddress(0x2000),
1099 Box::new(mem),
1100 false,
1101 false,
1102 MemCacheType::CacheCoherent
1103 )
1104 .is_err());
1105 }
1106
1107 #[test]
sync_memory()1108 fn sync_memory() {
1109 if !Whpx::is_enabled() {
1110 return;
1111 }
1112 let cpu_count = 1;
1113 let mem =
1114 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1115 let mut vm = new_vm(cpu_count, mem);
1116 let mem_size = 0x1000;
1117 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1118 let mem = MemoryMappingBuilder::new(mem_size)
1119 .from_shared_memory(&shm)
1120 .build()
1121 .unwrap();
1122 let slot = vm
1123 .add_memory_region(
1124 GuestAddress(0x10000),
1125 Box::new(mem),
1126 false,
1127 false,
1128 MemCacheType::CacheCoherent,
1129 )
1130 .unwrap();
1131 vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
1132 vm.msync_memory_region(slot, 0, mem_size).unwrap();
1133 assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
1134 assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
1135 }
1136 }
1137