• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use core::ffi::c_void;
6 use std::cmp::Reverse;
7 use std::collections::BTreeMap;
8 use std::collections::BinaryHeap;
9 use std::convert::TryInto;
10 use std::sync::Arc;
11 
12 use base::error;
13 use base::info;
14 use base::pagesize;
15 use base::AsRawDescriptor;
16 use base::Error;
17 use base::Event;
18 use base::MappedRegion;
19 use base::MmapError;
20 use base::Protection;
21 use base::RawDescriptor;
22 use base::Result;
23 use base::SafeDescriptor;
24 use base::SendTube;
25 use fnv::FnvHashMap;
26 use libc::EEXIST;
27 use libc::EFAULT;
28 use libc::EINVAL;
29 use libc::EIO;
30 use libc::ENODEV;
31 use libc::ENOENT;
32 use libc::ENOSPC;
33 use libc::ENOTSUP;
34 use libc::EOVERFLOW;
35 use sync::Mutex;
36 use vm_memory::GuestAddress;
37 use vm_memory::GuestMemory;
38 use winapi::shared::winerror::ERROR_BUSY;
39 use winapi::shared::winerror::ERROR_SUCCESS;
40 use winapi::um::memoryapi::OfferVirtualMemory;
41 use winapi::um::memoryapi::ReclaimVirtualMemory;
42 use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
43 use winapi::um::winnt::RtlZeroMemory;
44 
45 use super::types::*;
46 use super::*;
47 use crate::host_phys_addr_bits;
48 use crate::whpx::whpx_sys::*;
49 use crate::BalloonEvent;
50 use crate::ClockState;
51 use crate::Datamatch;
52 use crate::DeliveryMode;
53 use crate::DestinationMode;
54 use crate::DeviceKind;
55 use crate::HypervisorKind;
56 use crate::IoEventAddress;
57 use crate::LapicState;
58 use crate::MemCacheType;
59 use crate::MemSlot;
60 use crate::TriggerMode;
61 use crate::VcpuX86_64;
62 use crate::Vm;
63 use crate::VmCap;
64 use crate::VmX86_64;
65 
66 pub struct WhpxVm {
67     whpx: Whpx,
68     // reference counted, since we need to implement try_clone or some variation.
69     // There is only ever 1 create/1 delete partition unlike dup/close handle variations.
70     vm_partition: Arc<SafePartition>,
71     guest_mem: GuestMemory,
72     mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
73     /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
74     mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
75     // WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
76     //   1. All ioevents are registered during device setup, and thus can be cloned when the vm is
77     //      cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
78     //      vcpu thread easier because no locks will need to be acquired.
79     //   2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
80     //      will make this faster.
81     //   3. We only ever register one eventfd to each address. This simplifies our data structure.
82     ioevents: FnvHashMap<IoEventAddress, Event>,
83     // Tube to send events to control.
84     vm_evt_wrtube: Option<SendTube>,
85 }
86 
87 impl WhpxVm {
new( whpx: &Whpx, cpu_count: usize, guest_mem: GuestMemory, cpuid: CpuId, apic_emulation: bool, vm_evt_wrtube: Option<SendTube>, ) -> WhpxResult<WhpxVm>88     pub fn new(
89         whpx: &Whpx,
90         cpu_count: usize,
91         guest_mem: GuestMemory,
92         cpuid: CpuId,
93         apic_emulation: bool,
94         vm_evt_wrtube: Option<SendTube>,
95     ) -> WhpxResult<WhpxVm> {
96         let partition = SafePartition::new()?;
97         // setup partition defaults.
98         let mut property: WHV_PARTITION_PROPERTY = Default::default();
99         property.ProcessorCount = cpu_count as u32;
100         // safe because we own this partition, and the partition property is allocated on the stack.
101         check_whpx!(unsafe {
102             WHvSetPartitionProperty(
103                 partition.partition,
104                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
105                 &property as *const _ as *const c_void,
106                 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
107             )
108         })
109         .map_err(WhpxError::SetProcessorCount)?;
110 
111         // Pre-set any cpuid results in cpuid.
112         let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
113             .cpu_id_entries
114             .iter()
115             .map(WHV_X64_CPUID_RESULT::from)
116             .collect();
117 
118         // Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
119         cpuid_results.push(WHV_X64_CPUID_RESULT {
120             Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
121             Reserved: [0u32; 3],
122             // HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
123             Eax: HYPERV_CPUID_MIN,
124             Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
125             Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
126             Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
127         });
128 
129         // HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
130         cpuid_results.push(WHV_X64_CPUID_RESULT {
131             Function: HYPERV_CPUID_FEATURES,
132             Reserved: [0u32; 3],
133             // We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
134             // TSC scaling/offseting is handled in hardware, not the guest.
135             Eax: HV_ACCESS_FREQUENCY_MSRS
136                 | HV_ACCESS_TSC_INVARIANT
137                 | HV_MSR_REFERENCE_TSC_AVAILABLE,
138             Ebx: 0,
139             Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
140             Ecx: 0,
141         });
142 
143         // safe because we own this partition, and the cpuid_results vec is local to this function.
144         check_whpx!(unsafe {
145             WHvSetPartitionProperty(
146                 partition.partition,
147                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
148                 cpuid_results.as_ptr() as *const _ as *const c_void,
149                 (std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
150             )
151         })
152         .map_err(WhpxError::SetCpuidResultList)?;
153 
154         // Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
155         // We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
156         // that return different results per-cpu.
157         let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
158         // safe because we own this partition, and the exit_list vec local to this function.
159         check_whpx!(unsafe {
160             WHvSetPartitionProperty(
161                 partition.partition,
162                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
163                 exit_list.as_ptr() as *const _ as *const c_void,
164                 (std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
165             )
166         })
167         .map_err(WhpxError::SetCpuidExitList)?;
168 
169         // Setup exits for CPUID instruction.
170         let mut property: WHV_PARTITION_PROPERTY = Default::default();
171         // safe because we own this partition, and the partition property is allocated on the stack.
172         unsafe {
173             property
174                 .ExtendedVmExits
175                 .__bindgen_anon_1
176                 .set_X64CpuidExit(1);
177             // X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
178             // MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
179             // related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
180             // fault for any unhandled MSR accesses.
181             property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
182         }
183         // safe because we own this partition, and the partition property is allocated on the stack.
184         check_whpx!(unsafe {
185             WHvSetPartitionProperty(
186                 partition.partition,
187                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
188                 &property as *const _ as *const c_void,
189                 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
190             )
191         })
192         .map_err(WhpxError::SetExtendedVmExits)?;
193 
194         if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
195             return Err(WhpxError::LocalApicEmulationNotSupported);
196         }
197 
198         // Setup apic emulation mode
199         let mut property: WHV_PARTITION_PROPERTY = Default::default();
200         property.LocalApicEmulationMode = if apic_emulation {
201             // TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
202             // enable it if it is.
203             WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
204         } else {
205             WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
206         };
207 
208         // safe because we own this partition, and the partition property is allocated on the stack.
209         check_whpx!(unsafe {
210             WHvSetPartitionProperty(
211                 partition.partition,
212                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
213                 &property as *const _ as *const c_void,
214                 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
215             )
216         })
217         .map_err(WhpxError::SetLocalApicEmulationMode)?;
218 
219         // safe because we own this partition
220         check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
221             .map_err(WhpxError::SetupPartition)?;
222 
223         for region in guest_mem.regions() {
224             unsafe {
225                 // Safe because the guest regions are guaranteed not to overlap.
226                 set_user_memory_region(
227                     &partition,
228                     false, // read_only
229                     false, // track dirty pages
230                     region.guest_addr.offset(),
231                     region.size as u64,
232                     region.host_addr as *mut u8,
233                 )
234             }
235             .map_err(WhpxError::MapGpaRange)?;
236         }
237 
238         Ok(WhpxVm {
239             whpx: whpx.clone(),
240             vm_partition: Arc::new(partition),
241             guest_mem,
242             mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
243             mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
244             ioevents: FnvHashMap::default(),
245             vm_evt_wrtube,
246         })
247     }
248 
249     /// Get the current state of the specified VCPU's local APIC
get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>250     pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
251         let buffer = WhpxLapicState { regs: [0u32; 1024] };
252         let mut written_size = 0u32;
253         let size = std::mem::size_of::<WhpxLapicState>();
254 
255         check_whpx!(unsafe {
256             WHvGetVirtualProcessorInterruptControllerState(
257                 self.vm_partition.partition,
258                 vcpu_id as u32,
259                 buffer.regs.as_ptr() as *mut c_void,
260                 size as u32,
261                 &mut written_size,
262             )
263         })?;
264 
265         Ok(LapicState::from(&buffer))
266     }
267 
268     /// Set the current state of the specified VCPU's local APIC
set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>269     pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
270         let buffer = WhpxLapicState::from(state);
271         check_whpx!(unsafe {
272             WHvSetVirtualProcessorInterruptControllerState(
273                 self.vm_partition.partition,
274                 vcpu_id as u32,
275                 buffer.regs.as_ptr() as *mut c_void,
276                 std::mem::size_of::<WhpxLapicState>() as u32,
277             )
278         })?;
279         Ok(())
280     }
281 
282     /// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
283     /// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
request_interrupt( &self, vector: u8, dest_id: u8, dest_mode: DestinationMode, trigger: TriggerMode, delivery: DeliveryMode, ) -> Result<()>284     pub fn request_interrupt(
285         &self,
286         vector: u8,
287         dest_id: u8,
288         dest_mode: DestinationMode,
289         trigger: TriggerMode,
290         delivery: DeliveryMode,
291     ) -> Result<()> {
292         // The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
293         let mut interrupt = WHV_INTERRUPT_CONTROL {
294             Destination: dest_id as u32,
295             Vector: vector as u32,
296             ..Default::default()
297         };
298         interrupt.set_DestinationMode(match dest_mode {
299             DestinationMode::Physical => {
300                 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
301             }
302             DestinationMode::Logical => {
303                 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
304             }
305         } as u64);
306         interrupt.set_TriggerMode(match trigger {
307             TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
308             TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
309         } as u64);
310         interrupt.set_Type(match delivery {
311             DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
312             DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
313             DeliveryMode::SMI => {
314                 error!("WHPX does not support requesting an SMI");
315                 return Err(Error::new(ENOTSUP));
316             }
317             DeliveryMode::RemoteRead => {
318                 // This is also no longer supported by intel.
319                 error!("Remote Read interrupts are not supported by WHPX");
320                 return Err(Error::new(ENOTSUP));
321             }
322             DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
323             DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
324             DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
325             DeliveryMode::External => {
326                 error!("WHPX does not support requesting an external interrupt");
327                 return Err(Error::new(ENOTSUP));
328             }
329         } as u64);
330 
331         check_whpx!(unsafe {
332             WHvRequestInterrupt(
333                 self.vm_partition.partition,
334                 &interrupt,
335                 std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
336             )
337         })
338     }
339 
340     /// In order to fully unmap a memory range such that the host can reclaim the memory,
341     /// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
342     /// in the memory.
343     ///
344     /// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
345     /// uses when memory is in demand.
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>346     fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
347         info!(
348             "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
349             guest_address, size
350         );
351         // Safe because WHPX does proper error checking, even if an out-of-bounds address is
352         // provided.
353         unsafe {
354             check_whpx!(WHvUnmapGpaRange(
355                 self.vm_partition.partition,
356                 guest_address.offset(),
357                 size,
358             ))?;
359         }
360 
361         let host_address = self
362             .guest_mem
363             .get_host_address(guest_address)
364             .map_err(|_| Error::new(1))? as *mut c_void;
365 
366         // Safe because we have just successfully unmapped this range from the
367         // guest partition, so we know it's unused.
368         let result =
369             unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
370 
371         if result != ERROR_SUCCESS {
372             let err = Error::new(result);
373             error!("Freeing memory failed with error: {}", err);
374             return Err(err);
375         }
376         Ok(())
377     }
378 
379     /// Remap memory that has previously been unmapped with #handle_inflate. Note
380     /// that attempts to remap pages that were not previously unmapped, or addresses that are not
381     /// page-aligned, will result in failure.
382     ///
383     /// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
384     /// partition. Remapped memory has no guarantee of content, and the guest should not expect
385     /// it to.
handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>386     fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
387         info!(
388             "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
389             guest_address, size
390         );
391 
392         let host_address = self
393             .guest_mem
394             .get_host_address(guest_address)
395             .map_err(|_| Error::new(1))? as *const c_void;
396 
397         // Note that we aren't doing any validation here that this range was previously unmapped.
398         // However, we can avoid that expensive validation by relying on Windows error checking for
399         // ReclaimVirtualMemory. The call will fail if:
400         // - If the range is not currently "offered"
401         // - The range is outside of current guest mem (GuestMemory will fail to convert the
402         //   address)
403         // In short, security is guaranteed by ensuring the guest can never reclaim ranges it
404         // hadn't previously forfeited (and even then, the contents will be zeroed).
405         //
406         // Safe because the memory ranges in question are managed by Windows, not Rust.
407         // Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
408         let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
409 
410         if result == ERROR_BUSY || result == ERROR_SUCCESS {
411             // In either of these cases, the contents of the reclaimed memory
412             // are preserved or undefined. Regardless, zero the memory
413             // to ensure no unintentional memory contents are shared.
414             //
415             // Safe because we just reclaimed the region in question and haven't yet remapped
416             // it to the guest partition, so we know it's unused.
417             unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
418         } else {
419             let err = Error::new(result);
420             error!("Reclaiming memory failed with error: {}", err);
421             return Err(err);
422         }
423 
424         // Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
425         // Which would fail if it was called on areas which were not unmapped.
426         unsafe {
427             set_user_memory_region(
428                 &self.vm_partition,
429                 false, // read_only
430                 false, // track dirty pages
431                 guest_address.offset(),
432                 size,
433                 host_address as *mut u8,
434             )
435         }
436     }
437 }
438 
439 // Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
440 // from guest physical to host user pages.
441 //
442 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( partition: &SafePartition, read_only: bool, track_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>443 unsafe fn set_user_memory_region(
444     partition: &SafePartition,
445     read_only: bool,
446     track_dirty_pages: bool,
447     guest_addr: u64,
448     memory_size: u64,
449     userspace_addr: *mut u8,
450 ) -> Result<()> {
451     let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
452         | WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
453     if !read_only {
454         flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
455     }
456     if track_dirty_pages {
457         flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
458     }
459 
460     let ret = WHvMapGpaRange(
461         partition.partition,
462         userspace_addr as *mut c_void,
463         guest_addr,
464         memory_size,
465         flags,
466     );
467     check_whpx!(ret)
468 }
469 
470 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
471 /// size.
472 ///
473 /// # Arguments
474 ///
475 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize476 pub fn dirty_log_bitmap_size(size: usize) -> usize {
477     let page_size = pagesize();
478     (((size + page_size - 1) / page_size) + 7) / 8
479 }
480 
481 impl Vm for WhpxVm {
482     /// Makes a shallow clone of this `Vm`.
try_clone(&self) -> Result<Self>483     fn try_clone(&self) -> Result<Self> {
484         let mut ioevents = FnvHashMap::default();
485         for (addr, evt) in self.ioevents.iter() {
486             ioevents.insert(*addr, evt.try_clone()?);
487         }
488         Ok(WhpxVm {
489             whpx: self.whpx.try_clone()?,
490             vm_partition: self.vm_partition.clone(),
491             guest_mem: self.guest_mem.clone(),
492             mem_regions: self.mem_regions.clone(),
493             mem_slot_gaps: self.mem_slot_gaps.clone(),
494             ioevents,
495             vm_evt_wrtube: self
496                 .vm_evt_wrtube
497                 .as_ref()
498                 .map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
499         })
500     }
501 
try_clone_descriptor(&self) -> Result<SafeDescriptor>502     fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
503         Err(Error::new(ENOTSUP))
504     }
505 
hypervisor_kind(&self) -> HypervisorKind506     fn hypervisor_kind(&self) -> HypervisorKind {
507         HypervisorKind::Whpx
508     }
509 
check_capability(&self, c: VmCap) -> bool510     fn check_capability(&self, c: VmCap) -> bool {
511         match c {
512             VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
513                 .unwrap_or_else(|e| {
514                     error!(
515                         "failed to check whpx feature {:?}: {}",
516                         WhpxFeature::DirtyPageTracking,
517                         e
518                     );
519                     false
520                 }),
521             // there is a pvclock like thing already done w/ hyperv, but we can't get the state.
522             VmCap::PvClock => false,
523             VmCap::Protected => false,
524             // whpx initializes cpuid early during VM creation.
525             VmCap::EarlyInitCpuid => true,
526             #[cfg(target_arch = "x86_64")]
527             VmCap::BusLockDetect => false,
528             VmCap::ReadOnlyMemoryRegion => true,
529             VmCap::MemNoncoherentDma => false,
530         }
531     }
532 
get_memory(&self) -> &GuestMemory533     fn get_memory(&self) -> &GuestMemory {
534         &self.guest_mem
535     }
536 
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, _cache: MemCacheType, ) -> Result<MemSlot>537     fn add_memory_region(
538         &mut self,
539         guest_addr: GuestAddress,
540         mem: Box<dyn MappedRegion>,
541         read_only: bool,
542         log_dirty_pages: bool,
543         _cache: MemCacheType,
544     ) -> Result<MemSlot> {
545         let size = mem.size() as u64;
546         let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
547         if self.guest_mem.range_overlap(guest_addr, end_addr) {
548             return Err(Error::new(ENOSPC));
549         }
550         let mut regions = self.mem_regions.lock();
551         let mut gaps = self.mem_slot_gaps.lock();
552         let slot = match gaps.pop() {
553             Some(gap) => gap.0,
554             None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
555         };
556 
557         // Safe because we check that the given guest address is valid and has no overlaps. We also
558         // know that the pointer and size are correct because the MemoryMapping interface ensures
559         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
560         // is removed.
561         let res = unsafe {
562             set_user_memory_region(
563                 &self.vm_partition,
564                 read_only,
565                 log_dirty_pages,
566                 guest_addr.offset(),
567                 size,
568                 mem.as_ptr(),
569             )
570         };
571 
572         if let Err(e) = res {
573             gaps.push(Reverse(slot));
574             return Err(e);
575         }
576         regions.insert(slot, (guest_addr, mem));
577         Ok(slot)
578     }
579 
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>580     fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
581         let mut regions = self.mem_regions.lock();
582         let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
583 
584         mem.msync(offset, size).map_err(|err| match err {
585             MmapError::InvalidAddress => Error::new(EFAULT),
586             MmapError::NotPageAligned => Error::new(EINVAL),
587             MmapError::SystemCallFailed(e) => e,
588             _ => Error::new(EIO),
589         })
590     }
591 
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>592     fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
593         let mut regions = self.mem_regions.lock();
594         if !regions.contains_key(&slot) {
595             return Err(Error::new(ENOENT));
596         }
597         if let Some((guest_addr, mem)) = regions.get(&slot) {
598             // Safe because the slot is checked against the list of memory slots.
599             unsafe {
600                 check_whpx!(WHvUnmapGpaRange(
601                     self.vm_partition.partition,
602                     guest_addr.offset(),
603                     mem.size() as u64,
604                 ))?;
605             }
606             self.mem_slot_gaps.lock().push(Reverse(slot));
607             Ok(regions.remove(&slot).unwrap().1)
608         } else {
609             Err(Error::new(ENOENT))
610         }
611     }
612 
create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor>613     fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
614         // Whpx does not support in-kernel devices
615         Err(Error::new(libc::ENXIO))
616     }
617 
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>618     fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
619         let regions = self.mem_regions.lock();
620         if let Some((guest_addr, mem)) = regions.get(&slot) {
621             // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
622             if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
623                 return Err(Error::new(EINVAL));
624             }
625             let bitmap_size = if dirty_log.len() % 8 == 0 {
626                 dirty_log.len() / 8
627             } else {
628                 dirty_log.len() / 8 + 1
629             };
630             let mut bitmap = vec![0u64; bitmap_size];
631             check_whpx!(unsafe {
632                 WHvQueryGpaRangeDirtyBitmap(
633                     self.vm_partition.partition,
634                     guest_addr.offset(),
635                     mem.size() as u64,
636                     bitmap.as_mut_ptr() as *mut u64,
637                     (bitmap.len() * 8) as u32,
638                 )
639             })?;
640             // safe because we have allocated a vec of u64, which we can cast to a u8 slice.
641             let buffer = unsafe {
642                 std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
643             };
644             dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
645             Ok(())
646         } else {
647             Err(Error::new(ENOENT))
648         }
649     }
650 
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>651     fn register_ioevent(
652         &mut self,
653         evt: &Event,
654         addr: IoEventAddress,
655         datamatch: Datamatch,
656     ) -> Result<()> {
657         if datamatch != Datamatch::AnyLength {
658             error!("WHPX currently only supports Datamatch::AnyLength");
659             return Err(Error::new(ENOTSUP));
660         }
661 
662         if self.ioevents.contains_key(&addr) {
663             error!("WHPX does not support multiple ioevents for the same address");
664             return Err(Error::new(EEXIST));
665         }
666 
667         self.ioevents.insert(addr, evt.try_clone()?);
668 
669         Ok(())
670     }
671 
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>672     fn unregister_ioevent(
673         &mut self,
674         evt: &Event,
675         addr: IoEventAddress,
676         datamatch: Datamatch,
677     ) -> Result<()> {
678         if datamatch != Datamatch::AnyLength {
679             error!("WHPX only supports Datamatch::AnyLength");
680             return Err(Error::new(ENOTSUP));
681         }
682 
683         match self.ioevents.get(&addr) {
684             Some(existing_evt) => {
685                 // evt should match the existing evt associated with addr
686                 if evt != existing_evt {
687                     return Err(Error::new(ENOENT));
688                 }
689                 self.ioevents.remove(&addr);
690             }
691 
692             None => {
693                 return Err(Error::new(ENOENT));
694             }
695         };
696         Ok(())
697     }
698 
699     /// Trigger any io events based on the memory mapped IO at `addr`.  If the hypervisor does
700     /// in-kernel IO event delivery, this is a no-op.
handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()>701     fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
702         match self.ioevents.get(&addr) {
703             None => {}
704             Some(evt) => {
705                 evt.signal()?;
706             }
707         };
708         Ok(())
709     }
710 
get_pvclock(&self) -> Result<ClockState>711     fn get_pvclock(&self) -> Result<ClockState> {
712         Err(Error::new(ENODEV))
713     }
714 
set_pvclock(&self, _state: &ClockState) -> Result<()>715     fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
716         Err(Error::new(ENODEV))
717     }
718 
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>719     fn add_fd_mapping(
720         &mut self,
721         slot: u32,
722         offset: usize,
723         size: usize,
724         fd: &dyn AsRawDescriptor,
725         fd_offset: u64,
726         prot: Protection,
727     ) -> Result<()> {
728         let mut regions = self.mem_regions.lock();
729         let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
730 
731         match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
732             Ok(()) => Ok(()),
733             Err(MmapError::SystemCallFailed(e)) => Err(e),
734             Err(_) => Err(Error::new(EIO)),
735         }
736     }
737 
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>738     fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
739         let mut regions = self.mem_regions.lock();
740         let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
741 
742         match region.remove_mapping(offset, size) {
743             Ok(()) => Ok(()),
744             Err(MmapError::SystemCallFailed(e)) => Err(e),
745             Err(_) => Err(Error::new(EIO)),
746         }
747     }
748 
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>749     fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
750         match event {
751             BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
752             BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
753             BalloonEvent::BalloonTargetReached(_) => Ok(()),
754         }
755     }
756 
get_guest_phys_addr_bits(&self) -> u8757     fn get_guest_phys_addr_bits(&self) -> u8 {
758         // Assume the guest physical address size is the same as the host.
759         host_phys_addr_bits()
760     }
761 }
762 
763 impl VmX86_64 for WhpxVm {
get_hypervisor(&self) -> &dyn HypervisorX86_64764     fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
765         &self.whpx
766     }
767 
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>768     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
769         Ok(Box::new(WhpxVcpu::new(
770             self.vm_partition.clone(),
771             id.try_into().unwrap(),
772         )?))
773     }
774 
775     /// Sets the address of the three-page region in the VM's address space.
776     /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
777     /// WHPX.
set_tss_addr(&self, _addr: GuestAddress) -> Result<()>778     fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
779         Ok(())
780     }
781 
782     /// Sets the address of a one-page region in the VM's address space.
783     /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
784     /// WHPX.
set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()>785     fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
786         Ok(())
787     }
788 
load_protected_vm_firmware( &mut self, _fw_addr: GuestAddress, _fw_max_size: u64, ) -> Result<()>789     fn load_protected_vm_firmware(
790         &mut self,
791         _fw_addr: GuestAddress,
792         _fw_max_size: u64,
793     ) -> Result<()> {
794         // WHPX does not support protected VMs
795         Err(Error::new(libc::ENXIO))
796     }
797 }
798 
799 // NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
800 // memory.
801 #[cfg(test)]
802 mod tests {
803     use std::thread;
804     use std::time::Duration;
805 
806     use base::EventWaitResult;
807     use base::MemoryMappingBuilder;
808     use base::SharedMemory;
809 
810     use super::*;
811 
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm812     fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
813         let whpx = Whpx::new().expect("failed to instantiate whpx");
814         let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
815             .expect("failed to get whpx features");
816         WhpxVm::new(
817             &whpx,
818             cpu_count,
819             mem,
820             CpuId::new(0),
821             local_apic_supported,
822             None,
823         )
824         .expect("failed to create whpx vm")
825     }
826 
827     #[test]
create_vm()828     fn create_vm() {
829         if !Whpx::is_enabled() {
830             return;
831         }
832         let cpu_count = 1;
833         let mem =
834             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
835         new_vm(cpu_count, mem);
836     }
837 
838     #[test]
create_vcpu()839     fn create_vcpu() {
840         if !Whpx::is_enabled() {
841             return;
842         }
843         let cpu_count = 1;
844         let mem =
845             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
846         let vm = new_vm(cpu_count, mem);
847         vm.create_vcpu(0).expect("failed to create vcpu");
848     }
849 
850     #[test]
try_clone()851     fn try_clone() {
852         if !Whpx::is_enabled() {
853             return;
854         }
855         let cpu_count = 1;
856         let mem =
857             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
858         let vm = new_vm(cpu_count, mem);
859         let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
860     }
861 
862     #[test]
send_vm()863     fn send_vm() {
864         if !Whpx::is_enabled() {
865             return;
866         }
867         let cpu_count = 1;
868         let mem =
869             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
870         let vm = new_vm(cpu_count, mem);
871         thread::spawn(move || {
872             let _vm = vm;
873         })
874         .join()
875         .unwrap();
876     }
877 
878     #[test]
check_vm_capability()879     fn check_vm_capability() {
880         if !Whpx::is_enabled() {
881             return;
882         }
883         let cpu_count = 1;
884         let mem =
885             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
886         let vm = new_vm(cpu_count, mem);
887         assert!(vm.check_capability(VmCap::DirtyLog));
888         assert!(!vm.check_capability(VmCap::PvClock));
889     }
890 
891     #[test]
dirty_log_size()892     fn dirty_log_size() {
893         let page_size = pagesize();
894         assert_eq!(dirty_log_bitmap_size(0), 0);
895         assert_eq!(dirty_log_bitmap_size(page_size), 1);
896         assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
897         assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
898         assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
899     }
900 
901     #[test]
register_ioevent()902     fn register_ioevent() {
903         if !Whpx::is_enabled() {
904             return;
905         }
906         let cpu_count = 1;
907         let mem =
908             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
909         let mut vm = new_vm(cpu_count, mem);
910         let evt = Event::new().expect("failed to create event");
911         let otherevt = Event::new().expect("failed to create event");
912         vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
913             .unwrap();
914         vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
915             .unwrap();
916 
917         vm.register_ioevent(
918             &otherevt,
919             IoEventAddress::Mmio(0x1000),
920             Datamatch::AnyLength,
921         )
922         .expect_err("WHPX should not allow you to register two events for the same address");
923 
924         vm.register_ioevent(
925             &otherevt,
926             IoEventAddress::Mmio(0x1000),
927             Datamatch::U8(None),
928         )
929         .expect_err(
930             "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
931         );
932 
933         vm.register_ioevent(
934             &otherevt,
935             IoEventAddress::Mmio(0x1000),
936             Datamatch::U32(Some(0xf6)),
937         )
938         .expect_err(
939             "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
940         );
941 
942         vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
943             .expect_err("unregistering an unknown event should fail");
944         vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
945             .expect_err("unregistering an unknown PIO address should fail");
946         vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
947             .expect_err("unregistering an unknown PIO address should fail");
948         vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
949             .expect_err("unregistering an unknown MMIO address should fail");
950         vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
951             .unwrap();
952         vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
953             .unwrap();
954     }
955 
956     #[test]
handle_io_events()957     fn handle_io_events() {
958         if !Whpx::is_enabled() {
959             return;
960         }
961         let cpu_count = 1;
962         let mem =
963             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
964         let mut vm = new_vm(cpu_count, mem);
965         let evt = Event::new().expect("failed to create event");
966         let evt2 = Event::new().expect("failed to create event");
967         vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
968             .unwrap();
969         vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
970             .unwrap();
971 
972         // Check a pio address
973         vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
974             .expect("failed to handle_io_events");
975         assert_ne!(
976             evt.wait_timeout(Duration::from_millis(10))
977                 .expect("failed to read event"),
978             EventWaitResult::TimedOut
979         );
980         assert_eq!(
981             evt2.wait_timeout(Duration::from_millis(10))
982                 .expect("failed to read event"),
983             EventWaitResult::TimedOut
984         );
985         // Check an mmio address
986         vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
987             .expect("failed to handle_io_events");
988         assert_eq!(
989             evt.wait_timeout(Duration::from_millis(10))
990                 .expect("failed to read event"),
991             EventWaitResult::TimedOut
992         );
993         assert_ne!(
994             evt2.wait_timeout(Duration::from_millis(10))
995                 .expect("failed to read event"),
996             EventWaitResult::TimedOut
997         );
998 
999         // Check an address that does not match any registered ioevents
1000         vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
1001             .expect("failed to handle_io_events");
1002         assert_eq!(
1003             evt.wait_timeout(Duration::from_millis(10))
1004                 .expect("failed to read event"),
1005             EventWaitResult::TimedOut
1006         );
1007         assert_eq!(
1008             evt2.wait_timeout(Duration::from_millis(10))
1009                 .expect("failed to read event"),
1010             EventWaitResult::TimedOut
1011         );
1012     }
1013 
1014     #[test]
add_memory_ro()1015     fn add_memory_ro() {
1016         if !Whpx::is_enabled() {
1017             return;
1018         }
1019         let cpu_count = 1;
1020         let mem =
1021             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1022         let mut vm = new_vm(cpu_count, mem);
1023         let mem_size = 0x1000;
1024         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1025         let mem = MemoryMappingBuilder::new(mem_size)
1026             .from_shared_memory(&shm)
1027             .build()
1028             .unwrap();
1029         vm.add_memory_region(
1030             GuestAddress(0x1000),
1031             Box::new(mem),
1032             true,
1033             false,
1034             MemCacheType::CacheCoherent,
1035         )
1036         .unwrap();
1037     }
1038 
1039     #[test]
remove_memory()1040     fn remove_memory() {
1041         if !Whpx::is_enabled() {
1042             return;
1043         }
1044         let cpu_count = 1;
1045         let mem =
1046             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1047         let mut vm = new_vm(cpu_count, mem);
1048         let mem_size = 0x1000;
1049         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1050         let mem = MemoryMappingBuilder::new(mem_size)
1051             .from_shared_memory(&shm)
1052             .build()
1053             .unwrap();
1054         let mem_ptr = mem.as_ptr();
1055         let slot = vm
1056             .add_memory_region(
1057                 GuestAddress(0x1000),
1058                 Box::new(mem),
1059                 false,
1060                 false,
1061                 MemCacheType::CacheCoherent,
1062             )
1063             .unwrap();
1064         let removed_mem = vm.remove_memory_region(slot).unwrap();
1065         assert_eq!(removed_mem.size(), mem_size);
1066         assert_eq!(removed_mem.as_ptr(), mem_ptr);
1067     }
1068 
1069     #[test]
remove_invalid_memory()1070     fn remove_invalid_memory() {
1071         if !Whpx::is_enabled() {
1072             return;
1073         }
1074         let cpu_count = 1;
1075         let mem =
1076             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1077         let mut vm = new_vm(cpu_count, mem);
1078         assert!(vm.remove_memory_region(0).is_err());
1079     }
1080 
1081     #[test]
overlap_memory()1082     fn overlap_memory() {
1083         if !Whpx::is_enabled() {
1084             return;
1085         }
1086         let cpu_count = 1;
1087         let mem =
1088             GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
1089         let mut vm = new_vm(cpu_count, mem);
1090         let mem_size = 0x2000;
1091         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1092         let mem = MemoryMappingBuilder::new(mem_size)
1093             .from_shared_memory(&shm)
1094             .build()
1095             .unwrap();
1096         assert!(vm
1097             .add_memory_region(
1098                 GuestAddress(0x2000),
1099                 Box::new(mem),
1100                 false,
1101                 false,
1102                 MemCacheType::CacheCoherent
1103             )
1104             .is_err());
1105     }
1106 
1107     #[test]
sync_memory()1108     fn sync_memory() {
1109         if !Whpx::is_enabled() {
1110             return;
1111         }
1112         let cpu_count = 1;
1113         let mem =
1114             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1115         let mut vm = new_vm(cpu_count, mem);
1116         let mem_size = 0x1000;
1117         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1118         let mem = MemoryMappingBuilder::new(mem_size)
1119             .from_shared_memory(&shm)
1120             .build()
1121             .unwrap();
1122         let slot = vm
1123             .add_memory_region(
1124                 GuestAddress(0x10000),
1125                 Box::new(mem),
1126                 false,
1127                 false,
1128                 MemCacheType::CacheCoherent,
1129             )
1130             .unwrap();
1131         vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
1132         vm.msync_memory_region(slot, 0, mem_size).unwrap();
1133         assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
1134         assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
1135     }
1136 }
1137